mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-19 16:20:50 +00:00
Merge remote-tracking branch 'origin/master' into pr-local-plan
This commit is contained in:
commit
f39dbac9a0
8
.github/ISSUE_TEMPLATE/10_question.md
vendored
8
.github/ISSUE_TEMPLATE/10_question.md
vendored
@ -10,3 +10,11 @@ assignees: ''
|
||||
> Make sure to check documentation https://clickhouse.com/docs/en/ first. If the question is concise and probably has a short answer, asking it in [community Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-1gh9ds7f4-PgDhJAaF8ad5RbWBAAjzFg) is probably the fastest way to find the answer. For more complicated questions, consider asking them on StackOverflow with "clickhouse" tag https://stackoverflow.com/questions/tagged/clickhouse
|
||||
|
||||
> If you still prefer GitHub issues, remove all this text and ask your question here.
|
||||
|
||||
**Company or project name**
|
||||
|
||||
Put your company name or project description here
|
||||
|
||||
**Question**
|
||||
|
||||
Your question
|
||||
|
4
.github/ISSUE_TEMPLATE/20_feature-request.md
vendored
4
.github/ISSUE_TEMPLATE/20_feature-request.md
vendored
@ -9,6 +9,10 @@ assignees: ''
|
||||
|
||||
> (you don't have to strictly follow this form)
|
||||
|
||||
**Company or project name**
|
||||
|
||||
> Put your company name or project description here
|
||||
|
||||
**Use case**
|
||||
|
||||
> A clear and concise description of what is the intended usage scenario is.
|
||||
|
@ -9,6 +9,10 @@ assignees: ''
|
||||
|
||||
(you don't have to strictly follow this form)
|
||||
|
||||
**Company or project name**
|
||||
|
||||
Put your company name or project description here
|
||||
|
||||
**Describe the unexpected behaviour**
|
||||
A clear and concise description of what works not as it is supposed to.
|
||||
|
||||
|
@ -9,6 +9,10 @@ assignees: ''
|
||||
|
||||
(you don't have to strictly follow this form)
|
||||
|
||||
**Company or project name**
|
||||
|
||||
Put your company name or project description here
|
||||
|
||||
**Describe the unexpected behaviour**
|
||||
A clear and concise description of what works not as it is supposed to.
|
||||
|
||||
|
3
.github/ISSUE_TEMPLATE/45_usability-issue.md
vendored
3
.github/ISSUE_TEMPLATE/45_usability-issue.md
vendored
@ -9,6 +9,9 @@ assignees: ''
|
||||
|
||||
(you don't have to strictly follow this form)
|
||||
|
||||
**Company or project name**
|
||||
Put your company name or project description here
|
||||
|
||||
**Describe the issue**
|
||||
A clear and concise description of what works not as it is supposed to.
|
||||
|
||||
|
4
.github/ISSUE_TEMPLATE/50_build-issue.md
vendored
4
.github/ISSUE_TEMPLATE/50_build-issue.md
vendored
@ -9,6 +9,10 @@ assignees: ''
|
||||
|
||||
> Make sure that `git diff` result is empty and you've just pulled fresh master. Try cleaning up cmake cache. Just in case, official build instructions are published here: https://clickhouse.com/docs/en/development/build/
|
||||
|
||||
**Company or project name**
|
||||
|
||||
> Put your company name or project description here
|
||||
|
||||
**Operating system**
|
||||
|
||||
> OS kind or distribution, specific version/release, non-standard kernel if any. If you are trying to build inside virtual machine, please mention it too.
|
||||
|
@ -8,6 +8,9 @@ labels: comp-documentation
|
||||
|
||||
(you don't have to strictly follow this form)
|
||||
|
||||
**Company or project name**
|
||||
Put your company name or project description here
|
||||
|
||||
**Describe the issue**
|
||||
A clear and concise description of what's wrong in documentation.
|
||||
|
||||
|
@ -9,6 +9,9 @@ assignees: ''
|
||||
|
||||
(you don't have to strictly follow this form)
|
||||
|
||||
**Company or project name**
|
||||
Put your company name or project description here
|
||||
|
||||
**Describe the situation**
|
||||
What exactly works slower than expected?
|
||||
|
||||
|
@ -9,6 +9,9 @@ assignees: ''
|
||||
|
||||
(you don't have to strictly follow this form)
|
||||
|
||||
**Company or project name**
|
||||
Put your company name or project description here
|
||||
|
||||
**Describe the issue**
|
||||
A clear and concise description of what works not as it is supposed to.
|
||||
|
||||
|
4
.github/ISSUE_TEMPLATE/85_bug-report.md
vendored
4
.github/ISSUE_TEMPLATE/85_bug-report.md
vendored
@ -11,6 +11,10 @@ assignees: ''
|
||||
|
||||
> You have to provide the following information whenever possible.
|
||||
|
||||
**Company or project name**
|
||||
|
||||
> Put your company name or project description here
|
||||
|
||||
**Describe what's wrong**
|
||||
|
||||
> A clear and concise description of what works not as it is supposed to.
|
||||
|
@ -7,6 +7,10 @@ assignees: ''
|
||||
|
||||
---
|
||||
|
||||
**Company or project name**
|
||||
|
||||
Put your company name or project description here
|
||||
|
||||
**I have tried the following solutions**: https://clickhouse.com/docs/en/faq/troubleshooting/#troubleshooting-installation-errors
|
||||
|
||||
**Installation type**
|
||||
|
2
.github/workflows/reusable_test.yml
vendored
2
.github/workflows/reusable_test.yml
vendored
@ -58,7 +58,7 @@ jobs:
|
||||
env:
|
||||
GITHUB_JOB_OVERRIDDEN: ${{inputs.test_name}}${{ fromJson(inputs.data).jobs_data.jobs_params[inputs.test_name].num_batches > 1 && format('-{0}',matrix.batch) || '' }}
|
||||
strategy:
|
||||
fail-fast: false # we always wait for entire matrix
|
||||
fail-fast: false # we always wait for the entire matrix
|
||||
matrix:
|
||||
batch: ${{ fromJson(inputs.data).jobs_data.jobs_params[inputs.test_name].batches }}
|
||||
steps:
|
||||
|
3
.gitignore
vendored
3
.gitignore
vendored
@ -21,6 +21,9 @@
|
||||
*.stderr
|
||||
*.stdout
|
||||
|
||||
# llvm-xray logs
|
||||
xray-log.*
|
||||
|
||||
/docs/build
|
||||
/docs/publish
|
||||
/docs/edit
|
||||
|
29
.gitmessage
29
.gitmessage
@ -1,29 +0,0 @@
|
||||
|
||||
|
||||
### CI modificators (add a leading space to apply) ###
|
||||
|
||||
## To avoid a merge commit in CI:
|
||||
#no_merge_commit
|
||||
|
||||
## To discard CI cache:
|
||||
#no_ci_cache
|
||||
|
||||
## To not test (only style check):
|
||||
#do_not_test
|
||||
|
||||
## To run specified set of tests in CI:
|
||||
#ci_set_<SET_NAME>
|
||||
#ci_set_reduced
|
||||
#ci_set_arm
|
||||
#ci_set_integration
|
||||
#ci_set_old_analyzer
|
||||
|
||||
## To run specified job in CI:
|
||||
#job_<JOB NAME>
|
||||
#job_stateless_tests_release
|
||||
#job_package_debug
|
||||
#job_integration_tests_asan
|
||||
|
||||
## To run only specified batches for multi-batch job(s)
|
||||
#batch_2
|
||||
#batch_1_2_3
|
@ -11,8 +11,8 @@
|
||||
### <a id="245"></a> ClickHouse release 24.5, 2024-05-30
|
||||
|
||||
#### Backward Incompatible Change
|
||||
* Renamed "inverted indexes" to "full-text indexes" which is a less technical / more user-friendly name. This also changes internal table metadata and breaks tables with existing (experimental) inverted indexes. Please make to drop such indexes before upgrade and re-create them after upgrade. [#62884](https://github.com/ClickHouse/ClickHouse/pull/62884) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Usage of functions `neighbor`, `runningAccumulate`, `runningDifferenceStartingWithFirstValue`, `runningDifference` deprecated (because it is error-prone). Proper window functions should be used instead. To enable them back, set `allow_deprecated_functions = 1` or set `compatibility = '24.4'` or lower. [#63132](https://github.com/ClickHouse/ClickHouse/pull/63132) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Renamed "inverted indexes" to "full-text indexes" which is a less technical / more user-friendly name. This also changes internal table metadata and breaks tables with existing (experimental) inverted indexes. Please make sure to drop such indexes before upgrade and re-create them after upgrade. [#62884](https://github.com/ClickHouse/ClickHouse/pull/62884) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Usage of functions `neighbor`, `runningAccumulate`, `runningDifferenceStartingWithFirstValue`, `runningDifference` deprecated (because it is error-prone). Proper window functions should be used instead. To enable them back, set `allow_deprecated_error_prone_window_functions = 1` or set `compatibility = '24.4'` or lower. [#63132](https://github.com/ClickHouse/ClickHouse/pull/63132) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Queries from `system.columns` will work faster if there is a large number of columns, but many databases or tables are not granted for `SHOW TABLES`. Note that in previous versions, if you grant `SHOW COLUMNS` to individual columns without granting `SHOW TABLES` to the corresponding tables, the `system.columns` table will show these columns, but in a new version, it will skip the table entirely. Remove trace log messages "Access granted" and "Access denied" that slowed down queries. [#63439](https://github.com/ClickHouse/ClickHouse/pull/63439) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
#### New Feature
|
||||
|
@ -122,6 +122,8 @@ add_library(global-libs INTERFACE)
|
||||
|
||||
include (cmake/sanitize.cmake)
|
||||
|
||||
include (cmake/instrument.cmake)
|
||||
|
||||
option(ENABLE_COLORED_BUILD "Enable colors in compiler output" ON)
|
||||
|
||||
set (CMAKE_COLOR_MAKEFILE ${ENABLE_COLORED_BUILD}) # works only for the makefile generator
|
||||
@ -208,8 +210,6 @@ option(OMIT_HEAVY_DEBUG_SYMBOLS
|
||||
"Do not generate debugger info for heavy modules (ClickHouse functions and dictionaries, some contrib)"
|
||||
${OMIT_HEAVY_DEBUG_SYMBOLS_DEFAULT})
|
||||
|
||||
option(USE_DEBUG_HELPERS "Enable debug helpers" ${USE_DEBUG_HELPERS})
|
||||
|
||||
option(BUILD_STANDALONE_KEEPER "Build keeper as small standalone binary" OFF)
|
||||
if (NOT BUILD_STANDALONE_KEEPER)
|
||||
option(CREATE_KEEPER_SYMLINK "Create symlink for clickhouse-keeper to main server binary" ON)
|
||||
|
34
SECURITY.md
34
SECURITY.md
@ -2,20 +2,22 @@
|
||||
the file is autogenerated by utils/security-generator/generate_security.py
|
||||
-->
|
||||
|
||||
# Security Policy
|
||||
# ClickHouse Security Vulnerability Response Policy
|
||||
|
||||
## Security Announcements
|
||||
Security fixes will be announced by posting them in the [security changelog](https://clickhouse.com/docs/en/whats-new/security-changelog/).
|
||||
## Security Change Log and Support
|
||||
|
||||
## Scope and Supported Versions
|
||||
Details regarding security fixes are publicly reported in our [security changelog](https://clickhouse.com/docs/en/whats-new/security-changelog/). A summary of known security vulnerabilities is shown at the bottom of this page.
|
||||
|
||||
The following versions of ClickHouse server are currently being supported with security updates:
|
||||
Vulnerability notifications pre-release or during embargo periods are available to open source users and support customers registered for vulnerability alerts. Refer to our [Embargo Policy](#embargo-policy) below.
|
||||
|
||||
The following versions of ClickHouse server are currently supported with security updates:
|
||||
|
||||
| Version | Supported |
|
||||
|:-|:-|
|
||||
| 24.5 | ✔️ |
|
||||
| 24.4 | ✔️ |
|
||||
| 24.3 | ✔️ |
|
||||
| 24.2 | ✔️ |
|
||||
| 24.2 | ❌ |
|
||||
| 24.1 | ❌ |
|
||||
| 23.* | ❌ |
|
||||
| 23.8 | ✔️ |
|
||||
@ -37,7 +39,7 @@ The following versions of ClickHouse server are currently being supported with s
|
||||
|
||||
We're extremely grateful for security researchers and users that report vulnerabilities to the ClickHouse Open Source Community. All reports are thoroughly investigated by developers.
|
||||
|
||||
To report a potential vulnerability in ClickHouse please send the details about it to [security@clickhouse.com](mailto:security@clickhouse.com). We do not offer any financial rewards for reporting issues to us using this method. Alternatively, you can also submit your findings through our public bug bounty program hosted by [Bugcrowd](https://bugcrowd.com/clickhouse) and be rewarded for it as per the program scope and rules of engagement.
|
||||
To report a potential vulnerability in ClickHouse please send the details about it through our public bug bounty program hosted by [Bugcrowd](https://bugcrowd.com/clickhouse) and be rewarded for it as per the program scope and rules of engagement.
|
||||
|
||||
### When Should I Report a Vulnerability?
|
||||
|
||||
@ -59,3 +61,21 @@ As the security issue moves from triage, to identified fix, to release planning
|
||||
|
||||
A public disclosure date is negotiated by the ClickHouse maintainers and the bug submitter. We prefer to fully disclose the bug as soon as possible once a user mitigation is available. It is reasonable to delay disclosure when the bug or the fix is not yet fully understood, the solution is not well-tested, or for vendor coordination. The timeframe for disclosure is from immediate (especially if it's already publicly known) to 90 days. For a vulnerability with a straightforward mitigation, we expect the report date to disclosure date to be on the order of 7 days.
|
||||
|
||||
## Embargo Policy
|
||||
|
||||
Open source users and support customers may subscribe to receive alerts during the embargo period by visiting [https://trust.clickhouse.com/?product=clickhouseoss](https://trust.clickhouse.com/?product=clickhouseoss), requesting access and subscribing for alerts. Subscribers agree not to make these notifications public, issue communications, share this information with others, or issue public patches before the disclosure date. Accidental disclosures must be reported immediately to trust@clickhouse.com. Failure to follow this policy or repeated leaks may result in removal from the subscriber list.
|
||||
|
||||
Participation criteria:
|
||||
1. Be a current open source user or support customer with a valid corporate email domain (no @gmail.com, @azure.com, etc.).
|
||||
1. Sign up to the ClickHouse OSS Trust Center at [https://trust.clickhouse.com](https://trust.clickhouse.com).
|
||||
1. Accept the ClickHouse Security Vulnerability Response Policy as outlined above.
|
||||
1. Subscribe to ClickHouse OSS Trust Center alerts.
|
||||
|
||||
Removal criteria:
|
||||
1. Members may be removed for failure to follow this policy or repeated leaks.
|
||||
1. Members may be removed for bounced messages (mail delivery failure).
|
||||
1. Members may unsubscribe at any time.
|
||||
|
||||
Notification process:
|
||||
ClickHouse will post notifications within our OSS Trust Center and notify subscribers. Subscribers must log in to the Trust Center to download the notification. The notification will include the timeframe for public disclosure.
|
||||
|
||||
|
@ -34,15 +34,6 @@ set (SRCS
|
||||
throwError.cpp
|
||||
)
|
||||
|
||||
if (USE_DEBUG_HELPERS)
|
||||
get_target_property(MAGIC_ENUM_INCLUDE_DIR ch_contrib::magic_enum INTERFACE_INCLUDE_DIRECTORIES)
|
||||
# CMake generator expression will do insane quoting when it encounters special character like quotes, spaces, etc.
|
||||
# Prefixing "SHELL:" will force it to use the original text.
|
||||
set (INCLUDE_DEBUG_HELPERS "SHELL:-I\"${MAGIC_ENUM_INCLUDE_DIR}\" -include \"${ClickHouse_SOURCE_DIR}/base/base/iostream_debug_helpers.h\"")
|
||||
# Use generator expression as we don't want to pollute CMAKE_CXX_FLAGS, which will interfere with CMake check system.
|
||||
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:${INCLUDE_DEBUG_HELPERS}>)
|
||||
endif ()
|
||||
|
||||
add_library (common ${SRCS})
|
||||
|
||||
if (WITH_COVERAGE)
|
||||
|
@ -1,187 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "demangle.h"
|
||||
#include "getThreadId.h"
|
||||
#include <type_traits>
|
||||
#include <tuple>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <magic_enum.hpp>
|
||||
|
||||
/** Usage:
|
||||
*
|
||||
* DUMP(variable...)
|
||||
*/
|
||||
|
||||
|
||||
template <typename Out, typename T>
|
||||
Out & dumpValue(Out &, T &&);
|
||||
|
||||
|
||||
/// Catch-all case.
|
||||
template <int priority, typename Out, typename T>
|
||||
requires(priority == -1)
|
||||
Out & dumpImpl(Out & out, T &&) // NOLINT(cppcoreguidelines-missing-std-forward)
|
||||
{
|
||||
return out << "{...}";
|
||||
}
|
||||
|
||||
/// An object, that could be output with operator <<.
|
||||
template <int priority, typename Out, typename T>
|
||||
requires(priority == 0)
|
||||
Out & dumpImpl(Out & out, T && x, std::decay_t<decltype(std::declval<Out &>() << std::declval<T>())> * = nullptr) // NOLINT(cppcoreguidelines-missing-std-forward)
|
||||
{
|
||||
return out << x;
|
||||
}
|
||||
|
||||
/// A pointer-like object.
|
||||
template <int priority, typename Out, typename T>
|
||||
requires(priority == 1
|
||||
/// Protect from the case when operator * do effectively nothing (function pointer).
|
||||
&& !std::is_same_v<std::decay_t<T>, std::decay_t<decltype(*std::declval<T>())>>)
|
||||
Out & dumpImpl(Out & out, T && x, std::decay_t<decltype(*std::declval<T>())> * = nullptr) // NOLINT(cppcoreguidelines-missing-std-forward)
|
||||
{
|
||||
if (!x)
|
||||
return out << "nullptr";
|
||||
return dumpValue(out, *x);
|
||||
}
|
||||
|
||||
/// Container.
|
||||
template <int priority, typename Out, typename T>
|
||||
requires(priority == 2)
|
||||
Out & dumpImpl(Out & out, T && x, std::decay_t<decltype(std::begin(std::declval<T>()))> * = nullptr) // NOLINT(cppcoreguidelines-missing-std-forward)
|
||||
{
|
||||
bool first = true;
|
||||
out << "{";
|
||||
for (const auto & elem : x)
|
||||
{
|
||||
if (first)
|
||||
first = false;
|
||||
else
|
||||
out << ", ";
|
||||
dumpValue(out, elem);
|
||||
}
|
||||
return out << "}";
|
||||
}
|
||||
|
||||
|
||||
template <int priority, typename Out, typename T>
|
||||
requires(priority == 3 && std::is_enum_v<std::decay_t<T>>)
|
||||
Out & dumpImpl(Out & out, T && x) // NOLINT(cppcoreguidelines-missing-std-forward)
|
||||
{
|
||||
return out << magic_enum::enum_name(x);
|
||||
}
|
||||
|
||||
/// string and const char * - output not as container or pointer.
|
||||
|
||||
template <int priority, typename Out, typename T>
|
||||
requires(priority == 3 && (std::is_same_v<std::decay_t<T>, std::string> || std::is_same_v<std::decay_t<T>, const char *>))
|
||||
Out & dumpImpl(Out & out, T && x) // NOLINT(cppcoreguidelines-missing-std-forward)
|
||||
{
|
||||
return out << std::quoted(x);
|
||||
}
|
||||
|
||||
/// UInt8 - output as number, not char.
|
||||
|
||||
template <int priority, typename Out, typename T>
|
||||
requires(priority == 3 && std::is_same_v<std::decay_t<T>, unsigned char>)
|
||||
Out & dumpImpl(Out & out, T && x) // NOLINT(cppcoreguidelines-missing-std-forward)
|
||||
{
|
||||
return out << int(x);
|
||||
}
|
||||
|
||||
|
||||
/// Tuple, pair
|
||||
template <size_t N, typename Out, typename T>
|
||||
Out & dumpTupleImpl(Out & out, T && x) // NOLINT(cppcoreguidelines-missing-std-forward)
|
||||
{
|
||||
if constexpr (N == 0)
|
||||
out << "{";
|
||||
else
|
||||
out << ", ";
|
||||
|
||||
dumpValue(out, std::get<N>(x));
|
||||
|
||||
if constexpr (N + 1 == std::tuple_size_v<std::decay_t<T>>)
|
||||
out << "}";
|
||||
else
|
||||
dumpTupleImpl<N + 1>(out, x);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
template <int priority, typename Out, typename T>
|
||||
requires(priority == 4)
|
||||
Out & dumpImpl(Out & out, T && x, std::decay_t<decltype(std::get<0>(std::declval<T>()))> * = nullptr) // NOLINT(cppcoreguidelines-missing-std-forward)
|
||||
{
|
||||
return dumpTupleImpl<0>(out, x);
|
||||
}
|
||||
|
||||
|
||||
template <int priority, typename Out, typename T>
|
||||
Out & dumpDispatchPriorities(Out & out, T && x, std::decay_t<decltype(dumpImpl<priority>(std::declval<Out &>(), std::declval<T>()))> *) // NOLINT(cppcoreguidelines-missing-std-forward)
|
||||
{
|
||||
return dumpImpl<priority>(out, x);
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE(google-explicit-constructor)
|
||||
struct LowPriority { LowPriority(void *) {} };
|
||||
|
||||
template <int priority, typename Out, typename T>
|
||||
Out & dumpDispatchPriorities(Out & out, T && x, LowPriority) // NOLINT(cppcoreguidelines-missing-std-forward)
|
||||
{
|
||||
return dumpDispatchPriorities<priority - 1>(out, x, nullptr);
|
||||
}
|
||||
|
||||
|
||||
template <typename Out, typename T>
|
||||
Out & dumpValue(Out & out, T && x) // NOLINT(cppcoreguidelines-missing-std-forward)
|
||||
{
|
||||
return dumpDispatchPriorities<5>(out, x, nullptr);
|
||||
}
|
||||
|
||||
|
||||
template <typename Out, typename T>
|
||||
Out & dump(Out & out, const char * name, T && x) // NOLINT(cppcoreguidelines-missing-std-forward)
|
||||
{
|
||||
// Dumping string literal, printing name and demangled type is irrelevant.
|
||||
if constexpr (std::is_same_v<const char *, std::decay_t<std::remove_reference_t<T>>>)
|
||||
{
|
||||
const auto name_len = strlen(name);
|
||||
const auto value_len = strlen(x);
|
||||
// `name` is the same as quoted `x`
|
||||
if (name_len > 2 && value_len > 0 && name[0] == '"' && name[name_len - 1] == '"'
|
||||
&& strncmp(name + 1, x, std::min(value_len, name_len) - 1) == 0)
|
||||
return out << x;
|
||||
}
|
||||
|
||||
out << demangle(typeid(x).name()) << " " << name << " = ";
|
||||
return dumpValue(out, x) << "; ";
|
||||
}
|
||||
|
||||
#pragma clang diagnostic ignored "-Wgnu-zero-variadic-macro-arguments"
|
||||
|
||||
#define DUMPVAR(VAR) ::dump(std::cerr, #VAR, (VAR));
|
||||
#define DUMPHEAD std::cerr << __FILE__ << ':' << __LINE__ << " [ " << getThreadId() << " ] ";
|
||||
#define DUMPTAIL std::cerr << '\n';
|
||||
|
||||
#define DUMP1(V1) do { DUMPHEAD DUMPVAR(V1) DUMPTAIL } while(0)
|
||||
#define DUMP2(V1, V2) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPTAIL } while(0)
|
||||
#define DUMP3(V1, V2, V3) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPTAIL } while(0)
|
||||
#define DUMP4(V1, V2, V3, V4) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPTAIL } while(0)
|
||||
#define DUMP5(V1, V2, V3, V4, V5) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPTAIL } while(0)
|
||||
#define DUMP6(V1, V2, V3, V4, V5, V6) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPVAR(V6) DUMPTAIL } while(0)
|
||||
#define DUMP7(V1, V2, V3, V4, V5, V6, V7) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPVAR(V6) DUMPVAR(V7) DUMPTAIL } while(0)
|
||||
#define DUMP8(V1, V2, V3, V4, V5, V6, V7, V8) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPVAR(V6) DUMPVAR(V7) DUMPVAR(V8) DUMPTAIL } while(0)
|
||||
#define DUMP9(V1, V2, V3, V4, V5, V6, V7, V8, V9) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPVAR(V6) DUMPVAR(V7) DUMPVAR(V8) DUMPVAR(V9) DUMPTAIL } while(0)
|
||||
|
||||
/// https://groups.google.com/forum/#!searchin/kona-dev/variadic$20macro%7Csort:date/kona-dev/XMA-lDOqtlI/GCzdfZsD41sJ
|
||||
|
||||
#define VA_NUM_ARGS_IMPL(x1, x2, x3, x4, x5, x6, x7, x8, x9, N, ...) N
|
||||
#define VA_NUM_ARGS(...) VA_NUM_ARGS_IMPL(__VA_ARGS__, 9, 8, 7, 6, 5, 4, 3, 2, 1)
|
||||
|
||||
#define MAKE_VAR_MACRO_IMPL_CONCAT(PREFIX, NUM_ARGS) PREFIX ## NUM_ARGS
|
||||
#define MAKE_VAR_MACRO_IMPL(PREFIX, NUM_ARGS) MAKE_VAR_MACRO_IMPL_CONCAT(PREFIX, NUM_ARGS)
|
||||
#define MAKE_VAR_MACRO(PREFIX, ...) MAKE_VAR_MACRO_IMPL(PREFIX, VA_NUM_ARGS(__VA_ARGS__))
|
||||
|
||||
#define DUMP(...) MAKE_VAR_MACRO(DUMP, __VA_ARGS__)(__VA_ARGS__)
|
@ -1,2 +0,0 @@
|
||||
clickhouse_add_executable (dump_variable dump_variable.cpp)
|
||||
target_link_libraries (dump_variable PRIVATE clickhouse_common_io)
|
@ -1,70 +0,0 @@
|
||||
#include <base/iostream_debug_helpers.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <tuple>
|
||||
#include <array>
|
||||
#include <utility>
|
||||
|
||||
|
||||
struct S1;
|
||||
struct S2 {};
|
||||
|
||||
struct S3
|
||||
{
|
||||
std::set<const char *> m1;
|
||||
};
|
||||
|
||||
std::ostream & operator<<(std::ostream & stream, const S3 & what)
|
||||
{
|
||||
stream << "S3 {m1=";
|
||||
dumpValue(stream, what.m1) << "}";
|
||||
return stream;
|
||||
}
|
||||
|
||||
int main(int, char **)
|
||||
{
|
||||
int x = 1;
|
||||
|
||||
DUMP(x);
|
||||
DUMP(x, 1, &x);
|
||||
|
||||
DUMP(std::make_unique<int>(1));
|
||||
DUMP(std::make_shared<int>(1));
|
||||
|
||||
std::vector<int> vec{1, 2, 3};
|
||||
DUMP(vec);
|
||||
|
||||
auto pair = std::make_pair(1, 2);
|
||||
DUMP(pair);
|
||||
|
||||
auto tuple = std::make_tuple(1, 2, 3);
|
||||
DUMP(tuple);
|
||||
|
||||
std::map<int, std::string> map{{1, "hello"}, {2, "world"}};
|
||||
DUMP(map);
|
||||
|
||||
std::initializer_list<const char *> list{"hello", "world"};
|
||||
DUMP(list);
|
||||
|
||||
std::array<const char *, 2> arr{{"hello", "world"}};
|
||||
DUMP(arr);
|
||||
|
||||
//DUMP([]{});
|
||||
|
||||
S1 * s = nullptr;
|
||||
DUMP(s);
|
||||
|
||||
DUMP(S2());
|
||||
|
||||
std::set<const char *> variants = {"hello", "world"};
|
||||
DUMP(variants);
|
||||
|
||||
S3 s3 {{"hello", "world"}};
|
||||
DUMP(s3);
|
||||
|
||||
return 0;
|
||||
}
|
20
cmake/instrument.cmake
Normal file
20
cmake/instrument.cmake
Normal file
@ -0,0 +1,20 @@
|
||||
# https://llvm.org/docs/XRay.html
|
||||
|
||||
option (ENABLE_XRAY "Enable LLVM XRay" OFF)
|
||||
|
||||
if (NOT ENABLE_XRAY)
|
||||
message (STATUS "Not using LLVM XRay")
|
||||
return()
|
||||
endif()
|
||||
|
||||
if (NOT (ARCH_AMD64 AND (OS_LINUX OR OS_FREEBSD)))
|
||||
message (STATUS "Not using LLVM XRay, only amd64 Linux or FreeBSD are supported")
|
||||
return()
|
||||
endif()
|
||||
|
||||
# The target clang must support xray, otherwise it should error on invalid option
|
||||
set (XRAY_FLAGS "-fxray-instrument -DUSE_XRAY")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${XRAY_FLAGS}")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${XRAY_FLAGS}")
|
||||
|
||||
message (STATUS "Using LLVM XRay")
|
@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="24.4.1.2088"
|
||||
ARG VERSION="24.5.1.1763"
|
||||
ARG PACKAGES="clickhouse-keeper"
|
||||
ARG DIRECT_DOWNLOAD_URLS=""
|
||||
|
||||
|
@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="24.4.1.2088"
|
||||
ARG VERSION="24.5.1.1763"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
ARG DIRECT_DOWNLOAD_URLS=""
|
||||
|
||||
|
@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
|
||||
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
|
||||
ARG VERSION="24.4.1.2088"
|
||||
ARG VERSION="24.5.1.1763"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
#docker-official-library:off
|
||||
|
@ -15,7 +15,6 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
|
||||
file \
|
||||
libxml2-utils \
|
||||
moreutils \
|
||||
python3-fuzzywuzzy \
|
||||
python3-pip \
|
||||
yamllint \
|
||||
locales \
|
||||
@ -23,8 +22,18 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||
|
||||
# python-magic is the same version as in Ubuntu 22.04
|
||||
RUN pip3 install black==23.12.0 boto3 codespell==2.2.1 mypy==1.8.0 PyGithub unidiff pylint==3.1.0 \
|
||||
python-magic==0.4.24 requests types-requests \
|
||||
RUN pip3 install \
|
||||
PyGithub \
|
||||
black==23.12.0 \
|
||||
boto3 \
|
||||
codespell==2.2.1 \
|
||||
mypy==1.8.0 \
|
||||
pylint==3.1.0 \
|
||||
python-magic==0.4.24 \
|
||||
requests \
|
||||
thefuzz \
|
||||
types-requests \
|
||||
unidiff \
|
||||
&& rm -rf /root/.cache/pip
|
||||
|
||||
RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8
|
||||
|
@ -65,46 +65,22 @@ function save_settings_clean()
|
||||
script -q -c "clickhouse-local -q \"select * from system.settings into outfile '$out'\"" --log-out /dev/null
|
||||
}
|
||||
|
||||
# We save the (numeric) version of the old server to compare setting changes between the 2
|
||||
# We do this since we are testing against the latest release, not taking into account release candidates, so we might
|
||||
# be testing current master (24.6) against the latest stable release (24.4)
|
||||
function save_major_version()
|
||||
{
|
||||
local out=$1 && shift
|
||||
clickhouse-local -q "SELECT a[1]::UInt64 * 100 + a[2]::UInt64 as v FROM (Select splitByChar('.', version()) as a) into outfile '$out'"
|
||||
}
|
||||
|
||||
save_settings_clean 'old_settings.native'
|
||||
save_major_version 'old_version.native'
|
||||
|
||||
# Initial run without S3 to create system.*_log on local file system to make it
|
||||
# available for dump via clickhouse-local
|
||||
configure
|
||||
|
||||
function remove_keeper_config()
|
||||
{
|
||||
sudo sed -i "/<$1>$2<\/$1>/d" /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
}
|
||||
|
||||
# async_replication setting doesn't exist on some older versions
|
||||
remove_keeper_config "async_replication" "1"
|
||||
|
||||
# create_if_not_exists feature flag doesn't exist on some older versions
|
||||
remove_keeper_config "create_if_not_exists" "[01]"
|
||||
|
||||
#todo: remove these after 24.3 released.
|
||||
sudo sed -i "s|<object_storage_type>azure<|<object_storage_type>azure_blob_storage<|" /etc/clickhouse-server/config.d/azure_storage_conf.xml
|
||||
|
||||
#todo: remove these after 24.3 released.
|
||||
sudo sed -i "s|<object_storage_type>local<|<object_storage_type>local_blob_storage<|" /etc/clickhouse-server/config.d/storage_conf.xml
|
||||
|
||||
# latest_logs_cache_size_threshold setting doesn't exist on some older versions
|
||||
remove_keeper_config "latest_logs_cache_size_threshold" "[[:digit:]]\+"
|
||||
|
||||
# commit_logs_cache_size_threshold setting doesn't exist on some older versions
|
||||
remove_keeper_config "commit_logs_cache_size_threshold" "[[:digit:]]\+"
|
||||
|
||||
# it contains some new settings, but we can safely remove it
|
||||
rm /etc/clickhouse-server/config.d/merge_tree.xml
|
||||
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
|
||||
rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml
|
||||
rm /etc/clickhouse-server/config.d/storage_conf_02963.xml
|
||||
rm /etc/clickhouse-server/config.d/backoff_failed_mutation.xml
|
||||
rm /etc/clickhouse-server/config.d/handlers.yaml
|
||||
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
|
||||
rm /etc/clickhouse-server/users.d/s3_cache_new.xml
|
||||
rm /etc/clickhouse-server/users.d/replicated_ddl_entry.xml
|
||||
|
||||
start
|
||||
stop
|
||||
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log
|
||||
@ -116,44 +92,11 @@ export USE_S3_STORAGE_FOR_MERGE_TREE=1
|
||||
export ZOOKEEPER_FAULT_INJECTION=0
|
||||
configure
|
||||
|
||||
# force_sync=false doesn't work correctly on some older versions
|
||||
sudo sed -i "s|<force_sync>false</force_sync>|<force_sync>true</force_sync>|" /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
|
||||
#todo: remove these after 24.3 released.
|
||||
sudo sed -i "s|<object_storage_type>azure<|<object_storage_type>azure_blob_storage<|" /etc/clickhouse-server/config.d/azure_storage_conf.xml
|
||||
|
||||
#todo: remove these after 24.3 released.
|
||||
sudo sed -i "s|<object_storage_type>local<|<object_storage_type>local_blob_storage<|" /etc/clickhouse-server/config.d/storage_conf.xml
|
||||
|
||||
# async_replication setting doesn't exist on some older versions
|
||||
remove_keeper_config "async_replication" "1"
|
||||
|
||||
# create_if_not_exists feature flag doesn't exist on some older versions
|
||||
remove_keeper_config "create_if_not_exists" "[01]"
|
||||
|
||||
# latest_logs_cache_size_threshold setting doesn't exist on some older versions
|
||||
remove_keeper_config "latest_logs_cache_size_threshold" "[[:digit:]]\+"
|
||||
|
||||
# commit_logs_cache_size_threshold setting doesn't exist on some older versions
|
||||
remove_keeper_config "commit_logs_cache_size_threshold" "[[:digit:]]\+"
|
||||
|
||||
# But we still need default disk because some tables loaded only into it
|
||||
sudo sed -i "s|<main><disk>s3</disk></main>|<main><disk>s3</disk></main><default><disk>default</disk></default>|" /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
|
||||
sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
|
||||
sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
|
||||
|
||||
# it contains some new settings, but we can safely remove it
|
||||
rm /etc/clickhouse-server/config.d/merge_tree.xml
|
||||
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
|
||||
rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml
|
||||
rm /etc/clickhouse-server/config.d/storage_conf_02963.xml
|
||||
rm /etc/clickhouse-server/config.d/backoff_failed_mutation.xml
|
||||
rm /etc/clickhouse-server/config.d/handlers.yaml
|
||||
rm /etc/clickhouse-server/config.d/block_number.xml
|
||||
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
|
||||
rm /etc/clickhouse-server/users.d/s3_cache_new.xml
|
||||
rm /etc/clickhouse-server/users.d/replicated_ddl_entry.xml
|
||||
|
||||
start
|
||||
|
||||
clickhouse-client --query="SELECT 'Server version: ', version()"
|
||||
@ -192,6 +135,7 @@ then
|
||||
save_settings_clean 'new_settings.native'
|
||||
clickhouse-local -nmq "
|
||||
CREATE TABLE old_settings AS file('old_settings.native');
|
||||
CREATE TABLE old_version AS file('old_version.native');
|
||||
CREATE TABLE new_settings AS file('new_settings.native');
|
||||
|
||||
SELECT
|
||||
@ -202,8 +146,11 @@ then
|
||||
LEFT JOIN old_settings ON new_settings.name = old_settings.name
|
||||
WHERE (new_settings.value != old_settings.value) AND (name NOT IN (
|
||||
SELECT arrayJoin(tupleElement(changes, 'name'))
|
||||
FROM system.settings_changes
|
||||
WHERE version = extract(version(), '^(?:\\d+\\.\\d+)')
|
||||
FROM
|
||||
(
|
||||
SELECT *, splitByChar('.', version) AS version_array FROM system.settings_changes
|
||||
)
|
||||
WHERE (version_array[1]::UInt64 * 100 + version_array[2]::UInt64) > (SELECT v FROM old_version LIMIT 1)
|
||||
))
|
||||
SETTINGS join_use_nulls = 1
|
||||
INTO OUTFILE 'changed_settings.txt'
|
||||
@ -216,8 +163,11 @@ then
|
||||
FROM old_settings
|
||||
)) AND (name NOT IN (
|
||||
SELECT arrayJoin(tupleElement(changes, 'name'))
|
||||
FROM system.settings_changes
|
||||
WHERE version = extract(version(), '^(?:\\d+\\.\\d+)')
|
||||
FROM
|
||||
(
|
||||
SELECT *, splitByChar('.', version) AS version_array FROM system.settings_changes
|
||||
)
|
||||
WHERE (version_array[1]::UInt64 * 100 + version_array[2]::UInt64) > (SELECT v FROM old_version LIMIT 1)
|
||||
))
|
||||
INTO OUTFILE 'new_settings.txt'
|
||||
FORMAT PrettyCompactNoEscapes;
|
||||
|
366
docs/changelogs/v24.5.1.1763-stable.md
Normal file
366
docs/changelogs/v24.5.1.1763-stable.md
Normal file
@ -0,0 +1,366 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v24.5.1.1763-stable (647c154a94d) FIXME as compared to v24.4.1.2088-stable (6d4b31322d1)
|
||||
|
||||
#### Backward Incompatible Change
|
||||
* Renamed "inverted indexes" to "full-text indexes" which is a less technical / more user-friendly name. This also changes internal table metadata and breaks tables with existing (experimental) inverted indexes. Please make to drop such indexes before upgrade and re-create them after upgrade. [#62884](https://github.com/ClickHouse/ClickHouse/pull/62884) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Usage of functions `neighbor`, `runningAccumulate`, `runningDifferenceStartingWithFirstValue`, `runningDifference` deprecated (because it is error-prone). Proper window functions should be used instead. To enable them back, set `allow_deprecated_functions=1`. [#63132](https://github.com/ClickHouse/ClickHouse/pull/63132) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Queries from `system.columns` will work faster if there is a large number of columns, but many databases or tables are not granted for `SHOW TABLES`. Note that in previous versions, if you grant `SHOW COLUMNS` to individual columns without granting `SHOW TABLES` to the corresponding tables, the `system.columns` table will show these columns, but in a new version, it will skip the table entirely. Remove trace log messages "Access granted" and "Access denied" that slowed down queries. [#63439](https://github.com/ClickHouse/ClickHouse/pull/63439) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
#### New Feature
|
||||
* Provide support for AzureBlobStorage function in ClickHouse server to use Azure Workload identity to authenticate against Azure blob storage. If `use_workload_identity` parameter is set in config, [workload identity](https://github.com/Azure/azure-sdk-for-cpp/tree/main/sdk/identity/azure-identity#authenticate-azure-hosted-applications) is used for authentication. [#57881](https://github.com/ClickHouse/ClickHouse/pull/57881) ([Vinay Suryadevara](https://github.com/vinay92-ch)).
|
||||
* Introduce bulk loading to StorageEmbeddedRocksDB by creating and ingesting SST file instead of relying on rocksdb build-in memtable. This help to increase importing speed, especially for long-running insert query to StorageEmbeddedRocksDB tables. Also, introduce `StorageEmbeddedRocksDB` table settings. [#59163](https://github.com/ClickHouse/ClickHouse/pull/59163) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* User can now parse CRLF with TSV format using a setting `input_format_tsv_crlf_end_of_line`. Closes [#56257](https://github.com/ClickHouse/ClickHouse/issues/56257). [#59747](https://github.com/ClickHouse/ClickHouse/pull/59747) ([Shaun Struwig](https://github.com/Blargian)).
|
||||
* Adds the Form Format to read/write a single record in the application/x-www-form-urlencoded format. [#60199](https://github.com/ClickHouse/ClickHouse/pull/60199) ([Shaun Struwig](https://github.com/Blargian)).
|
||||
* Added possibility to compress in CROSS JOIN. [#60459](https://github.com/ClickHouse/ClickHouse/pull/60459) ([p1rattttt](https://github.com/p1rattttt)).
|
||||
* New setting `input_format_force_null_for_omitted_fields` that forces NULL values for omitted fields. [#60887](https://github.com/ClickHouse/ClickHouse/pull/60887) ([Constantine Peresypkin](https://github.com/pkit)).
|
||||
* Support join with inequal conditions which involve columns from both left and right table. e.g. `t1.y < t2.y`. To enable, `SET allow_experimental_join_condition = 1`. [#60920](https://github.com/ClickHouse/ClickHouse/pull/60920) ([lgbo](https://github.com/lgbo-ustc)).
|
||||
* Earlier our s3 storage and s3 table function didn't support selecting from archive files. I created a solution that allows to iterate over files inside archives in S3. [#62259](https://github.com/ClickHouse/ClickHouse/pull/62259) ([Daniil Ivanik](https://github.com/divanik)).
|
||||
* Support for conditional function `clamp`. [#62377](https://github.com/ClickHouse/ClickHouse/pull/62377) ([skyoct](https://github.com/skyoct)).
|
||||
* Add npy output format. [#62430](https://github.com/ClickHouse/ClickHouse/pull/62430) ([豪肥肥](https://github.com/HowePa)).
|
||||
* Added SQL functions `generateUUIDv7`, `generateUUIDv7ThreadMonotonic`, `generateUUIDv7NonMonotonic` (with different monotonicity/performance trade-offs) to generate version 7 UUIDs aka. timestamp-based UUIDs with random component. Also added a new function `UUIDToNum` to extract bytes from a UUID and a new function `UUIDv7ToDateTime` to extract timestamp component from a UUID version 7. [#62852](https://github.com/ClickHouse/ClickHouse/pull/62852) ([Alexey Petrunyaka](https://github.com/pet74alex)).
|
||||
* Backported in [#64307](https://github.com/ClickHouse/ClickHouse/issues/64307): Implement Dynamic data type that allows to store values of any type inside it without knowing all of them in advance. Dynamic type is available under a setting `allow_experimental_dynamic_type`. Reference: [#54864](https://github.com/ClickHouse/ClickHouse/issues/54864). [#63058](https://github.com/ClickHouse/ClickHouse/pull/63058) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Introduce bulk loading to StorageEmbeddedRocksDB by creating and ingesting SST file instead of relying on rocksdb build-in memtable. This help to increase importing speed, especially for long-running insert query to StorageEmbeddedRocksDB tables. Also, introduce StorageEmbeddedRocksDB table settings. [#63324](https://github.com/ClickHouse/ClickHouse/pull/63324) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Raw as a synonym for TSVRaw. [#63394](https://github.com/ClickHouse/ClickHouse/pull/63394) ([Unalian](https://github.com/Unalian)).
|
||||
* Added possibility to do cross join in temporary file if size exceeds limits. [#63432](https://github.com/ClickHouse/ClickHouse/pull/63432) ([p1rattttt](https://github.com/p1rattttt)).
|
||||
* On Linux and MacOS, if the program has STDOUT redirected to a file with a compression extension, use the corresponding compression method instead of nothing (making it behave similarly to `INTO OUTFILE` ). [#63662](https://github.com/ClickHouse/ClickHouse/pull/63662) ([v01dXYZ](https://github.com/v01dXYZ)).
|
||||
* Change warning on high number of attached tables to differentiate tables, views and dictionaries. [#64180](https://github.com/ClickHouse/ClickHouse/pull/64180) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
|
||||
|
||||
#### Performance Improvement
|
||||
* Skip merging of newly created projection blocks during `INSERT`-s. [#59405](https://github.com/ClickHouse/ClickHouse/pull/59405) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Process string functions XXXUTF8 'asciily' if input strings are all ascii chars. Inspired by https://github.com/apache/doris/pull/29799. Overall speed up by 1.07x~1.62x. Notice that peak memory usage had been decreased in some cases. [#61632](https://github.com/ClickHouse/ClickHouse/pull/61632) ([李扬](https://github.com/taiyang-li)).
|
||||
* Improved performance of selection (`{}`) globs in StorageS3. [#62120](https://github.com/ClickHouse/ClickHouse/pull/62120) ([Andrey Zvonov](https://github.com/zvonand)).
|
||||
* HostResolver has each IP address several times. If remote host has several IPs and by some reason (firewall rules for example) access on some IPs allowed and on others forbidden, than only first record of forbidden IPs marked as failed, and in each try these IPs have a chance to be chosen (and failed again). Even if fix this, every 120 seconds DNS cache dropped, and IPs can be chosen again. [#62652](https://github.com/ClickHouse/ClickHouse/pull/62652) ([Anton Ivashkin](https://github.com/ianton-ru)).
|
||||
* Add a new configuration`prefer_merge_sort_block_bytes` to control the memory usage and speed up sorting 2 times when merging when there are many columns. [#62904](https://github.com/ClickHouse/ClickHouse/pull/62904) ([LiuNeng](https://github.com/liuneng1994)).
|
||||
* `clickhouse-local` will start faster. In previous versions, it was not deleting temporary directories by mistake. Now it will. This closes [#62941](https://github.com/ClickHouse/ClickHouse/issues/62941). [#63074](https://github.com/ClickHouse/ClickHouse/pull/63074) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Micro-optimizations for the new analyzer. [#63429](https://github.com/ClickHouse/ClickHouse/pull/63429) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Index analysis will work if `DateTime` is compared to `DateTime64`. This closes [#63441](https://github.com/ClickHouse/ClickHouse/issues/63441). [#63443](https://github.com/ClickHouse/ClickHouse/pull/63443) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Index analysis will work if `DateTime` is compared to `DateTime64`. This closes [#63441](https://github.com/ClickHouse/ClickHouse/issues/63441). [#63532](https://github.com/ClickHouse/ClickHouse/pull/63532) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Speed up indices of type `set` a little (around 1.5 times) by removing garbage. [#64098](https://github.com/ClickHouse/ClickHouse/pull/64098) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
#### Improvement
|
||||
* Maps can now have `Float32`, `Float64`, `Array(T)`, `Map(K,V)` and `Tuple(T1, T2, ...)` as keys. Closes [#54537](https://github.com/ClickHouse/ClickHouse/issues/54537). [#59318](https://github.com/ClickHouse/ClickHouse/pull/59318) ([李扬](https://github.com/taiyang-li)).
|
||||
* Multiline strings with border preservation and column width change. [#59940](https://github.com/ClickHouse/ClickHouse/pull/59940) ([Volodyachan](https://github.com/Volodyachan)).
|
||||
* Make rabbitmq nack broken messages. Closes [#45350](https://github.com/ClickHouse/ClickHouse/issues/45350). [#60312](https://github.com/ClickHouse/ClickHouse/pull/60312) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix a crash in asynchronous stack unwinding (such as when using the sampling query profiler) while interpreting debug info. This closes [#60460](https://github.com/ClickHouse/ClickHouse/issues/60460). [#60468](https://github.com/ClickHouse/ClickHouse/pull/60468) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Distinct messages for s3 error 'no key' for cases disk and storage. [#61108](https://github.com/ClickHouse/ClickHouse/pull/61108) ([Sema Checherinda](https://github.com/CheSema)).
|
||||
* Less contention in filesystem cache (part 4). Allow to keep filesystem cache not filled to the limit by doing additional eviction in the background (controlled by `keep_free_space_size(elements)_ratio`). This allows to release pressure from space reservation for queries (on `tryReserve` method). Also this is done in a lock free way as much as possible, e.g. should not block normal cache usage. [#61250](https://github.com/ClickHouse/ClickHouse/pull/61250) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* The progress bar will work for trivial queries with LIMIT from `system.zeros`, `system.zeros_mt` (it already works for `system.numbers` and `system.numbers_mt`), and the `generateRandom` table function. As a bonus, if the total number of records is greater than the `max_rows_to_read` limit, it will throw an exception earlier. This closes [#58183](https://github.com/ClickHouse/ClickHouse/issues/58183). [#61823](https://github.com/ClickHouse/ClickHouse/pull/61823) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* YAML Merge Key support. [#62685](https://github.com/ClickHouse/ClickHouse/pull/62685) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Enhance error message when non-deterministic function is used with Replicated source. [#62896](https://github.com/ClickHouse/ClickHouse/pull/62896) ([Grégoire Pineau](https://github.com/lyrixx)).
|
||||
* Fix interserver secret for Distributed over Distributed from `remote`. [#63013](https://github.com/ClickHouse/ClickHouse/pull/63013) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Allow using `clickhouse-local` and its shortcuts `clickhouse` and `ch` with a query or queries file as a positional argument. Examples: `ch "SELECT 1"`, `ch --param_test Hello "SELECT {test:String}"`, `ch query.sql`. This closes [#62361](https://github.com/ClickHouse/ClickHouse/issues/62361). [#63081](https://github.com/ClickHouse/ClickHouse/pull/63081) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Support configuration substitutions from YAML files. [#63106](https://github.com/ClickHouse/ClickHouse/pull/63106) ([Eduard Karacharov](https://github.com/korowa)).
|
||||
* Add TTL information in system parts_columns table. [#63200](https://github.com/ClickHouse/ClickHouse/pull/63200) ([litlig](https://github.com/litlig)).
|
||||
* Keep previous data in terminal after picking from skim suggestions. [#63261](https://github.com/ClickHouse/ClickHouse/pull/63261) ([FlameFactory](https://github.com/FlameFactory)).
|
||||
* Width of fields now correctly calculate, ignoring ANSI escape sequences. [#63270](https://github.com/ClickHouse/ClickHouse/pull/63270) ([Shaun Struwig](https://github.com/Blargian)).
|
||||
* Enable plain_rewritable metadata for local and Azure (azure_blob_storage) object storages. [#63365](https://github.com/ClickHouse/ClickHouse/pull/63365) ([Julia Kartseva](https://github.com/jkartseva)).
|
||||
* Support English-style Unicode quotes, e.g. “Hello”, ‘world’. This is questionable in general but helpful when you type your query in a word processor, such as Google Docs. This closes [#58634](https://github.com/ClickHouse/ClickHouse/issues/58634). [#63381](https://github.com/ClickHouse/ClickHouse/pull/63381) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Allowed to create MaterializedMySQL database without connection to MySQL. [#63397](https://github.com/ClickHouse/ClickHouse/pull/63397) ([Kirill](https://github.com/kirillgarbar)).
|
||||
* Remove copying data when writing to filesystem cache. [#63401](https://github.com/ClickHouse/ClickHouse/pull/63401) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Update the usage of error code `NUMBER_OF_ARGUMENTS_DOESNT_MATCH` by more accurate error codes when appropriate. [#63406](https://github.com/ClickHouse/ClickHouse/pull/63406) ([Yohann Jardin](https://github.com/yohannj)).
|
||||
* `os_user` and `client_hostname` are now correctly set up for queries for command line suggestions in clickhouse-client. This closes [#63430](https://github.com/ClickHouse/ClickHouse/issues/63430). [#63433](https://github.com/ClickHouse/ClickHouse/pull/63433) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed tabulation from line numbering, correct handling of length when moving a line if the value has a tab, added tests. [#63493](https://github.com/ClickHouse/ClickHouse/pull/63493) ([Volodyachan](https://github.com/Volodyachan)).
|
||||
* Add this `aggregate_function_group_array_has_limit_size`setting to support discarding data in some scenarios. [#63516](https://github.com/ClickHouse/ClickHouse/pull/63516) ([zhongyuankai](https://github.com/zhongyuankai)).
|
||||
* Automatically mark a replica of Replicated database as lost and start recovery if some DDL task fails more than `max_retries_before_automatic_recovery` (100 by default) times in a row with the same error. Also, fixed a bug that could cause skipping DDL entries when an exception is thrown during an early stage of entry execution. [#63549](https://github.com/ClickHouse/ClickHouse/pull/63549) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Automatically correct `max_block_size=0` to default value. [#63587](https://github.com/ClickHouse/ClickHouse/pull/63587) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Account failed files in `s3queue_tracked_file_ttl_sec` and `s3queue_traked_files_limit` for `StorageS3Queue`. [#63638](https://github.com/ClickHouse/ClickHouse/pull/63638) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Add a build_id ALIAS column to trace_log to facilitate auto renaming upon detecting binary changes. This is to address [#52086](https://github.com/ClickHouse/ClickHouse/issues/52086). [#63656](https://github.com/ClickHouse/ClickHouse/pull/63656) ([Zimu Li](https://github.com/woodlzm)).
|
||||
* Enable truncate operation for object storage disks. [#63693](https://github.com/ClickHouse/ClickHouse/pull/63693) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
|
||||
* The loading of the keywords list is now dependent on the server revision and will be disabled for the old versions of ClickHouse server. CC @azat. [#63786](https://github.com/ClickHouse/ClickHouse/pull/63786) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Allow trailing commas in the columns list in the INSERT query. For example, `INSERT INTO test (a, b, c, ) VALUES ...`. [#63803](https://github.com/ClickHouse/ClickHouse/pull/63803) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Better exception messages for the `Regexp` format. [#63804](https://github.com/ClickHouse/ClickHouse/pull/63804) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Allow trailing commas in the `Values` format. For example, this query is allowed: `INSERT INTO test (a, b, c) VALUES (4, 5, 6,);`. [#63810](https://github.com/ClickHouse/ClickHouse/pull/63810) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Clickhouse disks have to read server setting to obtain actual metadata format version. [#63831](https://github.com/ClickHouse/ClickHouse/pull/63831) ([Sema Checherinda](https://github.com/CheSema)).
|
||||
* Disable pretty format restrictions (`output_format_pretty_max_rows`/`output_format_pretty_max_value_width`) when stdout is not TTY. [#63942](https://github.com/ClickHouse/ClickHouse/pull/63942) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Exception handling now works when ClickHouse is used inside AWS Lambda. Author: [Alexey Coolnev](https://github.com/acoolnev). [#64014](https://github.com/ClickHouse/ClickHouse/pull/64014) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Throw `CANNOT_DECOMPRESS` instread of `CORRUPTED_DATA` on invalid compressed data passed via HTTP. [#64036](https://github.com/ClickHouse/ClickHouse/pull/64036) ([vdimir](https://github.com/vdimir)).
|
||||
* A tip for a single large number in Pretty formats now works for Nullable and LowCardinality. This closes [#61993](https://github.com/ClickHouse/ClickHouse/issues/61993). [#64084](https://github.com/ClickHouse/ClickHouse/pull/64084) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Now backups with azure blob storage will use multicopy. [#64116](https://github.com/ClickHouse/ClickHouse/pull/64116) ([alesapin](https://github.com/alesapin)).
|
||||
* Add metrics, logs, and thread names around parts filtering with indices. [#64130](https://github.com/ClickHouse/ClickHouse/pull/64130) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Allow to use native copy for azure even with different containers. [#64154](https://github.com/ClickHouse/ClickHouse/pull/64154) ([alesapin](https://github.com/alesapin)).
|
||||
* Finally enable native copy for azure. [#64182](https://github.com/ClickHouse/ClickHouse/pull/64182) ([alesapin](https://github.com/alesapin)).
|
||||
* Ignore `allow_suspicious_primary_key` on `ATTACH` and verify on `ALTER`. [#64202](https://github.com/ClickHouse/ClickHouse/pull/64202) ([Azat Khuzhin](https://github.com/azat)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* ClickHouse is built with clang-18. A lot of new checks from clang-tidy-18 have been enabled. [#60469](https://github.com/ClickHouse/ClickHouse/pull/60469) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Re-enable broken s390x build in CI. [#63135](https://github.com/ClickHouse/ClickHouse/pull/63135) ([Harry Lee](https://github.com/HarryLeeIBM)).
|
||||
* The Dockerfile is reviewed by the docker official library in https://github.com/docker-library/official-images/pull/15846. [#63400](https://github.com/ClickHouse/ClickHouse/pull/63400) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Information about every symbol in every translation unit will be collected in the CI database for every build in the CI. This closes [#63494](https://github.com/ClickHouse/ClickHouse/issues/63494). [#63495](https://github.com/ClickHouse/ClickHouse/pull/63495) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Experimentally support loongarch64 as a new platform for ClickHouse. [#63733](https://github.com/ClickHouse/ClickHouse/pull/63733) ([qiangxuhui](https://github.com/qiangxuhui)).
|
||||
* Update Apache Datasketches library. It resolves [#63858](https://github.com/ClickHouse/ClickHouse/issues/63858). [#63923](https://github.com/ClickHouse/ClickHouse/pull/63923) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Enable GRPC support for aarch64 linux while cross-compiling binary. [#64072](https://github.com/ClickHouse/ClickHouse/pull/64072) ([alesapin](https://github.com/alesapin)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix making backup when multiple shards are used. This PR fixes [#56566](https://github.com/ClickHouse/ClickHouse/issues/56566). [#57684](https://github.com/ClickHouse/ClickHouse/pull/57684) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix passing projections/indexes from CREATE query into inner table of MV. [#59183](https://github.com/ClickHouse/ClickHouse/pull/59183) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix boundRatio incorrect merge. [#60532](https://github.com/ClickHouse/ClickHouse/pull/60532) ([Tao Wang](https://github.com/wangtZJU)).
|
||||
* Fix crash when using some functions with low-cardinality columns. [#61966](https://github.com/ClickHouse/ClickHouse/pull/61966) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Fix queries with FINAL give wrong result when table does not use adaptive granularity. [#62432](https://github.com/ClickHouse/ClickHouse/pull/62432) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Improve the detection of cgroups v2 memory controller in unusual locations. This fixes a warning that the cgroup memory observer was disabled because no cgroups v1 or v2 current memory file could be found. [#62903](https://github.com/ClickHouse/ClickHouse/pull/62903) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Fix subsequent use of external tables in client. [#62964](https://github.com/ClickHouse/ClickHouse/pull/62964) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix crash with untuple and unresolved lambda. [#63131](https://github.com/ClickHouse/ClickHouse/pull/63131) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix bug which could lead to server to accept connections before server is actually loaded. [#63181](https://github.com/ClickHouse/ClickHouse/pull/63181) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix intersect parts when restart after drop range. [#63202](https://github.com/ClickHouse/ClickHouse/pull/63202) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Fix a misbehavior when SQL security defaults don't load for old tables during server startup. [#63209](https://github.com/ClickHouse/ClickHouse/pull/63209) ([pufit](https://github.com/pufit)).
|
||||
* JOIN filter push down filled join fix. Closes [#63228](https://github.com/ClickHouse/ClickHouse/issues/63228). [#63234](https://github.com/ClickHouse/ClickHouse/pull/63234) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fix infinite loop while listing objects in Azure blob storage. [#63257](https://github.com/ClickHouse/ClickHouse/pull/63257) ([Julia Kartseva](https://github.com/jkartseva)).
|
||||
* CROSS join can be executed with any value `join_algorithm` setting, close [#62431](https://github.com/ClickHouse/ClickHouse/issues/62431). [#63273](https://github.com/ClickHouse/ClickHouse/pull/63273) ([vdimir](https://github.com/vdimir)).
|
||||
* Fixed a potential crash caused by a `no space left` error when temporary data in the cache is used. [#63346](https://github.com/ClickHouse/ClickHouse/pull/63346) ([vdimir](https://github.com/vdimir)).
|
||||
* Fix bug which could potentially lead to rare LOGICAL_ERROR during SELECT query with message: `Unexpected return type from materialize. Expected type_XXX. Got type_YYY.` Introduced in [#59379](https://github.com/ClickHouse/ClickHouse/issues/59379). [#63353](https://github.com/ClickHouse/ClickHouse/pull/63353) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix `X-ClickHouse-Timezone` header returning wrong timezone when using `session_timezone` as query level setting. [#63377](https://github.com/ClickHouse/ClickHouse/pull/63377) ([Andrey Zvonov](https://github.com/zvonand)).
|
||||
* Fix debug assert when using grouping WITH ROLLUP and LowCardinality types. [#63398](https://github.com/ClickHouse/ClickHouse/pull/63398) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix logical errors in queries with `GROUPING SETS` and `WHERE` and `group_by_use_nulls = true`, close [#60538](https://github.com/ClickHouse/ClickHouse/issues/60538). [#63405](https://github.com/ClickHouse/ClickHouse/pull/63405) ([vdimir](https://github.com/vdimir)).
|
||||
* Fix backup of projection part in case projection was removed from table metadata, but part still has projection. [#63426](https://github.com/ClickHouse/ClickHouse/pull/63426) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix 'Every derived table must have its own alias' error for MYSQL dictionary source, close [#63341](https://github.com/ClickHouse/ClickHouse/issues/63341). [#63481](https://github.com/ClickHouse/ClickHouse/pull/63481) ([vdimir](https://github.com/vdimir)).
|
||||
* Insert QueryFinish on AsyncInsertFlush with no data. [#63483](https://github.com/ClickHouse/ClickHouse/pull/63483) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix `system.query_log.used_dictionaries` logging. [#63487](https://github.com/ClickHouse/ClickHouse/pull/63487) ([Eduard Karacharov](https://github.com/korowa)).
|
||||
* Avoid segafult in `MergeTreePrefetchedReadPool` while fetching projection parts. [#63513](https://github.com/ClickHouse/ClickHouse/pull/63513) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix rabbitmq heap-use-after-free found by clang-18, which can happen if an error is thrown from RabbitMQ during initialization of exchange and queues. [#63515](https://github.com/ClickHouse/ClickHouse/pull/63515) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix crash on exit with sentry enabled (due to openssl destroyed before sentry). [#63548](https://github.com/ClickHouse/ClickHouse/pull/63548) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix support for Array and Map with Keyed hashing functions and materialized keys. [#63628](https://github.com/ClickHouse/ClickHouse/pull/63628) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
|
||||
* Fixed Parquet filter pushdown not working with Analyzer. [#63642](https://github.com/ClickHouse/ClickHouse/pull/63642) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* It is forbidden to convert MergeTree to replicated if the zookeeper path for this table already exists. [#63670](https://github.com/ClickHouse/ClickHouse/pull/63670) ([Kirill](https://github.com/kirillgarbar)).
|
||||
* Read only the necessary columns from VIEW (new analyzer). Closes [#62594](https://github.com/ClickHouse/ClickHouse/issues/62594). [#63688](https://github.com/ClickHouse/ClickHouse/pull/63688) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fix rare case with missing data in the result of distributed query. [#63691](https://github.com/ClickHouse/ClickHouse/pull/63691) ([vdimir](https://github.com/vdimir)).
|
||||
* Fix [#63539](https://github.com/ClickHouse/ClickHouse/issues/63539). Forbid WINDOW redefinition in new analyzer. [#63694](https://github.com/ClickHouse/ClickHouse/pull/63694) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Flatten_nested is broken with replicated database. [#63695](https://github.com/ClickHouse/ClickHouse/pull/63695) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix `SIZES_OF_COLUMNS_DOESNT_MATCH` error for queries with `arrayJoin` function in `WHERE`. Fixes [#63653](https://github.com/ClickHouse/ClickHouse/issues/63653). [#63722](https://github.com/ClickHouse/ClickHouse/pull/63722) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix `Not found column` and `CAST AS Map from array requires nested tuple of 2 elements` exceptions for distributed queries which use `Map(Nothing, Nothing)` type. Fixes [#63637](https://github.com/ClickHouse/ClickHouse/issues/63637). [#63753](https://github.com/ClickHouse/ClickHouse/pull/63753) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix possible `ILLEGAL_COLUMN` error in `partial_merge` join, close [#37928](https://github.com/ClickHouse/ClickHouse/issues/37928). [#63755](https://github.com/ClickHouse/ClickHouse/pull/63755) ([vdimir](https://github.com/vdimir)).
|
||||
* `query_plan_remove_redundant_distinct` can break queries with WINDOW FUNCTIONS (with `allow_experimental_analyzer` is on). Fixes [#62820](https://github.com/ClickHouse/ClickHouse/issues/62820). [#63776](https://github.com/ClickHouse/ClickHouse/pull/63776) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Fix possible crash with SYSTEM UNLOAD PRIMARY KEY. [#63778](https://github.com/ClickHouse/ClickHouse/pull/63778) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix a query with a duplicating cycling alias. Fixes [#63320](https://github.com/ClickHouse/ClickHouse/issues/63320). [#63791](https://github.com/ClickHouse/ClickHouse/pull/63791) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fixed performance degradation of parsing data formats in INSERT query. This closes [#62918](https://github.com/ClickHouse/ClickHouse/issues/62918). This partially reverts [#42284](https://github.com/ClickHouse/ClickHouse/issues/42284), which breaks the original design and introduces more problems. [#63801](https://github.com/ClickHouse/ClickHouse/pull/63801) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Add 'endpoint_subpath' S3 URI setting to allow plain_rewritable disks to share the same endpoint. [#63806](https://github.com/ClickHouse/ClickHouse/pull/63806) ([Julia Kartseva](https://github.com/jkartseva)).
|
||||
* Fix queries using parallel read buffer (e.g. with max_download_thread > 0) getting stuck when threads cannot be allocated. [#63814](https://github.com/ClickHouse/ClickHouse/pull/63814) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Allow JOIN filter push down to both streams if only single equivalent column is used in query. Closes [#63799](https://github.com/ClickHouse/ClickHouse/issues/63799). [#63819](https://github.com/ClickHouse/ClickHouse/pull/63819) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Remove the data from all disks after DROP with the Lazy database engines. Without these changes, orhpaned will remain on the disks. [#63848](https://github.com/ClickHouse/ClickHouse/pull/63848) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
|
||||
* Fix incorrect select query result when parallel replicas were used to read from a Materialized View. [#63861](https://github.com/ClickHouse/ClickHouse/pull/63861) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Fixes in `find_super_nodes` and `find_big_family` command of keeper-client: - do not fail on ZNONODE errors - find super nodes inside super nodes - properly calculate subtree node count. [#63862](https://github.com/ClickHouse/ClickHouse/pull/63862) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Fix a error `Database name is empty` for remote queries with lambdas over the cluster with modified default database. Fixes [#63471](https://github.com/ClickHouse/ClickHouse/issues/63471). [#63864](https://github.com/ClickHouse/ClickHouse/pull/63864) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix SIGSEGV due to CPU/Real (`query_profiler_real_time_period_ns`/`query_profiler_cpu_time_period_ns`) profiler (has been an issue since 2022, that leads to periodic server crashes, especially if you were using distributed engine). [#63865](https://github.com/ClickHouse/ClickHouse/pull/63865) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fixed `EXPLAIN CURRENT TRANSACTION` query. [#63926](https://github.com/ClickHouse/ClickHouse/pull/63926) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix analyzer - IN function with arbitrary deep sub-selects in materialized view to use insertion block. [#63930](https://github.com/ClickHouse/ClickHouse/pull/63930) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Allow `ALTER TABLE .. MODIFY|RESET SETTING` and `ALTER TABLE .. MODIFY COMMENT` for plain_rewritable disk. [#63933](https://github.com/ClickHouse/ClickHouse/pull/63933) ([Julia Kartseva](https://github.com/jkartseva)).
|
||||
* Fix Recursive CTE with distributed queries. Closes [#63790](https://github.com/ClickHouse/ClickHouse/issues/63790). [#63939](https://github.com/ClickHouse/ClickHouse/pull/63939) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fix resolve of unqualified COLUMNS matcher. Preserve the input columns order and forbid usage of unknown identifiers. [#63962](https://github.com/ClickHouse/ClickHouse/pull/63962) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Fix the `Not found column` error for queries with `skip_unused_shards = 1`, `LIMIT BY`, and the new analyzer. Fixes [#63943](https://github.com/ClickHouse/ClickHouse/issues/63943). [#63983](https://github.com/ClickHouse/ClickHouse/pull/63983) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* (Low-quality third-party Kusto Query Language). Resolve Client Abortion Issue When Using KQL Table Function in Interactive Mode. [#63992](https://github.com/ClickHouse/ClickHouse/pull/63992) ([Yong Wang](https://github.com/kashwy)).
|
||||
* Backported in [#64356](https://github.com/ClickHouse/ClickHouse/issues/64356): Fix an `Cyclic aliases` error for cyclic aliases of different type (expression and function). Fixes [#63205](https://github.com/ClickHouse/ClickHouse/issues/63205). [#63993](https://github.com/ClickHouse/ClickHouse/pull/63993) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Deserialize untrusted binary inputs in a safer way. [#64024](https://github.com/ClickHouse/ClickHouse/pull/64024) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Do not throw `Storage doesn't support FINAL` error for remote queries over non-MergeTree tables with `final = true` and new analyzer. Fixes [#63960](https://github.com/ClickHouse/ClickHouse/issues/63960). [#64037](https://github.com/ClickHouse/ClickHouse/pull/64037) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Add missing settings to recoverLostReplica. [#64040](https://github.com/ClickHouse/ClickHouse/pull/64040) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix unwind on SIGSEGV on aarch64 (due to small stack for signal). [#64058](https://github.com/ClickHouse/ClickHouse/pull/64058) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Backported in [#64324](https://github.com/ClickHouse/ClickHouse/issues/64324): This fix will use a proper redefined context with the correct definer for each individual view in the query pipeline Closes [#63777](https://github.com/ClickHouse/ClickHouse/issues/63777). [#64079](https://github.com/ClickHouse/ClickHouse/pull/64079) ([pufit](https://github.com/pufit)).
|
||||
* Backported in [#64384](https://github.com/ClickHouse/ClickHouse/issues/64384): Fix analyzer: "Not found column" error is fixed when using INTERPOLATE. [#64096](https://github.com/ClickHouse/ClickHouse/pull/64096) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Fix azure backup writing multipart blocks as 1mb (read buffer size) instead of max_upload_part_size. [#64117](https://github.com/ClickHouse/ClickHouse/pull/64117) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Backported in [#64541](https://github.com/ClickHouse/ClickHouse/issues/64541): Fix creating backups to S3 buckets with different credentials from the disk containing the file. [#64153](https://github.com/ClickHouse/ClickHouse/pull/64153) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Prevent LOGICAL_ERROR on CREATE TABLE as MaterializedView. [#64174](https://github.com/ClickHouse/ClickHouse/pull/64174) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#64332](https://github.com/ClickHouse/ClickHouse/issues/64332): The query cache now considers two identical queries against different databases as different. The previous behavior could be used to bypass missing privileges to read from a table. [#64199](https://github.com/ClickHouse/ClickHouse/pull/64199) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Ignore `text_log` config when using Keeper. [#64218](https://github.com/ClickHouse/ClickHouse/pull/64218) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Backported in [#64692](https://github.com/ClickHouse/ClickHouse/issues/64692): Fix Query Tree size validation. Closes [#63701](https://github.com/ClickHouse/ClickHouse/issues/63701). [#64377](https://github.com/ClickHouse/ClickHouse/pull/64377) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Backported in [#64411](https://github.com/ClickHouse/ClickHouse/issues/64411): Fix `Logical error: Bad cast` for `Buffer` table with `PREWHERE`. Fixes [#64172](https://github.com/ClickHouse/ClickHouse/issues/64172). [#64388](https://github.com/ClickHouse/ClickHouse/pull/64388) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#64625](https://github.com/ClickHouse/ClickHouse/issues/64625): Fix an error `Cannot find column` in distributed queries with constant CTE in the `GROUP BY` key. [#64519](https://github.com/ClickHouse/ClickHouse/pull/64519) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#64682](https://github.com/ClickHouse/ClickHouse/issues/64682): Fix [#64612](https://github.com/ClickHouse/ClickHouse/issues/64612). Do not rewrite aggregation if `-If` combinator is already used. [#64638](https://github.com/ClickHouse/ClickHouse/pull/64638) ([Dmitry Novik](https://github.com/novikd)).
|
||||
|
||||
#### CI Fix or Improvement (changelog entry is not required)
|
||||
|
||||
* Implement cumulative A Sync status. [#61464](https://github.com/ClickHouse/ClickHouse/pull/61464) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add ability to run Azure tests in PR with label. [#63196](https://github.com/ClickHouse/ClickHouse/pull/63196) ([alesapin](https://github.com/alesapin)).
|
||||
* Add azure run with msan. [#63238](https://github.com/ClickHouse/ClickHouse/pull/63238) ([alesapin](https://github.com/alesapin)).
|
||||
* Improve cloud backport script. [#63282](https://github.com/ClickHouse/ClickHouse/pull/63282) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Use `/commit/` to have the URLs in [reports](https://play.clickhouse.com/play?user=play#c2VsZWN0IGRpc3RpbmN0IGNvbW1pdF91cmwgZnJvbSBjaGVja3Mgd2hlcmUgY2hlY2tfc3RhcnRfdGltZSA+PSBub3coKSAtIGludGVydmFsIDEgbW9udGggYW5kIHB1bGxfcmVxdWVzdF9udW1iZXI9NjA1MzI=) like https://github.com/ClickHouse/ClickHouse/commit/44f8bc5308b53797bec8cccc3bd29fab8a00235d and not like https://github.com/ClickHouse/ClickHouse/commits/44f8bc5308b53797bec8cccc3bd29fab8a00235d. [#63331](https://github.com/ClickHouse/ClickHouse/pull/63331) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Extra constraints for stress and fuzzer tests. [#63470](https://github.com/ClickHouse/ClickHouse/pull/63470) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix 02362_part_log_merge_algorithm flaky test. [#63635](https://github.com/ClickHouse/ClickHouse/pull/63635) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
|
||||
* Fix test_odbc_interaction from aarch64 [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63787](https://github.com/ClickHouse/ClickHouse/pull/63787) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix test `test_catboost_evaluate` for aarch64. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63789](https://github.com/ClickHouse/ClickHouse/pull/63789) ([alesapin](https://github.com/alesapin)).
|
||||
* Remove HDFS from disks config for one integration test for arm. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63832](https://github.com/ClickHouse/ClickHouse/pull/63832) ([alesapin](https://github.com/alesapin)).
|
||||
* Bump version for old image in test_short_strings_aggregation to make it work on arm. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63836](https://github.com/ClickHouse/ClickHouse/pull/63836) ([alesapin](https://github.com/alesapin)).
|
||||
* Disable test `test_non_default_compression/test.py::test_preconfigured_deflateqpl_codec` on arm. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63839](https://github.com/ClickHouse/ClickHouse/pull/63839) ([alesapin](https://github.com/alesapin)).
|
||||
* Include checks like `Stateless tests (asan, distributed cache, meta storage in keeper, s3 storage) [2/3]` in `Mergeable Check` and `A Sync`. [#63945](https://github.com/ClickHouse/ClickHouse/pull/63945) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix 02124_insert_deduplication_token_multiple_blocks. [#63950](https://github.com/ClickHouse/ClickHouse/pull/63950) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Add `ClickHouseVersion.copy` method. Create a branch release in advance without spinning out the release to increase the stability. [#64039](https://github.com/ClickHouse/ClickHouse/pull/64039) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* The mime type is not 100% reliable for Python and shell scripts without shebangs; add a check for file extension. [#64062](https://github.com/ClickHouse/ClickHouse/pull/64062) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add retries in git submodule update. [#64125](https://github.com/ClickHouse/ClickHouse/pull/64125) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)
|
||||
|
||||
* Backported in [#64591](https://github.com/ClickHouse/ClickHouse/issues/64591): Disabled `enable_vertical_final` setting by default. This feature should not be used because it has a bug: [#64543](https://github.com/ClickHouse/ClickHouse/issues/64543). [#64544](https://github.com/ClickHouse/ClickHouse/pull/64544) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
|
||||
#### NO CL ENTRY
|
||||
|
||||
* NO CL ENTRY: 'Revert "Do not remove server constants from GROUP BY key for secondary query."'. [#63297](https://github.com/ClickHouse/ClickHouse/pull/63297) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* NO CL ENTRY: 'Revert "Introduce bulk loading to StorageEmbeddedRocksDB"'. [#63316](https://github.com/ClickHouse/ClickHouse/pull/63316) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* NO CL ENTRY: 'Add tags for the test 03000_traverse_shadow_system_data_paths.sql to make it stable'. [#63366](https://github.com/ClickHouse/ClickHouse/pull/63366) ([Aleksei Filatov](https://github.com/aalexfvk)).
|
||||
* NO CL ENTRY: 'Revert "Revert "Do not remove server constants from GROUP BY key for secondary query.""'. [#63415](https://github.com/ClickHouse/ClickHouse/pull/63415) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* NO CL ENTRY: 'Revert "Fix index analysis for `DateTime64`"'. [#63525](https://github.com/ClickHouse/ClickHouse/pull/63525) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* NO CL ENTRY: 'Add `jwcrypto` to integration tests runner'. [#63551](https://github.com/ClickHouse/ClickHouse/pull/63551) ([Konstantin Bogdanov](https://github.com/thevar1able)).
|
||||
* NO CL ENTRY: 'Follow-up for the `binary_symbols` table in CI'. [#63802](https://github.com/ClickHouse/ClickHouse/pull/63802) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* NO CL ENTRY: 'chore(ci-workers): remove reusable from tailscale key'. [#63999](https://github.com/ClickHouse/ClickHouse/pull/63999) ([Gabriel Martinez](https://github.com/GMartinez-Sisti)).
|
||||
* NO CL ENTRY: 'Revert "Update gui.md - Add ch-ui to open-source available tools."'. [#64064](https://github.com/ClickHouse/ClickHouse/pull/64064) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* NO CL ENTRY: 'Prevent stack overflow in Fuzzer and Stress test'. [#64082](https://github.com/ClickHouse/ClickHouse/pull/64082) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* NO CL ENTRY: 'Revert "Prevent conversion to Replicated if zookeeper path already exists"'. [#64214](https://github.com/ClickHouse/ClickHouse/pull/64214) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Remove http_max_chunk_size setting (too internal) [#60852](https://github.com/ClickHouse/ClickHouse/pull/60852) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix race in refreshable materialized views causing SELECT to fail sometimes [#60883](https://github.com/ClickHouse/ClickHouse/pull/60883) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Parallel replicas: table check failover [#61935](https://github.com/ClickHouse/ClickHouse/pull/61935) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Avoid crashing on column type mismatch in a few dozen places [#62087](https://github.com/ClickHouse/ClickHouse/pull/62087) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Fix optimize_if_chain_to_multiif const NULL handling [#62104](https://github.com/ClickHouse/ClickHouse/pull/62104) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Use intrusive lists for `ResourceRequest` instead of deque [#62165](https://github.com/ClickHouse/ClickHouse/pull/62165) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Analyzer: Fix validateAggregates for tables with different aliases [#62346](https://github.com/ClickHouse/ClickHouse/pull/62346) ([vdimir](https://github.com/vdimir)).
|
||||
* Improve code and tests of `DROP` of multiple tables [#62359](https://github.com/ClickHouse/ClickHouse/pull/62359) ([zhongyuankai](https://github.com/zhongyuankai)).
|
||||
* Fix exception message during writing to partitioned s3/hdfs/azure path with globs [#62423](https://github.com/ClickHouse/ClickHouse/pull/62423) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Support UBSan on Clang-19 (master) [#62466](https://github.com/ClickHouse/ClickHouse/pull/62466) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Save the stacktrace of thread waiting on failing AsyncLoader job [#62719](https://github.com/ClickHouse/ClickHouse/pull/62719) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* group_by_use_nulls strikes back [#62922](https://github.com/ClickHouse/ClickHouse/pull/62922) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Analyzer: prefer column name to alias from array join [#62995](https://github.com/ClickHouse/ClickHouse/pull/62995) ([vdimir](https://github.com/vdimir)).
|
||||
* CI: try separate the workflows file for GitHub's Merge Queue [#63123](https://github.com/ClickHouse/ClickHouse/pull/63123) ([Max K.](https://github.com/maxknv)).
|
||||
* Try to fix coverage tests [#63130](https://github.com/ClickHouse/ClickHouse/pull/63130) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix azure backup flaky test [#63158](https://github.com/ClickHouse/ClickHouse/pull/63158) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
|
||||
* Merging [#60920](https://github.com/ClickHouse/ClickHouse/issues/60920) [#63159](https://github.com/ClickHouse/ClickHouse/pull/63159) ([vdimir](https://github.com/vdimir)).
|
||||
* QueryAnalysisPass improve QUALIFY validation [#63162](https://github.com/ClickHouse/ClickHouse/pull/63162) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Add numpy tests for different endianness [#63189](https://github.com/ClickHouse/ClickHouse/pull/63189) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Fallback action-runner to autoupdate when it's unable to start [#63195](https://github.com/ClickHouse/ClickHouse/pull/63195) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix possible endless loop while reading from azure [#63197](https://github.com/ClickHouse/ClickHouse/pull/63197) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Add information about materialized view security bug fix into the changelog [#63204](https://github.com/ClickHouse/ClickHouse/pull/63204) ([pufit](https://github.com/pufit)).
|
||||
* Disable one query from 02994_sanity_check_settings [#63208](https://github.com/ClickHouse/ClickHouse/pull/63208) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Enable custom parquet encoder by default, attempt 2 [#63210](https://github.com/ClickHouse/ClickHouse/pull/63210) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Update version after release [#63215](https://github.com/ClickHouse/ClickHouse/pull/63215) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Update version_date.tsv and changelogs after v24.4.1.2088-stable [#63217](https://github.com/ClickHouse/ClickHouse/pull/63217) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Update version_date.tsv and changelogs after v24.3.3.102-lts [#63226](https://github.com/ClickHouse/ClickHouse/pull/63226) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Update version_date.tsv and changelogs after v24.2.3.70-stable [#63227](https://github.com/ClickHouse/ClickHouse/pull/63227) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Return back [#61551](https://github.com/ClickHouse/ClickHouse/issues/61551) (More optimal loading of marks) [#63233](https://github.com/ClickHouse/ClickHouse/pull/63233) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Hide CI options under a spoiler [#63237](https://github.com/ClickHouse/ClickHouse/pull/63237) ([Konstantin Bogdanov](https://github.com/thevar1able)).
|
||||
* Add `FROM` keyword to `TRUNCATE ALL TABLES` [#63241](https://github.com/ClickHouse/ClickHouse/pull/63241) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Minor follow-up to a renaming PR [#63260](https://github.com/ClickHouse/ClickHouse/pull/63260) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* More checks for concurrently deleted files and dirs in system.remote_data_paths [#63274](https://github.com/ClickHouse/ClickHouse/pull/63274) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Fix SettingsChangesHistory.h for allow_experimental_join_condition [#63278](https://github.com/ClickHouse/ClickHouse/pull/63278) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Update version_date.tsv and changelogs after v23.8.14.6-lts [#63285](https://github.com/ClickHouse/ClickHouse/pull/63285) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Fix azure flaky test [#63286](https://github.com/ClickHouse/ClickHouse/pull/63286) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
|
||||
* Fix deadlock in `CacheDictionaryUpdateQueue` in case of exception in constructor [#63287](https://github.com/ClickHouse/ClickHouse/pull/63287) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* DiskApp: fix 'list --recursive /' and crash on invalid arguments [#63296](https://github.com/ClickHouse/ClickHouse/pull/63296) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Fix terminate because of unhandled exception in `MergeTreeDeduplicationLog::shutdown` [#63298](https://github.com/ClickHouse/ClickHouse/pull/63298) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Move s3_plain_rewritable unit test to shell [#63317](https://github.com/ClickHouse/ClickHouse/pull/63317) ([Julia Kartseva](https://github.com/jkartseva)).
|
||||
* Add tests for [#63264](https://github.com/ClickHouse/ClickHouse/issues/63264) [#63321](https://github.com/ClickHouse/ClickHouse/pull/63321) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Try fix segfault in `MergeTreeReadPoolBase::createTask` [#63323](https://github.com/ClickHouse/ClickHouse/pull/63323) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Update README.md [#63326](https://github.com/ClickHouse/ClickHouse/pull/63326) ([Tyler Hannan](https://github.com/tylerhannan)).
|
||||
* Skip unaccessible table dirs in system.remote_data_paths [#63330](https://github.com/ClickHouse/ClickHouse/pull/63330) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Add test for [#56287](https://github.com/ClickHouse/ClickHouse/issues/56287) [#63340](https://github.com/ClickHouse/ClickHouse/pull/63340) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Update README.md [#63350](https://github.com/ClickHouse/ClickHouse/pull/63350) ([Tyler Hannan](https://github.com/tylerhannan)).
|
||||
* Add test for [#48049](https://github.com/ClickHouse/ClickHouse/issues/48049) [#63351](https://github.com/ClickHouse/ClickHouse/pull/63351) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Add option `query_id_prefix` to `clickhouse-benchmark` [#63352](https://github.com/ClickHouse/ClickHouse/pull/63352) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Rollback azurite to working version [#63354](https://github.com/ClickHouse/ClickHouse/pull/63354) ([alesapin](https://github.com/alesapin)).
|
||||
* Randomize setting `enable_block_offset_column` in stress tests [#63355](https://github.com/ClickHouse/ClickHouse/pull/63355) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix AST parsing of invalid type names [#63357](https://github.com/ClickHouse/ClickHouse/pull/63357) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Fix some 00002_log_and_exception_messages_formatting flakiness [#63358](https://github.com/ClickHouse/ClickHouse/pull/63358) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Add a test for [#55655](https://github.com/ClickHouse/ClickHouse/issues/55655) [#63380](https://github.com/ClickHouse/ClickHouse/pull/63380) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix data race in `reportBrokenPart` [#63396](https://github.com/ClickHouse/ClickHouse/pull/63396) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Workaround for `oklch()` inside canvas bug for firefox [#63404](https://github.com/ClickHouse/ClickHouse/pull/63404) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Add test for issue [#47862](https://github.com/ClickHouse/ClickHouse/issues/47862) [#63424](https://github.com/ClickHouse/ClickHouse/pull/63424) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Fix parsing of `CREATE INDEX` query [#63425](https://github.com/ClickHouse/ClickHouse/pull/63425) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* We are using Shared Catalog in the CI Logs cluster [#63442](https://github.com/ClickHouse/ClickHouse/pull/63442) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix collection of coverage data in the CI Logs cluster [#63453](https://github.com/ClickHouse/ClickHouse/pull/63453) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix flaky test for rocksdb bulk sink [#63457](https://github.com/ClickHouse/ClickHouse/pull/63457) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* io_uring: refactor get reader from context [#63475](https://github.com/ClickHouse/ClickHouse/pull/63475) ([Tomer Shafir](https://github.com/tomershafir)).
|
||||
* Analyzer setting max_streams_to_max_threads_ratio overflow fix [#63478](https://github.com/ClickHouse/ClickHouse/pull/63478) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Add setting for better rendering of multiline string for pretty format [#63479](https://github.com/ClickHouse/ClickHouse/pull/63479) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Fix logical error when reloading config with customly created web disk broken after [#56367](https://github.com/ClickHouse/ClickHouse/issues/56367) [#63484](https://github.com/ClickHouse/ClickHouse/pull/63484) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Add test for [#49307](https://github.com/ClickHouse/ClickHouse/issues/49307) [#63486](https://github.com/ClickHouse/ClickHouse/pull/63486) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Remove leftovers of GCC support in cmake rules [#63488](https://github.com/ClickHouse/ClickHouse/pull/63488) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix ProfileEventTimeIncrement code [#63489](https://github.com/ClickHouse/ClickHouse/pull/63489) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* MergeTreePrefetchedReadPool: Print parent name when logging projection parts [#63522](https://github.com/ClickHouse/ClickHouse/pull/63522) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Correctly stop `asyncCopy` tasks in all cases [#63523](https://github.com/ClickHouse/ClickHouse/pull/63523) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Almost everything should work on AArch64 (Part of [#58061](https://github.com/ClickHouse/ClickHouse/issues/58061)) [#63527](https://github.com/ClickHouse/ClickHouse/pull/63527) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Update randomization of `old_parts_lifetime` [#63530](https://github.com/ClickHouse/ClickHouse/pull/63530) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Update 02240_system_filesystem_cache_table.sh [#63531](https://github.com/ClickHouse/ClickHouse/pull/63531) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix data race in `DistributedSink` [#63538](https://github.com/ClickHouse/ClickHouse/pull/63538) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix azure tests run on master [#63540](https://github.com/ClickHouse/ClickHouse/pull/63540) ([alesapin](https://github.com/alesapin)).
|
||||
* Find a proper commit for cumulative `A Sync` status [#63543](https://github.com/ClickHouse/ClickHouse/pull/63543) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add `no-s3-storage` tag to local_plain_rewritable ut [#63546](https://github.com/ClickHouse/ClickHouse/pull/63546) ([Julia Kartseva](https://github.com/jkartseva)).
|
||||
* Go back to upstream lz4 submodule [#63574](https://github.com/ClickHouse/ClickHouse/pull/63574) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix logical error in ColumnTuple::tryInsert() [#63583](https://github.com/ClickHouse/ClickHouse/pull/63583) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* harmonize sumMap error messages on ILLEGAL_TYPE_OF_ARGUMENT [#63619](https://github.com/ClickHouse/ClickHouse/pull/63619) ([Yohann Jardin](https://github.com/yohannj)).
|
||||
* Update README.md [#63631](https://github.com/ClickHouse/ClickHouse/pull/63631) ([Tyler Hannan](https://github.com/tylerhannan)).
|
||||
* Ignore global profiler if system.trace_log is not enabled and fix really disable it for keeper standalone build [#63632](https://github.com/ClickHouse/ClickHouse/pull/63632) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fixes for 00002_log_and_exception_messages_formatting [#63634](https://github.com/ClickHouse/ClickHouse/pull/63634) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix tests flakiness due to long SYSTEM FLUSH LOGS (explicitly specify old_parts_lifetime) [#63639](https://github.com/ClickHouse/ClickHouse/pull/63639) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Update clickhouse-test help section [#63663](https://github.com/ClickHouse/ClickHouse/pull/63663) ([Ali](https://github.com/xogoodnow)).
|
||||
* Fix bad test `02950_part_log_bytes_uncompressed` [#63672](https://github.com/ClickHouse/ClickHouse/pull/63672) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Remove leftovers of `optimize_monotonous_functions_in_order_by` [#63674](https://github.com/ClickHouse/ClickHouse/pull/63674) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* tests: attempt to fix 02340_parts_refcnt_mergetree flakiness [#63684](https://github.com/ClickHouse/ClickHouse/pull/63684) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Parallel replicas: simple cleanup [#63685](https://github.com/ClickHouse/ClickHouse/pull/63685) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Cancel S3 reads properly when parallel reads are used [#63687](https://github.com/ClickHouse/ClickHouse/pull/63687) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Explain map insertion order [#63690](https://github.com/ClickHouse/ClickHouse/pull/63690) ([Mark Needham](https://github.com/mneedham)).
|
||||
* selectRangesToRead() simple cleanup [#63692](https://github.com/ClickHouse/ClickHouse/pull/63692) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Fix fuzzed analyzer_join_with_constant query [#63702](https://github.com/ClickHouse/ClickHouse/pull/63702) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Add missing explicit instantiations of ColumnUnique [#63718](https://github.com/ClickHouse/ClickHouse/pull/63718) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Better asserts in ColumnString.h [#63719](https://github.com/ClickHouse/ClickHouse/pull/63719) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Don't randomize some settings in 02941_variant_type_* tests to avoid timeouts [#63721](https://github.com/ClickHouse/ClickHouse/pull/63721) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix flaky 03145_non_loaded_projection_backup.sh [#63728](https://github.com/ClickHouse/ClickHouse/pull/63728) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Userspace page cache: don't collect stats if cache is unused [#63730](https://github.com/ClickHouse/ClickHouse/pull/63730) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Fix insignificant UBSAN error in QueryAnalyzer::replaceNodesWithPositionalArguments() [#63734](https://github.com/ClickHouse/ClickHouse/pull/63734) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Fix a bug in resolving matcher inside lambda inside ARRAY JOIN [#63744](https://github.com/ClickHouse/ClickHouse/pull/63744) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Remove unused CaresPTRResolver::cancel_requests method [#63754](https://github.com/ClickHouse/ClickHouse/pull/63754) ([Arthur Passos](https://github.com/arthurpassos)).
|
||||
* Do not hide disk name [#63756](https://github.com/ClickHouse/ClickHouse/pull/63756) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* CI: remove Cancel and Debug workflows as redundant [#63757](https://github.com/ClickHouse/ClickHouse/pull/63757) ([Max K.](https://github.com/maxknv)).
|
||||
* Security Policy: Add notification process [#63773](https://github.com/ClickHouse/ClickHouse/pull/63773) ([Leticia Webb](https://github.com/leticiawebb)).
|
||||
* Fix typo [#63774](https://github.com/ClickHouse/ClickHouse/pull/63774) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix fuzzer when only explicit faults are used [#63775](https://github.com/ClickHouse/ClickHouse/pull/63775) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Settings typo [#63782](https://github.com/ClickHouse/ClickHouse/pull/63782) ([Rory Crispin](https://github.com/RoryCrispin)).
|
||||
* Changed the previous value of `output_format_pretty_preserve_border_for_multiline_string` setting [#63783](https://github.com/ClickHouse/ClickHouse/pull/63783) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* fix antlr insertStmt for issue 63657 [#63811](https://github.com/ClickHouse/ClickHouse/pull/63811) ([GG Bond](https://github.com/zzyReal666)).
|
||||
* Fix race in `ReplicatedMergeTreeLogEntryData` [#63816](https://github.com/ClickHouse/ClickHouse/pull/63816) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Allow allocation during job destructor in `ThreadPool` [#63829](https://github.com/ClickHouse/ClickHouse/pull/63829) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* io_uring: add basic io_uring clickhouse perf test [#63835](https://github.com/ClickHouse/ClickHouse/pull/63835) ([Tomer Shafir](https://github.com/tomershafir)).
|
||||
* fix typo [#63838](https://github.com/ClickHouse/ClickHouse/pull/63838) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Remove unnecessary logging statements in MergeJoinTransform.cpp [#63860](https://github.com/ClickHouse/ClickHouse/pull/63860) ([vdimir](https://github.com/vdimir)).
|
||||
* CI: disable ARM integration test cases with libunwind crash [#63867](https://github.com/ClickHouse/ClickHouse/pull/63867) ([Max K.](https://github.com/maxknv)).
|
||||
* Fix some settings values in 02455_one_row_from_csv_memory_usage test to make it less flaky [#63874](https://github.com/ClickHouse/ClickHouse/pull/63874) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Randomise `allow_experimental_parallel_reading_from_replicas` in stress tests [#63899](https://github.com/ClickHouse/ClickHouse/pull/63899) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Fix logs test for binary data by converting it to a valid UTF8 string. [#63909](https://github.com/ClickHouse/ClickHouse/pull/63909) ([Alexey Katsman](https://github.com/alexkats)).
|
||||
* More sanity checks for parallel replicas [#63910](https://github.com/ClickHouse/ClickHouse/pull/63910) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Insignificant libunwind build fixes [#63946](https://github.com/ClickHouse/ClickHouse/pull/63946) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Revert multiline pretty changes due to performance problems [#63947](https://github.com/ClickHouse/ClickHouse/pull/63947) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Some usability improvements for c++expr script [#63948](https://github.com/ClickHouse/ClickHouse/pull/63948) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* CI: aarch64: disable arm integration tests with kerberaized kafka [#63961](https://github.com/ClickHouse/ClickHouse/pull/63961) ([Max K.](https://github.com/maxknv)).
|
||||
* Slightly better setting `force_optimize_projection_name` [#63997](https://github.com/ClickHouse/ClickHouse/pull/63997) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Better script to collect symbols statistics [#64013](https://github.com/ClickHouse/ClickHouse/pull/64013) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix a typo in Analyzer [#64022](https://github.com/ClickHouse/ClickHouse/pull/64022) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix libbcrypt for FreeBSD build [#64023](https://github.com/ClickHouse/ClickHouse/pull/64023) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix searching for libclang_rt.builtins.*.a on FreeBSD [#64051](https://github.com/ClickHouse/ClickHouse/pull/64051) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix waiting for mutations with retriable errors [#64063](https://github.com/ClickHouse/ClickHouse/pull/64063) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* harmonize h3PointDist* error messages [#64080](https://github.com/ClickHouse/ClickHouse/pull/64080) ([Yohann Jardin](https://github.com/yohannj)).
|
||||
* This log message is better in Trace [#64081](https://github.com/ClickHouse/ClickHouse/pull/64081) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* tests: fix expected error for 03036_reading_s3_archives (fixes CI) [#64089](https://github.com/ClickHouse/ClickHouse/pull/64089) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix sanitizers [#64090](https://github.com/ClickHouse/ClickHouse/pull/64090) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Update llvm/clang to 18.1.6 [#64091](https://github.com/ClickHouse/ClickHouse/pull/64091) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* CI: mergeable check redesign [#64093](https://github.com/ClickHouse/ClickHouse/pull/64093) ([Max K.](https://github.com/maxknv)).
|
||||
* Move `isAllASCII` from UTFHelper to StringUtils [#64108](https://github.com/ClickHouse/ClickHouse/pull/64108) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Clean up .clang-tidy after transition to Clang 18 [#64111](https://github.com/ClickHouse/ClickHouse/pull/64111) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Ignore exception when checking for cgroupsv2 [#64118](https://github.com/ClickHouse/ClickHouse/pull/64118) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Fix UBSan error in negative positional arguments [#64127](https://github.com/ClickHouse/ClickHouse/pull/64127) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Syncing code [#64135](https://github.com/ClickHouse/ClickHouse/pull/64135) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Losen build resource limits for unusual architectures [#64152](https://github.com/ClickHouse/ClickHouse/pull/64152) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* fix clang tidy [#64179](https://github.com/ClickHouse/ClickHouse/pull/64179) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Fix global query profiler [#64187](https://github.com/ClickHouse/ClickHouse/pull/64187) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* CI: cancel running PR wf after adding to MQ [#64188](https://github.com/ClickHouse/ClickHouse/pull/64188) ([Max K.](https://github.com/maxknv)).
|
||||
* Add debug logging to EmbeddedRocksDBBulkSink [#64203](https://github.com/ClickHouse/ClickHouse/pull/64203) ([vdimir](https://github.com/vdimir)).
|
||||
* Fix special builds (due to excessive resource usage - memory/CPU) [#64204](https://github.com/ClickHouse/ClickHouse/pull/64204) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Add gh to style-check dockerfile [#64227](https://github.com/ClickHouse/ClickHouse/pull/64227) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Followup for [#63691](https://github.com/ClickHouse/ClickHouse/issues/63691) [#64285](https://github.com/ClickHouse/ClickHouse/pull/64285) ([vdimir](https://github.com/vdimir)).
|
||||
* Rename allow_deprecated_functions to allow_deprecated_error_prone_win… [#64358](https://github.com/ClickHouse/ClickHouse/pull/64358) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Update description for settings `cross_join_min_rows_to_compress` and `cross_join_min_bytes_to_compress` [#64360](https://github.com/ClickHouse/ClickHouse/pull/64360) ([Nikita Fomichev](https://github.com/fm4v)).
|
||||
* Rename aggregate_function_group_array_has_limit_size [#64362](https://github.com/ClickHouse/ClickHouse/pull/64362) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Split tests 03039_dynamic_all_merge_algorithms to avoid timeouts [#64363](https://github.com/ClickHouse/ClickHouse/pull/64363) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Clean settings in 02943_variant_read_subcolumns test [#64437](https://github.com/ClickHouse/ClickHouse/pull/64437) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* CI: Critical bugfix category in PR template [#64480](https://github.com/ClickHouse/ClickHouse/pull/64480) ([Max K.](https://github.com/maxknv)).
|
||||
|
@ -71,7 +71,7 @@ If it fails, fix the style errors following the [code style guide](style.md).
|
||||
```sh
|
||||
mkdir -p /tmp/test_output
|
||||
# running all checks
|
||||
docker run --rm --volume=.:/ClickHouse --volume=/tmp/test_output:/test_output -u $(id -u ${USER}):$(id -g ${USER}) --cap-add=SYS_PTRACE clickhouse/style-test
|
||||
python3 tests/ci/style_check.py --no-push
|
||||
|
||||
# run specified check script (e.g.: ./check-mypy)
|
||||
docker run --rm --volume=.:/ClickHouse --volume=/tmp/test_output:/test_output -u $(id -u ${USER}):$(id -g ${USER}) --cap-add=SYS_PTRACE --entrypoint= -w/ClickHouse/utils/check-style clickhouse/style-test ./check-mypy
|
||||
|
@ -37,7 +37,7 @@ ways, for example with respect to their DDL/DQL syntax or performance/compressio
|
||||
To use full-text indexes, first enable them in the configuration:
|
||||
|
||||
```sql
|
||||
SET allow_experimental_inverted_index = true;
|
||||
SET allow_experimental_full_text_index = true;
|
||||
```
|
||||
|
||||
An full-text index can be defined on a string column using the following syntax
|
||||
|
@ -178,6 +178,10 @@ Additional parameters that control the behavior of the `MergeTree` (optional):
|
||||
|
||||
`max_partitions_to_read` — Limits the maximum number of partitions that can be accessed in one query. You can also specify setting [max_partitions_to_read](/docs/en/operations/settings/merge-tree-settings.md/#max-partitions-to-read) in the global setting.
|
||||
|
||||
#### allow_experimental_optimized_row_order
|
||||
|
||||
`allow_experimental_optimized_row_order` - Experimental. Enables the optimization of the row order during inserts to improve the compressability of the data for compression codecs (e.g. LZ4). Analyzes and reorders the data, and thus increases the CPU overhead of inserts.
|
||||
|
||||
**Example of Sections Setting**
|
||||
|
||||
``` sql
|
||||
|
@ -7,6 +7,8 @@ sidebar_label: Configuration Files
|
||||
# Configuration Files
|
||||
|
||||
The ClickHouse server can be configured with configuration files in XML or YAML syntax. In most installation types, the ClickHouse server runs with `/etc/clickhouse-server/config.xml` as default configuration file, but it is also possible to specify the location of the configuration file manually at server startup using command line option `--config-file=` or `-C`. Additional configuration files may be placed into directory `config.d/` relative to the main configuration file, for example into directory `/etc/clickhouse-server/config.d/`. Files in this directory and the main configuration are merged in a preprocessing step before the configuration is applied in ClickHouse server. Configuration files are merged in alphabetical order. To simplify updates and improve modularization, it is best practice to keep the default `config.xml` file unmodified and place additional customization into `config.d/`.
|
||||
(The ClickHouse keeper configuration lives in `/etc/clickhouse-keeper/keeper_config.xml` and thus the additional files need to be placed in `/etc/clickhouse-keeper/keeper_config.d/` )
|
||||
|
||||
|
||||
It is possible to mix XML and YAML configuration files, for example you could have a main configuration file `config.xml` and additional configuration files `config.d/network.xml`, `config.d/timezone.yaml` and `config.d/keeper.yaml`. Mixing XML and YAML within a single configuration file is not supported. XML configuration files should use `<clickhouse>...</clickhouse>` as top-level tag. In YAML configuration files, `clickhouse:` is optional, the parser inserts it implicitly if absent.
|
||||
|
||||
|
@ -885,3 +885,47 @@ Default value: false
|
||||
**See Also**
|
||||
|
||||
- [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge) setting
|
||||
|
||||
### allow_experimental_optimized_row_order
|
||||
|
||||
Controls if the row order should be optimized during inserts to improve the compressability of the newly inserted table part.
|
||||
|
||||
MergeTree tables are (optionally) compressed using [compression codecs](../../sql-reference/statements/create/table.md#column_compression_codec).
|
||||
Generic compression codecs such as LZ4 and ZSTD achieve maximum compression rates if the data exposes patterns.
|
||||
Long runs of the same value typically compress very well.
|
||||
|
||||
If this setting is enabled, ClickHouse attempts to store the data in newly inserted parts in a row order that minimizes the number of equal-value runs across the columns of the new table part.
|
||||
In other words, a small number of equal-value runs mean that individual runs are long and compress well.
|
||||
|
||||
Finding the optimal row order is computationally infeasible (NP hard).
|
||||
Therefore, ClickHouse uses a heuristics to quickly find a row order which still improves compression rates over the original row order.
|
||||
|
||||
<details markdown="1">
|
||||
|
||||
<summary>Heuristics for finding a row order</summary>
|
||||
|
||||
It is generally possible to shuffle the rows of a table (or table part) freely as SQL considers the same table (table part) in different row order equivalent.
|
||||
|
||||
This freedom of shuffling rows is restricted when a primary key is defined for the table.
|
||||
In ClickHouse, a primary key `C1, C2, ..., CN` enforces that the table rows are sorted by columns `C1`, `C2`, ... `Cn` ([clustered index](https://en.wikipedia.org/wiki/Database_index#Clustered)).
|
||||
As a result, rows can only be shuffled within "equivalence classes" of row, i.e. rows which have the same values in their primary key columns.
|
||||
The intuition is that primary keys with high-cardinality, e.g. primary keys involving a `DateTime64` timestamp column, lead to many small equivalence classes.
|
||||
Likewise, tables with a low-cardinality primary key, create few and large equivalence classes.
|
||||
A table with no primary key represents the extreme case of a single equivalence class which spans all rows.
|
||||
|
||||
The fewer and the larger the equivalence classes are, the higher the degree of freedom when re-shuffling rows.
|
||||
|
||||
The heuristics applied to find the best row order within each equivalence class is suggested by D. Lemir, O. Kaser in [Reordering columns for smaller indexes](https://doi.org/10.1016/j.ins.2011.02.002) and based on sorting the rows within each equivalence class by ascending cardinality of the non-primary key columns.
|
||||
It performs three steps:
|
||||
1. Find all equivalence classes based on the row values in primary key columns.
|
||||
2. For each equivalence class, calculate (usually estimate) the cardinalities of the non-primary-key columns.
|
||||
3. For each equivalence class, sort the rows in order of ascending non-primary-key column cardinality.
|
||||
|
||||
</details>
|
||||
|
||||
If enabled, insert operations incur additional CPU costs to analyze and optimize the row order of the new data.
|
||||
INSERTs are expected to take 30-50% longer depending on the data characteristics.
|
||||
Compression rates of LZ4 or ZSTD improve on average by 20-40%.
|
||||
|
||||
This setting works best for tables with no primary key or a low-cardinality primary key, i.e. a table with only few distinct primary key values.
|
||||
High-cardinality primary keys, e.g. involving timestamp columns of type `DateTime64`, are not expected to benefit from this setting.
|
||||
|
@ -1956,7 +1956,7 @@ Possible values:
|
||||
- Positive integer.
|
||||
- 0 — Asynchronous insertions are disabled.
|
||||
|
||||
Default value: `1000000`.
|
||||
Default value: `10485760`.
|
||||
|
||||
### async_insert_max_query_number {#async-insert-max-query-number}
|
||||
|
||||
|
@ -5,10 +5,57 @@ sidebar_position: 107
|
||||
|
||||
# corr
|
||||
|
||||
Syntax: `corr(x, y)`
|
||||
Calculates the [Pearson correlation coefficient](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient):
|
||||
|
||||
$$
|
||||
\frac{\Sigma{(x - \bar{x})(y - \bar{y})}}{\sqrt{\Sigma{(x - \bar{x})^2} * \Sigma{(y - \bar{y})^2}}}
|
||||
$$
|
||||
|
||||
Calculates the Pearson correlation coefficient: `Σ((x - x̅)(y - y̅)) / sqrt(Σ((x - x̅)^2) * Σ((y - y̅)^2))`.
|
||||
|
||||
:::note
|
||||
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `corrStable` function. It works slower but provides a lower computational error.
|
||||
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the [`corrStable`](../reference/corrstable.md) function. It is slower but provides a more accurate result.
|
||||
:::
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
corr(x, y)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` — first variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
|
||||
- `y` — second variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
|
||||
|
||||
**Returned Value**
|
||||
|
||||
- The Pearson correlation coefficient. [Float64](../../data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS series;
|
||||
CREATE TABLE series
|
||||
(
|
||||
i UInt32,
|
||||
x_value Float64,
|
||||
y_value Float64
|
||||
)
|
||||
ENGINE = Memory;
|
||||
INSERT INTO series(i, x_value, y_value) VALUES (1, 5.6, -4.4),(2, -9.6, 3),(3, -1.3, -4),(4, 5.3, 9.7),(5, 4.4, 0.037),(6, -8.6, -7.8),(7, 5.1, 9.3),(8, 7.9, -3.6),(9, -8.2, 0.62),(10, -3, 7.3);
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT corr(x_value, y_value)
|
||||
FROM series;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─corr(x_value, y_value)─┐
|
||||
│ 0.1730265755453256 │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
@ -0,0 +1,55 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/corrmatrix
|
||||
sidebar_position: 108
|
||||
---
|
||||
|
||||
# corrMatrix
|
||||
|
||||
Computes the correlation matrix over N variables.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
corrMatrix(x[, ...])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` — a variable number of parameters. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Correlation matrix. [Array](../../data-types/array.md)([Array](../../data-types/array.md)([Float64](../../data-types/float.md))).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS test;
|
||||
CREATE TABLE test
|
||||
(
|
||||
a UInt32,
|
||||
b Float64,
|
||||
c Float64,
|
||||
d Float64
|
||||
)
|
||||
ENGINE = Memory;
|
||||
INSERT INTO test(a, b, c, d) VALUES (1, 5.6, -4.4, 2.6), (2, -9.6, 3, 3.3), (3, -1.3, -4, 1.2), (4, 5.3, 9.7, 2.3), (5, 4.4, 0.037, 1.222), (6, -8.6, -7.8, 2.1233), (7, 5.1, 9.3, 8.1222), (8, 7.9, -3.6, 9.837), (9, -8.2, 0.62, 8.43555), (10, -3, 7.3, 6.762);
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT arrayMap(x -> round(x, 3), arrayJoin(corrMatrix(a, b, c, d))) AS corrMatrix
|
||||
FROM test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─corrMatrix─────────────┐
|
||||
1. │ [1,-0.096,0.243,0.746] │
|
||||
2. │ [-0.096,1,0.173,0.106] │
|
||||
3. │ [0.243,0.173,1,0.258] │
|
||||
4. │ [0.746,0.106,0.258,1] │
|
||||
└────────────────────────┘
|
||||
```
|
@ -0,0 +1,58 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/corrstable
|
||||
sidebar_position: 107
|
||||
---
|
||||
|
||||
# corrStable
|
||||
|
||||
Calculates the [Pearson correlation coefficient](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient):
|
||||
|
||||
$$
|
||||
\frac{\Sigma{(x - \bar{x})(y - \bar{y})}}{\sqrt{\Sigma{(x - \bar{x})^2} * \Sigma{(y - \bar{y})^2}}}
|
||||
$$
|
||||
|
||||
Similar to the [`corr`](../reference/corr.md) function, but uses a numerically stable algorithm. As a result, `corrStable` is slower than `corr` but produces a more accurate result.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
corrStable(x, y)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` — first variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
|
||||
- `y` — second variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
|
||||
|
||||
**Returned Value**
|
||||
|
||||
- The Pearson correlation coefficient. [Float64](../../data-types/float.md).
|
||||
|
||||
***Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS series;
|
||||
CREATE TABLE series
|
||||
(
|
||||
i UInt32,
|
||||
x_value Float64,
|
||||
y_value Float64
|
||||
)
|
||||
ENGINE = Memory;
|
||||
INSERT INTO series(i, x_value, y_value) VALUES (1, 5.6, -4.4),(2, -9.6, 3),(3, -1.3, -4),(4, 5.3, 9.7),(5, 4.4, 0.037),(6, -8.6, -7.8),(7, 5.1, 9.3),(8, 7.9, -3.6),(9, -8.2, 0.62),(10, -3, 7.3);
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT corrStable(x_value, y_value)
|
||||
FROM series;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─corrStable(x_value, y_value)─┐
|
||||
│ 0.17302657554532558 │
|
||||
└──────────────────────────────┘
|
||||
```
|
@ -1,14 +1,54 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/covarpop
|
||||
sidebar_position: 36
|
||||
sidebar_position: 37
|
||||
---
|
||||
|
||||
# covarPop
|
||||
|
||||
Syntax: `covarPop(x, y)`
|
||||
Calculates the population covariance:
|
||||
|
||||
Calculates the value of `Σ((x - x̅)(y - y̅)) / n`.
|
||||
$$
|
||||
\frac{\Sigma{(x - \bar{x})(y - \bar{y})}}{n}
|
||||
$$
|
||||
|
||||
:::note
|
||||
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `covarPopStable` function. It works slower but provides a lower computational error.
|
||||
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the [`covarPopStable`](../reference/covarpopstable.md) function. It works slower but provides a lower computational error.
|
||||
:::
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
covarPop(x, y)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` — first variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
|
||||
- `y` — second variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
|
||||
|
||||
**Returned Value**
|
||||
|
||||
- The population covariance between `x` and `y`. [Float64](../../data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS series;
|
||||
CREATE TABLE series(i UInt32, x_value Float64, y_value Float64) ENGINE = Memory;
|
||||
INSERT INTO series(i, x_value, y_value) VALUES (1, 5.6, -4.4),(2, -9.6, 3),(3, -1.3, -4),(4, 5.3, 9.7),(5, 4.4, 0.037),(6, -8.6, -7.8),(7, 5.1, 9.3),(8, 7.9, -3.6),(9, -8.2, 0.62),(10, -3, 7.3);
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT covarPop(x_value, y_value)
|
||||
FROM series;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```reference
|
||||
┌─covarPop(x_value, y_value)─┐
|
||||
│ 6.485648 │
|
||||
└────────────────────────────┘
|
||||
```
|
||||
|
@ -0,0 +1,55 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/covarpopmatrix
|
||||
sidebar_position: 36
|
||||
---
|
||||
|
||||
# covarPopMatrix
|
||||
|
||||
Returns the population covariance matrix over N variables.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
covarPopMatrix(x[, ...])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` — a variable number of parameters. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
|
||||
|
||||
**Returned Value**
|
||||
|
||||
- Population covariance matrix. [Array](../../data-types/array.md)([Array](../../data-types/array.md)([Float64](../../data-types/float.md))).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS test;
|
||||
CREATE TABLE test
|
||||
(
|
||||
a UInt32,
|
||||
b Float64,
|
||||
c Float64,
|
||||
d Float64
|
||||
)
|
||||
ENGINE = Memory;
|
||||
INSERT INTO test(a, b, c, d) VALUES (1, 5.6, -4.4, 2.6), (2, -9.6, 3, 3.3), (3, -1.3, -4, 1.2), (4, 5.3, 9.7, 2.3), (5, 4.4, 0.037, 1.222), (6, -8.6, -7.8, 2.1233), (7, 5.1, 9.3, 8.1222), (8, 7.9, -3.6, 9.837), (9, -8.2, 0.62, 8.43555), (10, -3, 7.3, 6.762);
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT arrayMap(x -> round(x, 3), arrayJoin(covarPopMatrix(a, b, c, d))) AS covarPopMatrix
|
||||
FROM test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```reference
|
||||
┌─covarPopMatrix────────────┐
|
||||
1. │ [8.25,-1.76,4.08,6.748] │
|
||||
2. │ [-1.76,41.07,6.486,2.132] │
|
||||
3. │ [4.08,6.486,34.21,4.755] │
|
||||
4. │ [6.748,2.132,4.755,9.93] │
|
||||
└───────────────────────────┘
|
||||
```
|
@ -0,0 +1,60 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/covarpopstable
|
||||
sidebar_position: 36
|
||||
---
|
||||
|
||||
# covarPopStable
|
||||
|
||||
Calculates the value of the population covariance:
|
||||
|
||||
$$
|
||||
\frac{\Sigma{(x - \bar{x})(y - \bar{y})}}{n}
|
||||
$$
|
||||
|
||||
It is similar to the [covarPop](../reference/covarpop.md) function, but uses a numerically stable algorithm. As a result, `covarPopStable` is slower than `covarPop` but produces a more accurate result.
|
||||
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
covarPop(x, y)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` — first variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
|
||||
- `y` — second variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
|
||||
|
||||
**Returned Value**
|
||||
|
||||
- The population covariance between `x` and `y`. [Float64](../../data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS series;
|
||||
CREATE TABLE series(i UInt32, x_value Float64, y_value Float64) ENGINE = Memory;
|
||||
INSERT INTO series(i, x_value, y_value) VALUES (1, 5.6,-4.4),(2, -9.6,3),(3, -1.3,-4),(4, 5.3,9.7),(5, 4.4,0.037),(6, -8.6,-7.8),(7, 5.1,9.3),(8, 7.9,-3.6),(9, -8.2,0.62),(10, -3,7.3);
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT covarPopStable(x_value, y_value)
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
x_value,
|
||||
y_value
|
||||
FROM series
|
||||
);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```reference
|
||||
┌─covarPopStable(x_value, y_value)─┐
|
||||
│ 6.485648 │
|
||||
└──────────────────────────────────┘
|
||||
```
|
||||
|
@ -7,8 +7,74 @@ sidebar_position: 37
|
||||
|
||||
Calculates the value of `Σ((x - x̅)(y - y̅)) / (n - 1)`.
|
||||
|
||||
Returns Float64. When `n <= 1`, returns `nan`.
|
||||
|
||||
:::note
|
||||
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `covarSampStable` function. It works slower but provides a lower computational error.
|
||||
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the [`covarSampStable`](../reference/covarsamp.md) function. It works slower but provides a lower computational error.
|
||||
:::
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
covarSamp(x, y)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` — first variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
|
||||
- `y` — second variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
|
||||
|
||||
**Returned Value**
|
||||
|
||||
- The sample covariance between `x` and `y`. For `n <= 1`, `nan` is returned. [Float64](../../data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS series;
|
||||
CREATE TABLE series(i UInt32, x_value Float64, y_value Float64) ENGINE = Memory;
|
||||
INSERT INTO series(i, x_value, y_value) VALUES (1, 5.6,-4.4),(2, -9.6,3),(3, -1.3,-4),(4, 5.3,9.7),(5, 4.4,0.037),(6, -8.6,-7.8),(7, 5.1,9.3),(8, 7.9,-3.6),(9, -8.2,0.62),(10, -3,7.3);
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT covarSamp(x_value, y_value)
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
x_value,
|
||||
y_value
|
||||
FROM series
|
||||
);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```reference
|
||||
┌─covarSamp(x_value, y_value)─┐
|
||||
│ 7.206275555555556 │
|
||||
└─────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT covarSamp(x_value, y_value)
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
x_value,
|
||||
y_value
|
||||
FROM series LIMIT 1
|
||||
);
|
||||
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```reference
|
||||
┌─covarSamp(x_value, y_value)─┐
|
||||
│ nan │
|
||||
└─────────────────────────────┘
|
||||
```
|
||||
|
||||
|
||||
|
@ -0,0 +1,57 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/covarsampmatrix
|
||||
sidebar_position: 38
|
||||
---
|
||||
|
||||
# covarSampMatrix
|
||||
|
||||
Returns the sample covariance matrix over N variables.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
covarSampMatrix(x[, ...])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` — a variable number of parameters. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
|
||||
|
||||
**Returned Value**
|
||||
|
||||
- Sample covariance matrix. [Array](../../data-types/array.md)([Array](../../data-types/array.md)([Float64](../../data-types/float.md))).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS test;
|
||||
CREATE TABLE test
|
||||
(
|
||||
a UInt32,
|
||||
b Float64,
|
||||
c Float64,
|
||||
d Float64
|
||||
)
|
||||
ENGINE = Memory;
|
||||
INSERT INTO test(a, b, c, d) VALUES (1, 5.6, -4.4, 2.6), (2, -9.6, 3, 3.3), (3, -1.3, -4, 1.2), (4, 5.3, 9.7, 2.3), (5, 4.4, 0.037, 1.222), (6, -8.6, -7.8, 2.1233), (7, 5.1, 9.3, 8.1222), (8, 7.9, -3.6, 9.837), (9, -8.2, 0.62, 8.43555), (10, -3, 7.3, 6.762);
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT arrayMap(x -> round(x, 3), arrayJoin(covarSampMatrix(a, b, c, d))) AS covarSampMatrix
|
||||
FROM test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```reference
|
||||
┌─covarSampMatrix─────────────┐
|
||||
1. │ [9.167,-1.956,4.534,7.498] │
|
||||
2. │ [-1.956,45.634,7.206,2.369] │
|
||||
3. │ [4.534,7.206,38.011,5.283] │
|
||||
4. │ [7.498,2.369,5.283,11.034] │
|
||||
└─────────────────────────────┘
|
||||
```
|
||||
|
||||
|
@ -0,0 +1,73 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/covarsampstable
|
||||
sidebar_position: 37
|
||||
---
|
||||
|
||||
# covarSampStable
|
||||
|
||||
Calculates the value of `Σ((x - x̅)(y - y̅)) / (n - 1)`. Similar to [covarSamp](../reference/covarsamp.md) but works slower while providing a lower computational error.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
covarSampStable(x, y)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` — first variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
|
||||
- `y` — second variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
|
||||
|
||||
**Returned Value**
|
||||
|
||||
- The sample covariance between `x` and `y`. For `n <= 1`, `inf` is returned. [Float64](../../data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS series;
|
||||
CREATE TABLE series(i UInt32, x_value Float64, y_value Float64) ENGINE = Memory;
|
||||
INSERT INTO series(i, x_value, y_value) VALUES (1, 5.6,-4.4),(2, -9.6,3),(3, -1.3,-4),(4, 5.3,9.7),(5, 4.4,0.037),(6, -8.6,-7.8),(7, 5.1,9.3),(8, 7.9,-3.6),(9, -8.2,0.62),(10, -3,7.3);
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT covarSampStable(x_value, y_value)
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
x_value,
|
||||
y_value
|
||||
FROM series
|
||||
);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```reference
|
||||
┌─covarSampStable(x_value, y_value)─┐
|
||||
│ 7.206275555555556 │
|
||||
└───────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT covarSampStable(x_value, y_value)
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
x_value,
|
||||
y_value
|
||||
FROM series LIMIT 1
|
||||
);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```reference
|
||||
┌─covarSampStable(x_value, y_value)─┐
|
||||
│ inf │
|
||||
└───────────────────────────────────┘
|
||||
```
|
@ -9,110 +9,116 @@ toc_hidden: true
|
||||
|
||||
Standard aggregate functions:
|
||||
|
||||
- [count](/docs/en/sql-reference/aggregate-functions/reference/count.md)
|
||||
- [min](/docs/en/sql-reference/aggregate-functions/reference/min.md)
|
||||
- [max](/docs/en/sql-reference/aggregate-functions/reference/max.md)
|
||||
- [sum](/docs/en/sql-reference/aggregate-functions/reference/sum.md)
|
||||
- [avg](/docs/en/sql-reference/aggregate-functions/reference/avg.md)
|
||||
- [any](/docs/en/sql-reference/aggregate-functions/reference/any.md)
|
||||
- [stddevPop](/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md)
|
||||
- [stddevPopStable](/docs/en/sql-reference/aggregate-functions/reference/stddevpopstable.md)
|
||||
- [stddevSamp](/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md)
|
||||
- [stddevSampStable](/docs/en/sql-reference/aggregate-functions/reference/stddevsampstable.md)
|
||||
- [varPop](/docs/en/sql-reference/aggregate-functions/reference/varpop.md)
|
||||
- [varSamp](/docs/en/sql-reference/aggregate-functions/reference/varsamp.md)
|
||||
- [corr](./corr.md)
|
||||
- [covarPop](/docs/en/sql-reference/aggregate-functions/reference/covarpop.md)
|
||||
- [covarSamp](/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md)
|
||||
- [entropy](./entropy.md)
|
||||
- [exponentialMovingAverage](./exponentialmovingaverage.md)
|
||||
- [intervalLengthSum](./intervalLengthSum.md)
|
||||
- [kolmogorovSmirnovTest](./kolmogorovsmirnovtest.md)
|
||||
- [mannwhitneyutest](./mannwhitneyutest.md)
|
||||
- [median](./median.md)
|
||||
- [rankCorr](./rankCorr.md)
|
||||
- [sumKahan](./sumkahan.md)
|
||||
- [studentTTest](./studentttest.md)
|
||||
- [welchTTest](./welchttest.md)
|
||||
- [count](../reference/count.md)
|
||||
- [min](../reference/min.md)
|
||||
- [max](../reference/max.md)
|
||||
- [sum](../reference/sum.md)
|
||||
- [avg](../reference/avg.md)
|
||||
- [any](../reference/any.md)
|
||||
- [stddevPop](../reference/stddevpop.md)
|
||||
- [stddevPopStable](../reference/stddevpopstable.md)
|
||||
- [stddevSamp](../reference/stddevsamp.md)
|
||||
- [stddevSampStable](../reference/stddevsampstable.md)
|
||||
- [varPop](../reference/varpop.md)
|
||||
- [varSamp](../reference/varsamp.md)
|
||||
- [corr](../reference/corr.md)
|
||||
- [corr](../reference/corrstable.md)
|
||||
- [corrMatrix](../reference/corrmatrix.md)
|
||||
- [covarPop](../reference/covarpop.md)
|
||||
- [covarStable](../reference/covarpopstable.md)
|
||||
- [covarPopMatrix](../reference/covarpopmatrix.md)
|
||||
- [covarSamp](../reference/covarsamp.md)
|
||||
- [covarSampStable](../reference/covarsampstable.md)
|
||||
- [covarSampMatrix](../reference/covarsampmatrix.md)
|
||||
- [entropy](../reference/entropy.md)
|
||||
- [exponentialMovingAverage](../reference/exponentialmovingaverage.md)
|
||||
- [intervalLengthSum](../reference/intervalLengthSum.md)
|
||||
- [kolmogorovSmirnovTest](../reference/kolmogorovsmirnovtest.md)
|
||||
- [mannwhitneyutest](../reference/mannwhitneyutest.md)
|
||||
- [median](../reference/median.md)
|
||||
- [rankCorr](../reference/rankCorr.md)
|
||||
- [sumKahan](../reference/sumkahan.md)
|
||||
- [studentTTest](../reference/studentttest.md)
|
||||
- [welchTTest](../reference/welchttest.md)
|
||||
|
||||
ClickHouse-specific aggregate functions:
|
||||
|
||||
- [analysisOfVariance](/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md)
|
||||
- [any](/docs/en/sql-reference/aggregate-functions/reference/any_respect_nulls.md)
|
||||
- [anyHeavy](/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md)
|
||||
- [anyLast](/docs/en/sql-reference/aggregate-functions/reference/anylast.md)
|
||||
- [anyLast](/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md)
|
||||
- [boundingRatio](/docs/en/sql-reference/aggregate-functions/reference/boundrat.md)
|
||||
- [first_value](/docs/en/sql-reference/aggregate-functions/reference/first_value.md)
|
||||
- [last_value](/docs/en/sql-reference/aggregate-functions/reference/last_value.md)
|
||||
- [argMin](/docs/en/sql-reference/aggregate-functions/reference/argmin.md)
|
||||
- [argMax](/docs/en/sql-reference/aggregate-functions/reference/argmax.md)
|
||||
- [avgWeighted](/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md)
|
||||
- [topK](/docs/en/sql-reference/aggregate-functions/reference/topk.md)
|
||||
- [topKWeighted](/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md)
|
||||
- [deltaSum](./deltasum.md)
|
||||
- [deltaSumTimestamp](./deltasumtimestamp.md)
|
||||
- [groupArray](/docs/en/sql-reference/aggregate-functions/reference/grouparray.md)
|
||||
- [groupArrayLast](/docs/en/sql-reference/aggregate-functions/reference/grouparraylast.md)
|
||||
- [groupUniqArray](/docs/en/sql-reference/aggregate-functions/reference/groupuniqarray.md)
|
||||
- [groupArrayInsertAt](/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md)
|
||||
- [groupArrayMovingAvg](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md)
|
||||
- [groupArrayMovingSum](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md)
|
||||
- [groupArraySample](./grouparraysample.md)
|
||||
- [groupArraySorted](/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md)
|
||||
- [groupArrayIntersect](./grouparrayintersect.md)
|
||||
- [groupBitAnd](/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md)
|
||||
- [groupBitOr](/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md)
|
||||
- [groupBitXor](/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md)
|
||||
- [groupBitmap](/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md)
|
||||
- [groupBitmapAnd](/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md)
|
||||
- [groupBitmapOr](/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md)
|
||||
- [groupBitmapXor](/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md)
|
||||
- [sumWithOverflow](/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md)
|
||||
- [sumMap](/docs/en/sql-reference/aggregate-functions/reference/summap.md)
|
||||
- [sumMapWithOverflow](/docs/en/sql-reference/aggregate-functions/reference/summapwithoverflow.md)
|
||||
- [sumMapFiltered](/docs/en/sql-reference/aggregate-functions/parametric-functions.md/#summapfiltered)
|
||||
- [sumMapFilteredWithOverflow](/docs/en/sql-reference/aggregate-functions/parametric-functions.md/#summapfilteredwithoverflow)
|
||||
- [minMap](/docs/en/sql-reference/aggregate-functions/reference/minmap.md)
|
||||
- [maxMap](/docs/en/sql-reference/aggregate-functions/reference/maxmap.md)
|
||||
- [skewSamp](/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md)
|
||||
- [skewPop](/docs/en/sql-reference/aggregate-functions/reference/skewpop.md)
|
||||
- [kurtSamp](/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md)
|
||||
- [kurtPop](/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md)
|
||||
- [uniq](/docs/en/sql-reference/aggregate-functions/reference/uniq.md)
|
||||
- [uniqExact](/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md)
|
||||
- [uniqCombined](/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md)
|
||||
- [uniqCombined64](/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md)
|
||||
- [uniqHLL12](/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md)
|
||||
- [uniqTheta](/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md)
|
||||
- [quantile](/docs/en/sql-reference/aggregate-functions/reference/quantile.md)
|
||||
- [quantiles](/docs/en/sql-reference/aggregate-functions/reference/quantiles.md)
|
||||
- [quantileExact](/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md)
|
||||
- [quantileExactLow](/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactlow)
|
||||
- [quantileExactHigh](/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md#quantileexacthigh)
|
||||
- [quantileExactWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md)
|
||||
- [quantileTiming](/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md)
|
||||
- [quantileTimingWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md)
|
||||
- [quantileDeterministic](/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md)
|
||||
- [quantileTDigest](/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md)
|
||||
- [quantileTDigestWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md)
|
||||
- [quantileBFloat16](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16)
|
||||
- [quantileBFloat16Weighted](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16weighted)
|
||||
- [quantileDD](/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch)
|
||||
- [simpleLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md)
|
||||
- [singleValueOrNull](/docs/en/sql-reference/aggregate-functions/reference/singlevalueornull.md)
|
||||
- [stochasticLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md)
|
||||
- [stochasticLogisticRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md)
|
||||
- [categoricalInformationValue](/docs/en/sql-reference/aggregate-functions/reference/categoricalinformationvalue.md)
|
||||
- [contingency](./contingency.md)
|
||||
- [cramersV](./cramersv.md)
|
||||
- [cramersVBiasCorrected](./cramersvbiascorrected.md)
|
||||
- [theilsU](./theilsu.md)
|
||||
- [maxIntersections](./maxintersections.md)
|
||||
- [maxIntersectionsPosition](./maxintersectionsposition.md)
|
||||
- [meanZTest](./meanztest.md)
|
||||
- [quantileGK](./quantileGK.md)
|
||||
- [quantileInterpolatedWeighted](./quantileinterpolatedweighted.md)
|
||||
- [sparkBar](./sparkbar.md)
|
||||
- [sumCount](./sumcount.md)
|
||||
- [largestTriangleThreeBuckets](./largestTriangleThreeBuckets.md)
|
||||
- [analysisOfVariance](../reference/analysis_of_variance.md)
|
||||
- [any](../reference/any_respect_nulls.md)
|
||||
- [anyHeavy](../reference/anyheavy.md)
|
||||
- [anyLast](../reference/anylast.md)
|
||||
- [anyLast](../reference/anylast_respect_nulls.md)
|
||||
- [boundingRatio](../reference/boundrat.md)
|
||||
- [first_value](../reference/first_value.md)
|
||||
- [last_value](../reference/last_value.md)
|
||||
- [argMin](../reference/argmin.md)
|
||||
- [argMax](../reference/argmax.md)
|
||||
- [avgWeighted](../reference/avgweighted.md)
|
||||
- [topK](../reference/topk.md)
|
||||
- [topKWeighted](../reference/topkweighted.md)
|
||||
- [deltaSum](../reference/deltasum.md)
|
||||
- [deltaSumTimestamp](../reference/deltasumtimestamp.md)
|
||||
- [groupArray](../reference/grouparray.md)
|
||||
- [groupArrayLast](../reference/grouparraylast.md)
|
||||
- [groupUniqArray](../reference/groupuniqarray.md)
|
||||
- [groupArrayInsertAt](../reference/grouparrayinsertat.md)
|
||||
- [groupArrayMovingAvg](../reference/grouparraymovingavg.md)
|
||||
- [groupArrayMovingSum](../reference/grouparraymovingsum.md)
|
||||
- [groupArraySample](../reference/grouparraysample.md)
|
||||
- [groupArraySorted](../reference/grouparraysorted.md)
|
||||
- [groupArrayIntersect](../reference/grouparrayintersect.md)
|
||||
- [groupBitAnd](../reference/groupbitand.md)
|
||||
- [groupBitOr](../reference/groupbitor.md)
|
||||
- [groupBitXor](../reference/groupbitxor.md)
|
||||
- [groupBitmap](../reference/groupbitmap.md)
|
||||
- [groupBitmapAnd](../reference/groupbitmapand.md)
|
||||
- [groupBitmapOr](../reference/groupbitmapor.md)
|
||||
- [groupBitmapXor](../reference/groupbitmapxor.md)
|
||||
- [sumWithOverflow](../reference/sumwithoverflow.md)
|
||||
- [sumMap](../reference/summap.md)
|
||||
- [sumMapWithOverflow](../reference/summapwithoverflow.md)
|
||||
- [sumMapFiltered](../parametric-functions.md/#summapfiltered)
|
||||
- [sumMapFilteredWithOverflow](../parametric-functions.md/#summapfilteredwithoverflow)
|
||||
- [minMap](../reference/minmap.md)
|
||||
- [maxMap](../reference/maxmap.md)
|
||||
- [skewSamp](../reference/skewsamp.md)
|
||||
- [skewPop](../reference/skewpop.md)
|
||||
- [kurtSamp](../reference/kurtsamp.md)
|
||||
- [kurtPop](../reference/kurtpop.md)
|
||||
- [uniq](../reference/uniq.md)
|
||||
- [uniqExact](../reference/uniqexact.md)
|
||||
- [uniqCombined](../reference/uniqcombined.md)
|
||||
- [uniqCombined64](../reference/uniqcombined64.md)
|
||||
- [uniqHLL12](../reference/uniqhll12.md)
|
||||
- [uniqTheta](../reference/uniqthetasketch.md)
|
||||
- [quantile](../reference/quantile.md)
|
||||
- [quantiles](../reference/quantiles.md)
|
||||
- [quantileExact](../reference/quantileexact.md)
|
||||
- [quantileExactLow](../reference/quantileexact.md#quantileexactlow)
|
||||
- [quantileExactHigh](../reference/quantileexact.md#quantileexacthigh)
|
||||
- [quantileExactWeighted](../reference/quantileexactweighted.md)
|
||||
- [quantileTiming](../reference/quantiletiming.md)
|
||||
- [quantileTimingWeighted](../reference/quantiletimingweighted.md)
|
||||
- [quantileDeterministic](../reference/quantiledeterministic.md)
|
||||
- [quantileTDigest](../reference/quantiletdigest.md)
|
||||
- [quantileTDigestWeighted](../reference/quantiletdigestweighted.md)
|
||||
- [quantileBFloat16](../reference/quantilebfloat16.md#quantilebfloat16)
|
||||
- [quantileBFloat16Weighted](../reference/quantilebfloat16.md#quantilebfloat16weighted)
|
||||
- [quantileDD](../reference/quantileddsketch.md#quantileddsketch)
|
||||
- [simpleLinearRegression](../reference/simplelinearregression.md)
|
||||
- [singleValueOrNull](../reference/singlevalueornull.md)
|
||||
- [stochasticLinearRegression](../reference/stochasticlinearregression.md)
|
||||
- [stochasticLogisticRegression](../reference/stochasticlogisticregression.md)
|
||||
- [categoricalInformationValue](../reference/categoricalinformationvalue.md)
|
||||
- [contingency](../reference/contingency.md)
|
||||
- [cramersV](../reference/cramersv.md)
|
||||
- [cramersVBiasCorrected](../reference/cramersvbiascorrected.md)
|
||||
- [theilsU](../reference/theilsu.md)
|
||||
- [maxIntersections](../reference/maxintersections.md)
|
||||
- [maxIntersectionsPosition](../reference/maxintersectionsposition.md)
|
||||
- [meanZTest](../reference/meanztest.md)
|
||||
- [quantileGK](../reference/quantileGK.md)
|
||||
- [quantileInterpolatedWeighted](../reference/quantileinterpolatedweighted.md)
|
||||
- [sparkBar](../reference/sparkbar.md)
|
||||
- [sumCount](../reference/sumcount.md)
|
||||
- [largestTriangleThreeBuckets](../reference/largestTriangleThreeBuckets.md)
|
||||
|
@ -701,6 +701,267 @@ Result:
|
||||
1 2 3 4 5 6 7 8
|
||||
```
|
||||
|
||||
## hilbertEncode
|
||||
|
||||
Calculates code for Hilbert Curve for a list of unsigned integers.
|
||||
|
||||
The function has two modes of operation:
|
||||
- Simple
|
||||
- Expanded
|
||||
|
||||
### Simple mode
|
||||
|
||||
Simple: accepts up to 2 unsigned integers as arguments and produces a UInt64 code.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
hilbertEncode(args)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `args`: up to 2 [unsigned integers](../../sql-reference/data-types/int-uint.md) or columns of the aforementioned type.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A UInt64 code
|
||||
|
||||
Type: [UInt64](../../sql-reference/data-types/int-uint.md)
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT hilbertEncode(3, 4);
|
||||
```
|
||||
Result:
|
||||
|
||||
```response
|
||||
31
|
||||
```
|
||||
|
||||
### Expanded mode
|
||||
|
||||
Accepts a range mask ([tuple](../../sql-reference/data-types/tuple.md)) as a first argument and up to 2 [unsigned integers](../../sql-reference/data-types/int-uint.md) as other arguments.
|
||||
|
||||
Each number in the mask configures the number of bits by which the corresponding argument will be shifted left, effectively scaling the argument within its range.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
hilbertEncode(range_mask, args)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
- `range_mask`: ([tuple](../../sql-reference/data-types/tuple.md))
|
||||
- `args`: up to 2 [unsigned integers](../../sql-reference/data-types/int-uint.md) or columns of the aforementioned type.
|
||||
|
||||
Note: when using columns for `args` the provided `range_mask` tuple should still be a constant.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A UInt64 code
|
||||
|
||||
Type: [UInt64](../../sql-reference/data-types/int-uint.md)
|
||||
|
||||
|
||||
**Example**
|
||||
|
||||
Range expansion can be beneficial when you need a similar distribution for arguments with wildly different ranges (or cardinality)
|
||||
For example: 'IP Address' (0...FFFFFFFF) and 'Country code' (0...FF).
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT hilbertEncode((10,6), 1024, 16);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
4031541586602
|
||||
```
|
||||
|
||||
Note: tuple size must be equal to the number of the other arguments.
|
||||
|
||||
**Example**
|
||||
|
||||
For a single argument without a tuple, the function returns the argument itself as the Hilbert index, since no dimensional mapping is needed.
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT hilbertEncode(1);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
1
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
If a single argument is provided with a tuple specifying bit shifts, the function shifts the argument left by the specified number of bits.
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT hilbertEncode(tuple(2), 128);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
512
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
The function also accepts columns as arguments:
|
||||
|
||||
Query:
|
||||
|
||||
First create the table and insert some data.
|
||||
|
||||
```sql
|
||||
create table hilbert_numbers(
|
||||
n1 UInt32,
|
||||
n2 UInt32
|
||||
)
|
||||
Engine=MergeTree()
|
||||
ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
|
||||
insert into hilbert_numbers (*) values(1,2);
|
||||
```
|
||||
Use column names instead of constants as function arguments to `hilbertEncode`
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT hilbertEncode(n1, n2) FROM hilbert_numbers;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
13
|
||||
```
|
||||
|
||||
**implementation details**
|
||||
|
||||
Please note that you can fit only so many bits of information into Hilbert code as [UInt64](../../sql-reference/data-types/int-uint.md) has. Two arguments will have a range of maximum 2^32 (64/2) each. All overflow will be clamped to zero.
|
||||
|
||||
## hilbertDecode
|
||||
|
||||
Decodes a Hilbert curve index back into a tuple of unsigned integers, representing coordinates in multi-dimensional space.
|
||||
|
||||
As with the `hilbertEncode` function, this function has two modes of operation:
|
||||
- Simple
|
||||
- Expanded
|
||||
|
||||
### Simple mode
|
||||
|
||||
Accepts up to 2 unsigned integers as arguments and produces a UInt64 code.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
hilbertDecode(tuple_size, code)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
- `tuple_size`: integer value no more than 2.
|
||||
- `code`: [UInt64](../../sql-reference/data-types/int-uint.md) code.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- [tuple](../../sql-reference/data-types/tuple.md) of the specified size.
|
||||
|
||||
Type: [UInt64](../../sql-reference/data-types/int-uint.md)
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT hilbertDecode(2, 31);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
["3", "4"]
|
||||
```
|
||||
|
||||
### Expanded mode
|
||||
|
||||
Accepts a range mask (tuple) as a first argument and up to 2 unsigned integers as other arguments.
|
||||
Each number in the mask configures the number of bits by which the corresponding argument will be shifted left, effectively scaling the argument within its range.
|
||||
|
||||
Range expansion can be beneficial when you need a similar distribution for arguments with wildly different ranges (or cardinality)
|
||||
For example: 'IP Address' (0...FFFFFFFF) and 'Country code' (0...FF).
|
||||
As with the encode function, this is limited to 8 numbers at most.
|
||||
|
||||
**Example**
|
||||
|
||||
Hilbert code for one argument is always the argument itself (as a tuple).
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT hilbertDecode(1, 1);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
["1"]
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
A single argument with a tuple specifying bit shifts will be right-shifted accordingly.
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT hilbertDecode(tuple(2), 32768);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
["128"]
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
The function accepts a column of codes as a second argument:
|
||||
|
||||
First create the table and insert some data.
|
||||
|
||||
Query:
|
||||
```sql
|
||||
create table hilbert_numbers(
|
||||
n1 UInt32,
|
||||
n2 UInt32
|
||||
)
|
||||
Engine=MergeTree()
|
||||
ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
|
||||
insert into hilbert_numbers (*) values(1,2);
|
||||
```
|
||||
Use column names instead of constants as function arguments to `hilbertDecode`
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
select untuple(hilbertDecode(2, hilbertEncode(n1, n2))) from hilbert_numbers;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
1 2
|
||||
```
|
||||
|
@ -415,8 +415,8 @@ Alias: `power(x, y)`
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` - [(U)Int8/16/32/64](../data-types/int-uint.md) or [Float*](../data-types/float.md)
|
||||
- `y` - [(U)Int8/16/32/64](../data-types/int-uint.md) or [Float*](../data-types/float.md)
|
||||
- `x` - [(U)Int8/16/32/64](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md)
|
||||
- `y` - [(U)Int8/16/32/64](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md)
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -635,8 +635,8 @@ atan2(y, x)
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `y` — y-coordinate of the point through which the ray passes. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
|
||||
- `x` — x-coordinate of the point through which the ray passes. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
|
||||
- `y` — y-coordinate of the point through which the ray passes. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
|
||||
- `x` — x-coordinate of the point through which the ray passes. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -670,8 +670,8 @@ hypot(x, y)
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` — The first cathetus of a right-angle triangle. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
|
||||
- `y` — The second cathetus of a right-angle triangle. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
|
||||
- `x` — The first cathetus of a right-angle triangle. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
|
||||
- `y` — The second cathetus of a right-angle triangle. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -838,6 +838,7 @@ degrees(x)
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` — Input in radians. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
|
||||
- `x` — Input in radians. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
|
||||
|
||||
**Returned value**
|
||||
|
@ -735,6 +735,8 @@ LIMIT 10
|
||||
|
||||
Given a size (number of bytes), this function returns a readable, rounded size with suffix (KB, MB, etc.) as string.
|
||||
|
||||
The opposite operations of this function are [parseReadableSize](#parseReadableSize), [parseReadableSizeOrZero](#parseReadableSizeOrZero), and [parseReadableSizeOrNull](#parseReadableSizeOrNull).
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
@ -766,6 +768,8 @@ Result:
|
||||
|
||||
Given a size (number of bytes), this function returns a readable, rounded size with suffix (KiB, MiB, etc.) as string.
|
||||
|
||||
The opposite operations of this function are [parseReadableSize](#parseReadableSize), [parseReadableSizeOrZero](#parseReadableSizeOrZero), and [parseReadableSizeOrNull](#parseReadableSizeOrNull).
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
@ -890,6 +894,122 @@ SELECT
|
||||
└────────────────────┴────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## parseReadableSize
|
||||
|
||||
Given a string containing a byte size and `B`, `KiB`, `KB`, `MiB`, `MB`, etc. as a unit (i.e. [ISO/IEC 80000-13](https://en.wikipedia.org/wiki/ISO/IEC_80000) or decimal byte unit), this function returns the corresponding number of bytes.
|
||||
If the function is unable to parse the input value, it throws an exception.
|
||||
|
||||
The inverse operations of this function are [formatReadableSize](#formatReadableSize) and [formatReadableDecimalSize](#formatReadableDecimalSize).
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
formatReadableSize(x)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` : Readable size with ISO/IEC 80000-13 or decimal byte unit ([String](../../sql-reference/data-types/string.md)).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Number of bytes, rounded up to the nearest integer ([UInt64](../../sql-reference/data-types/int-uint.md)).
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
arrayJoin(['1 B', '1 KiB', '3 MB', '5.314 KiB']) AS readable_sizes,
|
||||
parseReadableSize(readable_sizes) AS sizes;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─readable_sizes─┬───sizes─┐
|
||||
│ 1 B │ 1 │
|
||||
│ 1 KiB │ 1024 │
|
||||
│ 3 MB │ 3000000 │
|
||||
│ 5.314 KiB │ 5442 │
|
||||
└────────────────┴─────────┘
|
||||
```
|
||||
|
||||
## parseReadableSizeOrNull
|
||||
|
||||
Given a string containing a byte size and `B`, `KiB`, `KB`, `MiB`, `MB`, etc. as a unit (i.e. [ISO/IEC 80000-13](https://en.wikipedia.org/wiki/ISO/IEC_80000) or decimal byte unit), this function returns the corresponding number of bytes.
|
||||
If the function is unable to parse the input value, it returns `NULL`.
|
||||
|
||||
The inverse operations of this function are [formatReadableSize](#formatReadableSize) and [formatReadableDecimalSize](#formatReadableDecimalSize).
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
parseReadableSizeOrNull(x)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` : Readable size with ISO/IEC 80000-13 or decimal byte unit ([String](../../sql-reference/data-types/string.md)).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Number of bytes, rounded up to the nearest integer, or NULL if unable to parse the input (Nullable([UInt64](../../sql-reference/data-types/int-uint.md))).
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
arrayJoin(['1 B', '1 KiB', '3 MB', '5.314 KiB', 'invalid']) AS readable_sizes,
|
||||
parseReadableSizeOrNull(readable_sizes) AS sizes;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─readable_sizes─┬───sizes─┐
|
||||
│ 1 B │ 1 │
|
||||
│ 1 KiB │ 1024 │
|
||||
│ 3 MB │ 3000000 │
|
||||
│ 5.314 KiB │ 5442 │
|
||||
│ invalid │ ᴺᵁᴸᴸ │
|
||||
└────────────────┴─────────┘
|
||||
```
|
||||
|
||||
## parseReadableSizeOrZero
|
||||
|
||||
Given a string containing a byte size and `B`, `KiB`, `KB`, `MiB`, `MB`, etc. as a unit (i.e. [ISO/IEC 80000-13](https://en.wikipedia.org/wiki/ISO/IEC_80000) or decimal byte unit), this function returns the corresponding number of bytes. If the function is unable to parse the input value, it returns `0`.
|
||||
|
||||
The inverse operations of this function are [formatReadableSize](#formatReadableSize) and [formatReadableDecimalSize](#formatReadableDecimalSize).
|
||||
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
parseReadableSizeOrZero(x)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` : Readable size with ISO/IEC 80000-13 or decimal byte unit ([String](../../sql-reference/data-types/string.md)).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Number of bytes, rounded up to the nearest integer, or 0 if unable to parse the input ([UInt64](../../sql-reference/data-types/int-uint.md)).
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
arrayJoin(['1 B', '1 KiB', '3 MB', '5.314 KiB', 'invalid']) AS readable_sizes,
|
||||
parseReadableSizeOrZero(readable_sizes) AS sizes;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─readable_sizes─┬───sizes─┐
|
||||
│ 1 B │ 1 │
|
||||
│ 1 KiB │ 1024 │
|
||||
│ 3 MB │ 3000000 │
|
||||
│ 5.314 KiB │ 5442 │
|
||||
│ invalid │ 0 │
|
||||
└────────────────┴─────────┘
|
||||
```
|
||||
|
||||
## parseTimeDelta
|
||||
|
||||
Parse a sequence of numbers followed by something resembling a time unit.
|
||||
|
@ -337,7 +337,7 @@ Then, when executing the query `SELECT name FROM users_a WHERE length(name) < 5;
|
||||
|
||||
Defines storage time for values. Can be specified only for MergeTree-family tables. For the detailed description, see [TTL for columns and tables](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).
|
||||
|
||||
## Column Compression Codecs
|
||||
## Column Compression Codecs {#column_compression_codec}
|
||||
|
||||
By default, ClickHouse applies `lz4` compression in the self-managed version, and `zstd` in ClickHouse Cloud.
|
||||
|
||||
@ -410,6 +410,10 @@ High compression levels are useful for asymmetric scenarios, like compress once,
|
||||
- For compression, ZSTD_QAT tries to use an Intel® QAT offloading device ([QuickAssist Technology](https://www.intel.com/content/www/us/en/developer/topic-technology/open/quick-assist-technology/overview.html)). If no such device was found, it will fallback to ZSTD compression in software.
|
||||
- Decompression is always performed in software.
|
||||
|
||||
:::note
|
||||
ZSTD_QAT is not available in ClickHouse Cloud.
|
||||
:::
|
||||
|
||||
#### DEFLATE_QPL
|
||||
|
||||
`DEFLATE_QPL` — [Deflate compression algorithm](https://github.com/intel/qpl) implemented by Intel® Query Processing Library. Some limitations apply:
|
||||
|
@ -304,8 +304,8 @@ atan2(y, x)
|
||||
|
||||
**Аргументы**
|
||||
|
||||
- `y` — координата y точки, в которую проведена линия. [Float64](../../sql-reference/data-types/float.md#float32-float64).
|
||||
- `x` — координата х точки, в которую проведена линия. [Float64](../../sql-reference/data-types/float.md#float32-float64).
|
||||
- `y` — координата y точки, в которую проведена линия. [Float64](../../sql-reference/data-types/float.md#float32-float64) или [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `x` — координата х точки, в которую проведена линия. [Float64](../../sql-reference/data-types/float.md#float32-float64) или [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
@ -341,8 +341,8 @@ hypot(x, y)
|
||||
|
||||
**Аргументы**
|
||||
|
||||
- `x` — первый катет прямоугольного треугольника. [Float64](../../sql-reference/data-types/float.md#float32-float64).
|
||||
- `y` — второй катет прямоугольного треугольника. [Float64](../../sql-reference/data-types/float.md#float32-float64).
|
||||
- `x` — первый катет прямоугольного треугольника. [Float64](../../sql-reference/data-types/float.md#float32-float64) или [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `y` — второй катет прямоугольного треугольника. [Float64](../../sql-reference/data-types/float.md#float32-float64) или [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
|
@ -154,7 +154,8 @@ function _clickhouse_quote()
|
||||
# Extract every option (everything that starts with "-") from the --help dialog.
|
||||
function _clickhouse_get_options()
|
||||
{
|
||||
"$@" --help 2>&1 | awk -F '[ ,=<>.]' '{ for (i=1; i <= NF; ++i) { if (substr($i, 1, 1) == "-" && length($i) > 1) print $i; } }' | sort -u
|
||||
# By default --help will not print all settings, this is done only under --verbose
|
||||
"$@" --help --verbose 2>&1 | awk -F '[ ,=<>.]' '{ for (i=1; i <= NF; ++i) { if (substr($i, 1, 1) == "-" && length($i) > 1) print $i; } }' | sort -u
|
||||
}
|
||||
|
||||
function _complete_for_clickhouse_generic_bin_impl()
|
||||
|
@ -11,7 +11,6 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int KEEPER_EXCEPTION;
|
||||
}
|
||||
|
||||
bool LSCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
|
||||
@ -214,6 +213,143 @@ void GetStatCommand::execute(const ASTKeeperQuery * query, KeeperClient * client
|
||||
std::cout << "numChildren = " << stat.numChildren << "\n";
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
/// Helper class for parallelized tree traversal
|
||||
template <class UserCtx>
|
||||
struct TraversalTask : public std::enable_shared_from_this<TraversalTask<UserCtx>>
|
||||
{
|
||||
using TraversalTaskPtr = std::shared_ptr<TraversalTask<UserCtx>>;
|
||||
|
||||
struct Ctx
|
||||
{
|
||||
std::deque<TraversalTaskPtr> new_tasks; /// Tasks for newly discovered children, that hasn't been started yet
|
||||
std::deque<std::function<void(Ctx &)>> in_flight_list_requests; /// In-flight getChildren requests
|
||||
std::deque<std::function<void(Ctx &)>> finish_callbacks; /// Callbacks to be called
|
||||
KeeperClient * client;
|
||||
UserCtx & user_ctx;
|
||||
|
||||
Ctx(KeeperClient * client_, UserCtx & user_ctx_) : client(client_), user_ctx(user_ctx_) {}
|
||||
};
|
||||
|
||||
private:
|
||||
const fs::path path;
|
||||
const TraversalTaskPtr parent;
|
||||
|
||||
Int64 child_tasks = 0;
|
||||
Int64 nodes_in_subtree = 1;
|
||||
|
||||
public:
|
||||
TraversalTask(const fs::path & path_, TraversalTaskPtr parent_)
|
||||
: path(path_)
|
||||
, parent(parent_)
|
||||
{
|
||||
}
|
||||
|
||||
/// Start traversing the subtree
|
||||
void onStart(Ctx & ctx)
|
||||
{
|
||||
/// tryGetChildren doesn't throw if the node is not found (was deleted in the meantime)
|
||||
std::shared_ptr<std::future<Coordination::ListResponse>> list_request =
|
||||
std::make_shared<std::future<Coordination::ListResponse>>(ctx.client->zookeeper->asyncTryGetChildren(path));
|
||||
ctx.in_flight_list_requests.push_back([task = this->shared_from_this(), list_request](Ctx & ctx_) mutable
|
||||
{
|
||||
task->onGetChildren(ctx_, list_request->get());
|
||||
});
|
||||
}
|
||||
|
||||
/// Called when getChildren request returns
|
||||
void onGetChildren(Ctx & ctx, const Coordination::ListResponse & response)
|
||||
{
|
||||
const bool traverse_children = ctx.user_ctx.onListChildren(path, response.names);
|
||||
|
||||
if (traverse_children)
|
||||
{
|
||||
/// Schedule traversal of each child
|
||||
for (const auto & child : response.names)
|
||||
{
|
||||
auto task = std::make_shared<TraversalTask>(path / child, this->shared_from_this());
|
||||
ctx.new_tasks.push_back(task);
|
||||
}
|
||||
child_tasks = response.names.size();
|
||||
}
|
||||
|
||||
if (child_tasks == 0)
|
||||
finish(ctx);
|
||||
}
|
||||
|
||||
/// Called when a child subtree has been traversed
|
||||
void onChildTraversalFinished(Ctx & ctx, Int64 child_nodes_in_subtree)
|
||||
{
|
||||
nodes_in_subtree += child_nodes_in_subtree;
|
||||
|
||||
--child_tasks;
|
||||
|
||||
/// Finish if all children have been traversed
|
||||
if (child_tasks == 0)
|
||||
finish(ctx);
|
||||
}
|
||||
|
||||
private:
|
||||
/// This node and all its children have been traversed
|
||||
void finish(Ctx & ctx)
|
||||
{
|
||||
ctx.user_ctx.onFinishChildrenTraversal(path, nodes_in_subtree);
|
||||
|
||||
if (!parent)
|
||||
return;
|
||||
|
||||
/// Notify the parent that we have finished traversing the subtree
|
||||
ctx.finish_callbacks.push_back([p = this->parent, child_nodes_in_subtree = this->nodes_in_subtree](Ctx & ctx_)
|
||||
{
|
||||
p->onChildTraversalFinished(ctx_, child_nodes_in_subtree);
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
/// Traverses the tree in parallel and calls user callbacks
|
||||
/// Parallelization is achieved by sending multiple async getChildren requests to Keeper, but all processing is done in a single thread
|
||||
template <class UserCtx>
|
||||
void parallelized_traverse(const fs::path & path, KeeperClient * client, size_t max_in_flight_requests, UserCtx & ctx_)
|
||||
{
|
||||
typename TraversalTask<UserCtx>::Ctx ctx(client, ctx_);
|
||||
|
||||
auto root_task = std::make_shared<TraversalTask<UserCtx>>(path, nullptr);
|
||||
|
||||
ctx.new_tasks.push_back(root_task);
|
||||
|
||||
/// Until there is something to do
|
||||
while (!ctx.new_tasks.empty() || !ctx.in_flight_list_requests.empty() || !ctx.finish_callbacks.empty())
|
||||
{
|
||||
/// First process all finish callbacks, they don't wait for anything and allow to free memory
|
||||
while (!ctx.finish_callbacks.empty())
|
||||
{
|
||||
auto callback = std::move(ctx.finish_callbacks.front());
|
||||
ctx.finish_callbacks.pop_front();
|
||||
callback(ctx);
|
||||
}
|
||||
|
||||
/// Make new requests if there are less than max in flight
|
||||
while (!ctx.new_tasks.empty() && ctx.in_flight_list_requests.size() < max_in_flight_requests)
|
||||
{
|
||||
auto task = std::move(ctx.new_tasks.front());
|
||||
ctx.new_tasks.pop_front();
|
||||
task->onStart(ctx);
|
||||
}
|
||||
|
||||
/// Wait for first request in the queue to finish
|
||||
if (!ctx.in_flight_list_requests.empty())
|
||||
{
|
||||
auto request = std::move(ctx.in_flight_list_requests.front());
|
||||
ctx.in_flight_list_requests.pop_front();
|
||||
request(ctx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} /// anonymous namespace
|
||||
|
||||
bool FindSuperNodes::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
|
||||
{
|
||||
ASTPtr threshold;
|
||||
@ -237,27 +373,21 @@ void FindSuperNodes::execute(const ASTKeeperQuery * query, KeeperClient * client
|
||||
auto threshold = query->args[0].safeGet<UInt64>();
|
||||
auto path = client->getAbsolutePath(query->args[1].safeGet<String>());
|
||||
|
||||
Coordination::Stat stat;
|
||||
if (!client->zookeeper->exists(path, &stat))
|
||||
return; /// It is ok if node was deleted meanwhile
|
||||
|
||||
if (stat.numChildren >= static_cast<Int32>(threshold))
|
||||
std::cout << static_cast<String>(path) << "\t" << stat.numChildren << "\n";
|
||||
|
||||
Strings children;
|
||||
auto status = client->zookeeper->tryGetChildren(path, children);
|
||||
if (status == Coordination::Error::ZNONODE)
|
||||
return; /// It is ok if node was deleted meanwhile
|
||||
else if (status != Coordination::Error::ZOK)
|
||||
throw DB::Exception(DB::ErrorCodes::KEEPER_EXCEPTION, "Error {} while getting children of {}", status, path.string());
|
||||
|
||||
std::sort(children.begin(), children.end());
|
||||
auto next_query = *query;
|
||||
for (const auto & child : children)
|
||||
struct
|
||||
{
|
||||
next_query.args[1] = DB::Field(path / child);
|
||||
execute(&next_query, client);
|
||||
}
|
||||
bool onListChildren(const fs::path & path, const Strings & children) const
|
||||
{
|
||||
if (children.size() >= threshold)
|
||||
std::cout << static_cast<String>(path) << "\t" << children.size() << "\n";
|
||||
return true;
|
||||
}
|
||||
|
||||
void onFinishChildrenTraversal(const fs::path &, Int64) const {}
|
||||
|
||||
size_t threshold;
|
||||
} ctx {.threshold = threshold };
|
||||
|
||||
parallelized_traverse(path, client, /* max_in_flight_requests */ 50, ctx);
|
||||
}
|
||||
|
||||
bool DeleteStaleBackups::parse(IParser::Pos & /* pos */, std::shared_ptr<ASTKeeperQuery> & /* node */, Expected & /* expected */) const
|
||||
@ -322,38 +452,28 @@ bool FindBigFamily::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> &
|
||||
return true;
|
||||
}
|
||||
|
||||
/// DFS the subtree and return the number of nodes in the subtree
|
||||
static Int64 traverse(const fs::path & path, KeeperClient * client, std::vector<std::tuple<Int64, String>> & result)
|
||||
{
|
||||
Int64 nodes_in_subtree = 1;
|
||||
|
||||
Strings children;
|
||||
auto status = client->zookeeper->tryGetChildren(path, children);
|
||||
if (status == Coordination::Error::ZNONODE)
|
||||
return 0;
|
||||
else if (status != Coordination::Error::ZOK)
|
||||
throw DB::Exception(DB::ErrorCodes::KEEPER_EXCEPTION, "Error {} while getting children of {}", status, path.string());
|
||||
|
||||
for (auto & child : children)
|
||||
nodes_in_subtree += traverse(path / child, client, result);
|
||||
|
||||
result.emplace_back(nodes_in_subtree, path.string());
|
||||
|
||||
return nodes_in_subtree;
|
||||
}
|
||||
|
||||
void FindBigFamily::execute(const ASTKeeperQuery * query, KeeperClient * client) const
|
||||
{
|
||||
auto path = client->getAbsolutePath(query->args[0].safeGet<String>());
|
||||
auto n = query->args[1].safeGet<UInt64>();
|
||||
|
||||
std::vector<std::tuple<Int64, String>> result;
|
||||
struct
|
||||
{
|
||||
std::vector<std::tuple<Int64, String>> result;
|
||||
|
||||
traverse(path, client, result);
|
||||
bool onListChildren(const fs::path &, const Strings &) const { return true; }
|
||||
|
||||
std::sort(result.begin(), result.end(), std::greater());
|
||||
for (UInt64 i = 0; i < std::min(result.size(), static_cast<size_t>(n)); ++i)
|
||||
std::cout << std::get<1>(result[i]) << "\t" << std::get<0>(result[i]) << "\n";
|
||||
void onFinishChildrenTraversal(const fs::path & path, Int64 nodes_in_subtree)
|
||||
{
|
||||
result.emplace_back(nodes_in_subtree, path.string());
|
||||
}
|
||||
} ctx;
|
||||
|
||||
parallelized_traverse(path, client, /* max_in_flight_requests */ 50, ctx);
|
||||
|
||||
std::sort(ctx.result.begin(), ctx.result.end(), std::greater());
|
||||
for (UInt64 i = 0; i < std::min(ctx.result.size(), static_cast<size_t>(n)); ++i)
|
||||
std::cout << std::get<1>(ctx.result[i]) << "\t" << std::get<0>(ctx.result[i]) << "\n";
|
||||
}
|
||||
|
||||
bool RMCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
|
||||
|
@ -9,8 +9,6 @@ set (CLICKHOUSE_KEEPER_LINK
|
||||
clickhouse_common_zookeeper
|
||||
daemon
|
||||
dbms
|
||||
|
||||
${LINK_RESOURCE_LIB}
|
||||
)
|
||||
|
||||
clickhouse_program_add(keeper)
|
||||
@ -210,8 +208,6 @@ if (BUILD_STANDALONE_KEEPER)
|
||||
loggers_no_text_log
|
||||
clickhouse_common_io
|
||||
clickhouse_parsers # Otherwise compression will not built. FIXME.
|
||||
|
||||
${LINK_RESOURCE_LIB_STANDALONE_KEEPER}
|
||||
)
|
||||
|
||||
set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../)
|
||||
|
@ -14,8 +14,6 @@ set (CLICKHOUSE_SERVER_LINK
|
||||
clickhouse_storages_system
|
||||
clickhouse_table_functions
|
||||
|
||||
${LINK_RESOURCE_LIB}
|
||||
|
||||
PUBLIC
|
||||
daemon
|
||||
)
|
||||
|
1486
src/Analyzer/Resolve/IdentifierResolver.cpp
Normal file
1486
src/Analyzer/Resolve/IdentifierResolver.cpp
Normal file
File diff suppressed because it is too large
Load Diff
157
src/Analyzer/Resolve/IdentifierResolver.h
Normal file
157
src/Analyzer/Resolve/IdentifierResolver.h
Normal file
@ -0,0 +1,157 @@
|
||||
#pragma once
|
||||
|
||||
#include <Analyzer/HashUtils.h>
|
||||
#include <Analyzer/IQueryTreeNode.h>
|
||||
#include <Analyzer/Resolve/IdentifierLookup.h>
|
||||
|
||||
#include <Core/Joins.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <Parsers/NullsAction.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct GetColumnsOptions;
|
||||
struct IdentifierResolveScope;
|
||||
struct AnalysisTableExpressionData;
|
||||
class QueryExpressionsAliasVisitor ;
|
||||
|
||||
class QueryNode;
|
||||
class JoinNode;
|
||||
class ColumnNode;
|
||||
|
||||
using ProjectionName = String;
|
||||
using ProjectionNames = std::vector<ProjectionName>;
|
||||
|
||||
struct Settings;
|
||||
|
||||
class IdentifierResolver
|
||||
{
|
||||
public:
|
||||
|
||||
IdentifierResolver(
|
||||
std::unordered_set<std::string_view> & ctes_in_resolve_process_,
|
||||
std::unordered_map<QueryTreeNodePtr, ProjectionName> & node_to_projection_name_)
|
||||
: ctes_in_resolve_process(ctes_in_resolve_process_)
|
||||
, node_to_projection_name(node_to_projection_name_)
|
||||
{}
|
||||
|
||||
/// Utility functions
|
||||
|
||||
static bool isExpressionNodeType(QueryTreeNodeType node_type);
|
||||
|
||||
static bool isFunctionExpressionNodeType(QueryTreeNodeType node_type);
|
||||
|
||||
static bool isSubqueryNodeType(QueryTreeNodeType node_type);
|
||||
|
||||
static bool isTableExpressionNodeType(QueryTreeNodeType node_type);
|
||||
|
||||
static DataTypePtr getExpressionNodeResultTypeOrNull(const QueryTreeNodePtr & query_tree_node);
|
||||
|
||||
static void collectCompoundExpressionValidIdentifiersForTypoCorrection(const Identifier & unresolved_identifier,
|
||||
const DataTypePtr & compound_expression_type,
|
||||
const Identifier & valid_identifier_prefix,
|
||||
std::unordered_set<Identifier> & valid_identifiers_result);
|
||||
|
||||
static void collectTableExpressionValidIdentifiersForTypoCorrection(const Identifier & unresolved_identifier,
|
||||
const QueryTreeNodePtr & table_expression,
|
||||
const AnalysisTableExpressionData & table_expression_data,
|
||||
std::unordered_set<Identifier> & valid_identifiers_result);
|
||||
|
||||
static void collectScopeValidIdentifiersForTypoCorrection(const Identifier & unresolved_identifier,
|
||||
const IdentifierResolveScope & scope,
|
||||
bool allow_expression_identifiers,
|
||||
bool allow_function_identifiers,
|
||||
bool allow_table_expression_identifiers,
|
||||
std::unordered_set<Identifier> & valid_identifiers_result);
|
||||
|
||||
static void collectScopeWithParentScopesValidIdentifiersForTypoCorrection(const Identifier & unresolved_identifier,
|
||||
const IdentifierResolveScope & scope,
|
||||
bool allow_expression_identifiers,
|
||||
bool allow_function_identifiers,
|
||||
bool allow_table_expression_identifiers,
|
||||
std::unordered_set<Identifier> & valid_identifiers_result);
|
||||
|
||||
static std::vector<String> collectIdentifierTypoHints(const Identifier & unresolved_identifier, const std::unordered_set<Identifier> & valid_identifiers);
|
||||
|
||||
static QueryTreeNodePtr wrapExpressionNodeInTupleElement(QueryTreeNodePtr expression_node, IdentifierView nested_path, const ContextPtr & context);
|
||||
|
||||
static QueryTreeNodePtr convertJoinedColumnTypeToNullIfNeeded(
|
||||
const QueryTreeNodePtr & resolved_identifier,
|
||||
const JoinKind & join_kind,
|
||||
std::optional<JoinTableSide> resolved_side,
|
||||
IdentifierResolveScope & scope);
|
||||
|
||||
/// Resolve identifier functions
|
||||
|
||||
static QueryTreeNodePtr tryResolveTableIdentifierFromDatabaseCatalog(const Identifier & table_identifier, ContextPtr context);
|
||||
|
||||
QueryTreeNodePtr tryResolveIdentifierFromCompoundExpression(const Identifier & expression_identifier,
|
||||
size_t identifier_bind_size,
|
||||
const QueryTreeNodePtr & compound_expression,
|
||||
String compound_expression_source,
|
||||
IdentifierResolveScope & scope,
|
||||
bool can_be_not_found = false);
|
||||
|
||||
QueryTreeNodePtr tryResolveIdentifierFromExpressionArguments(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope);
|
||||
|
||||
static bool tryBindIdentifierToAliases(const IdentifierLookup & identifier_lookup, const IdentifierResolveScope & scope);
|
||||
|
||||
QueryTreeNodePtr tryResolveIdentifierFromTableColumns(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope);
|
||||
|
||||
static bool tryBindIdentifierToTableExpression(const IdentifierLookup & identifier_lookup,
|
||||
const QueryTreeNodePtr & table_expression_node,
|
||||
const IdentifierResolveScope & scope);
|
||||
|
||||
static bool tryBindIdentifierToTableExpressions(const IdentifierLookup & identifier_lookup,
|
||||
const QueryTreeNodePtr & table_expression_node,
|
||||
const IdentifierResolveScope & scope);
|
||||
|
||||
QueryTreeNodePtr tryResolveIdentifierFromTableExpression(const IdentifierLookup & identifier_lookup,
|
||||
const QueryTreeNodePtr & table_expression_node,
|
||||
IdentifierResolveScope & scope);
|
||||
|
||||
QueryTreeNodePtr tryResolveIdentifierFromJoin(const IdentifierLookup & identifier_lookup,
|
||||
const QueryTreeNodePtr & table_expression_node,
|
||||
IdentifierResolveScope & scope);
|
||||
|
||||
QueryTreeNodePtr matchArrayJoinSubcolumns(
|
||||
const QueryTreeNodePtr & array_join_column_inner_expression,
|
||||
const ColumnNode & array_join_column_expression_typed,
|
||||
const QueryTreeNodePtr & resolved_expression,
|
||||
IdentifierResolveScope & scope);
|
||||
|
||||
QueryTreeNodePtr tryResolveExpressionFromArrayJoinExpressions(const QueryTreeNodePtr & resolved_expression,
|
||||
const QueryTreeNodePtr & table_expression_node,
|
||||
IdentifierResolveScope & scope);
|
||||
|
||||
QueryTreeNodePtr tryResolveIdentifierFromArrayJoin(const IdentifierLookup & identifier_lookup,
|
||||
const QueryTreeNodePtr & table_expression_node,
|
||||
IdentifierResolveScope & scope);
|
||||
|
||||
QueryTreeNodePtr tryResolveIdentifierFromJoinTreeNode(const IdentifierLookup & identifier_lookup,
|
||||
const QueryTreeNodePtr & join_tree_node,
|
||||
IdentifierResolveScope & scope);
|
||||
|
||||
QueryTreeNodePtr tryResolveIdentifierFromJoinTree(const IdentifierLookup & identifier_lookup,
|
||||
IdentifierResolveScope & scope);
|
||||
|
||||
QueryTreeNodePtr tryResolveIdentifierFromStorage(
|
||||
const Identifier & identifier,
|
||||
const QueryTreeNodePtr & table_expression_node,
|
||||
const AnalysisTableExpressionData & table_expression_data,
|
||||
IdentifierResolveScope & scope,
|
||||
size_t identifier_column_qualifier_parts,
|
||||
bool can_be_not_found = false);
|
||||
|
||||
/// CTEs that are currently in resolve process
|
||||
std::unordered_set<std::string_view> & ctes_in_resolve_process;
|
||||
|
||||
/// Global expression node to projection name map
|
||||
std::unordered_map<QueryTreeNodePtr, ProjectionName> & node_to_projection_name;
|
||||
|
||||
};
|
||||
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -4,6 +4,7 @@
|
||||
#include <Analyzer/HashUtils.h>
|
||||
#include <Analyzer/IQueryTreeNode.h>
|
||||
#include <Analyzer/Resolve/IdentifierLookup.h>
|
||||
#include <Analyzer/Resolve/IdentifierResolver.h>
|
||||
|
||||
#include <Core/Joins.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
@ -121,16 +122,6 @@ public:
|
||||
private:
|
||||
/// Utility functions
|
||||
|
||||
static bool isExpressionNodeType(QueryTreeNodeType node_type);
|
||||
|
||||
static bool isFunctionExpressionNodeType(QueryTreeNodeType node_type);
|
||||
|
||||
static bool isSubqueryNodeType(QueryTreeNodeType node_type);
|
||||
|
||||
static bool isTableExpressionNodeType(QueryTreeNodeType node_type);
|
||||
|
||||
static DataTypePtr getExpressionNodeResultTypeOrNull(const QueryTreeNodePtr & query_tree_node);
|
||||
|
||||
static ProjectionName calculateFunctionProjectionName(const QueryTreeNodePtr & function_node,
|
||||
const ProjectionNames & parameters_projection_names,
|
||||
const ProjectionNames & arguments_projection_names);
|
||||
@ -149,34 +140,6 @@ private:
|
||||
const ProjectionName & fill_to_expression_projection_name,
|
||||
const ProjectionName & fill_step_expression_projection_name);
|
||||
|
||||
static void collectCompoundExpressionValidIdentifiersForTypoCorrection(const Identifier & unresolved_identifier,
|
||||
const DataTypePtr & compound_expression_type,
|
||||
const Identifier & valid_identifier_prefix,
|
||||
std::unordered_set<Identifier> & valid_identifiers_result);
|
||||
|
||||
static void collectTableExpressionValidIdentifiersForTypoCorrection(const Identifier & unresolved_identifier,
|
||||
const QueryTreeNodePtr & table_expression,
|
||||
const AnalysisTableExpressionData & table_expression_data,
|
||||
std::unordered_set<Identifier> & valid_identifiers_result);
|
||||
|
||||
static void collectScopeValidIdentifiersForTypoCorrection(const Identifier & unresolved_identifier,
|
||||
const IdentifierResolveScope & scope,
|
||||
bool allow_expression_identifiers,
|
||||
bool allow_function_identifiers,
|
||||
bool allow_table_expression_identifiers,
|
||||
std::unordered_set<Identifier> & valid_identifiers_result);
|
||||
|
||||
static void collectScopeWithParentScopesValidIdentifiersForTypoCorrection(const Identifier & unresolved_identifier,
|
||||
const IdentifierResolveScope & scope,
|
||||
bool allow_expression_identifiers,
|
||||
bool allow_function_identifiers,
|
||||
bool allow_table_expression_identifiers,
|
||||
std::unordered_set<Identifier> & valid_identifiers_result);
|
||||
|
||||
static std::vector<String> collectIdentifierTypoHints(const Identifier & unresolved_identifier, const std::unordered_set<Identifier> & valid_identifiers);
|
||||
|
||||
static QueryTreeNodePtr wrapExpressionNodeInTupleElement(QueryTreeNodePtr expression_node, IdentifierView nested_path);
|
||||
|
||||
QueryTreeNodePtr tryGetLambdaFromSQLUserDefinedFunctions(const std::string & function_name, ContextPtr context);
|
||||
|
||||
void evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & query_tree_node, IdentifierResolveScope & scope);
|
||||
@ -204,84 +167,18 @@ private:
|
||||
|
||||
static std::optional<JoinTableSide> getColumnSideFromJoinTree(const QueryTreeNodePtr & resolved_identifier, const JoinNode & join_node);
|
||||
|
||||
static QueryTreeNodePtr convertJoinedColumnTypeToNullIfNeeded(
|
||||
const QueryTreeNodePtr & resolved_identifier,
|
||||
const JoinKind & join_kind,
|
||||
std::optional<JoinTableSide> resolved_side,
|
||||
IdentifierResolveScope & scope);
|
||||
|
||||
/// Resolve identifier functions
|
||||
|
||||
static QueryTreeNodePtr tryResolveTableIdentifierFromDatabaseCatalog(const Identifier & table_identifier, ContextPtr context);
|
||||
|
||||
QueryTreeNodePtr tryResolveIdentifierFromCompoundExpression(const Identifier & expression_identifier,
|
||||
size_t identifier_bind_size,
|
||||
const QueryTreeNodePtr & compound_expression,
|
||||
String compound_expression_source,
|
||||
IdentifierResolveScope & scope,
|
||||
bool can_be_not_found = false);
|
||||
|
||||
QueryTreeNodePtr tryResolveIdentifierFromExpressionArguments(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope);
|
||||
|
||||
static bool tryBindIdentifierToAliases(const IdentifierLookup & identifier_lookup, const IdentifierResolveScope & scope);
|
||||
|
||||
QueryTreeNodePtr tryResolveIdentifierFromAliases(const IdentifierLookup & identifier_lookup,
|
||||
IdentifierResolveScope & scope,
|
||||
IdentifierResolveSettings identifier_resolve_settings);
|
||||
|
||||
QueryTreeNodePtr tryResolveIdentifierFromTableColumns(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope);
|
||||
|
||||
static bool tryBindIdentifierToTableExpression(const IdentifierLookup & identifier_lookup,
|
||||
const QueryTreeNodePtr & table_expression_node,
|
||||
const IdentifierResolveScope & scope);
|
||||
|
||||
static bool tryBindIdentifierToTableExpressions(const IdentifierLookup & identifier_lookup,
|
||||
const QueryTreeNodePtr & table_expression_node,
|
||||
const IdentifierResolveScope & scope);
|
||||
|
||||
QueryTreeNodePtr tryResolveIdentifierFromTableExpression(const IdentifierLookup & identifier_lookup,
|
||||
const QueryTreeNodePtr & table_expression_node,
|
||||
IdentifierResolveScope & scope);
|
||||
|
||||
QueryTreeNodePtr tryResolveIdentifierFromJoin(const IdentifierLookup & identifier_lookup,
|
||||
const QueryTreeNodePtr & table_expression_node,
|
||||
IdentifierResolveScope & scope);
|
||||
|
||||
QueryTreeNodePtr matchArrayJoinSubcolumns(
|
||||
const QueryTreeNodePtr & array_join_column_inner_expression,
|
||||
const ColumnNode & array_join_column_expression_typed,
|
||||
const QueryTreeNodePtr & resolved_expression,
|
||||
IdentifierResolveScope & scope);
|
||||
|
||||
QueryTreeNodePtr tryResolveExpressionFromArrayJoinExpressions(const QueryTreeNodePtr & resolved_expression,
|
||||
const QueryTreeNodePtr & table_expression_node,
|
||||
IdentifierResolveScope & scope);
|
||||
|
||||
QueryTreeNodePtr tryResolveIdentifierFromArrayJoin(const IdentifierLookup & identifier_lookup,
|
||||
const QueryTreeNodePtr & table_expression_node,
|
||||
IdentifierResolveScope & scope);
|
||||
|
||||
QueryTreeNodePtr tryResolveIdentifierFromJoinTreeNode(const IdentifierLookup & identifier_lookup,
|
||||
const QueryTreeNodePtr & join_tree_node,
|
||||
IdentifierResolveScope & scope);
|
||||
|
||||
QueryTreeNodePtr tryResolveIdentifierFromJoinTree(const IdentifierLookup & identifier_lookup,
|
||||
IdentifierResolveScope & scope);
|
||||
|
||||
IdentifierResolveResult tryResolveIdentifierInParentScopes(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope);
|
||||
|
||||
IdentifierResolveResult tryResolveIdentifier(const IdentifierLookup & identifier_lookup,
|
||||
IdentifierResolveScope & scope,
|
||||
IdentifierResolveSettings identifier_resolve_settings = {});
|
||||
|
||||
QueryTreeNodePtr tryResolveIdentifierFromStorage(
|
||||
const Identifier & identifier,
|
||||
const QueryTreeNodePtr & table_expression_node,
|
||||
const AnalysisTableExpressionData & table_expression_data,
|
||||
IdentifierResolveScope & scope,
|
||||
size_t identifier_column_qualifier_parts,
|
||||
bool can_be_not_found = false);
|
||||
|
||||
/// Resolve query tree nodes functions
|
||||
|
||||
void qualifyColumnNodesWithProjectionNames(const QueryTreeNodes & column_nodes,
|
||||
@ -362,6 +259,8 @@ private:
|
||||
/// Global expression node to projection name map
|
||||
std::unordered_map<QueryTreeNodePtr, ProjectionName> node_to_projection_name;
|
||||
|
||||
IdentifierResolver identifier_resolver; // (ctes_in_resolve_process, node_to_projection_name);
|
||||
|
||||
/// Global resolve expression node to projection names map
|
||||
std::unordered_map<QueryTreeNodePtr, ProjectionNames> resolved_expressions;
|
||||
|
||||
|
71
src/Analyzer/Resolve/ReplaceColumnsVisitor.h
Normal file
71
src/Analyzer/Resolve/ReplaceColumnsVisitor.h
Normal file
@ -0,0 +1,71 @@
|
||||
#pragma once
|
||||
|
||||
#include <Analyzer/InDepthQueryTreeVisitor.h>
|
||||
#include <Analyzer/Utils.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
/// Used to replace columns that changed type because of JOIN to their original type
|
||||
class ReplaceColumnsVisitor : public InDepthQueryTreeVisitor<ReplaceColumnsVisitor>
|
||||
{
|
||||
public:
|
||||
explicit ReplaceColumnsVisitor(const QueryTreeNodePtrWithHashMap<QueryTreeNodePtr> & replacement_map_, const ContextPtr & context_)
|
||||
: replacement_map(replacement_map_)
|
||||
, context(context_)
|
||||
{}
|
||||
|
||||
/// Apply replacement transitively, because column may change it's type twice, one to have a supertype and then because of `joun_use_nulls`
|
||||
static QueryTreeNodePtr findTransitiveReplacement(QueryTreeNodePtr node, const QueryTreeNodePtrWithHashMap<QueryTreeNodePtr> & replacement_map_)
|
||||
{
|
||||
auto it = replacement_map_.find(node);
|
||||
QueryTreeNodePtr result_node = nullptr;
|
||||
for (; it != replacement_map_.end(); it = replacement_map_.find(result_node))
|
||||
{
|
||||
if (result_node && result_node->isEqual(*it->second))
|
||||
{
|
||||
Strings map_dump;
|
||||
for (const auto & [k, v]: replacement_map_)
|
||||
map_dump.push_back(fmt::format("{} -> {} (is_equals: {}, is_same: {})",
|
||||
k.node->dumpTree(), v->dumpTree(), k.node->isEqual(*v), k.node == v));
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Infinite loop in query tree replacement map: {}", fmt::join(map_dump, "; "));
|
||||
}
|
||||
chassert(it->second);
|
||||
|
||||
result_node = it->second;
|
||||
}
|
||||
return result_node;
|
||||
}
|
||||
|
||||
void visitImpl(QueryTreeNodePtr & node)
|
||||
{
|
||||
if (auto replacement_node = findTransitiveReplacement(node, replacement_map))
|
||||
node = replacement_node;
|
||||
|
||||
if (auto * function_node = node->as<FunctionNode>(); function_node && function_node->isResolved())
|
||||
rerunFunctionResolve(function_node, context);
|
||||
}
|
||||
|
||||
/// We want to re-run resolve for function _after_ its arguments are replaced
|
||||
bool shouldTraverseTopToBottom() const { return false; }
|
||||
|
||||
bool needChildVisit(QueryTreeNodePtr & /* parent */, QueryTreeNodePtr & child)
|
||||
{
|
||||
/// Visit only expressions, but not subqueries
|
||||
return child->getNodeType() == QueryTreeNodeType::IDENTIFIER
|
||||
|| child->getNodeType() == QueryTreeNodeType::LIST
|
||||
|| child->getNodeType() == QueryTreeNodeType::FUNCTION
|
||||
|| child->getNodeType() == QueryTreeNodeType::COLUMN;
|
||||
}
|
||||
|
||||
private:
|
||||
const QueryTreeNodePtrWithHashMap<QueryTreeNodePtr> & replacement_map;
|
||||
const ContextPtr & context;
|
||||
};
|
||||
|
||||
}
|
@ -22,15 +22,6 @@ include (configure_config.cmake)
|
||||
configure_file (Common/config.h.in ${CONFIG_INCLUDE_PATH}/config.h)
|
||||
configure_file (Common/config_version.cpp.in ${CONFIG_INCLUDE_PATH}/config_version.cpp)
|
||||
|
||||
if (USE_DEBUG_HELPERS)
|
||||
get_target_property(MAGIC_ENUM_INCLUDE_DIR ch_contrib::magic_enum INTERFACE_INCLUDE_DIRECTORIES)
|
||||
# CMake generator expression will do insane quoting when it encounters special character like quotes, spaces, etc.
|
||||
# Prefixing "SHELL:" will force it to use the original text.
|
||||
set (INCLUDE_DEBUG_HELPERS "SHELL:-I\"${ClickHouse_SOURCE_DIR}/base\" -I\"${MAGIC_ENUM_INCLUDE_DIR}\" -include \"${ClickHouse_SOURCE_DIR}/src/Core/iostream_debug_helpers.h\"")
|
||||
# Use generator expression as we don't want to pollute CMAKE_CXX_FLAGS, which will interfere with CMake check system.
|
||||
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:${INCLUDE_DEBUG_HELPERS}>)
|
||||
endif ()
|
||||
|
||||
# ClickHouse developers may use platform-dependent code under some macro (e.g. `#ifdef ENABLE_MULTITARGET`).
|
||||
# If turned ON, this option defines such macro.
|
||||
# See `src/Common/TargetSpecific.h`
|
||||
|
@ -828,7 +828,7 @@ ColumnPtr ColumnArray::filterTuple(const Filter & filt, ssize_t result_size_hint
|
||||
size_t tuple_size = tuple.tupleSize();
|
||||
|
||||
if (tuple_size == 0)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty tuple");
|
||||
return filterGeneric(filt, result_size_hint);
|
||||
|
||||
Columns temporary_arrays(tuple_size);
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
@ -1265,7 +1265,7 @@ ColumnPtr ColumnArray::replicateTuple(const Offsets & replicate_offsets) const
|
||||
size_t tuple_size = tuple.tupleSize();
|
||||
|
||||
if (tuple_size == 0)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty tuple");
|
||||
return replicateGeneric(replicate_offsets);
|
||||
|
||||
Columns temporary_arrays(tuple_size);
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
#include <Common/HashTable/Hash.h>
|
||||
#include <Common/RadixSort.h>
|
||||
#include <Common/SipHash.h>
|
||||
@ -264,6 +265,23 @@ void ColumnDecimal<T>::updatePermutation(IColumn::PermutationSortDirection direc
|
||||
}
|
||||
}
|
||||
|
||||
template <is_decimal T>
|
||||
size_t ColumnDecimal<T>::estimateCardinalityInPermutedRange(const IColumn::Permutation & permutation, const EqualRange & equal_range) const
|
||||
{
|
||||
const size_t range_size = equal_range.size();
|
||||
if (range_size <= 1)
|
||||
return range_size;
|
||||
|
||||
/// TODO use sampling if the range is too large (e.g. 16k elements, but configurable)
|
||||
HashSet<T> elements;
|
||||
for (size_t i = equal_range.from; i < equal_range.to; ++i)
|
||||
{
|
||||
size_t permuted_i = permutation[i];
|
||||
elements.insert(data[permuted_i]);
|
||||
}
|
||||
return elements.size();
|
||||
}
|
||||
|
||||
template <is_decimal T>
|
||||
ColumnPtr ColumnDecimal<T>::permute(const IColumn::Permutation & perm, size_t limit) const
|
||||
{
|
||||
|
@ -97,6 +97,8 @@ public:
|
||||
size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override;
|
||||
void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
|
||||
size_t limit, int, IColumn::Permutation & res, EqualRanges& equal_ranges) const override;
|
||||
size_t estimateCardinalityInPermutedRange(const IColumn::Permutation & permutation, const EqualRange & equal_range) const override;
|
||||
|
||||
|
||||
MutableColumnPtr cloneResized(size_t size) const override;
|
||||
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/HashTable/Hash.h>
|
||||
#include <Common/HashTable/StringHashSet.h>
|
||||
#include <Common/SipHash.h>
|
||||
#include <Common/WeakHash.h>
|
||||
#include <Common/assert_cast.h>
|
||||
@ -200,6 +201,24 @@ void ColumnFixedString::updatePermutation(IColumn::PermutationSortDirection dire
|
||||
updatePermutationImpl(limit, res, equal_ranges, ComparatorDescendingStable(*this), comparator_equal, DefaultSort(), DefaultPartialSort());
|
||||
}
|
||||
|
||||
size_t ColumnFixedString::estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const
|
||||
{
|
||||
const size_t range_size = equal_range.size();
|
||||
if (range_size <= 1)
|
||||
return range_size;
|
||||
|
||||
/// TODO use sampling if the range is too large (e.g. 16k elements, but configurable)
|
||||
StringHashSet elements;
|
||||
bool inserted = false;
|
||||
for (size_t i = equal_range.from; i < equal_range.to; ++i)
|
||||
{
|
||||
size_t permuted_i = permutation[i];
|
||||
StringRef value = getDataAt(permuted_i);
|
||||
elements.emplace(value, inserted);
|
||||
}
|
||||
return elements.size();
|
||||
}
|
||||
|
||||
void ColumnFixedString::insertRangeFrom(const IColumn & src, size_t start, size_t length)
|
||||
{
|
||||
const ColumnFixedString & src_concrete = assert_cast<const ColumnFixedString &>(src);
|
||||
|
@ -142,6 +142,8 @@ public:
|
||||
void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
|
||||
size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const override;
|
||||
|
||||
size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const override;
|
||||
|
||||
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
|
||||
|
||||
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override;
|
||||
|
@ -3,9 +3,12 @@
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <DataTypes/NumberTraits.h>
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Common/WeakHash.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include "Storages/IndicesDescription.h"
|
||||
#include "base/types.h"
|
||||
#include <base/sort.h>
|
||||
#include <base/scope_guard.h>
|
||||
|
||||
@ -486,6 +489,21 @@ void ColumnLowCardinality::updatePermutationWithCollation(const Collator & colla
|
||||
updatePermutationImpl(limit, res, equal_ranges, comparator, equal_comparator, DefaultSort(), DefaultPartialSort());
|
||||
}
|
||||
|
||||
size_t ColumnLowCardinality::estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const
|
||||
{
|
||||
const size_t range_size = equal_range.size();
|
||||
if (range_size <= 1)
|
||||
return range_size;
|
||||
|
||||
HashSet<UInt64> elements;
|
||||
for (size_t i = equal_range.from; i < equal_range.to; ++i)
|
||||
{
|
||||
UInt64 index = getIndexes().getUInt(permutation[i]);
|
||||
elements.insert(index);
|
||||
}
|
||||
return elements.size();
|
||||
}
|
||||
|
||||
std::vector<MutableColumnPtr> ColumnLowCardinality::scatter(ColumnIndex num_columns, const Selector & selector) const
|
||||
{
|
||||
auto columns = getIndexes().scatter(num_columns, selector);
|
||||
|
@ -145,6 +145,8 @@ public:
|
||||
void updatePermutationWithCollation(const Collator & collator, IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
|
||||
size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_ranges) const override;
|
||||
|
||||
size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const override;
|
||||
|
||||
ColumnPtr replicate(const Offsets & offsets) const override
|
||||
{
|
||||
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().replicate(offsets));
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/HashTable/StringHashSet.h>
|
||||
#include <Common/SipHash.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/WeakHash.h>
|
||||
@ -621,7 +622,7 @@ void ColumnNullable::updatePermutationImpl(IColumn::PermutationSortDirection dir
|
||||
if (unlikely(stability == PermutationSortStability::Stable))
|
||||
{
|
||||
for (auto & null_range : null_ranges)
|
||||
::sort(res.begin() + null_range.first, res.begin() + null_range.second);
|
||||
::sort(std::ranges::next(res.begin(), null_range.from), std::ranges::next(res.begin(), null_range.to));
|
||||
}
|
||||
|
||||
if (is_nulls_last || null_ranges.empty())
|
||||
@ -660,6 +661,33 @@ void ColumnNullable::updatePermutationWithCollation(const Collator & collator, I
|
||||
updatePermutationImpl(direction, stability, limit, null_direction_hint, res, equal_ranges, &collator);
|
||||
}
|
||||
|
||||
|
||||
size_t ColumnNullable::estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const
|
||||
{
|
||||
const size_t range_size = equal_range.size();
|
||||
if (range_size <= 1)
|
||||
return range_size;
|
||||
|
||||
/// TODO use sampling if the range is too large (e.g. 16k elements, but configurable)
|
||||
StringHashSet elements;
|
||||
bool has_null = false;
|
||||
bool inserted = false;
|
||||
for (size_t i = equal_range.from; i < equal_range.to; ++i)
|
||||
{
|
||||
size_t permuted_i = permutation[i];
|
||||
if (isNullAt(permuted_i))
|
||||
{
|
||||
has_null = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
StringRef value = getDataAt(permuted_i);
|
||||
elements.emplace(value, inserted);
|
||||
}
|
||||
}
|
||||
return elements.size() + (has_null ? 1 : 0);
|
||||
}
|
||||
|
||||
void ColumnNullable::reserve(size_t n)
|
||||
{
|
||||
getNestedColumn().reserve(n);
|
||||
|
@ -109,6 +109,7 @@ public:
|
||||
size_t limit, int null_direction_hint, Permutation & res) const override;
|
||||
void updatePermutationWithCollation(const Collator & collator, IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
|
||||
size_t limit, int null_direction_hint, Permutation & res, EqualRanges& equal_ranges) const override;
|
||||
size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const override;
|
||||
void reserve(size_t n) override;
|
||||
void shrinkToFit() override;
|
||||
void ensureOwnership() override;
|
||||
|
@ -820,6 +820,9 @@ ColumnPtr recursiveRemoveSparse(const ColumnPtr & column)
|
||||
if (const auto * column_tuple = typeid_cast<const ColumnTuple *>(column.get()))
|
||||
{
|
||||
auto columns = column_tuple->getColumns();
|
||||
if (columns.empty())
|
||||
return column;
|
||||
|
||||
for (auto & element : columns)
|
||||
element = recursiveRemoveSparse(element);
|
||||
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <Columns/ColumnCompressed.h>
|
||||
#include <Columns/MaskOperations.h>
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/HashTable/StringHashSet.h>
|
||||
#include <Common/HashTable/Hash.h>
|
||||
#include <Common/WeakHash.h>
|
||||
#include <Common/assert_cast.h>
|
||||
@ -481,6 +482,23 @@ void ColumnString::updatePermutationWithCollation(const Collator & collator, Per
|
||||
DefaultPartialSort());
|
||||
}
|
||||
|
||||
size_t ColumnString::estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const
|
||||
{
|
||||
const size_t range_size = equal_range.size();
|
||||
if (range_size <= 1)
|
||||
return range_size;
|
||||
|
||||
/// TODO use sampling if the range is too large (e.g. 16k elements, but configurable)
|
||||
StringHashSet elements;
|
||||
bool inserted = false;
|
||||
for (size_t i = equal_range.from; i < equal_range.to; ++i)
|
||||
{
|
||||
size_t permuted_i = permutation[i];
|
||||
StringRef value = getDataAt(permuted_i);
|
||||
elements.emplace(value, inserted);
|
||||
}
|
||||
return elements.size();
|
||||
}
|
||||
|
||||
ColumnPtr ColumnString::replicate(const Offsets & replicate_offsets) const
|
||||
{
|
||||
|
@ -260,6 +260,8 @@ public:
|
||||
void updatePermutationWithCollation(const Collator & collator, IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
|
||||
size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const override;
|
||||
|
||||
size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const override;
|
||||
|
||||
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
|
||||
|
||||
ColumnPtr compress() const override;
|
||||
|
@ -3,14 +3,16 @@
|
||||
#include <Columns/ColumnCompressed.h>
|
||||
#include <Columns/IColumnImpl.h>
|
||||
#include <Core/Field.h>
|
||||
#include <Processors/Transforms/ColumnGathererTransform.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/WeakHash.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/iota.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Columns/ColumnsCommon.h>
|
||||
#include <DataTypes/Serializations/SerializationInfoTuple.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <Processors/Transforms/ColumnGathererTransform.h>
|
||||
#include <base/sort.h>
|
||||
|
||||
|
||||
@ -23,6 +25,7 @@ namespace ErrorCodes
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
|
||||
@ -44,6 +47,9 @@ std::string ColumnTuple::getName() const
|
||||
|
||||
ColumnTuple::ColumnTuple(MutableColumns && mutable_columns)
|
||||
{
|
||||
if (mutable_columns.empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "This function cannot be used to construct empty tuple. It is a bug");
|
||||
|
||||
columns.reserve(mutable_columns.size());
|
||||
for (auto & column : mutable_columns)
|
||||
{
|
||||
@ -52,15 +58,21 @@ ColumnTuple::ColumnTuple(MutableColumns && mutable_columns)
|
||||
|
||||
columns.push_back(std::move(column));
|
||||
}
|
||||
column_length = columns[0]->size();
|
||||
}
|
||||
|
||||
ColumnTuple::ColumnTuple(size_t len) : column_length(len) {}
|
||||
|
||||
ColumnTuple::Ptr ColumnTuple::create(const Columns & columns)
|
||||
{
|
||||
if (columns.empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "This function cannot be used to construct empty tuple. It is a bug");
|
||||
|
||||
for (const auto & column : columns)
|
||||
if (isColumnConst(*column))
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "ColumnTuple cannot have ColumnConst as its element");
|
||||
|
||||
auto column_tuple = ColumnTuple::create(MutableColumns());
|
||||
auto column_tuple = ColumnTuple::create(columns[0]->size());
|
||||
column_tuple->columns.assign(columns.begin(), columns.end());
|
||||
|
||||
return column_tuple;
|
||||
@ -68,11 +80,14 @@ ColumnTuple::Ptr ColumnTuple::create(const Columns & columns)
|
||||
|
||||
ColumnTuple::Ptr ColumnTuple::create(const TupleColumns & columns)
|
||||
{
|
||||
if (columns.empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "This function cannot be used to construct empty tuple. It is a bug");
|
||||
|
||||
for (const auto & column : columns)
|
||||
if (isColumnConst(*column))
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "ColumnTuple cannot have ColumnConst as its element");
|
||||
|
||||
auto column_tuple = ColumnTuple::create(MutableColumns());
|
||||
auto column_tuple = ColumnTuple::create(columns[0]->size());
|
||||
column_tuple->columns = columns;
|
||||
|
||||
return column_tuple;
|
||||
@ -80,6 +95,9 @@ ColumnTuple::Ptr ColumnTuple::create(const TupleColumns & columns)
|
||||
|
||||
MutableColumnPtr ColumnTuple::cloneEmpty() const
|
||||
{
|
||||
if (columns.empty())
|
||||
return ColumnTuple::create(0);
|
||||
|
||||
const size_t tuple_size = columns.size();
|
||||
MutableColumns new_columns(tuple_size);
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
@ -90,6 +108,9 @@ MutableColumnPtr ColumnTuple::cloneEmpty() const
|
||||
|
||||
MutableColumnPtr ColumnTuple::cloneResized(size_t new_size) const
|
||||
{
|
||||
if (columns.empty())
|
||||
return ColumnTuple::create(new_size);
|
||||
|
||||
const size_t tuple_size = columns.size();
|
||||
MutableColumns new_columns(tuple_size);
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
@ -98,6 +119,16 @@ MutableColumnPtr ColumnTuple::cloneResized(size_t new_size) const
|
||||
return ColumnTuple::create(std::move(new_columns));
|
||||
}
|
||||
|
||||
size_t ColumnTuple::size() const
|
||||
{
|
||||
if (columns.empty())
|
||||
return column_length;
|
||||
|
||||
/// It's difficult to maintain a consistent `column_length` because there
|
||||
/// are many places that manipulates sub-columns directly.
|
||||
return columns.at(0)->size();
|
||||
}
|
||||
|
||||
Field ColumnTuple::operator[](size_t n) const
|
||||
{
|
||||
Field res;
|
||||
@ -144,6 +175,7 @@ void ColumnTuple::insert(const Field & x)
|
||||
if (tuple.size() != tuple_size)
|
||||
throw Exception(ErrorCodes::CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE, "Cannot insert value of different size into tuple");
|
||||
|
||||
++column_length;
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
columns[i]->insert(tuple[i]);
|
||||
}
|
||||
@ -181,6 +213,7 @@ void ColumnTuple::insertFrom(const IColumn & src_, size_t n)
|
||||
if (src.columns.size() != tuple_size)
|
||||
throw Exception(ErrorCodes::CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE, "Cannot insert value of different size into tuple");
|
||||
|
||||
++column_length;
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
columns[i]->insertFrom(*src.columns[i], n);
|
||||
}
|
||||
@ -199,18 +232,28 @@ void ColumnTuple::insertManyFrom(const IColumn & src, size_t position, size_t le
|
||||
|
||||
void ColumnTuple::insertDefault()
|
||||
{
|
||||
++column_length;
|
||||
for (auto & column : columns)
|
||||
column->insertDefault();
|
||||
}
|
||||
|
||||
void ColumnTuple::popBack(size_t n)
|
||||
{
|
||||
column_length -= n;
|
||||
for (auto & column : columns)
|
||||
column->popBack(n);
|
||||
}
|
||||
|
||||
StringRef ColumnTuple::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
|
||||
{
|
||||
if (columns.empty())
|
||||
{
|
||||
/// Has to put one useless byte into Arena, because serialization into zero number of bytes is ambiguous.
|
||||
char * res = arena.allocContinue(1, begin);
|
||||
*res = 0;
|
||||
return { res, 1 };
|
||||
}
|
||||
|
||||
StringRef res(begin, 0);
|
||||
for (const auto & column : columns)
|
||||
{
|
||||
@ -232,6 +275,11 @@ char * ColumnTuple::serializeValueIntoMemory(size_t n, char * memory) const
|
||||
|
||||
const char * ColumnTuple::deserializeAndInsertFromArena(const char * pos)
|
||||
{
|
||||
++column_length;
|
||||
|
||||
if (columns.empty())
|
||||
return pos + 1;
|
||||
|
||||
for (auto & column : columns)
|
||||
pos = column->deserializeAndInsertFromArena(pos);
|
||||
|
||||
@ -272,6 +320,7 @@ void ColumnTuple::updateHashFast(SipHash & hash) const
|
||||
|
||||
void ColumnTuple::insertRangeFrom(const IColumn & src, size_t start, size_t length)
|
||||
{
|
||||
column_length += length;
|
||||
const size_t tuple_size = columns.size();
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
columns[i]->insertRangeFrom(
|
||||
@ -281,6 +330,12 @@ void ColumnTuple::insertRangeFrom(const IColumn & src, size_t start, size_t leng
|
||||
|
||||
ColumnPtr ColumnTuple::filter(const Filter & filt, ssize_t result_size_hint) const
|
||||
{
|
||||
if (columns.empty())
|
||||
{
|
||||
size_t bytes = countBytesInFilter(filt);
|
||||
return cloneResized(bytes);
|
||||
}
|
||||
|
||||
const size_t tuple_size = columns.size();
|
||||
Columns new_columns(tuple_size);
|
||||
|
||||
@ -292,12 +347,29 @@ ColumnPtr ColumnTuple::filter(const Filter & filt, ssize_t result_size_hint) con
|
||||
|
||||
void ColumnTuple::expand(const Filter & mask, bool inverted)
|
||||
{
|
||||
if (columns.empty())
|
||||
{
|
||||
size_t bytes = countBytesInFilter(mask);
|
||||
if (inverted)
|
||||
bytes = mask.size() - bytes;
|
||||
column_length = bytes;
|
||||
return;
|
||||
}
|
||||
|
||||
for (auto & column : columns)
|
||||
column->expand(mask, inverted);
|
||||
}
|
||||
|
||||
ColumnPtr ColumnTuple::permute(const Permutation & perm, size_t limit) const
|
||||
{
|
||||
if (columns.empty())
|
||||
{
|
||||
if (column_length != perm.size())
|
||||
throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of permutation doesn't match size of column");
|
||||
|
||||
return cloneResized(limit ? std::min(column_length, limit) : column_length);
|
||||
}
|
||||
|
||||
const size_t tuple_size = columns.size();
|
||||
Columns new_columns(tuple_size);
|
||||
|
||||
@ -309,6 +381,14 @@ ColumnPtr ColumnTuple::permute(const Permutation & perm, size_t limit) const
|
||||
|
||||
ColumnPtr ColumnTuple::index(const IColumn & indexes, size_t limit) const
|
||||
{
|
||||
if (columns.empty())
|
||||
{
|
||||
if (indexes.size() < limit)
|
||||
throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of indexes is less than required");
|
||||
|
||||
return cloneResized(limit ? limit : column_length);
|
||||
}
|
||||
|
||||
const size_t tuple_size = columns.size();
|
||||
Columns new_columns(tuple_size);
|
||||
|
||||
@ -320,6 +400,14 @@ ColumnPtr ColumnTuple::index(const IColumn & indexes, size_t limit) const
|
||||
|
||||
ColumnPtr ColumnTuple::replicate(const Offsets & offsets) const
|
||||
{
|
||||
if (columns.empty())
|
||||
{
|
||||
if (column_length != offsets.size())
|
||||
throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of offsets doesn't match size of column");
|
||||
|
||||
return cloneResized(offsets.back());
|
||||
}
|
||||
|
||||
const size_t tuple_size = columns.size();
|
||||
Columns new_columns(tuple_size);
|
||||
|
||||
@ -331,6 +419,22 @@ ColumnPtr ColumnTuple::replicate(const Offsets & offsets) const
|
||||
|
||||
MutableColumns ColumnTuple::scatter(ColumnIndex num_columns, const Selector & selector) const
|
||||
{
|
||||
if (columns.empty())
|
||||
{
|
||||
if (column_length != selector.size())
|
||||
throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of selector doesn't match size of column");
|
||||
|
||||
std::vector<size_t> counts(num_columns);
|
||||
for (auto idx : selector)
|
||||
++counts[idx];
|
||||
|
||||
MutableColumns res(num_columns);
|
||||
for (size_t i = 0; i < num_columns; ++i)
|
||||
res[i] = cloneResized(counts[i]);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
const size_t tuple_size = columns.size();
|
||||
std::vector<MutableColumns> scattered_tuple_elements(tuple_size);
|
||||
|
||||
@ -413,6 +517,9 @@ void ColumnTuple::getPermutationImpl(IColumn::PermutationSortDirection direction
|
||||
res.resize(rows);
|
||||
iota(res.data(), rows, IColumn::Permutation::value_type(0));
|
||||
|
||||
if (columns.empty())
|
||||
return;
|
||||
|
||||
if (limit >= rows)
|
||||
limit = 0;
|
||||
|
||||
@ -429,7 +536,7 @@ void ColumnTuple::updatePermutationImpl(IColumn::PermutationSortDirection direct
|
||||
|
||||
for (const auto & column : columns)
|
||||
{
|
||||
while (!equal_ranges.empty() && limit && limit <= equal_ranges.back().first)
|
||||
while (!equal_ranges.empty() && limit && limit <= equal_ranges.back().from)
|
||||
equal_ranges.pop_back();
|
||||
|
||||
if (collator && column->isCollationSupported())
|
||||
@ -603,6 +710,9 @@ void ColumnTuple::takeDynamicStructureFromSourceColumns(const Columns & source_c
|
||||
|
||||
ColumnPtr ColumnTuple::compress() const
|
||||
{
|
||||
if (columns.empty())
|
||||
return Ptr();
|
||||
|
||||
size_t byte_size = 0;
|
||||
Columns compressed;
|
||||
compressed.reserve(columns.size());
|
||||
|
@ -26,6 +26,13 @@ private:
|
||||
explicit ColumnTuple(MutableColumns && columns);
|
||||
ColumnTuple(const ColumnTuple &) = default;
|
||||
|
||||
/// Empty tuple needs a dedicated field to store its size.
|
||||
/// This field used *only* for zero-sized tuples.
|
||||
/// Otherwise `columns[0].size()` should be used to get a size of tuple column
|
||||
size_t column_length;
|
||||
|
||||
/// Dedicated constructor for empty tuples.
|
||||
explicit ColumnTuple(size_t len);
|
||||
public:
|
||||
/** Create immutable column using immutable arguments. This arguments may be shared with other columns.
|
||||
* Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
|
||||
@ -39,6 +46,8 @@ public:
|
||||
requires std::is_rvalue_reference_v<Arg &&>
|
||||
static MutablePtr create(Arg && arg) { return Base::create(std::forward<Arg>(arg)); }
|
||||
|
||||
static MutablePtr create(size_t len_) { return Base::create(len_); }
|
||||
|
||||
std::string getName() const override;
|
||||
const char * getFamilyName() const override { return "Tuple"; }
|
||||
TypeIndex getDataType() const override { return TypeIndex::Tuple; }
|
||||
@ -46,10 +55,7 @@ public:
|
||||
MutableColumnPtr cloneEmpty() const override;
|
||||
MutableColumnPtr cloneResized(size_t size) const override;
|
||||
|
||||
size_t size() const override
|
||||
{
|
||||
return columns.at(0)->size();
|
||||
}
|
||||
size_t size() const override;
|
||||
|
||||
Field operator[](size_t n) const override;
|
||||
void get(size_t n, Field & res) const override;
|
||||
@ -117,6 +123,9 @@ public:
|
||||
bool hasDynamicStructure() const override;
|
||||
void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override;
|
||||
|
||||
/// Empty tuple needs a public method to manage its size.
|
||||
void addSize(size_t delta) { column_length += delta; }
|
||||
|
||||
private:
|
||||
int compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator=nullptr) const;
|
||||
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/HashTable/Hash.h>
|
||||
#include <Common/HashTable/StringHashSet.h>
|
||||
#include <Common/NaNUtils.h>
|
||||
#include <Common/RadixSort.h>
|
||||
#include <Common/SipHash.h>
|
||||
@ -413,6 +414,25 @@ void ColumnVector<T>::updatePermutation(IColumn::PermutationSortDirection direct
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
size_t ColumnVector<T>::estimateCardinalityInPermutedRange(const IColumn::Permutation & permutation, const EqualRange & equal_range) const
|
||||
{
|
||||
const size_t range_size = equal_range.size();
|
||||
if (range_size <= 1)
|
||||
return range_size;
|
||||
|
||||
/// TODO use sampling if the range is too large (e.g. 16k elements, but configurable)
|
||||
StringHashSet elements;
|
||||
bool inserted = false;
|
||||
for (size_t i = equal_range.from; i < equal_range.to; ++i)
|
||||
{
|
||||
size_t permuted_i = permutation[i];
|
||||
StringRef value = getDataAt(permuted_i);
|
||||
elements.emplace(value, inserted);
|
||||
}
|
||||
return elements.size();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
MutableColumnPtr ColumnVector<T>::cloneResized(size_t size) const
|
||||
{
|
||||
|
@ -161,6 +161,8 @@ public:
|
||||
void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
|
||||
size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges& equal_ranges) const override;
|
||||
|
||||
size_t estimateCardinalityInPermutedRange(const IColumn::Permutation & permutation, const EqualRange & equal_range) const override;
|
||||
|
||||
void reserve(size_t n) override
|
||||
{
|
||||
data.reserve_exact(n);
|
||||
|
@ -83,6 +83,11 @@ ColumnPtr IColumn::createWithOffsets(const Offsets & offsets, const ColumnConst
|
||||
return res;
|
||||
}
|
||||
|
||||
size_t IColumn::estimateCardinalityInPermutedRange(const IColumn::Permutation & /*permutation*/, const EqualRange & equal_range) const
|
||||
{
|
||||
return equal_range.size();
|
||||
}
|
||||
|
||||
void IColumn::forEachSubcolumn(ColumnCallback callback) const
|
||||
{
|
||||
const_cast<IColumn*>(this)->forEachSubcolumn([&callback](WrappedPtr & subcolumn)
|
||||
|
@ -36,11 +36,19 @@ class Field;
|
||||
class WeakHash32;
|
||||
class ColumnConst;
|
||||
|
||||
/*
|
||||
* Represents a set of equal ranges in previous column to perform sorting in current column.
|
||||
* Used in sorting by tuples.
|
||||
* */
|
||||
using EqualRanges = std::vector<std::pair<size_t, size_t> >;
|
||||
/// A range of column values between row indexes `from` and `to`. The name "equal range" is due to table sorting as its main use case: With
|
||||
/// a PRIMARY KEY (c_pk1, c_pk2, ...), the first PK column is fully sorted. The second PK column is sorted within equal-value runs of the
|
||||
/// first PK column, and so on. The number of runs (ranges) per column increases from one primary key column to the next. An "equal range"
|
||||
/// is a run in a previous column, within the values of the current column can be sorted.
|
||||
struct EqualRange
|
||||
{
|
||||
size_t from; /// inclusive
|
||||
size_t to; /// exclusive
|
||||
EqualRange(size_t from_, size_t to_) : from(from_), to(to_) { chassert(from <= to); }
|
||||
size_t size() const { return to - from; }
|
||||
};
|
||||
|
||||
using EqualRanges = std::vector<EqualRange>;
|
||||
|
||||
/// Declares interface to store columns in memory.
|
||||
class IColumn : public COW<IColumn>
|
||||
@ -399,6 +407,9 @@ public:
|
||||
"or for Array or Tuple, containing them.");
|
||||
}
|
||||
|
||||
/// Estimate the cardinality (number of unique values) of the values in 'equal_range' after permutation, formally: |{ column[permutation[r]] : r in equal_range }|.
|
||||
virtual size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const;
|
||||
|
||||
/** Copies each element according offsets parameter.
|
||||
* (i-th element should be copied offsets[i] - offsets[i - 1] times.)
|
||||
* It is necessary in ARRAY JOIN operation.
|
||||
|
@ -60,12 +60,9 @@ ColumnPtr IColumnDummy::filter(const Filter & filt, ssize_t /*result_size_hint*/
|
||||
return cloneDummy(bytes);
|
||||
}
|
||||
|
||||
void IColumnDummy::expand(const IColumn::Filter & mask, bool inverted)
|
||||
void IColumnDummy::expand(const IColumn::Filter & mask, bool)
|
||||
{
|
||||
size_t bytes = countBytesInFilter(mask);
|
||||
if (inverted)
|
||||
bytes = mask.size() - bytes;
|
||||
s = bytes;
|
||||
s = mask.size();
|
||||
}
|
||||
|
||||
ColumnPtr IColumnDummy::permute(const Permutation & perm, size_t limit) const
|
||||
|
@ -139,7 +139,7 @@ void IColumn::updatePermutationImpl(
|
||||
if (equal_ranges.empty())
|
||||
return;
|
||||
|
||||
if (limit >= size() || limit > equal_ranges.back().second)
|
||||
if (limit >= size() || limit > equal_ranges.back().to)
|
||||
limit = 0;
|
||||
|
||||
EqualRanges new_ranges;
|
||||
|
@ -77,7 +77,7 @@ INSTANTIATE(IPv6)
|
||||
|
||||
#undef INSTANTIATE
|
||||
|
||||
template <bool inverted, bool column_is_short, typename Container>
|
||||
template <bool inverted, typename Container>
|
||||
static size_t extractMaskNumericImpl(
|
||||
PaddedPODArray<UInt8> & mask,
|
||||
const Container & data,
|
||||
@ -85,42 +85,27 @@ static size_t extractMaskNumericImpl(
|
||||
const PaddedPODArray<UInt8> * null_bytemap,
|
||||
PaddedPODArray<UInt8> * nulls)
|
||||
{
|
||||
if constexpr (!column_is_short)
|
||||
{
|
||||
if (data.size() != mask.size())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a full data column is not equal to the size of a mask");
|
||||
}
|
||||
if (data.size() != mask.size())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a full data column is not equal to the size of a mask");
|
||||
|
||||
size_t ones_count = 0;
|
||||
size_t data_index = 0;
|
||||
|
||||
size_t mask_size = mask.size();
|
||||
size_t data_size = data.size();
|
||||
|
||||
for (size_t i = 0; i != mask_size && data_index != data_size; ++i)
|
||||
for (size_t i = 0; i != mask_size; ++i)
|
||||
{
|
||||
// Change mask only where value is 1.
|
||||
if (!mask[i])
|
||||
continue;
|
||||
|
||||
UInt8 value;
|
||||
size_t index;
|
||||
if constexpr (column_is_short)
|
||||
{
|
||||
index = data_index;
|
||||
++data_index;
|
||||
}
|
||||
else
|
||||
index = i;
|
||||
|
||||
if (null_bytemap && (*null_bytemap)[index])
|
||||
if (null_bytemap && (*null_bytemap)[i])
|
||||
{
|
||||
value = null_value;
|
||||
if (nulls)
|
||||
(*nulls)[i] = 1;
|
||||
}
|
||||
else
|
||||
value = static_cast<bool>(data[index]);
|
||||
value = static_cast<bool>(data[i]);
|
||||
|
||||
if constexpr (inverted)
|
||||
value = !value;
|
||||
@ -131,12 +116,6 @@ static size_t extractMaskNumericImpl(
|
||||
mask[i] = value;
|
||||
}
|
||||
|
||||
if constexpr (column_is_short)
|
||||
{
|
||||
if (data_index != data_size)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a short column is not equal to the number of ones in a mask");
|
||||
}
|
||||
|
||||
return ones_count;
|
||||
}
|
||||
|
||||
@ -155,10 +134,7 @@ static bool extractMaskNumeric(
|
||||
|
||||
const auto & data = numeric_column->getData();
|
||||
size_t ones_count;
|
||||
if (column->size() < mask.size())
|
||||
ones_count = extractMaskNumericImpl<inverted, true>(mask, data, null_value, null_bytemap, nulls);
|
||||
else
|
||||
ones_count = extractMaskNumericImpl<inverted, false>(mask, data, null_value, null_bytemap, nulls);
|
||||
ones_count = extractMaskNumericImpl<inverted>(mask, data, null_value, null_bytemap, nulls);
|
||||
|
||||
mask_info.has_ones = ones_count > 0;
|
||||
mask_info.has_zeros = ones_count != mask.size();
|
||||
@ -279,25 +255,32 @@ void maskedExecute(ColumnWithTypeAndName & column, const PaddedPODArray<UInt8> &
|
||||
if (!column_function)
|
||||
return;
|
||||
|
||||
size_t original_size = column.column->size();
|
||||
|
||||
ColumnWithTypeAndName result;
|
||||
/// If mask contains only zeros, we can just create
|
||||
/// an empty column with the execution result type.
|
||||
if (!mask_info.has_ones)
|
||||
{
|
||||
/// If mask contains only zeros, we can just create a column with default values as it will be ignored
|
||||
auto result_type = column_function->getResultType();
|
||||
auto empty_column = result_type->createColumn();
|
||||
result = {std::move(empty_column), result_type, ""};
|
||||
auto default_column = result_type->createColumnConstWithDefaultValue(original_size)->convertToFullColumnIfConst();
|
||||
column = {default_column, result_type, ""};
|
||||
}
|
||||
/// Filter column only if mask contains zeros.
|
||||
else if (mask_info.has_zeros)
|
||||
{
|
||||
/// If it contains both zeros and ones, we need to execute the function only on the mask values
|
||||
/// First we filter the column, which creates a new column, then we apply the column, and finally we expand it
|
||||
/// Expanding is done to keep consistency in function calls (all columns the same size) and it's ok
|
||||
/// since the values won't be used by `if`
|
||||
auto filtered = column_function->filter(mask, -1);
|
||||
result = typeid_cast<const ColumnFunction *>(filtered.get())->reduce();
|
||||
auto filter_after_execution = typeid_cast<const ColumnFunction *>(filtered.get())->reduce();
|
||||
auto mut_column = IColumn::mutate(std::move(filter_after_execution.column));
|
||||
mut_column->expand(mask, false);
|
||||
column.column = std::move(mut_column);
|
||||
}
|
||||
else
|
||||
result = column_function->reduce();
|
||||
column = column_function->reduce();
|
||||
|
||||
column = std::move(result);
|
||||
chassert(column.column->size() == original_size);
|
||||
}
|
||||
|
||||
void executeColumnIfNeeded(ColumnWithTypeAndName & column, bool empty)
|
||||
|
@ -43,12 +43,13 @@ namespace
|
||||
endpoint,
|
||||
proxy_scheme,
|
||||
proxy_port,
|
||||
cache_ttl
|
||||
std::chrono::seconds {cache_ttl}
|
||||
};
|
||||
|
||||
return std::make_shared<RemoteProxyConfigurationResolver>(
|
||||
server_configuration,
|
||||
request_protocol,
|
||||
std::make_shared<RemoteProxyHostFetcherImpl>(),
|
||||
isTunnelingDisabledForHTTPSRequestsOverHTTPProxy(configuration));
|
||||
}
|
||||
|
||||
|
@ -6,22 +6,47 @@
|
||||
#include <Poco/Net/HTTPRequest.h>
|
||||
#include <Poco/Net/HTTPResponse.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/DNSResolver.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int RECEIVED_ERROR_FROM_REMOTE_IO_SERVER;
|
||||
}
|
||||
|
||||
std::string RemoteProxyHostFetcherImpl::fetch(const Poco::URI & endpoint, const ConnectionTimeouts & timeouts)
|
||||
{
|
||||
auto request = Poco::Net::HTTPRequest(Poco::Net::HTTPRequest::HTTP_GET, endpoint.getPath(), Poco::Net::HTTPRequest::HTTP_1_1);
|
||||
auto session = makeHTTPSession(HTTPConnectionGroupType::HTTP, endpoint, timeouts);
|
||||
|
||||
session->sendRequest(request);
|
||||
|
||||
Poco::Net::HTTPResponse response;
|
||||
auto & response_body_stream = session->receiveResponse(response);
|
||||
|
||||
if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK)
|
||||
throw HTTPException(
|
||||
ErrorCodes::RECEIVED_ERROR_FROM_REMOTE_IO_SERVER,
|
||||
endpoint.toString(),
|
||||
response.getStatus(),
|
||||
response.getReason(),
|
||||
"");
|
||||
|
||||
std::string proxy_host;
|
||||
Poco::StreamCopier::copyToString(response_body_stream, proxy_host);
|
||||
|
||||
return proxy_host;
|
||||
}
|
||||
|
||||
RemoteProxyConfigurationResolver::RemoteProxyConfigurationResolver(
|
||||
const RemoteServerConfiguration & remote_server_configuration_,
|
||||
Protocol request_protocol_,
|
||||
std::shared_ptr<RemoteProxyHostFetcher> fetcher_,
|
||||
bool disable_tunneling_for_https_requests_over_http_proxy_
|
||||
)
|
||||
: ProxyConfigurationResolver(request_protocol_, disable_tunneling_for_https_requests_over_http_proxy_), remote_server_configuration(remote_server_configuration_)
|
||||
: ProxyConfigurationResolver(request_protocol_, disable_tunneling_for_https_requests_over_http_proxy_),
|
||||
remote_server_configuration(remote_server_configuration_), fetcher(fetcher_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -29,9 +54,7 @@ ProxyConfiguration RemoteProxyConfigurationResolver::resolve()
|
||||
{
|
||||
auto logger = getLogger("RemoteProxyConfigurationResolver");
|
||||
|
||||
auto & [endpoint, proxy_protocol, proxy_port, cache_ttl_] = remote_server_configuration;
|
||||
|
||||
LOG_DEBUG(logger, "Obtain proxy using resolver: {}", endpoint.toString());
|
||||
auto & [endpoint, proxy_protocol_string, proxy_port, cache_ttl] = remote_server_configuration;
|
||||
|
||||
std::lock_guard lock(cache_mutex);
|
||||
|
||||
@ -55,66 +78,26 @@ ProxyConfiguration RemoteProxyConfigurationResolver::resolve()
|
||||
.withSendTimeout(1)
|
||||
.withReceiveTimeout(1);
|
||||
|
||||
try
|
||||
{
|
||||
/// It should be just empty GET request.
|
||||
Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, endpoint.getPath(), Poco::Net::HTTPRequest::HTTP_1_1);
|
||||
const auto proxy_host = fetcher->fetch(endpoint, timeouts);
|
||||
|
||||
const auto & host = endpoint.getHost();
|
||||
auto resolved_hosts = DNSResolver::instance().resolveHostAll(host);
|
||||
LOG_DEBUG(logger, "Use proxy: {}://{}:{}", proxy_protocol_string, proxy_host, proxy_port);
|
||||
|
||||
HTTPSessionPtr session;
|
||||
auto proxy_protocol = ProxyConfiguration::protocolFromString(proxy_protocol_string);
|
||||
|
||||
for (size_t i = 0; i < resolved_hosts.size(); ++i)
|
||||
{
|
||||
auto resolved_endpoint = endpoint;
|
||||
resolved_endpoint.setHost(resolved_hosts[i].toString());
|
||||
session = makeHTTPSession(HTTPConnectionGroupType::HTTP, resolved_endpoint, timeouts);
|
||||
bool use_tunneling_for_https_requests_over_http_proxy = useTunneling(
|
||||
request_protocol,
|
||||
proxy_protocol,
|
||||
disable_tunneling_for_https_requests_over_http_proxy);
|
||||
|
||||
try
|
||||
{
|
||||
session->sendRequest(request);
|
||||
break;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (i + 1 == resolved_hosts.size())
|
||||
throw;
|
||||
}
|
||||
}
|
||||
cached_config.protocol = proxy_protocol;
|
||||
cached_config.host = proxy_host;
|
||||
cached_config.port = proxy_port;
|
||||
cached_config.tunneling = use_tunneling_for_https_requests_over_http_proxy;
|
||||
cached_config.original_request_protocol = request_protocol;
|
||||
cache_timestamp = std::chrono::system_clock::now();
|
||||
cache_valid = true;
|
||||
|
||||
Poco::Net::HTTPResponse response;
|
||||
auto & response_body_stream = session->receiveResponse(response);
|
||||
|
||||
if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Proxy resolver returned not OK status: {}", response.getReason());
|
||||
|
||||
String proxy_host;
|
||||
/// Read proxy host as string from response body.
|
||||
Poco::StreamCopier::copyToString(response_body_stream, proxy_host);
|
||||
|
||||
LOG_DEBUG(logger, "Use proxy: {}://{}:{}", proxy_protocol, proxy_host, proxy_port);
|
||||
|
||||
bool use_tunneling_for_https_requests_over_http_proxy = useTunneling(
|
||||
request_protocol,
|
||||
cached_config.protocol,
|
||||
disable_tunneling_for_https_requests_over_http_proxy);
|
||||
|
||||
cached_config.protocol = ProxyConfiguration::protocolFromString(proxy_protocol);
|
||||
cached_config.host = proxy_host;
|
||||
cached_config.port = proxy_port;
|
||||
cached_config.tunneling = use_tunneling_for_https_requests_over_http_proxy;
|
||||
cached_config.original_request_protocol = request_protocol;
|
||||
cache_timestamp = std::chrono::system_clock::now();
|
||||
cache_valid = true;
|
||||
|
||||
return cached_config;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException("RemoteProxyConfigurationResolver", "Failed to obtain proxy");
|
||||
return {};
|
||||
}
|
||||
return cached_config;
|
||||
}
|
||||
|
||||
void RemoteProxyConfigurationResolver::errorReport(const ProxyConfiguration & config)
|
||||
@ -124,7 +107,7 @@ void RemoteProxyConfigurationResolver::errorReport(const ProxyConfiguration & co
|
||||
|
||||
std::lock_guard lock(cache_mutex);
|
||||
|
||||
if (!cache_ttl.count() || !cache_valid)
|
||||
if (!remote_server_configuration.cache_ttl_.count() || !cache_valid)
|
||||
return;
|
||||
|
||||
if (std::tie(cached_config.protocol, cached_config.host, cached_config.port)
|
||||
|
@ -10,6 +10,19 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct ConnectionTimeouts;
|
||||
|
||||
struct RemoteProxyHostFetcher
|
||||
{
|
||||
virtual ~RemoteProxyHostFetcher() = default;
|
||||
virtual std::string fetch(const Poco::URI & endpoint, const ConnectionTimeouts & timeouts) = 0;
|
||||
};
|
||||
|
||||
struct RemoteProxyHostFetcherImpl : public RemoteProxyHostFetcher
|
||||
{
|
||||
std::string fetch(const Poco::URI & endpoint, const ConnectionTimeouts & timeouts) override;
|
||||
};
|
||||
|
||||
/*
|
||||
* Makes an HTTP GET request to the specified endpoint to obtain a proxy host.
|
||||
* */
|
||||
@ -22,13 +35,14 @@ public:
|
||||
Poco::URI endpoint;
|
||||
String proxy_protocol;
|
||||
unsigned proxy_port;
|
||||
unsigned cache_ttl_;
|
||||
const std::chrono::seconds cache_ttl_;
|
||||
};
|
||||
|
||||
RemoteProxyConfigurationResolver(
|
||||
const RemoteServerConfiguration & remote_server_configuration_,
|
||||
Protocol request_protocol_,
|
||||
bool disable_tunneling_for_https_requests_over_http_proxy_ = true);
|
||||
std::shared_ptr<RemoteProxyHostFetcher> fetcher_,
|
||||
bool disable_tunneling_for_https_requests_over_http_proxy_ = false);
|
||||
|
||||
ProxyConfiguration resolve() override;
|
||||
|
||||
@ -36,11 +50,11 @@ public:
|
||||
|
||||
private:
|
||||
RemoteServerConfiguration remote_server_configuration;
|
||||
std::shared_ptr<RemoteProxyHostFetcher> fetcher;
|
||||
|
||||
std::mutex cache_mutex;
|
||||
bool cache_valid = false;
|
||||
std::chrono::time_point<std::chrono::system_clock> cache_timestamp;
|
||||
const std::chrono::seconds cache_ttl{0};
|
||||
ProxyConfiguration cached_config;
|
||||
};
|
||||
|
||||
|
@ -280,6 +280,10 @@ public:
|
||||
if (!initialized())
|
||||
abort();
|
||||
|
||||
/// Thread cannot join itself.
|
||||
if (state->thread_id == std::this_thread::get_id())
|
||||
abort();
|
||||
|
||||
state->event.wait();
|
||||
state.reset();
|
||||
}
|
||||
@ -293,12 +297,7 @@ public:
|
||||
|
||||
bool joinable() const
|
||||
{
|
||||
if (!state)
|
||||
return false;
|
||||
/// Thread cannot join itself.
|
||||
if (state->thread_id == std::this_thread::get_id())
|
||||
return false;
|
||||
return true;
|
||||
return initialized();
|
||||
}
|
||||
|
||||
std::thread::id get_id() const
|
||||
|
@ -637,6 +637,9 @@ void TestKeeper::finalize(const String &)
|
||||
expired = true;
|
||||
}
|
||||
|
||||
/// Signal request_queue to wake up processing thread without waiting for timeout
|
||||
requests_queue.finish();
|
||||
|
||||
processing_thread.join();
|
||||
|
||||
try
|
||||
|
@ -1,5 +1,4 @@
|
||||
#include "ZooKeeper.h"
|
||||
#include "Coordination/KeeperConstants.h"
|
||||
#include "Coordination/KeeperFeatureFlags.h"
|
||||
#include "ZooKeeperImpl.h"
|
||||
#include "KeeperException.h"
|
||||
@ -376,11 +375,14 @@ void ZooKeeper::createAncestors(const std::string & path)
|
||||
}
|
||||
|
||||
Coordination::Responses responses;
|
||||
Coordination::Error code = multiImpl(create_ops, responses, /*check_session_valid*/ false);
|
||||
const auto & [code, failure_reason] = multiImpl(create_ops, responses, /*check_session_valid*/ false);
|
||||
|
||||
if (code == Coordination::Error::ZOK)
|
||||
return;
|
||||
|
||||
if (!failure_reason.empty())
|
||||
throw KeeperException::fromMessage(code, failure_reason);
|
||||
|
||||
throw KeeperException::fromPath(code, path);
|
||||
}
|
||||
|
||||
@ -676,17 +678,19 @@ Coordination::Error ZooKeeper::trySet(const std::string & path, const std::strin
|
||||
}
|
||||
|
||||
|
||||
Coordination::Error ZooKeeper::multiImpl(const Coordination::Requests & requests, Coordination::Responses & responses, bool check_session_valid)
|
||||
std::pair<Coordination::Error, std::string>
|
||||
ZooKeeper::multiImpl(const Coordination::Requests & requests, Coordination::Responses & responses, bool check_session_valid)
|
||||
{
|
||||
if (requests.empty())
|
||||
return Coordination::Error::ZOK;
|
||||
return {Coordination::Error::ZOK, ""};
|
||||
|
||||
std::future<Coordination::MultiResponse> future_result;
|
||||
Coordination::Requests requests_with_check_session;
|
||||
if (check_session_valid)
|
||||
{
|
||||
Coordination::Requests new_requests = requests;
|
||||
addCheckSessionOp(new_requests);
|
||||
future_result = asyncTryMultiNoThrow(new_requests);
|
||||
requests_with_check_session = requests;
|
||||
addCheckSessionOp(requests_with_check_session);
|
||||
future_result = asyncTryMultiNoThrow(requests_with_check_session);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -696,7 +700,7 @@ Coordination::Error ZooKeeper::multiImpl(const Coordination::Requests & requests
|
||||
if (future_result.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready)
|
||||
{
|
||||
impl->finalize(fmt::format("Operation timeout on {} {}", Coordination::OpNum::Multi, requests[0]->getPath()));
|
||||
return Coordination::Error::ZOPERATIONTIMEOUT;
|
||||
return {Coordination::Error::ZOPERATIONTIMEOUT, ""};
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -704,11 +708,14 @@ Coordination::Error ZooKeeper::multiImpl(const Coordination::Requests & requests
|
||||
Coordination::Error code = response.error;
|
||||
responses = response.responses;
|
||||
|
||||
std::string reason;
|
||||
|
||||
if (check_session_valid)
|
||||
{
|
||||
if (code != Coordination::Error::ZOK && !Coordination::isHardwareError(code) && getFailedOpIndex(code, responses) == requests.size())
|
||||
{
|
||||
impl->finalize(fmt::format("Session was killed: {}", requests.back()->getPath()));
|
||||
reason = fmt::format("Session was killed: {}", requests_with_check_session.back()->getPath());
|
||||
impl->finalize(reason);
|
||||
code = Coordination::Error::ZSESSIONMOVED;
|
||||
}
|
||||
responses.pop_back();
|
||||
@ -717,23 +724,33 @@ Coordination::Error ZooKeeper::multiImpl(const Coordination::Requests & requests
|
||||
chassert(code == Coordination::Error::ZOK || Coordination::isHardwareError(code) || responses.back()->error != Coordination::Error::ZOK);
|
||||
}
|
||||
|
||||
return code;
|
||||
return {code, std::move(reason)};
|
||||
}
|
||||
}
|
||||
|
||||
Coordination::Responses ZooKeeper::multi(const Coordination::Requests & requests, bool check_session_valid)
|
||||
{
|
||||
Coordination::Responses responses;
|
||||
Coordination::Error code = multiImpl(requests, responses, check_session_valid);
|
||||
const auto & [code, failure_reason] = multiImpl(requests, responses, check_session_valid);
|
||||
if (!failure_reason.empty())
|
||||
throw KeeperException::fromMessage(code, failure_reason);
|
||||
|
||||
KeeperMultiException::check(code, requests, responses);
|
||||
return responses;
|
||||
}
|
||||
|
||||
Coordination::Error ZooKeeper::tryMulti(const Coordination::Requests & requests, Coordination::Responses & responses, bool check_session_valid)
|
||||
{
|
||||
Coordination::Error code = multiImpl(requests, responses, check_session_valid);
|
||||
const auto & [code, failure_reason] = multiImpl(requests, responses, check_session_valid);
|
||||
|
||||
if (code != Coordination::Error::ZOK && !Coordination::isUserError(code))
|
||||
{
|
||||
if (!failure_reason.empty())
|
||||
throw KeeperException::fromMessage(code, failure_reason);
|
||||
|
||||
throw KeeperException(code);
|
||||
}
|
||||
|
||||
return code;
|
||||
}
|
||||
|
||||
@ -1346,7 +1363,7 @@ Coordination::Error ZooKeeper::tryMultiNoThrow(const Coordination::Requests & re
|
||||
{
|
||||
try
|
||||
{
|
||||
return multiImpl(requests, responses, check_session_valid);
|
||||
return multiImpl(requests, responses, check_session_valid).first;
|
||||
}
|
||||
catch (const Coordination::Exception & e)
|
||||
{
|
||||
|
@ -2,10 +2,8 @@
|
||||
|
||||
#include "Types.h"
|
||||
#include <Poco/Util/LayeredConfiguration.h>
|
||||
#include <unordered_set>
|
||||
#include <future>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
@ -18,7 +16,6 @@
|
||||
#include <Common/thread_local_rng.h>
|
||||
#include <Coordination/KeeperFeatureFlags.h>
|
||||
#include <unistd.h>
|
||||
#include <random>
|
||||
|
||||
|
||||
namespace ProfileEvents
|
||||
@ -644,7 +641,11 @@ private:
|
||||
Coordination::Stat * stat,
|
||||
Coordination::WatchCallbackPtr watch_callback,
|
||||
Coordination::ListRequestType list_request_type);
|
||||
Coordination::Error multiImpl(const Coordination::Requests & requests, Coordination::Responses & responses, bool check_session_valid);
|
||||
|
||||
/// returns error code with optional reason
|
||||
std::pair<Coordination::Error, std::string>
|
||||
multiImpl(const Coordination::Requests & requests, Coordination::Responses & responses, bool check_session_valid);
|
||||
|
||||
Coordination::Error existsImpl(const std::string & path, Coordination::Stat * stat_, Coordination::WatchCallback watch_callback);
|
||||
Coordination::Error syncImpl(const std::string & path, std::string & returned_path);
|
||||
|
||||
|
172
src/Common/tests/gtest_proxy_remote_configuration_resolver.cpp
Normal file
172
src/Common/tests/gtest_proxy_remote_configuration_resolver.cpp
Normal file
@ -0,0 +1,172 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <Common/RemoteProxyConfigurationResolver.h>
|
||||
#include <Poco/URI.h>
|
||||
#include <IO/ConnectionTimeouts.h>
|
||||
#include <base/sleep.h>
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
struct RemoteProxyHostFetcherMock : public DB::RemoteProxyHostFetcher
|
||||
{
|
||||
explicit RemoteProxyHostFetcherMock(const std::string & return_mock_) : return_mock(return_mock_) {}
|
||||
|
||||
std::string fetch(const Poco::URI &, const DB::ConnectionTimeouts &) override
|
||||
{
|
||||
fetch_count++;
|
||||
return return_mock;
|
||||
}
|
||||
|
||||
std::string return_mock;
|
||||
std::size_t fetch_count {0};
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
TEST(RemoteProxyConfigurationResolver, HTTPOverHTTP)
|
||||
{
|
||||
const char * proxy_server_mock = "proxy1";
|
||||
auto remote_server_configuration = RemoteProxyConfigurationResolver::RemoteServerConfiguration
|
||||
{
|
||||
Poco::URI("not_important"),
|
||||
"http",
|
||||
80,
|
||||
std::chrono::seconds {10}
|
||||
};
|
||||
|
||||
RemoteProxyConfigurationResolver resolver(
|
||||
remote_server_configuration,
|
||||
ProxyConfiguration::Protocol::HTTP,
|
||||
std::make_shared<RemoteProxyHostFetcherMock>(proxy_server_mock)
|
||||
);
|
||||
|
||||
auto configuration = resolver.resolve();
|
||||
|
||||
ASSERT_EQ(configuration.host, proxy_server_mock);
|
||||
ASSERT_EQ(configuration.port, 80);
|
||||
ASSERT_EQ(configuration.protocol, ProxyConfiguration::Protocol::HTTP);
|
||||
ASSERT_EQ(configuration.original_request_protocol, ProxyConfiguration::Protocol::HTTP);
|
||||
ASSERT_EQ(configuration.tunneling, false);
|
||||
}
|
||||
|
||||
TEST(RemoteProxyConfigurationResolver, HTTPSOverHTTPS)
|
||||
{
|
||||
const char * proxy_server_mock = "proxy1";
|
||||
auto remote_server_configuration = RemoteProxyConfigurationResolver::RemoteServerConfiguration
|
||||
{
|
||||
Poco::URI("not_important"),
|
||||
"https",
|
||||
443,
|
||||
std::chrono::seconds {10}
|
||||
};
|
||||
|
||||
RemoteProxyConfigurationResolver resolver(
|
||||
remote_server_configuration,
|
||||
ProxyConfiguration::Protocol::HTTPS,
|
||||
std::make_shared<RemoteProxyHostFetcherMock>(proxy_server_mock)
|
||||
);
|
||||
|
||||
auto configuration = resolver.resolve();
|
||||
|
||||
ASSERT_EQ(configuration.host, proxy_server_mock);
|
||||
ASSERT_EQ(configuration.port, 443);
|
||||
ASSERT_EQ(configuration.protocol, ProxyConfiguration::Protocol::HTTPS);
|
||||
ASSERT_EQ(configuration.original_request_protocol, ProxyConfiguration::Protocol::HTTPS);
|
||||
// tunneling should not be used, https over https.
|
||||
ASSERT_EQ(configuration.tunneling, false);
|
||||
}
|
||||
|
||||
TEST(RemoteProxyConfigurationResolver, HTTPSOverHTTP)
|
||||
{
|
||||
const char * proxy_server_mock = "proxy1";
|
||||
auto remote_server_configuration = RemoteProxyConfigurationResolver::RemoteServerConfiguration
|
||||
{
|
||||
Poco::URI("not_important"),
|
||||
"http",
|
||||
80,
|
||||
std::chrono::seconds {10}
|
||||
};
|
||||
|
||||
RemoteProxyConfigurationResolver resolver(
|
||||
remote_server_configuration,
|
||||
ProxyConfiguration::Protocol::HTTPS,
|
||||
std::make_shared<RemoteProxyHostFetcherMock>(proxy_server_mock)
|
||||
);
|
||||
|
||||
auto configuration = resolver.resolve();
|
||||
|
||||
ASSERT_EQ(configuration.host, proxy_server_mock);
|
||||
ASSERT_EQ(configuration.port, 80);
|
||||
ASSERT_EQ(configuration.protocol, ProxyConfiguration::Protocol::HTTP);
|
||||
ASSERT_EQ(configuration.original_request_protocol, ProxyConfiguration::Protocol::HTTPS);
|
||||
// tunneling should be used, https over http.
|
||||
ASSERT_EQ(configuration.tunneling, true);
|
||||
}
|
||||
|
||||
TEST(RemoteProxyConfigurationResolver, HTTPSOverHTTPNoTunneling)
|
||||
{
|
||||
const char * proxy_server_mock = "proxy1";
|
||||
auto remote_server_configuration = RemoteProxyConfigurationResolver::RemoteServerConfiguration
|
||||
{
|
||||
Poco::URI("not_important"),
|
||||
"http",
|
||||
80,
|
||||
std::chrono::seconds {10}
|
||||
};
|
||||
|
||||
RemoteProxyConfigurationResolver resolver(
|
||||
remote_server_configuration,
|
||||
ProxyConfiguration::Protocol::HTTPS,
|
||||
std::make_shared<RemoteProxyHostFetcherMock>(proxy_server_mock),
|
||||
true /* disable_tunneling_for_https_requests_over_http_proxy_ */
|
||||
);
|
||||
|
||||
auto configuration = resolver.resolve();
|
||||
|
||||
ASSERT_EQ(configuration.host, proxy_server_mock);
|
||||
ASSERT_EQ(configuration.port, 80);
|
||||
ASSERT_EQ(configuration.protocol, ProxyConfiguration::Protocol::HTTP);
|
||||
ASSERT_EQ(configuration.original_request_protocol, ProxyConfiguration::Protocol::HTTPS);
|
||||
// tunneling should be used, https over http.
|
||||
ASSERT_EQ(configuration.tunneling, false);
|
||||
}
|
||||
|
||||
TEST(RemoteProxyConfigurationResolver, SimpleCacheTest)
|
||||
{
|
||||
const char * proxy_server_mock = "proxy1";
|
||||
auto cache_ttl = 5u;
|
||||
auto remote_server_configuration = RemoteProxyConfigurationResolver::RemoteServerConfiguration
|
||||
{
|
||||
Poco::URI("not_important"),
|
||||
"http",
|
||||
80,
|
||||
std::chrono::seconds {cache_ttl}
|
||||
};
|
||||
|
||||
auto fetcher_mock = std::make_shared<RemoteProxyHostFetcherMock>(proxy_server_mock);
|
||||
|
||||
RemoteProxyConfigurationResolver resolver(
|
||||
remote_server_configuration,
|
||||
ProxyConfiguration::Protocol::HTTP,
|
||||
fetcher_mock
|
||||
);
|
||||
|
||||
resolver.resolve();
|
||||
resolver.resolve();
|
||||
resolver.resolve();
|
||||
|
||||
ASSERT_EQ(fetcher_mock->fetch_count, 1u);
|
||||
|
||||
sleepForSeconds(cache_ttl * 2);
|
||||
|
||||
resolver.resolve();
|
||||
|
||||
ASSERT_EQ(fetcher_mock->fetch_count, 2);
|
||||
}
|
||||
|
||||
}
|
@ -129,7 +129,6 @@ class IColumn;
|
||||
M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Makes sense for debug only.", 0) \
|
||||
M(String, s3queue_default_zookeeper_path, "/clickhouse/s3queue/", "Default zookeeper path prefix for S3Queue engine", 0) \
|
||||
M(Bool, s3queue_enable_logging_to_s3queue_log, false, "Enable writing to system.s3queue_log. The value can be overwritten per table with table settings", 0) \
|
||||
M(Bool, s3queue_allow_experimental_sharded_mode, false, "Enable experimental sharded mode of S3Queue table engine. It is experimental because it will be rewritten", 0) \
|
||||
M(UInt64, hdfs_replication, 0, "The actual number of replications can be specified when the hdfs file is created.", 0) \
|
||||
M(Bool, hdfs_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables", 0) \
|
||||
M(Bool, hdfs_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in hdfs engine tables", 0) \
|
||||
@ -243,7 +242,8 @@ class IColumn;
|
||||
M(Bool, do_not_merge_across_partitions_select_final, false, "Merge parts only in one partition in select final", 0) \
|
||||
M(Bool, split_parts_ranges_into_intersecting_and_non_intersecting_final, true, "Split parts ranges into intersecting and non intersecting during FINAL optimization", 0) \
|
||||
M(Bool, split_intersecting_parts_ranges_into_layers_final, true, "Split intersecting parts ranges into layers during FINAL optimization", 0) \
|
||||
M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental full-text index.", 0) \
|
||||
M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental inverted index.", 0) \
|
||||
M(Bool, allow_experimental_full_text_index, false, "If it is set to true, allow to use experimental full-text index.", 0) \
|
||||
\
|
||||
M(UInt64, mysql_max_rows_to_insert, 65536, "The maximum number of rows in MySQL batch insertion of the MySQL storage engine", 0) \
|
||||
M(Bool, mysql_map_string_to_text_in_show_columns, true, "If enabled, String type will be mapped to TEXT in SHOW [FULL] COLUMNS, BLOB otherwise. Has an effect only when the connection is made through the MySQL wire protocol.", 0) \
|
||||
@ -260,6 +260,8 @@ class IColumn;
|
||||
M(Bool, force_primary_key, false, "Throw an exception if there is primary key in a table, and it is not used.", 0) \
|
||||
M(Bool, use_skip_indexes, true, "Use data skipping indexes during query execution.", 0) \
|
||||
M(Bool, use_skip_indexes_if_final, false, "If query has FINAL, then skipping data based on indexes may produce incorrect result, hence disabled by default.", 0) \
|
||||
M(Bool, materialize_skip_indexes_on_insert, true, "If true skip indexes are calculated on inserts, otherwise skip indexes will be calculated only during merges", 0) \
|
||||
M(Bool, materialize_statistics_on_insert, true, "If true statistics are calculated on inserts, otherwise statistics will be calculated only during merges", 0) \
|
||||
M(String, ignore_data_skipping_indices, "", "Comma separated list of strings or literals with the name of the data skipping indices that should be excluded during query execution.", 0) \
|
||||
\
|
||||
M(String, force_data_skipping_indices, "", "Comma separated list of strings or literals with the name of the data skipping indices that should be used during query execution, otherwise an exception will be thrown.", 0) \
|
||||
@ -961,6 +963,7 @@ class IColumn;
|
||||
MAKE_OBSOLETE(M, UInt64, partial_merge_join_optimizations, 0) \
|
||||
MAKE_OBSOLETE(M, MaxThreads, max_alter_threads, 0) \
|
||||
MAKE_OBSOLETE(M, Bool, use_mysql_types_in_show_columns, false) \
|
||||
MAKE_OBSOLETE(M, Bool, s3queue_allow_experimental_sharded_mode, false) \
|
||||
/* moved to config.xml: see also src/Core/ServerSettings.h */ \
|
||||
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_buffer_flush_schedule_pool_size, 16) \
|
||||
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_pool_size, 16) \
|
||||
|
@ -85,10 +85,13 @@ namespace SettingsChangesHistory
|
||||
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
|
||||
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
|
||||
{
|
||||
{"24.6", {{"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."},
|
||||
{"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"},
|
||||
{"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"},
|
||||
{"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."},
|
||||
{"hdfs_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in HDFS engine instead of empty query result"},
|
||||
{"azure_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in AzureBlobStorage engine instead of empty query result"},
|
||||
{"s3_validate_request_settings", true, true, "Allow to disable S3 request settings validation"},
|
||||
{"allow_experimental_full_text_index", false, false, "Enable experimental full-text index"},
|
||||
{"azure_skip_empty_files", false, false, "Allow to skip empty files in azure table engine"},
|
||||
{"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"},
|
||||
{"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"},
|
||||
|
@ -1,149 +0,0 @@
|
||||
#include "iostream_debug_helpers.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <Client/Connection.h>
|
||||
#include <Core/Block.h>
|
||||
#include <Core/ColumnWithTypeAndName.h>
|
||||
#include <Core/Field.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <IO/WriteBufferFromOStream.h>
|
||||
#include <Interpreters/ExpressionAnalyzer.h>
|
||||
#include <Interpreters/ExpressionActions.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Common/COW.h>
|
||||
#include <Common/FieldVisitorDump.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
template <>
|
||||
std::ostream & operator<< <Field>(std::ostream & stream, const Field & what)
|
||||
{
|
||||
stream << applyVisitor(FieldVisitorDump(), what);
|
||||
return stream;
|
||||
}
|
||||
|
||||
std::ostream & operator<<(std::ostream & stream, const NameAndTypePair & what)
|
||||
{
|
||||
stream << "NameAndTypePair(name = " << what.name << ", type = " << what.type << ")";
|
||||
return stream;
|
||||
}
|
||||
|
||||
std::ostream & operator<<(std::ostream & stream, const IDataType & what)
|
||||
{
|
||||
stream << "IDataType(name = " << what.getName() << ", default = " << what.getDefault() << ")";
|
||||
return stream;
|
||||
}
|
||||
|
||||
std::ostream & operator<<(std::ostream & stream, const IStorage & what)
|
||||
{
|
||||
auto table_id = what.getStorageID();
|
||||
stream << "IStorage(name = " << what.getName() << ", tableName = " << table_id.table_name << ") {"
|
||||
<< what.getInMemoryMetadataPtr()->getColumns().getAllPhysical().toString() << "}";
|
||||
return stream;
|
||||
}
|
||||
|
||||
std::ostream & operator<<(std::ostream & stream, const TableLockHolder &)
|
||||
{
|
||||
stream << "TableStructureReadLock()";
|
||||
return stream;
|
||||
}
|
||||
|
||||
std::ostream & operator<<(std::ostream & stream, const IFunctionOverloadResolver & what)
|
||||
{
|
||||
stream << "IFunction(name = " << what.getName() << ", variadic = " << what.isVariadic() << ", args = " << what.getNumberOfArguments()
|
||||
<< ")";
|
||||
return stream;
|
||||
}
|
||||
|
||||
std::ostream & operator<<(std::ostream & stream, const Block & what)
|
||||
{
|
||||
stream << "Block("
|
||||
<< "num_columns = " << what.columns() << "){" << what.dumpStructure() << "}";
|
||||
return stream;
|
||||
}
|
||||
|
||||
std::ostream & operator<<(std::ostream & stream, const ColumnWithTypeAndName & what)
|
||||
{
|
||||
stream << "ColumnWithTypeAndName(name = " << what.name << ", type = " << *what.type << ", column = ";
|
||||
return dumpValue(stream, what.column) << ")";
|
||||
}
|
||||
|
||||
std::ostream & operator<<(std::ostream & stream, const IColumn & what)
|
||||
{
|
||||
stream << "IColumn(" << what.dumpStructure() << ")";
|
||||
stream << "{";
|
||||
for (size_t i = 0; i < what.size(); ++i)
|
||||
{
|
||||
if (i)
|
||||
stream << ", ";
|
||||
stream << applyVisitor(FieldVisitorDump(), what[i]);
|
||||
}
|
||||
stream << "}";
|
||||
|
||||
return stream;
|
||||
}
|
||||
|
||||
std::ostream & operator<<(std::ostream & stream, const Packet & what)
|
||||
{
|
||||
stream << "Packet("
|
||||
<< "type = " << what.type;
|
||||
// types description: Core/Protocol.h
|
||||
if (what.exception)
|
||||
stream << "exception = " << what.exception.get();
|
||||
// TODO: profile_info
|
||||
stream << ") {" << what.block << "}";
|
||||
return stream;
|
||||
}
|
||||
|
||||
std::ostream & operator<<(std::ostream & stream, const ExpressionActions & what)
|
||||
{
|
||||
stream << "ExpressionActions(" << what.dumpActions() << ")";
|
||||
return stream;
|
||||
}
|
||||
|
||||
std::ostream & operator<<(std::ostream & stream, const TreeRewriterResult & what)
|
||||
{
|
||||
stream << "SyntaxAnalyzerResult{";
|
||||
stream << "storage=" << what.storage << "; ";
|
||||
if (!what.source_columns.empty())
|
||||
{
|
||||
stream << "source_columns=";
|
||||
dumpValue(stream, what.source_columns);
|
||||
stream << "; ";
|
||||
}
|
||||
if (!what.aliases.empty())
|
||||
{
|
||||
stream << "aliases=";
|
||||
dumpValue(stream, what.aliases);
|
||||
stream << "; ";
|
||||
}
|
||||
if (!what.array_join_result_to_source.empty())
|
||||
{
|
||||
stream << "array_join_result_to_source=";
|
||||
dumpValue(stream, what.array_join_result_to_source);
|
||||
stream << "; ";
|
||||
}
|
||||
if (!what.array_join_alias_to_name.empty())
|
||||
{
|
||||
stream << "array_join_alias_to_name=";
|
||||
dumpValue(stream, what.array_join_alias_to_name);
|
||||
stream << "; ";
|
||||
}
|
||||
if (!what.array_join_name_to_alias.empty())
|
||||
{
|
||||
stream << "array_join_name_to_alias=";
|
||||
dumpValue(stream, what.array_join_name_to_alias);
|
||||
stream << "; ";
|
||||
}
|
||||
stream << "rewrite_subqueries=" << what.rewrite_subqueries << "; ";
|
||||
stream << "}";
|
||||
|
||||
return stream;
|
||||
}
|
||||
|
||||
}
|
@ -1,49 +0,0 @@
|
||||
#pragma once
|
||||
#include <iostream>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
// Use template to disable implicit casting for certain overloaded types such as Field, which leads
|
||||
// to overload resolution ambiguity.
|
||||
class Field;
|
||||
template <typename T>
|
||||
requires std::is_same_v<T, Field>
|
||||
std::ostream & operator<<(std::ostream & stream, const T & what);
|
||||
|
||||
struct NameAndTypePair;
|
||||
std::ostream & operator<<(std::ostream & stream, const NameAndTypePair & what);
|
||||
|
||||
class IDataType;
|
||||
std::ostream & operator<<(std::ostream & stream, const IDataType & what);
|
||||
|
||||
class IStorage;
|
||||
std::ostream & operator<<(std::ostream & stream, const IStorage & what);
|
||||
|
||||
class IFunctionOverloadResolver;
|
||||
std::ostream & operator<<(std::ostream & stream, const IFunctionOverloadResolver & what);
|
||||
|
||||
class IFunctionBase;
|
||||
std::ostream & operator<<(std::ostream & stream, const IFunctionBase & what);
|
||||
|
||||
class Block;
|
||||
std::ostream & operator<<(std::ostream & stream, const Block & what);
|
||||
|
||||
struct ColumnWithTypeAndName;
|
||||
std::ostream & operator<<(std::ostream & stream, const ColumnWithTypeAndName & what);
|
||||
|
||||
class IColumn;
|
||||
std::ostream & operator<<(std::ostream & stream, const IColumn & what);
|
||||
|
||||
struct Packet;
|
||||
std::ostream & operator<<(std::ostream & stream, const Packet & what);
|
||||
|
||||
class ExpressionActions;
|
||||
std::ostream & operator<<(std::ostream & stream, const ExpressionActions & what);
|
||||
|
||||
struct TreeRewriterResult;
|
||||
std::ostream & operator<<(std::ostream & stream, const TreeRewriterResult & what);
|
||||
}
|
||||
|
||||
/// some operator<< should be declared before operator<<(... std::shared_ptr<>)
|
||||
#include <base/iostream_debug_helpers.h>
|
@ -742,6 +742,7 @@ std::string BaseDaemon::getDefaultConfigFileName() const
|
||||
|
||||
void BaseDaemon::closeFDs()
|
||||
{
|
||||
#if !defined(USE_XRAY)
|
||||
/// NOTE: may benefit from close_range() (linux 5.9+)
|
||||
#if defined(OS_FREEBSD) || defined(OS_DARWIN)
|
||||
fs::path proc_path{"/dev/fd"};
|
||||
@ -789,13 +790,13 @@ void BaseDaemon::closeFDs()
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void BaseDaemon::initialize(Application & self)
|
||||
{
|
||||
closeFDs();
|
||||
|
||||
ServerApplication::initialize(self);
|
||||
|
||||
/// now highest priority (lowest value) is PRIO_APPLICATION = -100, we want higher!
|
||||
|
@ -75,6 +75,9 @@ ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column)
|
||||
else if (const auto * column_tuple = typeid_cast<const ColumnTuple *>(column.get()))
|
||||
{
|
||||
auto columns = column_tuple->getColumns();
|
||||
if (columns.empty())
|
||||
return column;
|
||||
|
||||
for (auto & element : columns)
|
||||
element = recursiveRemoveLowCardinality(element);
|
||||
res = ColumnTuple::create(columns);
|
||||
|
@ -29,7 +29,6 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int DUPLICATE_COLUMN;
|
||||
extern const int EMPTY_DATA_PASSED;
|
||||
extern const int NOT_FOUND_COLUMN_IN_BLOCK;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH;
|
||||
@ -181,6 +180,9 @@ static void addElementSafe(const DataTypes & elems, IColumn & column, F && impl)
|
||||
|
||||
MutableColumnPtr DataTypeTuple::createColumn() const
|
||||
{
|
||||
if (elems.empty())
|
||||
return ColumnTuple::create(0);
|
||||
|
||||
size_t size = elems.size();
|
||||
MutableColumns tuple_columns(size);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
@ -206,6 +208,9 @@ MutableColumnPtr DataTypeTuple::createColumn(const ISerialization & serializatio
|
||||
if (!serialization_tuple)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected serialization to create column of type Tuple");
|
||||
|
||||
if (elems.empty())
|
||||
return IDataType::createColumn(serialization);
|
||||
|
||||
const auto & element_serializations = serialization_tuple->getElementsSerializations();
|
||||
|
||||
size_t size = elems.size();
|
||||
@ -224,6 +229,12 @@ Field DataTypeTuple::getDefault() const
|
||||
|
||||
void DataTypeTuple::insertDefaultInto(IColumn & column) const
|
||||
{
|
||||
if (elems.empty())
|
||||
{
|
||||
column.insertDefault();
|
||||
return;
|
||||
}
|
||||
|
||||
addElementSafe(elems, column, [&]
|
||||
{
|
||||
for (const auto & i : collections::range(0, elems.size()))
|
||||
@ -388,7 +399,7 @@ void DataTypeTuple::forEachChild(const ChildCallback & callback) const
|
||||
static DataTypePtr create(const ASTPtr & arguments)
|
||||
{
|
||||
if (!arguments || arguments->children.empty())
|
||||
throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Tuple cannot be empty");
|
||||
return std::make_shared<DataTypeTuple>(DataTypes{});
|
||||
|
||||
DataTypes nested_types;
|
||||
nested_types.reserve(arguments->children.size());
|
||||
|
@ -20,7 +20,6 @@ namespace DB
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int EMPTY_DATA_PASSED;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
@ -146,9 +145,6 @@ DataTypePtr FieldToDataType<on_error>::operator() (const Array & x) const
|
||||
template <LeastSupertypeOnError on_error>
|
||||
DataTypePtr FieldToDataType<on_error>::operator() (const Tuple & tuple) const
|
||||
{
|
||||
if (tuple.empty())
|
||||
throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Cannot infer type of an empty tuple");
|
||||
|
||||
DataTypes element_types;
|
||||
element_types.reserve(tuple.size());
|
||||
|
||||
|
@ -229,9 +229,10 @@ static std::pair<ColumnPtr, DataTypePtr> recursivlyConvertDynamicColumnToTuple(
|
||||
= recursivlyConvertDynamicColumnToTuple(tuple_columns[i], tuple_types[i]);
|
||||
}
|
||||
|
||||
auto new_column = tuple_size == 0 ? column : ColumnPtr(ColumnTuple::create(new_tuple_columns));
|
||||
return
|
||||
{
|
||||
ColumnTuple::create(new_tuple_columns),
|
||||
new_column,
|
||||
recreateTupleWithElements(*type_tuple, new_tuple_types)
|
||||
};
|
||||
}
|
||||
|
@ -70,13 +70,15 @@ void SerializationInfoTuple::add(const SerializationInfo & other)
|
||||
|
||||
void SerializationInfoTuple::addDefaults(size_t length)
|
||||
{
|
||||
SerializationInfo::addDefaults(length);
|
||||
|
||||
for (const auto & elem : elems)
|
||||
elem->addDefaults(length);
|
||||
}
|
||||
|
||||
void SerializationInfoTuple::replaceData(const SerializationInfo & other)
|
||||
{
|
||||
SerializationInfo::add(other);
|
||||
SerializationInfo::replaceData(other);
|
||||
|
||||
const auto & other_info = assert_cast<const SerializationInfoTuple &>(other);
|
||||
for (const auto & [name, elem] : name_to_elem)
|
||||
@ -94,7 +96,9 @@ MutableSerializationInfoPtr SerializationInfoTuple::clone() const
|
||||
for (const auto & elem : elems)
|
||||
elems_cloned.push_back(elem->clone());
|
||||
|
||||
return std::make_shared<SerializationInfoTuple>(std::move(elems_cloned), names, settings);
|
||||
auto ret = std::make_shared<SerializationInfoTuple>(std::move(elems_cloned), names, settings);
|
||||
ret->data = data;
|
||||
return ret;
|
||||
}
|
||||
|
||||
MutableSerializationInfoPtr SerializationInfoTuple::createWithType(
|
||||
|
@ -91,6 +91,10 @@ static ReturnType addElementSafe(size_t num_elems, IColumn & column, F && impl)
|
||||
restore_elements();
|
||||
return ReturnType(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_cast<ColumnTuple &>(column).addSize(1);
|
||||
}
|
||||
|
||||
// Check that all columns now have the same size.
|
||||
size_t new_size = column.size();
|
||||
@ -564,6 +568,12 @@ void SerializationTuple::enumerateStreams(
|
||||
const StreamCallback & callback,
|
||||
const SubstreamData & data) const
|
||||
{
|
||||
if (elems.empty())
|
||||
{
|
||||
ISerialization::enumerateStreams(settings, callback, data);
|
||||
return;
|
||||
}
|
||||
|
||||
const auto * type_tuple = data.type ? &assert_cast<const DataTypeTuple &>(*data.type) : nullptr;
|
||||
const auto * column_tuple = data.column ? &assert_cast<const ColumnTuple &>(*data.column) : nullptr;
|
||||
const auto * info_tuple = data.serialization_info ? &assert_cast<const SerializationInfoTuple &>(*data.serialization_info) : nullptr;
|
||||
@ -626,6 +636,22 @@ void SerializationTuple::serializeBinaryBulkWithMultipleStreams(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
if (elems.empty())
|
||||
{
|
||||
if (WriteBuffer * stream = settings.getter(settings.path))
|
||||
{
|
||||
size_t size = column.size();
|
||||
|
||||
if (limit == 0 || offset + limit > size)
|
||||
limit = size - offset;
|
||||
|
||||
for (size_t i = 0; i < limit; ++i)
|
||||
stream->write('0');
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
auto * tuple_state = checkAndGetState<SerializeBinaryBulkStateTuple>(state);
|
||||
|
||||
for (size_t i = 0; i < elems.size(); ++i)
|
||||
@ -642,6 +668,24 @@ void SerializationTuple::deserializeBinaryBulkWithMultipleStreams(
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsCache * cache) const
|
||||
{
|
||||
if (elems.empty())
|
||||
{
|
||||
auto cached_column = getFromSubstreamsCache(cache, settings.path);
|
||||
if (cached_column)
|
||||
{
|
||||
column = cached_column;
|
||||
}
|
||||
else if (ReadBuffer * stream = settings.getter(settings.path))
|
||||
{
|
||||
auto mutable_column = column->assumeMutable();
|
||||
typeid_cast<ColumnTuple &>(*mutable_column).addSize(stream->tryIgnore(limit));
|
||||
column = std::move(mutable_column);
|
||||
addToSubstreamsCache(cache, settings.path, column);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
auto * tuple_state = checkAndGetState<DeserializeBinaryBulkStateTuple>(state);
|
||||
|
||||
auto mutable_column = column->assumeMutable();
|
||||
@ -650,6 +694,8 @@ void SerializationTuple::deserializeBinaryBulkWithMultipleStreams(
|
||||
settings.avg_value_size_hint = 0;
|
||||
for (size_t i = 0; i < elems.size(); ++i)
|
||||
elems[i]->deserializeBinaryBulkWithMultipleStreams(column_tuple.getColumnPtr(i), limit, settings, tuple_state->states[i], cache);
|
||||
|
||||
typeid_cast<ColumnTuple &>(*mutable_column).addSize(column_tuple.getColumn(0).size());
|
||||
}
|
||||
|
||||
size_t SerializationTuple::getPositionByName(const String & name) const
|
||||
|
@ -1,4 +1,3 @@
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Core/Field.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
@ -10,8 +9,6 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <Core/iostream_debug_helpers.h>
|
||||
|
||||
|
||||
template <typename T>
|
||||
inline std::ostream& operator<<(std::ostream & ostr, const std::vector<T> & v)
|
||||
@ -63,7 +60,7 @@ TEST_P(ParseDataTypeTest, parseStringValue)
|
||||
data_type->getDefaultSerialization()->deserializeWholeText(*col, buffer, FormatSettings{});
|
||||
}
|
||||
|
||||
ASSERT_EQ(p.expected_values.size(), col->size()) << "Actual items: " << *col;
|
||||
ASSERT_EQ(p.expected_values.size(), col->size());
|
||||
for (size_t i = 0; i < col->size(); ++i)
|
||||
{
|
||||
ASSERT_EQ(p.expected_values[i], (*col)[i]);
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user