mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-19 21:03:51 +00:00
Merge branch 'ClickHouse:master' into hilbert-index-analysis
This commit is contained in:
commit
fe63c3409f
8
.github/ISSUE_TEMPLATE/10_question.md
vendored
8
.github/ISSUE_TEMPLATE/10_question.md
vendored
@ -10,3 +10,11 @@ assignees: ''
|
||||
> Make sure to check documentation https://clickhouse.com/docs/en/ first. If the question is concise and probably has a short answer, asking it in [community Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-1gh9ds7f4-PgDhJAaF8ad5RbWBAAjzFg) is probably the fastest way to find the answer. For more complicated questions, consider asking them on StackOverflow with "clickhouse" tag https://stackoverflow.com/questions/tagged/clickhouse
|
||||
|
||||
> If you still prefer GitHub issues, remove all this text and ask your question here.
|
||||
|
||||
**Company or project name**
|
||||
|
||||
Put your company name or project description here
|
||||
|
||||
**Question**
|
||||
|
||||
Your question
|
||||
|
4
.github/ISSUE_TEMPLATE/20_feature-request.md
vendored
4
.github/ISSUE_TEMPLATE/20_feature-request.md
vendored
@ -9,6 +9,10 @@ assignees: ''
|
||||
|
||||
> (you don't have to strictly follow this form)
|
||||
|
||||
**Company or project name**
|
||||
|
||||
> Put your company name or project description here
|
||||
|
||||
**Use case**
|
||||
|
||||
> A clear and concise description of what is the intended usage scenario is.
|
||||
|
@ -9,6 +9,10 @@ assignees: ''
|
||||
|
||||
(you don't have to strictly follow this form)
|
||||
|
||||
**Company or project name**
|
||||
|
||||
Put your company name or project description here
|
||||
|
||||
**Describe the unexpected behaviour**
|
||||
A clear and concise description of what works not as it is supposed to.
|
||||
|
||||
|
@ -9,6 +9,10 @@ assignees: ''
|
||||
|
||||
(you don't have to strictly follow this form)
|
||||
|
||||
**Company or project name**
|
||||
|
||||
Put your company name or project description here
|
||||
|
||||
**Describe the unexpected behaviour**
|
||||
A clear and concise description of what works not as it is supposed to.
|
||||
|
||||
|
3
.github/ISSUE_TEMPLATE/45_usability-issue.md
vendored
3
.github/ISSUE_TEMPLATE/45_usability-issue.md
vendored
@ -9,6 +9,9 @@ assignees: ''
|
||||
|
||||
(you don't have to strictly follow this form)
|
||||
|
||||
**Company or project name**
|
||||
Put your company name or project description here
|
||||
|
||||
**Describe the issue**
|
||||
A clear and concise description of what works not as it is supposed to.
|
||||
|
||||
|
4
.github/ISSUE_TEMPLATE/50_build-issue.md
vendored
4
.github/ISSUE_TEMPLATE/50_build-issue.md
vendored
@ -9,6 +9,10 @@ assignees: ''
|
||||
|
||||
> Make sure that `git diff` result is empty and you've just pulled fresh master. Try cleaning up cmake cache. Just in case, official build instructions are published here: https://clickhouse.com/docs/en/development/build/
|
||||
|
||||
**Company or project name**
|
||||
|
||||
> Put your company name or project description here
|
||||
|
||||
**Operating system**
|
||||
|
||||
> OS kind or distribution, specific version/release, non-standard kernel if any. If you are trying to build inside virtual machine, please mention it too.
|
||||
|
@ -8,6 +8,9 @@ labels: comp-documentation
|
||||
|
||||
(you don't have to strictly follow this form)
|
||||
|
||||
**Company or project name**
|
||||
Put your company name or project description here
|
||||
|
||||
**Describe the issue**
|
||||
A clear and concise description of what's wrong in documentation.
|
||||
|
||||
|
@ -9,6 +9,9 @@ assignees: ''
|
||||
|
||||
(you don't have to strictly follow this form)
|
||||
|
||||
**Company or project name**
|
||||
Put your company name or project description here
|
||||
|
||||
**Describe the situation**
|
||||
What exactly works slower than expected?
|
||||
|
||||
|
@ -9,6 +9,9 @@ assignees: ''
|
||||
|
||||
(you don't have to strictly follow this form)
|
||||
|
||||
**Company or project name**
|
||||
Put your company name or project description here
|
||||
|
||||
**Describe the issue**
|
||||
A clear and concise description of what works not as it is supposed to.
|
||||
|
||||
|
4
.github/ISSUE_TEMPLATE/85_bug-report.md
vendored
4
.github/ISSUE_TEMPLATE/85_bug-report.md
vendored
@ -11,6 +11,10 @@ assignees: ''
|
||||
|
||||
> You have to provide the following information whenever possible.
|
||||
|
||||
**Company or project name**
|
||||
|
||||
> Put your company name or project description here
|
||||
|
||||
**Describe what's wrong**
|
||||
|
||||
> A clear and concise description of what works not as it is supposed to.
|
||||
|
@ -7,6 +7,10 @@ assignees: ''
|
||||
|
||||
---
|
||||
|
||||
**Company or project name**
|
||||
|
||||
Put your company name or project description here
|
||||
|
||||
**I have tried the following solutions**: https://clickhouse.com/docs/en/faq/troubleshooting/#troubleshooting-installation-errors
|
||||
|
||||
**Installation type**
|
||||
|
3
.github/workflows/master.yml
vendored
3
.github/workflows/master.yml
vendored
@ -106,7 +106,8 @@ jobs:
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
# stage for jobs that do not prohibit merge
|
||||
Tests_3:
|
||||
needs: [RunConfig, Builds_1]
|
||||
# Test_3 should not wait for Test_1/Test_2 and should not be blocked by them on master branch since all jobs need to run there.
|
||||
needs: [RunConfig, Builds_1, Builds_2]
|
||||
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_3') }}
|
||||
uses: ./.github/workflows/reusable_test_stage.yml
|
||||
with:
|
||||
|
2
.github/workflows/pull_request.yml
vendored
2
.github/workflows/pull_request.yml
vendored
@ -135,7 +135,7 @@ jobs:
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
# stage for jobs that do not prohibit merge
|
||||
Tests_3:
|
||||
needs: [RunConfig, Tests_1, Tests_2]
|
||||
needs: [RunConfig, Builds_1, Tests_1, Builds_2, Tests_2]
|
||||
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_3') }}
|
||||
uses: ./.github/workflows/reusable_test_stage.yml
|
||||
with:
|
||||
|
2
.github/workflows/reusable_test.yml
vendored
2
.github/workflows/reusable_test.yml
vendored
@ -58,7 +58,7 @@ jobs:
|
||||
env:
|
||||
GITHUB_JOB_OVERRIDDEN: ${{inputs.test_name}}${{ fromJson(inputs.data).jobs_data.jobs_params[inputs.test_name].num_batches > 1 && format('-{0}',matrix.batch) || '' }}
|
||||
strategy:
|
||||
fail-fast: false # we always wait for entire matrix
|
||||
fail-fast: false # we always wait for the entire matrix
|
||||
matrix:
|
||||
batch: ${{ fromJson(inputs.data).jobs_data.jobs_params[inputs.test_name].batches }}
|
||||
steps:
|
||||
|
3
.gitignore
vendored
3
.gitignore
vendored
@ -21,6 +21,9 @@
|
||||
*.stderr
|
||||
*.stdout
|
||||
|
||||
# llvm-xray logs
|
||||
xray-log.*
|
||||
|
||||
/docs/build
|
||||
/docs/publish
|
||||
/docs/edit
|
||||
|
29
.gitmessage
29
.gitmessage
@ -1,29 +0,0 @@
|
||||
|
||||
|
||||
### CI modificators (add a leading space to apply) ###
|
||||
|
||||
## To avoid a merge commit in CI:
|
||||
#no_merge_commit
|
||||
|
||||
## To discard CI cache:
|
||||
#no_ci_cache
|
||||
|
||||
## To not test (only style check):
|
||||
#do_not_test
|
||||
|
||||
## To run specified set of tests in CI:
|
||||
#ci_set_<SET_NAME>
|
||||
#ci_set_reduced
|
||||
#ci_set_arm
|
||||
#ci_set_integration
|
||||
#ci_set_old_analyzer
|
||||
|
||||
## To run specified job in CI:
|
||||
#job_<JOB NAME>
|
||||
#job_stateless_tests_release
|
||||
#job_package_debug
|
||||
#job_integration_tests_asan
|
||||
|
||||
## To run only specified batches for multi-batch job(s)
|
||||
#batch_2
|
||||
#batch_1_2_3
|
@ -11,7 +11,7 @@
|
||||
### <a id="245"></a> ClickHouse release 24.5, 2024-05-30
|
||||
|
||||
#### Backward Incompatible Change
|
||||
* Renamed "inverted indexes" to "full-text indexes" which is a less technical / more user-friendly name. This also changes internal table metadata and breaks tables with existing (experimental) inverted indexes. Please make to drop such indexes before upgrade and re-create them after upgrade. [#62884](https://github.com/ClickHouse/ClickHouse/pull/62884) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Renamed "inverted indexes" to "full-text indexes" which is a less technical / more user-friendly name. This also changes internal table metadata and breaks tables with existing (experimental) inverted indexes. Please make sure to drop such indexes before upgrade and re-create them after upgrade. [#62884](https://github.com/ClickHouse/ClickHouse/pull/62884) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Usage of functions `neighbor`, `runningAccumulate`, `runningDifferenceStartingWithFirstValue`, `runningDifference` deprecated (because it is error-prone). Proper window functions should be used instead. To enable them back, set `allow_deprecated_error_prone_window_functions = 1` or set `compatibility = '24.4'` or lower. [#63132](https://github.com/ClickHouse/ClickHouse/pull/63132) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Queries from `system.columns` will work faster if there is a large number of columns, but many databases or tables are not granted for `SHOW TABLES`. Note that in previous versions, if you grant `SHOW COLUMNS` to individual columns without granting `SHOW TABLES` to the corresponding tables, the `system.columns` table will show these columns, but in a new version, it will skip the table entirely. Remove trace log messages "Access granted" and "Access denied" that slowed down queries. [#63439](https://github.com/ClickHouse/ClickHouse/pull/63439) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
|
@ -122,6 +122,8 @@ add_library(global-libs INTERFACE)
|
||||
|
||||
include (cmake/sanitize.cmake)
|
||||
|
||||
include (cmake/xray_instrumentation.cmake)
|
||||
|
||||
option(ENABLE_COLORED_BUILD "Enable colors in compiler output" ON)
|
||||
|
||||
set (CMAKE_COLOR_MAKEFILE ${ENABLE_COLORED_BUILD}) # works only for the makefile generator
|
||||
@ -208,8 +210,6 @@ option(OMIT_HEAVY_DEBUG_SYMBOLS
|
||||
"Do not generate debugger info for heavy modules (ClickHouse functions and dictionaries, some contrib)"
|
||||
${OMIT_HEAVY_DEBUG_SYMBOLS_DEFAULT})
|
||||
|
||||
option(USE_DEBUG_HELPERS "Enable debug helpers" ${USE_DEBUG_HELPERS})
|
||||
|
||||
option(BUILD_STANDALONE_KEEPER "Build keeper as small standalone binary" OFF)
|
||||
if (NOT BUILD_STANDALONE_KEEPER)
|
||||
option(CREATE_KEEPER_SYMLINK "Create symlink for clickhouse-keeper to main server binary" ON)
|
||||
|
@ -19,7 +19,10 @@ The following versions of ClickHouse server are currently supported with securit
|
||||
| 24.3 | ✔️ |
|
||||
| 24.2 | ❌ |
|
||||
| 24.1 | ❌ |
|
||||
| 23.* | ❌ |
|
||||
| 23.12 | ❌ |
|
||||
| 23.11 | ❌ |
|
||||
| 23.10 | ❌ |
|
||||
| 23.9 | ❌ |
|
||||
| 23.8 | ✔️ |
|
||||
| 23.7 | ❌ |
|
||||
| 23.6 | ❌ |
|
||||
|
@ -34,15 +34,6 @@ set (SRCS
|
||||
throwError.cpp
|
||||
)
|
||||
|
||||
if (USE_DEBUG_HELPERS)
|
||||
get_target_property(MAGIC_ENUM_INCLUDE_DIR ch_contrib::magic_enum INTERFACE_INCLUDE_DIRECTORIES)
|
||||
# CMake generator expression will do insane quoting when it encounters special character like quotes, spaces, etc.
|
||||
# Prefixing "SHELL:" will force it to use the original text.
|
||||
set (INCLUDE_DEBUG_HELPERS "SHELL:-I\"${MAGIC_ENUM_INCLUDE_DIR}\" -include \"${ClickHouse_SOURCE_DIR}/base/base/iostream_debug_helpers.h\"")
|
||||
# Use generator expression as we don't want to pollute CMAKE_CXX_FLAGS, which will interfere with CMake check system.
|
||||
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:${INCLUDE_DEBUG_HELPERS}>)
|
||||
endif ()
|
||||
|
||||
add_library (common ${SRCS})
|
||||
|
||||
if (WITH_COVERAGE)
|
||||
|
@ -1,187 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "demangle.h"
|
||||
#include "getThreadId.h"
|
||||
#include <type_traits>
|
||||
#include <tuple>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <magic_enum.hpp>
|
||||
|
||||
/** Usage:
|
||||
*
|
||||
* DUMP(variable...)
|
||||
*/
|
||||
|
||||
|
||||
template <typename Out, typename T>
|
||||
Out & dumpValue(Out &, T &&);
|
||||
|
||||
|
||||
/// Catch-all case.
|
||||
template <int priority, typename Out, typename T>
|
||||
requires(priority == -1)
|
||||
Out & dumpImpl(Out & out, T &&) // NOLINT(cppcoreguidelines-missing-std-forward)
|
||||
{
|
||||
return out << "{...}";
|
||||
}
|
||||
|
||||
/// An object, that could be output with operator <<.
|
||||
template <int priority, typename Out, typename T>
|
||||
requires(priority == 0)
|
||||
Out & dumpImpl(Out & out, T && x, std::decay_t<decltype(std::declval<Out &>() << std::declval<T>())> * = nullptr) // NOLINT(cppcoreguidelines-missing-std-forward)
|
||||
{
|
||||
return out << x;
|
||||
}
|
||||
|
||||
/// A pointer-like object.
|
||||
template <int priority, typename Out, typename T>
|
||||
requires(priority == 1
|
||||
/// Protect from the case when operator * do effectively nothing (function pointer).
|
||||
&& !std::is_same_v<std::decay_t<T>, std::decay_t<decltype(*std::declval<T>())>>)
|
||||
Out & dumpImpl(Out & out, T && x, std::decay_t<decltype(*std::declval<T>())> * = nullptr) // NOLINT(cppcoreguidelines-missing-std-forward)
|
||||
{
|
||||
if (!x)
|
||||
return out << "nullptr";
|
||||
return dumpValue(out, *x);
|
||||
}
|
||||
|
||||
/// Container.
|
||||
template <int priority, typename Out, typename T>
|
||||
requires(priority == 2)
|
||||
Out & dumpImpl(Out & out, T && x, std::decay_t<decltype(std::begin(std::declval<T>()))> * = nullptr) // NOLINT(cppcoreguidelines-missing-std-forward)
|
||||
{
|
||||
bool first = true;
|
||||
out << "{";
|
||||
for (const auto & elem : x)
|
||||
{
|
||||
if (first)
|
||||
first = false;
|
||||
else
|
||||
out << ", ";
|
||||
dumpValue(out, elem);
|
||||
}
|
||||
return out << "}";
|
||||
}
|
||||
|
||||
|
||||
template <int priority, typename Out, typename T>
|
||||
requires(priority == 3 && std::is_enum_v<std::decay_t<T>>)
|
||||
Out & dumpImpl(Out & out, T && x) // NOLINT(cppcoreguidelines-missing-std-forward)
|
||||
{
|
||||
return out << magic_enum::enum_name(x);
|
||||
}
|
||||
|
||||
/// string and const char * - output not as container or pointer.
|
||||
|
||||
template <int priority, typename Out, typename T>
|
||||
requires(priority == 3 && (std::is_same_v<std::decay_t<T>, std::string> || std::is_same_v<std::decay_t<T>, const char *>))
|
||||
Out & dumpImpl(Out & out, T && x) // NOLINT(cppcoreguidelines-missing-std-forward)
|
||||
{
|
||||
return out << std::quoted(x);
|
||||
}
|
||||
|
||||
/// UInt8 - output as number, not char.
|
||||
|
||||
template <int priority, typename Out, typename T>
|
||||
requires(priority == 3 && std::is_same_v<std::decay_t<T>, unsigned char>)
|
||||
Out & dumpImpl(Out & out, T && x) // NOLINT(cppcoreguidelines-missing-std-forward)
|
||||
{
|
||||
return out << int(x);
|
||||
}
|
||||
|
||||
|
||||
/// Tuple, pair
|
||||
template <size_t N, typename Out, typename T>
|
||||
Out & dumpTupleImpl(Out & out, T && x) // NOLINT(cppcoreguidelines-missing-std-forward)
|
||||
{
|
||||
if constexpr (N == 0)
|
||||
out << "{";
|
||||
else
|
||||
out << ", ";
|
||||
|
||||
dumpValue(out, std::get<N>(x));
|
||||
|
||||
if constexpr (N + 1 == std::tuple_size_v<std::decay_t<T>>)
|
||||
out << "}";
|
||||
else
|
||||
dumpTupleImpl<N + 1>(out, x);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
template <int priority, typename Out, typename T>
|
||||
requires(priority == 4)
|
||||
Out & dumpImpl(Out & out, T && x, std::decay_t<decltype(std::get<0>(std::declval<T>()))> * = nullptr) // NOLINT(cppcoreguidelines-missing-std-forward)
|
||||
{
|
||||
return dumpTupleImpl<0>(out, x);
|
||||
}
|
||||
|
||||
|
||||
template <int priority, typename Out, typename T>
|
||||
Out & dumpDispatchPriorities(Out & out, T && x, std::decay_t<decltype(dumpImpl<priority>(std::declval<Out &>(), std::declval<T>()))> *) // NOLINT(cppcoreguidelines-missing-std-forward)
|
||||
{
|
||||
return dumpImpl<priority>(out, x);
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE(google-explicit-constructor)
|
||||
struct LowPriority { LowPriority(void *) {} };
|
||||
|
||||
template <int priority, typename Out, typename T>
|
||||
Out & dumpDispatchPriorities(Out & out, T && x, LowPriority) // NOLINT(cppcoreguidelines-missing-std-forward)
|
||||
{
|
||||
return dumpDispatchPriorities<priority - 1>(out, x, nullptr);
|
||||
}
|
||||
|
||||
|
||||
template <typename Out, typename T>
|
||||
Out & dumpValue(Out & out, T && x) // NOLINT(cppcoreguidelines-missing-std-forward)
|
||||
{
|
||||
return dumpDispatchPriorities<5>(out, x, nullptr);
|
||||
}
|
||||
|
||||
|
||||
template <typename Out, typename T>
|
||||
Out & dump(Out & out, const char * name, T && x) // NOLINT(cppcoreguidelines-missing-std-forward)
|
||||
{
|
||||
// Dumping string literal, printing name and demangled type is irrelevant.
|
||||
if constexpr (std::is_same_v<const char *, std::decay_t<std::remove_reference_t<T>>>)
|
||||
{
|
||||
const auto name_len = strlen(name);
|
||||
const auto value_len = strlen(x);
|
||||
// `name` is the same as quoted `x`
|
||||
if (name_len > 2 && value_len > 0 && name[0] == '"' && name[name_len - 1] == '"'
|
||||
&& strncmp(name + 1, x, std::min(value_len, name_len) - 1) == 0)
|
||||
return out << x;
|
||||
}
|
||||
|
||||
out << demangle(typeid(x).name()) << " " << name << " = ";
|
||||
return dumpValue(out, x) << "; ";
|
||||
}
|
||||
|
||||
#pragma clang diagnostic ignored "-Wgnu-zero-variadic-macro-arguments"
|
||||
|
||||
#define DUMPVAR(VAR) ::dump(std::cerr, #VAR, (VAR));
|
||||
#define DUMPHEAD std::cerr << __FILE__ << ':' << __LINE__ << " [ " << getThreadId() << " ] ";
|
||||
#define DUMPTAIL std::cerr << '\n';
|
||||
|
||||
#define DUMP1(V1) do { DUMPHEAD DUMPVAR(V1) DUMPTAIL } while(0)
|
||||
#define DUMP2(V1, V2) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPTAIL } while(0)
|
||||
#define DUMP3(V1, V2, V3) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPTAIL } while(0)
|
||||
#define DUMP4(V1, V2, V3, V4) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPTAIL } while(0)
|
||||
#define DUMP5(V1, V2, V3, V4, V5) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPTAIL } while(0)
|
||||
#define DUMP6(V1, V2, V3, V4, V5, V6) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPVAR(V6) DUMPTAIL } while(0)
|
||||
#define DUMP7(V1, V2, V3, V4, V5, V6, V7) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPVAR(V6) DUMPVAR(V7) DUMPTAIL } while(0)
|
||||
#define DUMP8(V1, V2, V3, V4, V5, V6, V7, V8) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPVAR(V6) DUMPVAR(V7) DUMPVAR(V8) DUMPTAIL } while(0)
|
||||
#define DUMP9(V1, V2, V3, V4, V5, V6, V7, V8, V9) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPVAR(V6) DUMPVAR(V7) DUMPVAR(V8) DUMPVAR(V9) DUMPTAIL } while(0)
|
||||
|
||||
/// https://groups.google.com/forum/#!searchin/kona-dev/variadic$20macro%7Csort:date/kona-dev/XMA-lDOqtlI/GCzdfZsD41sJ
|
||||
|
||||
#define VA_NUM_ARGS_IMPL(x1, x2, x3, x4, x5, x6, x7, x8, x9, N, ...) N
|
||||
#define VA_NUM_ARGS(...) VA_NUM_ARGS_IMPL(__VA_ARGS__, 9, 8, 7, 6, 5, 4, 3, 2, 1)
|
||||
|
||||
#define MAKE_VAR_MACRO_IMPL_CONCAT(PREFIX, NUM_ARGS) PREFIX ## NUM_ARGS
|
||||
#define MAKE_VAR_MACRO_IMPL(PREFIX, NUM_ARGS) MAKE_VAR_MACRO_IMPL_CONCAT(PREFIX, NUM_ARGS)
|
||||
#define MAKE_VAR_MACRO(PREFIX, ...) MAKE_VAR_MACRO_IMPL(PREFIX, VA_NUM_ARGS(__VA_ARGS__))
|
||||
|
||||
#define DUMP(...) MAKE_VAR_MACRO(DUMP, __VA_ARGS__)(__VA_ARGS__)
|
@ -1,2 +0,0 @@
|
||||
clickhouse_add_executable (dump_variable dump_variable.cpp)
|
||||
target_link_libraries (dump_variable PRIVATE clickhouse_common_io)
|
@ -1,70 +0,0 @@
|
||||
#include <base/iostream_debug_helpers.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <tuple>
|
||||
#include <array>
|
||||
#include <utility>
|
||||
|
||||
|
||||
struct S1;
|
||||
struct S2 {};
|
||||
|
||||
struct S3
|
||||
{
|
||||
std::set<const char *> m1;
|
||||
};
|
||||
|
||||
std::ostream & operator<<(std::ostream & stream, const S3 & what)
|
||||
{
|
||||
stream << "S3 {m1=";
|
||||
dumpValue(stream, what.m1) << "}";
|
||||
return stream;
|
||||
}
|
||||
|
||||
int main(int, char **)
|
||||
{
|
||||
int x = 1;
|
||||
|
||||
DUMP(x);
|
||||
DUMP(x, 1, &x);
|
||||
|
||||
DUMP(std::make_unique<int>(1));
|
||||
DUMP(std::make_shared<int>(1));
|
||||
|
||||
std::vector<int> vec{1, 2, 3};
|
||||
DUMP(vec);
|
||||
|
||||
auto pair = std::make_pair(1, 2);
|
||||
DUMP(pair);
|
||||
|
||||
auto tuple = std::make_tuple(1, 2, 3);
|
||||
DUMP(tuple);
|
||||
|
||||
std::map<int, std::string> map{{1, "hello"}, {2, "world"}};
|
||||
DUMP(map);
|
||||
|
||||
std::initializer_list<const char *> list{"hello", "world"};
|
||||
DUMP(list);
|
||||
|
||||
std::array<const char *, 2> arr{{"hello", "world"}};
|
||||
DUMP(arr);
|
||||
|
||||
//DUMP([]{});
|
||||
|
||||
S1 * s = nullptr;
|
||||
DUMP(s);
|
||||
|
||||
DUMP(S2());
|
||||
|
||||
std::set<const char *> variants = {"hello", "world"};
|
||||
DUMP(variants);
|
||||
|
||||
S3 s3 {{"hello", "world"}};
|
||||
DUMP(s3);
|
||||
|
||||
return 0;
|
||||
}
|
20
cmake/xray_instrumentation.cmake
Normal file
20
cmake/xray_instrumentation.cmake
Normal file
@ -0,0 +1,20 @@
|
||||
# https://llvm.org/docs/XRay.html
|
||||
|
||||
option (ENABLE_XRAY "Enable LLVM XRay" OFF)
|
||||
|
||||
if (NOT ENABLE_XRAY)
|
||||
message (STATUS "Not using LLVM XRay")
|
||||
return()
|
||||
endif()
|
||||
|
||||
if (NOT (ARCH_AMD64 AND OS_LINUX))
|
||||
message (STATUS "Not using LLVM XRay, only amd64 Linux or FreeBSD are supported")
|
||||
return()
|
||||
endif()
|
||||
|
||||
# The target clang must support xray, otherwise it should error on invalid option
|
||||
set (XRAY_FLAGS "-fxray-instrument -DUSE_XRAY")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${XRAY_FLAGS}")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${XRAY_FLAGS}")
|
||||
|
||||
message (STATUS "Using LLVM XRay")
|
@ -285,7 +285,7 @@ stop_logs_replication
|
||||
|
||||
# Try to get logs while server is running
|
||||
failed_to_save_logs=0
|
||||
for table in query_log zookeeper_log trace_log transactions_info_log metric_log
|
||||
for table in query_log zookeeper_log trace_log transactions_info_log metric_log blob_storage_log
|
||||
do
|
||||
err=$(clickhouse-client -q "select * from system.$table into outfile '/test_output/$table.tsv.gz' format TSVWithNamesAndTypes")
|
||||
echo "$err"
|
||||
@ -339,7 +339,7 @@ if [ $failed_to_save_logs -ne 0 ]; then
|
||||
# directly
|
||||
# - even though ci auto-compress some files (but not *.tsv) it does this only
|
||||
# for files >64MB, we want this files to be compressed explicitly
|
||||
for table in query_log zookeeper_log trace_log transactions_info_log metric_log
|
||||
for table in query_log zookeeper_log trace_log transactions_info_log metric_log blob_storage_log
|
||||
do
|
||||
clickhouse-local "$data_path_config" --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||:
|
||||
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
|
||||
|
@ -15,7 +15,6 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
|
||||
file \
|
||||
libxml2-utils \
|
||||
moreutils \
|
||||
python3-fuzzywuzzy \
|
||||
python3-pip \
|
||||
yamllint \
|
||||
locales \
|
||||
@ -23,8 +22,18 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||
|
||||
# python-magic is the same version as in Ubuntu 22.04
|
||||
RUN pip3 install black==23.12.0 boto3 codespell==2.2.1 mypy==1.8.0 PyGithub unidiff pylint==3.1.0 \
|
||||
python-magic==0.4.24 requests types-requests \
|
||||
RUN pip3 install \
|
||||
PyGithub \
|
||||
black==23.12.0 \
|
||||
boto3 \
|
||||
codespell==2.2.1 \
|
||||
mypy==1.8.0 \
|
||||
pylint==3.1.0 \
|
||||
python-magic==0.4.24 \
|
||||
requests \
|
||||
thefuzz \
|
||||
types-requests \
|
||||
unidiff \
|
||||
&& rm -rf /root/.cache/pip
|
||||
|
||||
RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8
|
||||
|
@ -71,7 +71,7 @@ If it fails, fix the style errors following the [code style guide](style.md).
|
||||
```sh
|
||||
mkdir -p /tmp/test_output
|
||||
# running all checks
|
||||
docker run --rm --volume=.:/ClickHouse --volume=/tmp/test_output:/test_output -u $(id -u ${USER}):$(id -g ${USER}) --cap-add=SYS_PTRACE clickhouse/style-test
|
||||
python3 tests/ci/style_check.py --no-push
|
||||
|
||||
# run specified check script (e.g.: ./check-mypy)
|
||||
docker run --rm --volume=.:/ClickHouse --volume=/tmp/test_output:/test_output -u $(id -u ${USER}):$(id -g ${USER}) --cap-add=SYS_PTRACE --entrypoint= -w/ClickHouse/utils/check-style clickhouse/style-test ./check-mypy
|
||||
|
@ -34,10 +34,11 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name
|
||||
- `options` — MongoDB connection string options (optional parameter).
|
||||
|
||||
:::tip
|
||||
If you are using the MongoDB Atlas cloud offering please add these options:
|
||||
If you are using the MongoDB Atlas cloud offering:
|
||||
|
||||
```
|
||||
'connectTimeoutMS=10000&ssl=true&authSource=admin'
|
||||
- connection url can be obtained from 'Atlas SQL' option
|
||||
- use options: 'connectTimeoutMS=10000&ssl=true&authSource=admin'
|
||||
```
|
||||
|
||||
:::
|
||||
|
@ -37,7 +37,7 @@ ways, for example with respect to their DDL/DQL syntax or performance/compressio
|
||||
To use full-text indexes, first enable them in the configuration:
|
||||
|
||||
```sql
|
||||
SET allow_experimental_inverted_index = true;
|
||||
SET allow_experimental_full_text_index = true;
|
||||
```
|
||||
|
||||
An full-text index can be defined on a string column using the following syntax
|
||||
|
@ -39,8 +39,8 @@ If you need to update rows frequently, we recommend using the [`ReplacingMergeTr
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
(
|
||||
name1 [type1] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr1] [COMMENT ...] [CODEC(codec1)] [STATISTIC(stat1)] [TTL expr1] [PRIMARY KEY] [SETTINGS (name = value, ...)],
|
||||
name2 [type2] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr2] [COMMENT ...] [CODEC(codec2)] [STATISTIC(stat2)] [TTL expr2] [PRIMARY KEY] [SETTINGS (name = value, ...)],
|
||||
name1 [type1] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr1] [COMMENT ...] [CODEC(codec1)] [STATISTICS(stat1)] [TTL expr1] [PRIMARY KEY] [SETTINGS (name = value, ...)],
|
||||
name2 [type2] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr2] [COMMENT ...] [CODEC(codec2)] [STATISTICS(stat2)] [TTL expr2] [PRIMARY KEY] [SETTINGS (name = value, ...)],
|
||||
...
|
||||
INDEX index_name1 expr1 TYPE type1(...) [GRANULARITY value1],
|
||||
INDEX index_name2 expr2 TYPE type2(...) [GRANULARITY value2],
|
||||
@ -178,6 +178,10 @@ Additional parameters that control the behavior of the `MergeTree` (optional):
|
||||
|
||||
`max_partitions_to_read` — Limits the maximum number of partitions that can be accessed in one query. You can also specify setting [max_partitions_to_read](/docs/en/operations/settings/merge-tree-settings.md/#max-partitions-to-read) in the global setting.
|
||||
|
||||
#### allow_experimental_optimized_row_order
|
||||
|
||||
`allow_experimental_optimized_row_order` - Experimental. Enables the optimization of the row order during inserts to improve the compressability of the data for compression codecs (e.g. LZ4). Analyzes and reorders the data, and thus increases the CPU overhead of inserts.
|
||||
|
||||
**Example of Sections Setting**
|
||||
|
||||
``` sql
|
||||
@ -1039,12 +1043,12 @@ ClickHouse versions 22.3 through 22.7 use a different cache configuration, see [
|
||||
|
||||
## Column Statistics (Experimental) {#column-statistics}
|
||||
|
||||
The statistic declaration is in the columns section of the `CREATE` query for tables from the `*MergeTree*` Family when we enable `set allow_experimental_statistic = 1`.
|
||||
The statistics declaration is in the columns section of the `CREATE` query for tables from the `*MergeTree*` Family when we enable `set allow_experimental_statistics = 1`.
|
||||
|
||||
``` sql
|
||||
CREATE TABLE tab
|
||||
(
|
||||
a Int64 STATISTIC(tdigest),
|
||||
a Int64 STATISTICS(TDigest, Uniq),
|
||||
b Float64
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
@ -1054,19 +1058,23 @@ ORDER BY a
|
||||
We can also manipulate statistics with `ALTER` statements.
|
||||
|
||||
```sql
|
||||
ALTER TABLE tab ADD STATISTIC b TYPE tdigest;
|
||||
ALTER TABLE tab DROP STATISTIC a TYPE tdigest;
|
||||
ALTER TABLE tab ADD STATISTICS b TYPE TDigest, Uniq;
|
||||
ALTER TABLE tab DROP STATISTICS a;
|
||||
```
|
||||
|
||||
These lightweight statistics aggregate information about distribution of values in columns.
|
||||
They can be used for query optimization when we enable `set allow_statistic_optimize = 1`.
|
||||
These lightweight statistics aggregate information about distribution of values in columns. Statistics are stored in every part and updated when every insert comes.
|
||||
They can be used for prewhere optimization only if we enable `set allow_statistics_optimize = 1`.
|
||||
|
||||
#### Available Types of Column Statistics {#available-types-of-column-statistics}
|
||||
|
||||
- `tdigest`
|
||||
- `TDigest`
|
||||
|
||||
Stores distribution of values from numeric columns in [TDigest](https://github.com/tdunning/t-digest) sketch.
|
||||
|
||||
- `Uniq`
|
||||
|
||||
Estimate the number of distinct values of a column by HyperLogLog.
|
||||
|
||||
## Column-level Settings {#column-level-settings}
|
||||
|
||||
Certain MergeTree settings can be override at column level:
|
||||
|
@ -885,3 +885,47 @@ Default value: false
|
||||
**See Also**
|
||||
|
||||
- [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge) setting
|
||||
|
||||
### allow_experimental_optimized_row_order
|
||||
|
||||
Controls if the row order should be optimized during inserts to improve the compressability of the newly inserted table part.
|
||||
|
||||
MergeTree tables are (optionally) compressed using [compression codecs](../../sql-reference/statements/create/table.md#column_compression_codec).
|
||||
Generic compression codecs such as LZ4 and ZSTD achieve maximum compression rates if the data exposes patterns.
|
||||
Long runs of the same value typically compress very well.
|
||||
|
||||
If this setting is enabled, ClickHouse attempts to store the data in newly inserted parts in a row order that minimizes the number of equal-value runs across the columns of the new table part.
|
||||
In other words, a small number of equal-value runs mean that individual runs are long and compress well.
|
||||
|
||||
Finding the optimal row order is computationally infeasible (NP hard).
|
||||
Therefore, ClickHouse uses a heuristics to quickly find a row order which still improves compression rates over the original row order.
|
||||
|
||||
<details markdown="1">
|
||||
|
||||
<summary>Heuristics for finding a row order</summary>
|
||||
|
||||
It is generally possible to shuffle the rows of a table (or table part) freely as SQL considers the same table (table part) in different row order equivalent.
|
||||
|
||||
This freedom of shuffling rows is restricted when a primary key is defined for the table.
|
||||
In ClickHouse, a primary key `C1, C2, ..., CN` enforces that the table rows are sorted by columns `C1`, `C2`, ... `Cn` ([clustered index](https://en.wikipedia.org/wiki/Database_index#Clustered)).
|
||||
As a result, rows can only be shuffled within "equivalence classes" of row, i.e. rows which have the same values in their primary key columns.
|
||||
The intuition is that primary keys with high-cardinality, e.g. primary keys involving a `DateTime64` timestamp column, lead to many small equivalence classes.
|
||||
Likewise, tables with a low-cardinality primary key, create few and large equivalence classes.
|
||||
A table with no primary key represents the extreme case of a single equivalence class which spans all rows.
|
||||
|
||||
The fewer and the larger the equivalence classes are, the higher the degree of freedom when re-shuffling rows.
|
||||
|
||||
The heuristics applied to find the best row order within each equivalence class is suggested by D. Lemir, O. Kaser in [Reordering columns for smaller indexes](https://doi.org/10.1016/j.ins.2011.02.002) and based on sorting the rows within each equivalence class by ascending cardinality of the non-primary key columns.
|
||||
It performs three steps:
|
||||
1. Find all equivalence classes based on the row values in primary key columns.
|
||||
2. For each equivalence class, calculate (usually estimate) the cardinalities of the non-primary-key columns.
|
||||
3. For each equivalence class, sort the rows in order of ascending non-primary-key column cardinality.
|
||||
|
||||
</details>
|
||||
|
||||
If enabled, insert operations incur additional CPU costs to analyze and optimize the row order of the new data.
|
||||
INSERTs are expected to take 30-50% longer depending on the data characteristics.
|
||||
Compression rates of LZ4 or ZSTD improve on average by 20-40%.
|
||||
|
||||
This setting works best for tables with no primary key or a low-cardinality primary key, i.e. a table with only few distinct primary key values.
|
||||
High-cardinality primary keys, e.g. involving timestamp columns of type `DateTime64`, are not expected to benefit from this setting.
|
||||
|
@ -5108,7 +5108,7 @@ a Tuple(
|
||||
)
|
||||
```
|
||||
|
||||
## allow_experimental_statistic {#allow_experimental_statistic}
|
||||
## allow_experimental_statistics {#allow_experimental_statistics}
|
||||
|
||||
Allows defining columns with [statistics](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) and [manipulate statistics](../../engines/table-engines/mergetree-family/mergetree.md#column-statistics).
|
||||
|
||||
|
@ -5,10 +5,57 @@ sidebar_position: 107
|
||||
|
||||
# corr
|
||||
|
||||
Syntax: `corr(x, y)`
|
||||
Calculates the [Pearson correlation coefficient](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient):
|
||||
|
||||
$$
|
||||
\frac{\Sigma{(x - \bar{x})(y - \bar{y})}}{\sqrt{\Sigma{(x - \bar{x})^2} * \Sigma{(y - \bar{y})^2}}}
|
||||
$$
|
||||
|
||||
Calculates the Pearson correlation coefficient: `Σ((x - x̅)(y - y̅)) / sqrt(Σ((x - x̅)^2) * Σ((y - y̅)^2))`.
|
||||
|
||||
:::note
|
||||
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `corrStable` function. It works slower but provides a lower computational error.
|
||||
:::
|
||||
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the [`corrStable`](../reference/corrstable.md) function. It is slower but provides a more accurate result.
|
||||
:::
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
corr(x, y)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` — first variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
|
||||
- `y` — second variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
|
||||
|
||||
**Returned Value**
|
||||
|
||||
- The Pearson correlation coefficient. [Float64](../../data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS series;
|
||||
CREATE TABLE series
|
||||
(
|
||||
i UInt32,
|
||||
x_value Float64,
|
||||
y_value Float64
|
||||
)
|
||||
ENGINE = Memory;
|
||||
INSERT INTO series(i, x_value, y_value) VALUES (1, 5.6, -4.4),(2, -9.6, 3),(3, -1.3, -4),(4, 5.3, 9.7),(5, 4.4, 0.037),(6, -8.6, -7.8),(7, 5.1, 9.3),(8, 7.9, -3.6),(9, -8.2, 0.62),(10, -3, 7.3);
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT corr(x_value, y_value)
|
||||
FROM series;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─corr(x_value, y_value)─┐
|
||||
│ 0.1730265755453256 │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
@ -0,0 +1,55 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/corrmatrix
|
||||
sidebar_position: 108
|
||||
---
|
||||
|
||||
# corrMatrix
|
||||
|
||||
Computes the correlation matrix over N variables.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
corrMatrix(x[, ...])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` — a variable number of parameters. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Correlation matrix. [Array](../../data-types/array.md)([Array](../../data-types/array.md)([Float64](../../data-types/float.md))).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS test;
|
||||
CREATE TABLE test
|
||||
(
|
||||
a UInt32,
|
||||
b Float64,
|
||||
c Float64,
|
||||
d Float64
|
||||
)
|
||||
ENGINE = Memory;
|
||||
INSERT INTO test(a, b, c, d) VALUES (1, 5.6, -4.4, 2.6), (2, -9.6, 3, 3.3), (3, -1.3, -4, 1.2), (4, 5.3, 9.7, 2.3), (5, 4.4, 0.037, 1.222), (6, -8.6, -7.8, 2.1233), (7, 5.1, 9.3, 8.1222), (8, 7.9, -3.6, 9.837), (9, -8.2, 0.62, 8.43555), (10, -3, 7.3, 6.762);
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT arrayMap(x -> round(x, 3), arrayJoin(corrMatrix(a, b, c, d))) AS corrMatrix
|
||||
FROM test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─corrMatrix─────────────┐
|
||||
1. │ [1,-0.096,0.243,0.746] │
|
||||
2. │ [-0.096,1,0.173,0.106] │
|
||||
3. │ [0.243,0.173,1,0.258] │
|
||||
4. │ [0.746,0.106,0.258,1] │
|
||||
└────────────────────────┘
|
||||
```
|
@ -0,0 +1,58 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/corrstable
|
||||
sidebar_position: 107
|
||||
---
|
||||
|
||||
# corrStable
|
||||
|
||||
Calculates the [Pearson correlation coefficient](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient):
|
||||
|
||||
$$
|
||||
\frac{\Sigma{(x - \bar{x})(y - \bar{y})}}{\sqrt{\Sigma{(x - \bar{x})^2} * \Sigma{(y - \bar{y})^2}}}
|
||||
$$
|
||||
|
||||
Similar to the [`corr`](../reference/corr.md) function, but uses a numerically stable algorithm. As a result, `corrStable` is slower than `corr` but produces a more accurate result.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
corrStable(x, y)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` — first variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
|
||||
- `y` — second variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
|
||||
|
||||
**Returned Value**
|
||||
|
||||
- The Pearson correlation coefficient. [Float64](../../data-types/float.md).
|
||||
|
||||
***Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS series;
|
||||
CREATE TABLE series
|
||||
(
|
||||
i UInt32,
|
||||
x_value Float64,
|
||||
y_value Float64
|
||||
)
|
||||
ENGINE = Memory;
|
||||
INSERT INTO series(i, x_value, y_value) VALUES (1, 5.6, -4.4),(2, -9.6, 3),(3, -1.3, -4),(4, 5.3, 9.7),(5, 4.4, 0.037),(6, -8.6, -7.8),(7, 5.1, 9.3),(8, 7.9, -3.6),(9, -8.2, 0.62),(10, -3, 7.3);
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT corrStable(x_value, y_value)
|
||||
FROM series;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─corrStable(x_value, y_value)─┐
|
||||
│ 0.17302657554532558 │
|
||||
└──────────────────────────────┘
|
||||
```
|
@ -1,14 +1,54 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/covarpop
|
||||
sidebar_position: 36
|
||||
sidebar_position: 37
|
||||
---
|
||||
|
||||
# covarPop
|
||||
|
||||
Syntax: `covarPop(x, y)`
|
||||
Calculates the population covariance:
|
||||
|
||||
Calculates the value of `Σ((x - x̅)(y - y̅)) / n`.
|
||||
$$
|
||||
\frac{\Sigma{(x - \bar{x})(y - \bar{y})}}{n}
|
||||
$$
|
||||
|
||||
:::note
|
||||
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `covarPopStable` function. It works slower but provides a lower computational error.
|
||||
:::
|
||||
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the [`covarPopStable`](../reference/covarpopstable.md) function. It works slower but provides a lower computational error.
|
||||
:::
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
covarPop(x, y)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` — first variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
|
||||
- `y` — second variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
|
||||
|
||||
**Returned Value**
|
||||
|
||||
- The population covariance between `x` and `y`. [Float64](../../data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS series;
|
||||
CREATE TABLE series(i UInt32, x_value Float64, y_value Float64) ENGINE = Memory;
|
||||
INSERT INTO series(i, x_value, y_value) VALUES (1, 5.6, -4.4),(2, -9.6, 3),(3, -1.3, -4),(4, 5.3, 9.7),(5, 4.4, 0.037),(6, -8.6, -7.8),(7, 5.1, 9.3),(8, 7.9, -3.6),(9, -8.2, 0.62),(10, -3, 7.3);
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT covarPop(x_value, y_value)
|
||||
FROM series;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```reference
|
||||
┌─covarPop(x_value, y_value)─┐
|
||||
│ 6.485648 │
|
||||
└────────────────────────────┘
|
||||
```
|
||||
|
@ -0,0 +1,55 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/covarpopmatrix
|
||||
sidebar_position: 36
|
||||
---
|
||||
|
||||
# covarPopMatrix
|
||||
|
||||
Returns the population covariance matrix over N variables.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
covarPopMatrix(x[, ...])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` — a variable number of parameters. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
|
||||
|
||||
**Returned Value**
|
||||
|
||||
- Population covariance matrix. [Array](../../data-types/array.md)([Array](../../data-types/array.md)([Float64](../../data-types/float.md))).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS test;
|
||||
CREATE TABLE test
|
||||
(
|
||||
a UInt32,
|
||||
b Float64,
|
||||
c Float64,
|
||||
d Float64
|
||||
)
|
||||
ENGINE = Memory;
|
||||
INSERT INTO test(a, b, c, d) VALUES (1, 5.6, -4.4, 2.6), (2, -9.6, 3, 3.3), (3, -1.3, -4, 1.2), (4, 5.3, 9.7, 2.3), (5, 4.4, 0.037, 1.222), (6, -8.6, -7.8, 2.1233), (7, 5.1, 9.3, 8.1222), (8, 7.9, -3.6, 9.837), (9, -8.2, 0.62, 8.43555), (10, -3, 7.3, 6.762);
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT arrayMap(x -> round(x, 3), arrayJoin(covarPopMatrix(a, b, c, d))) AS covarPopMatrix
|
||||
FROM test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```reference
|
||||
┌─covarPopMatrix────────────┐
|
||||
1. │ [8.25,-1.76,4.08,6.748] │
|
||||
2. │ [-1.76,41.07,6.486,2.132] │
|
||||
3. │ [4.08,6.486,34.21,4.755] │
|
||||
4. │ [6.748,2.132,4.755,9.93] │
|
||||
└───────────────────────────┘
|
||||
```
|
@ -0,0 +1,60 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/covarpopstable
|
||||
sidebar_position: 36
|
||||
---
|
||||
|
||||
# covarPopStable
|
||||
|
||||
Calculates the value of the population covariance:
|
||||
|
||||
$$
|
||||
\frac{\Sigma{(x - \bar{x})(y - \bar{y})}}{n}
|
||||
$$
|
||||
|
||||
It is similar to the [covarPop](../reference/covarpop.md) function, but uses a numerically stable algorithm. As a result, `covarPopStable` is slower than `covarPop` but produces a more accurate result.
|
||||
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
covarPop(x, y)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` — first variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
|
||||
- `y` — second variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
|
||||
|
||||
**Returned Value**
|
||||
|
||||
- The population covariance between `x` and `y`. [Float64](../../data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS series;
|
||||
CREATE TABLE series(i UInt32, x_value Float64, y_value Float64) ENGINE = Memory;
|
||||
INSERT INTO series(i, x_value, y_value) VALUES (1, 5.6,-4.4),(2, -9.6,3),(3, -1.3,-4),(4, 5.3,9.7),(5, 4.4,0.037),(6, -8.6,-7.8),(7, 5.1,9.3),(8, 7.9,-3.6),(9, -8.2,0.62),(10, -3,7.3);
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT covarPopStable(x_value, y_value)
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
x_value,
|
||||
y_value
|
||||
FROM series
|
||||
);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```reference
|
||||
┌─covarPopStable(x_value, y_value)─┐
|
||||
│ 6.485648 │
|
||||
└──────────────────────────────────┘
|
||||
```
|
||||
|
@ -7,8 +7,74 @@ sidebar_position: 37
|
||||
|
||||
Calculates the value of `Σ((x - x̅)(y - y̅)) / (n - 1)`.
|
||||
|
||||
Returns Float64. When `n <= 1`, returns `nan`.
|
||||
|
||||
:::note
|
||||
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `covarSampStable` function. It works slower but provides a lower computational error.
|
||||
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the [`covarSampStable`](../reference/covarsamp.md) function. It works slower but provides a lower computational error.
|
||||
:::
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
covarSamp(x, y)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` — first variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
|
||||
- `y` — second variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
|
||||
|
||||
**Returned Value**
|
||||
|
||||
- The sample covariance between `x` and `y`. For `n <= 1`, `nan` is returned. [Float64](../../data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS series;
|
||||
CREATE TABLE series(i UInt32, x_value Float64, y_value Float64) ENGINE = Memory;
|
||||
INSERT INTO series(i, x_value, y_value) VALUES (1, 5.6,-4.4),(2, -9.6,3),(3, -1.3,-4),(4, 5.3,9.7),(5, 4.4,0.037),(6, -8.6,-7.8),(7, 5.1,9.3),(8, 7.9,-3.6),(9, -8.2,0.62),(10, -3,7.3);
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT covarSamp(x_value, y_value)
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
x_value,
|
||||
y_value
|
||||
FROM series
|
||||
);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```reference
|
||||
┌─covarSamp(x_value, y_value)─┐
|
||||
│ 7.206275555555556 │
|
||||
└─────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT covarSamp(x_value, y_value)
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
x_value,
|
||||
y_value
|
||||
FROM series LIMIT 1
|
||||
);
|
||||
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```reference
|
||||
┌─covarSamp(x_value, y_value)─┐
|
||||
│ nan │
|
||||
└─────────────────────────────┘
|
||||
```
|
||||
|
||||
|
||||
|
@ -0,0 +1,57 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/covarsampmatrix
|
||||
sidebar_position: 38
|
||||
---
|
||||
|
||||
# covarSampMatrix
|
||||
|
||||
Returns the sample covariance matrix over N variables.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
covarSampMatrix(x[, ...])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` — a variable number of parameters. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
|
||||
|
||||
**Returned Value**
|
||||
|
||||
- Sample covariance matrix. [Array](../../data-types/array.md)([Array](../../data-types/array.md)([Float64](../../data-types/float.md))).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS test;
|
||||
CREATE TABLE test
|
||||
(
|
||||
a UInt32,
|
||||
b Float64,
|
||||
c Float64,
|
||||
d Float64
|
||||
)
|
||||
ENGINE = Memory;
|
||||
INSERT INTO test(a, b, c, d) VALUES (1, 5.6, -4.4, 2.6), (2, -9.6, 3, 3.3), (3, -1.3, -4, 1.2), (4, 5.3, 9.7, 2.3), (5, 4.4, 0.037, 1.222), (6, -8.6, -7.8, 2.1233), (7, 5.1, 9.3, 8.1222), (8, 7.9, -3.6, 9.837), (9, -8.2, 0.62, 8.43555), (10, -3, 7.3, 6.762);
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT arrayMap(x -> round(x, 3), arrayJoin(covarSampMatrix(a, b, c, d))) AS covarSampMatrix
|
||||
FROM test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```reference
|
||||
┌─covarSampMatrix─────────────┐
|
||||
1. │ [9.167,-1.956,4.534,7.498] │
|
||||
2. │ [-1.956,45.634,7.206,2.369] │
|
||||
3. │ [4.534,7.206,38.011,5.283] │
|
||||
4. │ [7.498,2.369,5.283,11.034] │
|
||||
└─────────────────────────────┘
|
||||
```
|
||||
|
||||
|
@ -0,0 +1,73 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/covarsampstable
|
||||
sidebar_position: 37
|
||||
---
|
||||
|
||||
# covarSampStable
|
||||
|
||||
Calculates the value of `Σ((x - x̅)(y - y̅)) / (n - 1)`. Similar to [covarSamp](../reference/covarsamp.md) but works slower while providing a lower computational error.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
covarSampStable(x, y)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` — first variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
|
||||
- `y` — second variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
|
||||
|
||||
**Returned Value**
|
||||
|
||||
- The sample covariance between `x` and `y`. For `n <= 1`, `inf` is returned. [Float64](../../data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS series;
|
||||
CREATE TABLE series(i UInt32, x_value Float64, y_value Float64) ENGINE = Memory;
|
||||
INSERT INTO series(i, x_value, y_value) VALUES (1, 5.6,-4.4),(2, -9.6,3),(3, -1.3,-4),(4, 5.3,9.7),(5, 4.4,0.037),(6, -8.6,-7.8),(7, 5.1,9.3),(8, 7.9,-3.6),(9, -8.2,0.62),(10, -3,7.3);
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT covarSampStable(x_value, y_value)
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
x_value,
|
||||
y_value
|
||||
FROM series
|
||||
);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```reference
|
||||
┌─covarSampStable(x_value, y_value)─┐
|
||||
│ 7.206275555555556 │
|
||||
└───────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT covarSampStable(x_value, y_value)
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
x_value,
|
||||
y_value
|
||||
FROM series LIMIT 1
|
||||
);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```reference
|
||||
┌─covarSampStable(x_value, y_value)─┐
|
||||
│ inf │
|
||||
└───────────────────────────────────┘
|
||||
```
|
@ -9,110 +9,116 @@ toc_hidden: true
|
||||
|
||||
Standard aggregate functions:
|
||||
|
||||
- [count](/docs/en/sql-reference/aggregate-functions/reference/count.md)
|
||||
- [min](/docs/en/sql-reference/aggregate-functions/reference/min.md)
|
||||
- [max](/docs/en/sql-reference/aggregate-functions/reference/max.md)
|
||||
- [sum](/docs/en/sql-reference/aggregate-functions/reference/sum.md)
|
||||
- [avg](/docs/en/sql-reference/aggregate-functions/reference/avg.md)
|
||||
- [any](/docs/en/sql-reference/aggregate-functions/reference/any.md)
|
||||
- [stddevPop](/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md)
|
||||
- [stddevPopStable](/docs/en/sql-reference/aggregate-functions/reference/stddevpopstable.md)
|
||||
- [stddevSamp](/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md)
|
||||
- [stddevSampStable](/docs/en/sql-reference/aggregate-functions/reference/stddevsampstable.md)
|
||||
- [varPop](/docs/en/sql-reference/aggregate-functions/reference/varpop.md)
|
||||
- [varSamp](/docs/en/sql-reference/aggregate-functions/reference/varsamp.md)
|
||||
- [corr](./corr.md)
|
||||
- [covarPop](/docs/en/sql-reference/aggregate-functions/reference/covarpop.md)
|
||||
- [covarSamp](/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md)
|
||||
- [entropy](./entropy.md)
|
||||
- [exponentialMovingAverage](./exponentialmovingaverage.md)
|
||||
- [intervalLengthSum](./intervalLengthSum.md)
|
||||
- [kolmogorovSmirnovTest](./kolmogorovsmirnovtest.md)
|
||||
- [mannwhitneyutest](./mannwhitneyutest.md)
|
||||
- [median](./median.md)
|
||||
- [rankCorr](./rankCorr.md)
|
||||
- [sumKahan](./sumkahan.md)
|
||||
- [studentTTest](./studentttest.md)
|
||||
- [welchTTest](./welchttest.md)
|
||||
- [count](../reference/count.md)
|
||||
- [min](../reference/min.md)
|
||||
- [max](../reference/max.md)
|
||||
- [sum](../reference/sum.md)
|
||||
- [avg](../reference/avg.md)
|
||||
- [any](../reference/any.md)
|
||||
- [stddevPop](../reference/stddevpop.md)
|
||||
- [stddevPopStable](../reference/stddevpopstable.md)
|
||||
- [stddevSamp](../reference/stddevsamp.md)
|
||||
- [stddevSampStable](../reference/stddevsampstable.md)
|
||||
- [varPop](../reference/varpop.md)
|
||||
- [varSamp](../reference/varsamp.md)
|
||||
- [corr](../reference/corr.md)
|
||||
- [corr](../reference/corrstable.md)
|
||||
- [corrMatrix](../reference/corrmatrix.md)
|
||||
- [covarPop](../reference/covarpop.md)
|
||||
- [covarStable](../reference/covarpopstable.md)
|
||||
- [covarPopMatrix](../reference/covarpopmatrix.md)
|
||||
- [covarSamp](../reference/covarsamp.md)
|
||||
- [covarSampStable](../reference/covarsampstable.md)
|
||||
- [covarSampMatrix](../reference/covarsampmatrix.md)
|
||||
- [entropy](../reference/entropy.md)
|
||||
- [exponentialMovingAverage](../reference/exponentialmovingaverage.md)
|
||||
- [intervalLengthSum](../reference/intervalLengthSum.md)
|
||||
- [kolmogorovSmirnovTest](../reference/kolmogorovsmirnovtest.md)
|
||||
- [mannwhitneyutest](../reference/mannwhitneyutest.md)
|
||||
- [median](../reference/median.md)
|
||||
- [rankCorr](../reference/rankCorr.md)
|
||||
- [sumKahan](../reference/sumkahan.md)
|
||||
- [studentTTest](../reference/studentttest.md)
|
||||
- [welchTTest](../reference/welchttest.md)
|
||||
|
||||
ClickHouse-specific aggregate functions:
|
||||
|
||||
- [analysisOfVariance](/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md)
|
||||
- [any](/docs/en/sql-reference/aggregate-functions/reference/any_respect_nulls.md)
|
||||
- [anyHeavy](/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md)
|
||||
- [anyLast](/docs/en/sql-reference/aggregate-functions/reference/anylast.md)
|
||||
- [anyLast](/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md)
|
||||
- [boundingRatio](/docs/en/sql-reference/aggregate-functions/reference/boundrat.md)
|
||||
- [first_value](/docs/en/sql-reference/aggregate-functions/reference/first_value.md)
|
||||
- [last_value](/docs/en/sql-reference/aggregate-functions/reference/last_value.md)
|
||||
- [argMin](/docs/en/sql-reference/aggregate-functions/reference/argmin.md)
|
||||
- [argMax](/docs/en/sql-reference/aggregate-functions/reference/argmax.md)
|
||||
- [avgWeighted](/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md)
|
||||
- [topK](/docs/en/sql-reference/aggregate-functions/reference/topk.md)
|
||||
- [topKWeighted](/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md)
|
||||
- [deltaSum](./deltasum.md)
|
||||
- [deltaSumTimestamp](./deltasumtimestamp.md)
|
||||
- [groupArray](/docs/en/sql-reference/aggregate-functions/reference/grouparray.md)
|
||||
- [groupArrayLast](/docs/en/sql-reference/aggregate-functions/reference/grouparraylast.md)
|
||||
- [groupUniqArray](/docs/en/sql-reference/aggregate-functions/reference/groupuniqarray.md)
|
||||
- [groupArrayInsertAt](/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md)
|
||||
- [groupArrayMovingAvg](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md)
|
||||
- [groupArrayMovingSum](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md)
|
||||
- [groupArraySample](./grouparraysample.md)
|
||||
- [groupArraySorted](/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md)
|
||||
- [groupArrayIntersect](./grouparrayintersect.md)
|
||||
- [groupBitAnd](/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md)
|
||||
- [groupBitOr](/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md)
|
||||
- [groupBitXor](/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md)
|
||||
- [groupBitmap](/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md)
|
||||
- [groupBitmapAnd](/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md)
|
||||
- [groupBitmapOr](/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md)
|
||||
- [groupBitmapXor](/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md)
|
||||
- [sumWithOverflow](/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md)
|
||||
- [sumMap](/docs/en/sql-reference/aggregate-functions/reference/summap.md)
|
||||
- [sumMapWithOverflow](/docs/en/sql-reference/aggregate-functions/reference/summapwithoverflow.md)
|
||||
- [sumMapFiltered](/docs/en/sql-reference/aggregate-functions/parametric-functions.md/#summapfiltered)
|
||||
- [sumMapFilteredWithOverflow](/docs/en/sql-reference/aggregate-functions/parametric-functions.md/#summapfilteredwithoverflow)
|
||||
- [minMap](/docs/en/sql-reference/aggregate-functions/reference/minmap.md)
|
||||
- [maxMap](/docs/en/sql-reference/aggregate-functions/reference/maxmap.md)
|
||||
- [skewSamp](/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md)
|
||||
- [skewPop](/docs/en/sql-reference/aggregate-functions/reference/skewpop.md)
|
||||
- [kurtSamp](/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md)
|
||||
- [kurtPop](/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md)
|
||||
- [uniq](/docs/en/sql-reference/aggregate-functions/reference/uniq.md)
|
||||
- [uniqExact](/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md)
|
||||
- [uniqCombined](/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md)
|
||||
- [uniqCombined64](/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md)
|
||||
- [uniqHLL12](/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md)
|
||||
- [uniqTheta](/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md)
|
||||
- [quantile](/docs/en/sql-reference/aggregate-functions/reference/quantile.md)
|
||||
- [quantiles](/docs/en/sql-reference/aggregate-functions/reference/quantiles.md)
|
||||
- [quantileExact](/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md)
|
||||
- [quantileExactLow](/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactlow)
|
||||
- [quantileExactHigh](/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md#quantileexacthigh)
|
||||
- [quantileExactWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md)
|
||||
- [quantileTiming](/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md)
|
||||
- [quantileTimingWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md)
|
||||
- [quantileDeterministic](/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md)
|
||||
- [quantileTDigest](/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md)
|
||||
- [quantileTDigestWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md)
|
||||
- [quantileBFloat16](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16)
|
||||
- [quantileBFloat16Weighted](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16weighted)
|
||||
- [quantileDD](/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch)
|
||||
- [simpleLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md)
|
||||
- [singleValueOrNull](/docs/en/sql-reference/aggregate-functions/reference/singlevalueornull.md)
|
||||
- [stochasticLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md)
|
||||
- [stochasticLogisticRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md)
|
||||
- [categoricalInformationValue](/docs/en/sql-reference/aggregate-functions/reference/categoricalinformationvalue.md)
|
||||
- [contingency](./contingency.md)
|
||||
- [cramersV](./cramersv.md)
|
||||
- [cramersVBiasCorrected](./cramersvbiascorrected.md)
|
||||
- [theilsU](./theilsu.md)
|
||||
- [maxIntersections](./maxintersections.md)
|
||||
- [maxIntersectionsPosition](./maxintersectionsposition.md)
|
||||
- [meanZTest](./meanztest.md)
|
||||
- [quantileGK](./quantileGK.md)
|
||||
- [quantileInterpolatedWeighted](./quantileinterpolatedweighted.md)
|
||||
- [sparkBar](./sparkbar.md)
|
||||
- [sumCount](./sumcount.md)
|
||||
- [largestTriangleThreeBuckets](./largestTriangleThreeBuckets.md)
|
||||
- [analysisOfVariance](../reference/analysis_of_variance.md)
|
||||
- [any](../reference/any_respect_nulls.md)
|
||||
- [anyHeavy](../reference/anyheavy.md)
|
||||
- [anyLast](../reference/anylast.md)
|
||||
- [anyLast](../reference/anylast_respect_nulls.md)
|
||||
- [boundingRatio](../reference/boundrat.md)
|
||||
- [first_value](../reference/first_value.md)
|
||||
- [last_value](../reference/last_value.md)
|
||||
- [argMin](../reference/argmin.md)
|
||||
- [argMax](../reference/argmax.md)
|
||||
- [avgWeighted](../reference/avgweighted.md)
|
||||
- [topK](../reference/topk.md)
|
||||
- [topKWeighted](../reference/topkweighted.md)
|
||||
- [deltaSum](../reference/deltasum.md)
|
||||
- [deltaSumTimestamp](../reference/deltasumtimestamp.md)
|
||||
- [groupArray](../reference/grouparray.md)
|
||||
- [groupArrayLast](../reference/grouparraylast.md)
|
||||
- [groupUniqArray](../reference/groupuniqarray.md)
|
||||
- [groupArrayInsertAt](../reference/grouparrayinsertat.md)
|
||||
- [groupArrayMovingAvg](../reference/grouparraymovingavg.md)
|
||||
- [groupArrayMovingSum](../reference/grouparraymovingsum.md)
|
||||
- [groupArraySample](../reference/grouparraysample.md)
|
||||
- [groupArraySorted](../reference/grouparraysorted.md)
|
||||
- [groupArrayIntersect](../reference/grouparrayintersect.md)
|
||||
- [groupBitAnd](../reference/groupbitand.md)
|
||||
- [groupBitOr](../reference/groupbitor.md)
|
||||
- [groupBitXor](../reference/groupbitxor.md)
|
||||
- [groupBitmap](../reference/groupbitmap.md)
|
||||
- [groupBitmapAnd](../reference/groupbitmapand.md)
|
||||
- [groupBitmapOr](../reference/groupbitmapor.md)
|
||||
- [groupBitmapXor](../reference/groupbitmapxor.md)
|
||||
- [sumWithOverflow](../reference/sumwithoverflow.md)
|
||||
- [sumMap](../reference/summap.md)
|
||||
- [sumMapWithOverflow](../reference/summapwithoverflow.md)
|
||||
- [sumMapFiltered](../parametric-functions.md/#summapfiltered)
|
||||
- [sumMapFilteredWithOverflow](../parametric-functions.md/#summapfilteredwithoverflow)
|
||||
- [minMap](../reference/minmap.md)
|
||||
- [maxMap](../reference/maxmap.md)
|
||||
- [skewSamp](../reference/skewsamp.md)
|
||||
- [skewPop](../reference/skewpop.md)
|
||||
- [kurtSamp](../reference/kurtsamp.md)
|
||||
- [kurtPop](../reference/kurtpop.md)
|
||||
- [uniq](../reference/uniq.md)
|
||||
- [uniqExact](../reference/uniqexact.md)
|
||||
- [uniqCombined](../reference/uniqcombined.md)
|
||||
- [uniqCombined64](../reference/uniqcombined64.md)
|
||||
- [uniqHLL12](../reference/uniqhll12.md)
|
||||
- [uniqTheta](../reference/uniqthetasketch.md)
|
||||
- [quantile](../reference/quantile.md)
|
||||
- [quantiles](../reference/quantiles.md)
|
||||
- [quantileExact](../reference/quantileexact.md)
|
||||
- [quantileExactLow](../reference/quantileexact.md#quantileexactlow)
|
||||
- [quantileExactHigh](../reference/quantileexact.md#quantileexacthigh)
|
||||
- [quantileExactWeighted](../reference/quantileexactweighted.md)
|
||||
- [quantileTiming](../reference/quantiletiming.md)
|
||||
- [quantileTimingWeighted](../reference/quantiletimingweighted.md)
|
||||
- [quantileDeterministic](../reference/quantiledeterministic.md)
|
||||
- [quantileTDigest](../reference/quantiletdigest.md)
|
||||
- [quantileTDigestWeighted](../reference/quantiletdigestweighted.md)
|
||||
- [quantileBFloat16](../reference/quantilebfloat16.md#quantilebfloat16)
|
||||
- [quantileBFloat16Weighted](../reference/quantilebfloat16.md#quantilebfloat16weighted)
|
||||
- [quantileDD](../reference/quantileddsketch.md#quantileddsketch)
|
||||
- [simpleLinearRegression](../reference/simplelinearregression.md)
|
||||
- [singleValueOrNull](../reference/singlevalueornull.md)
|
||||
- [stochasticLinearRegression](../reference/stochasticlinearregression.md)
|
||||
- [stochasticLogisticRegression](../reference/stochasticlogisticregression.md)
|
||||
- [categoricalInformationValue](../reference/categoricalinformationvalue.md)
|
||||
- [contingency](../reference/contingency.md)
|
||||
- [cramersV](../reference/cramersv.md)
|
||||
- [cramersVBiasCorrected](../reference/cramersvbiascorrected.md)
|
||||
- [theilsU](../reference/theilsu.md)
|
||||
- [maxIntersections](../reference/maxintersections.md)
|
||||
- [maxIntersectionsPosition](../reference/maxintersectionsposition.md)
|
||||
- [meanZTest](../reference/meanztest.md)
|
||||
- [quantileGK](../reference/quantileGK.md)
|
||||
- [quantileInterpolatedWeighted](../reference/quantileinterpolatedweighted.md)
|
||||
- [sparkBar](../reference/sparkbar.md)
|
||||
- [sumCount](../reference/sumcount.md)
|
||||
- [largestTriangleThreeBuckets](../reference/largestTriangleThreeBuckets.md)
|
||||
|
@ -1,7 +1,7 @@
|
||||
---
|
||||
slug: /en/sql-reference/data-types/boolean
|
||||
sidebar_position: 22
|
||||
sidebar_label: Boolean
|
||||
sidebar_label: Bool
|
||||
---
|
||||
|
||||
# Bool
|
||||
|
@ -6,101 +6,106 @@ sidebar_label: Map(K, V)
|
||||
|
||||
# Map(K, V)
|
||||
|
||||
`Map(K, V)` data type stores `key:value` pairs.
|
||||
The Map datatype is implemented as `Array(Tuple(key T1, value T2))`, which means that the order of keys in each map does not change, i.e., this data type maintains insertion order.
|
||||
Data type `Map(K, V)` stores key-value pairs.
|
||||
|
||||
Unlike other databases, maps are not unique in ClickHouse, i.e. a map can contain two elements with the same key.
|
||||
(The reason for that is that maps are internally implemented as `Array(Tuple(K, V))`.)
|
||||
|
||||
You can use use syntax `m[k]` to obtain the value for key `k` in map `m`.
|
||||
Also, `m[k]` scans the map, i.e. the runtime of the operation is linear in the size of the map.
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `key` — The key part of the pair. Arbitrary type, except [Nullable](../../sql-reference/data-types/nullable.md) and [LowCardinality](../../sql-reference/data-types/lowcardinality.md) nested with [Nullable](../../sql-reference/data-types/nullable.md) types.
|
||||
- `value` — The value part of the pair. Arbitrary type, including [Map](../../sql-reference/data-types/map.md) and [Array](../../sql-reference/data-types/array.md).
|
||||
|
||||
To get the value from an `a Map('key', 'value')` column, use `a['key']` syntax. This lookup works now with a linear complexity.
|
||||
- `K` — The type of the Map keys. Arbitrary type except [Nullable](../../sql-reference/data-types/nullable.md) and [LowCardinality](../../sql-reference/data-types/lowcardinality.md) nested with [Nullable](../../sql-reference/data-types/nullable.md) types.
|
||||
- `V` — The type of the Map values. Arbitrary type.
|
||||
|
||||
**Examples**
|
||||
|
||||
Consider the table:
|
||||
Create a table with a column of type map:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE table_map (a Map(String, UInt64)) ENGINE=Memory;
|
||||
INSERT INTO table_map VALUES ({'key1':1, 'key2':10}), ({'key1':2,'key2':20}), ({'key1':3,'key2':30});
|
||||
CREATE TABLE tab (m Map(String, UInt64)) ENGINE=Memory;
|
||||
INSERT INTO tab VALUES ({'key1':1, 'key2':10}), ({'key1':2,'key2':20}), ({'key1':3,'key2':30});
|
||||
```
|
||||
|
||||
Select all `key2` values:
|
||||
To select `key2` values:
|
||||
|
||||
```sql
|
||||
SELECT a['key2'] FROM table_map;
|
||||
SELECT m['key2'] FROM tab;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─arrayElement(a, 'key2')─┐
|
||||
┌─arrayElement(m, 'key2')─┐
|
||||
│ 10 │
|
||||
│ 20 │
|
||||
│ 30 │
|
||||
└─────────────────────────┘
|
||||
```
|
||||
|
||||
If there's no such `key` in the `Map()` column, the query returns zeros for numerical values, empty strings or empty arrays.
|
||||
If the requested key `k` is not contained in the map, `m[k]` returns the value type's default value, e.g. `0` for integer types and `''` for string types.
|
||||
To check whether a key exists in a map, you can use function [mapContains](../../sql-reference/functions/tuple-map-functions#mapcontains).
|
||||
|
||||
```sql
|
||||
INSERT INTO table_map VALUES ({'key3':100}), ({});
|
||||
SELECT a['key3'] FROM table_map;
|
||||
CREATE TABLE tab (m Map(String, UInt64)) ENGINE=Memory;
|
||||
INSERT INTO tab VALUES ({'key1':100}), ({});
|
||||
SELECT m['key1'] FROM tab;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─arrayElement(a, 'key3')─┐
|
||||
┌─arrayElement(m, 'key1')─┐
|
||||
│ 100 │
|
||||
│ 0 │
|
||||
└─────────────────────────┘
|
||||
┌─arrayElement(a, 'key3')─┐
|
||||
│ 0 │
|
||||
│ 0 │
|
||||
│ 0 │
|
||||
└─────────────────────────┘
|
||||
```
|
||||
|
||||
## Convert Tuple to Map Type
|
||||
## Converting Tuple to Map
|
||||
|
||||
You can cast `Tuple()` as `Map()` using [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function:
|
||||
Values of type `Tuple()` can be casted to values of type `Map()` using function [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast):
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT CAST(([1, 2, 3], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─map───────────────────────────┐
|
||||
│ {1:'Ready',2:'Steady',3:'Go'} │
|
||||
└───────────────────────────────┘
|
||||
```
|
||||
|
||||
## Map.keys and Map.values Subcolumns
|
||||
## Reading subcolumns of Map
|
||||
|
||||
To optimize `Map` column processing, in some cases you can use the `keys` and `values` subcolumns instead of reading the whole column.
|
||||
To avoid reading the entire map, you can use subcolumns `keys` and `values` in some cases.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE t_map (`a` Map(String, UInt64)) ENGINE = Memory;
|
||||
CREATE TABLE tab (m Map(String, UInt64)) ENGINE = Memory;
|
||||
INSERT INTO tab VALUES (map('key1', 1, 'key2', 2, 'key3', 3));
|
||||
|
||||
INSERT INTO t_map VALUES (map('key1', 1, 'key2', 2, 'key3', 3));
|
||||
|
||||
SELECT a.keys FROM t_map;
|
||||
|
||||
SELECT a.values FROM t_map;
|
||||
SELECT m.keys FROM tab; -- same as mapKeys(m)
|
||||
SELECT m.values FROM tab; -- same as mapValues(m)
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─a.keys─────────────────┐
|
||||
┌─m.keys─────────────────┐
|
||||
│ ['key1','key2','key3'] │
|
||||
└────────────────────────┘
|
||||
|
||||
┌─a.values─┐
|
||||
┌─m.values─┐
|
||||
│ [1,2,3] │
|
||||
└──────────┘
|
||||
```
|
||||
|
@ -415,8 +415,8 @@ Alias: `power(x, y)`
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` - [(U)Int8/16/32/64](../data-types/int-uint.md) or [Float*](../data-types/float.md)
|
||||
- `y` - [(U)Int8/16/32/64](../data-types/int-uint.md) or [Float*](../data-types/float.md)
|
||||
- `x` - [(U)Int8/16/32/64](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md)
|
||||
- `y` - [(U)Int8/16/32/64](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md)
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -635,8 +635,8 @@ atan2(y, x)
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `y` — y-coordinate of the point through which the ray passes. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
|
||||
- `x` — x-coordinate of the point through which the ray passes. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
|
||||
- `y` — y-coordinate of the point through which the ray passes. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
|
||||
- `x` — x-coordinate of the point through which the ray passes. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -670,8 +670,8 @@ hypot(x, y)
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` — The first cathetus of a right-angle triangle. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
|
||||
- `y` — The second cathetus of a right-angle triangle. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
|
||||
- `x` — The first cathetus of a right-angle triangle. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
|
||||
- `y` — The second cathetus of a right-angle triangle. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -838,6 +838,7 @@ degrees(x)
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` — Input in radians. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
|
||||
- `x` — Input in radians. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
|
||||
|
||||
**Returned value**
|
||||
|
@ -735,6 +735,8 @@ LIMIT 10
|
||||
|
||||
Given a size (number of bytes), this function returns a readable, rounded size with suffix (KB, MB, etc.) as string.
|
||||
|
||||
The opposite operations of this function are [parseReadableSize](#parseReadableSize), [parseReadableSizeOrZero](#parseReadableSizeOrZero), and [parseReadableSizeOrNull](#parseReadableSizeOrNull).
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
@ -766,6 +768,8 @@ Result:
|
||||
|
||||
Given a size (number of bytes), this function returns a readable, rounded size with suffix (KiB, MiB, etc.) as string.
|
||||
|
||||
The opposite operations of this function are [parseReadableSize](#parseReadableSize), [parseReadableSizeOrZero](#parseReadableSizeOrZero), and [parseReadableSizeOrNull](#parseReadableSizeOrNull).
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
@ -890,6 +894,122 @@ SELECT
|
||||
└────────────────────┴────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## parseReadableSize
|
||||
|
||||
Given a string containing a byte size and `B`, `KiB`, `KB`, `MiB`, `MB`, etc. as a unit (i.e. [ISO/IEC 80000-13](https://en.wikipedia.org/wiki/ISO/IEC_80000) or decimal byte unit), this function returns the corresponding number of bytes.
|
||||
If the function is unable to parse the input value, it throws an exception.
|
||||
|
||||
The inverse operations of this function are [formatReadableSize](#formatReadableSize) and [formatReadableDecimalSize](#formatReadableDecimalSize).
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
formatReadableSize(x)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` : Readable size with ISO/IEC 80000-13 or decimal byte unit ([String](../../sql-reference/data-types/string.md)).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Number of bytes, rounded up to the nearest integer ([UInt64](../../sql-reference/data-types/int-uint.md)).
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
arrayJoin(['1 B', '1 KiB', '3 MB', '5.314 KiB']) AS readable_sizes,
|
||||
parseReadableSize(readable_sizes) AS sizes;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─readable_sizes─┬───sizes─┐
|
||||
│ 1 B │ 1 │
|
||||
│ 1 KiB │ 1024 │
|
||||
│ 3 MB │ 3000000 │
|
||||
│ 5.314 KiB │ 5442 │
|
||||
└────────────────┴─────────┘
|
||||
```
|
||||
|
||||
## parseReadableSizeOrNull
|
||||
|
||||
Given a string containing a byte size and `B`, `KiB`, `KB`, `MiB`, `MB`, etc. as a unit (i.e. [ISO/IEC 80000-13](https://en.wikipedia.org/wiki/ISO/IEC_80000) or decimal byte unit), this function returns the corresponding number of bytes.
|
||||
If the function is unable to parse the input value, it returns `NULL`.
|
||||
|
||||
The inverse operations of this function are [formatReadableSize](#formatReadableSize) and [formatReadableDecimalSize](#formatReadableDecimalSize).
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
parseReadableSizeOrNull(x)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` : Readable size with ISO/IEC 80000-13 or decimal byte unit ([String](../../sql-reference/data-types/string.md)).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Number of bytes, rounded up to the nearest integer, or NULL if unable to parse the input (Nullable([UInt64](../../sql-reference/data-types/int-uint.md))).
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
arrayJoin(['1 B', '1 KiB', '3 MB', '5.314 KiB', 'invalid']) AS readable_sizes,
|
||||
parseReadableSizeOrNull(readable_sizes) AS sizes;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─readable_sizes─┬───sizes─┐
|
||||
│ 1 B │ 1 │
|
||||
│ 1 KiB │ 1024 │
|
||||
│ 3 MB │ 3000000 │
|
||||
│ 5.314 KiB │ 5442 │
|
||||
│ invalid │ ᴺᵁᴸᴸ │
|
||||
└────────────────┴─────────┘
|
||||
```
|
||||
|
||||
## parseReadableSizeOrZero
|
||||
|
||||
Given a string containing a byte size and `B`, `KiB`, `KB`, `MiB`, `MB`, etc. as a unit (i.e. [ISO/IEC 80000-13](https://en.wikipedia.org/wiki/ISO/IEC_80000) or decimal byte unit), this function returns the corresponding number of bytes. If the function is unable to parse the input value, it returns `0`.
|
||||
|
||||
The inverse operations of this function are [formatReadableSize](#formatReadableSize) and [formatReadableDecimalSize](#formatReadableDecimalSize).
|
||||
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
parseReadableSizeOrZero(x)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` : Readable size with ISO/IEC 80000-13 or decimal byte unit ([String](../../sql-reference/data-types/string.md)).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Number of bytes, rounded up to the nearest integer, or 0 if unable to parse the input ([UInt64](../../sql-reference/data-types/int-uint.md)).
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
arrayJoin(['1 B', '1 KiB', '3 MB', '5.314 KiB', 'invalid']) AS readable_sizes,
|
||||
parseReadableSizeOrZero(readable_sizes) AS sizes;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─readable_sizes─┬───sizes─┐
|
||||
│ 1 B │ 1 │
|
||||
│ 1 KiB │ 1024 │
|
||||
│ 3 MB │ 3000000 │
|
||||
│ 5.314 KiB │ 5442 │
|
||||
│ invalid │ 0 │
|
||||
└────────────────┴─────────┘
|
||||
```
|
||||
|
||||
## parseTimeDelta
|
||||
|
||||
Parse a sequence of numbers followed by something resembling a time unit.
|
||||
|
@ -6,7 +6,7 @@ sidebar_label: Maps
|
||||
|
||||
## map
|
||||
|
||||
Arranges `key:value` pairs into [Map(key, value)](../data-types/map.md) data type.
|
||||
Creates a value of type [Map(key, value)](../data-types/map.md) from key-value pairs.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -16,12 +16,12 @@ map(key1, value1[, key2, value2, ...])
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `key` — The key part of the pair. Arbitrary type, except [Nullable](../data-types/nullable.md) and [LowCardinality](../data-types/lowcardinality.md) nested with [Nullable](../data-types/nullable.md).
|
||||
- `value` — The value part of the pair. Arbitrary type, including [Map](../data-types/map.md) and [Array](../data-types/array.md).
|
||||
- `key_n` — The keys of the map entries. Any type supported as key type of [Map](../data-types/map.md).
|
||||
- `value_n` — The values of the map entries. Any type supported as value type of [Map](../data-types/map.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Data structure as `key:value` pairs. [Map(key, value)](../data-types/map.md).
|
||||
- A map containing `key:value` pairs. [Map(key, value)](../data-types/map.md).
|
||||
|
||||
**Examples**
|
||||
|
||||
@ -41,35 +41,16 @@ Result:
|
||||
└──────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table_map (a Map(String, UInt64)) ENGINE = MergeTree() ORDER BY a;
|
||||
INSERT INTO table_map SELECT map('key1', number, 'key2', number * 2) FROM numbers(3);
|
||||
SELECT a['key2'] FROM table_map;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─arrayElement(a, 'key2')─┐
|
||||
│ 0 │
|
||||
│ 2 │
|
||||
│ 4 │
|
||||
└─────────────────────────┘
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Map(key, value)](../data-types/map.md) data type
|
||||
|
||||
## mapFromArrays
|
||||
|
||||
Merges an [Array](../data-types/array.md) of keys and an [Array](../data-types/array.md) of values into a [Map(key, value)](../data-types/map.md). Notice that the second argument could also be a [Map](../data-types/map.md), thus it is casted to an Array when executing.
|
||||
Creates a map from an array of keys and an array of values.
|
||||
|
||||
The function is a convenient alternative to syntax `CAST([...], 'Map(key_type, value_type)')`.
|
||||
For example, instead of writing
|
||||
- `CAST((['aa', 'bb'], [4, 5]), 'Map(String, UInt32)')`, or
|
||||
- `CAST([('aa',4), ('bb',5)], 'Map(String, UInt32)')`
|
||||
|
||||
The function is a more convenient alternative to `CAST((key_array, value_array_or_map), 'Map(key_type, value_type)')`. For example, instead of writing `CAST((['aa', 'bb'], [4, 5]), 'Map(String, UInt32)')`, you can write `mapFromArrays(['aa', 'bb'], [4, 5])`.
|
||||
|
||||
you can write `mapFromArrays(['aa', 'bb'], [4, 5])`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -81,12 +62,12 @@ Alias: `MAP_FROM_ARRAYS(keys, values)`
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `keys` — Given key array to create a map from. The nested type of array must be: [String](../data-types/string.md), [Integer](../data-types/int-uint.md), [LowCardinality](../data-types/lowcardinality.md), [FixedString](../data-types/fixedstring.md), [UUID](../data-types/uuid.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [Date32](../data-types/date32.md), [Enum](../data-types/enum.md)
|
||||
- `values` - Given value array or map to create a map from.
|
||||
- `keys` — Array of keys to create the map from. [Array(T)](../data-types/array.md) where `T` can be any type supported by [Map](../data-types/map.md) as key type.
|
||||
- `values` - Array or map of values to create the map from. [Array](../data-types/array.md) or [Map](../data-types/map.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A map whose keys and values are constructed from the key array and value array/map.
|
||||
- A map with keys and values constructed from the key array and value array/map.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -94,14 +75,25 @@ Query:
|
||||
|
||||
```sql
|
||||
select mapFromArrays(['a', 'b', 'c'], [1, 2, 3])
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```
|
||||
┌─mapFromArrays(['a', 'b', 'c'], [1, 2, 3])─┐
|
||||
│ {'a':1,'b':2,'c':3} │
|
||||
└───────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
`mapFromArrays` also accepts arguments of type [Map](../data-types/map.md). These are casted to array of tuples during execution.
|
||||
|
||||
```sql
|
||||
SELECT mapFromArrays([1, 2, 3], map('a', 1, 'b', 2, 'c', 3))
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```
|
||||
┌─mapFromArrays([1, 2, 3], map('a', 1, 'b', 2, 'c', 3))─┐
|
||||
│ {1:('a',1),2:('b',2),3:('c',3)} │
|
||||
└───────────────────────────────────────────────────────┘
|
||||
@ -109,9 +101,11 @@ SELECT mapFromArrays([1, 2, 3], map('a', 1, 'b', 2, 'c', 3))
|
||||
|
||||
## extractKeyValuePairs
|
||||
|
||||
Extracts key-value pairs, i.e. a [Map(String, String)](../data-types/map.md), from a string. Parsing is robust towards noise (e.g. log files).
|
||||
|
||||
A key-value pair consists of a key, followed by a `key_value_delimiter` and a value. Key value pairs must be separated by `pair_delimiter`. Quoted keys and values are also supported.
|
||||
Converts a string of key-value pairs to a [Map(String, String)](../data-types/map.md).
|
||||
Parsing is tolerant towards noise (e.g. log files).
|
||||
Key-value pairs in the input string consist of a key, followed by a key-value delimiter, and a value.
|
||||
Key value pairs are separated by a pair delimiter.
|
||||
Keys and values can be quoted.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -126,17 +120,17 @@ Alias:
|
||||
**Arguments**
|
||||
|
||||
- `data` - String to extract key-value pairs from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
|
||||
- `key_value_delimiter` - Character to be used as delimiter between the key and the value. Defaults to `:`. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
|
||||
- `pair_delimiters` - Set of character to be used as delimiters between pairs. Defaults to ` `, `,` and `;`. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
|
||||
- `quoting_character` - Character to be used as quoting character. Defaults to `"`. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
|
||||
- `key_value_delimiter` - Single character delimiting keys and values. Defaults to `:`. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
|
||||
- `pair_delimiters` - Set of character delimiting pairs. Defaults to ` `, `,` and `;`. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
|
||||
- `quoting_character` - Single character used as quoting character. Defaults to `"`. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- A [Map(String, String)](../data-types/map.md) of key-value pairs.
|
||||
- A of key-value pairs. Type: [Map(String, String)](../data-types/map.md)
|
||||
|
||||
**Examples**
|
||||
|
||||
Simple case:
|
||||
Query
|
||||
|
||||
``` sql
|
||||
SELECT extractKeyValuePairs('name:neymar, age:31 team:psg,nationality:brazil') as kv
|
||||
@ -150,7 +144,7 @@ Result:
|
||||
└─────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Single quote as quoting character:
|
||||
With a single quote `'` as quoting character:
|
||||
|
||||
``` sql
|
||||
SELECT extractKeyValuePairs('name:\'neymar\';\'age\':31;team:psg;nationality:brazil,last_key:last_value', ':', ';,', '\'') as kv
|
||||
@ -178,9 +172,29 @@ Result:
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
||||
To restore a map string key-value pairs serialized with `toString`:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
map('John', '33', 'Paula', '31') AS m,
|
||||
toString(m) as map_serialized,
|
||||
extractKeyValuePairs(map_serialized, ':', ',', '\'') AS map_restored
|
||||
FORMAT Vertical;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```
|
||||
Row 1:
|
||||
──────
|
||||
m: {'John':'33','Paula':'31'}
|
||||
map_serialized: {'John':'33','Paula':'31'}
|
||||
map_restored: {'John':'33','Paula':'31'}
|
||||
```
|
||||
|
||||
## extractKeyValuePairsWithEscaping
|
||||
|
||||
Same as `extractKeyValuePairs` but with escaping support.
|
||||
Same as `extractKeyValuePairs` but supports escaping.
|
||||
|
||||
Supported escape sequences: `\x`, `\N`, `\a`, `\b`, `\e`, `\f`, `\n`, `\r`, `\t`, `\v` and `\0`.
|
||||
Non standard escape sequences are returned as it is (including the backslash) unless they are one of the following:
|
||||
@ -229,20 +243,6 @@ Arguments are [maps](../data-types/map.md) or [tuples](../data-types/tuple.md#tu
|
||||
|
||||
**Example**
|
||||
|
||||
Query with a tuple:
|
||||
|
||||
```sql
|
||||
SELECT mapAdd(([toUInt8(1), 2], [1, 1]), ([toUInt8(1), 2], [1, 1])) as res, toTypeName(res) as type;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─res───────────┬─type───────────────────────────────┐
|
||||
│ ([1,2],[2,2]) │ Tuple(Array(UInt8), Array(UInt64)) │
|
||||
└───────────────┴────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query with `Map` type:
|
||||
|
||||
```sql
|
||||
@ -257,6 +257,20 @@ Result:
|
||||
└──────────────────────────────┘
|
||||
```
|
||||
|
||||
Query with a tuple:
|
||||
|
||||
```sql
|
||||
SELECT mapAdd(([toUInt8(1), 2], [1, 1]), ([toUInt8(1), 2], [1, 1])) as res, toTypeName(res) as type;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─res───────────┬─type───────────────────────────────┐
|
||||
│ ([1,2],[2,2]) │ Tuple(Array(UInt8), Array(UInt64)) │
|
||||
└───────────────┴────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## mapSubtract
|
||||
|
||||
Collect all the keys and subtract corresponding values.
|
||||
@ -277,20 +291,6 @@ Arguments are [maps](../data-types/map.md) or [tuples](../data-types/tuple.md#tu
|
||||
|
||||
**Example**
|
||||
|
||||
Query with a tuple map:
|
||||
|
||||
```sql
|
||||
SELECT mapSubtract(([toUInt8(1), 2], [toInt32(1), 1]), ([toUInt8(1), 2], [toInt32(2), 1])) as res, toTypeName(res) as type;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─res────────────┬─type──────────────────────────────┐
|
||||
│ ([1,2],[-1,0]) │ Tuple(Array(UInt8), Array(Int64)) │
|
||||
└────────────────┴───────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query with `Map` type:
|
||||
|
||||
```sql
|
||||
@ -305,55 +305,57 @@ Result:
|
||||
└───────────────────────────────────┘
|
||||
```
|
||||
|
||||
## mapPopulateSeries
|
||||
|
||||
Fills missing keys in the maps (key and value array pair), where keys are integers. Also, it supports specifying the max key, which is used to extend the keys array.
|
||||
|
||||
**Syntax**
|
||||
Query with a tuple map:
|
||||
|
||||
```sql
|
||||
mapPopulateSeries(keys, values[, max])
|
||||
mapPopulateSeries(map[, max])
|
||||
```
|
||||
|
||||
Generates a map (a tuple with two arrays or a value of `Map` type, depending on the arguments), where keys are a series of numbers, from minimum to maximum keys (or `max` argument if it specified) taken from the map with a step size of one, and corresponding values. If the value is not specified for the key, then it uses the default value in the resulting map. For repeated keys, only the first value (in order of appearing) gets associated with the key.
|
||||
|
||||
For array arguments the number of elements in `keys` and `values` must be the same for each row.
|
||||
|
||||
**Arguments**
|
||||
|
||||
Arguments are [maps](../data-types/map.md) or two [arrays](../data-types/array.md#data-type-array), where the first array represent keys, and the second array contains values for the each key.
|
||||
|
||||
Mapped arrays:
|
||||
|
||||
- `keys` — Array of keys. [Array](../data-types/array.md#data-type-array)([Int](../data-types/int-uint.md#uint-ranges)).
|
||||
- `values` — Array of values. [Array](../data-types/array.md#data-type-array)([Int](../data-types/int-uint.md#uint-ranges)).
|
||||
- `max` — Maximum key value. Optional. [Int8, Int16, Int32, Int64, Int128, Int256](../data-types/int-uint.md#int-ranges).
|
||||
|
||||
or
|
||||
|
||||
- `map` — Map with integer keys. [Map](../data-types/map.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Depending on the arguments returns a [map](../data-types/map.md) or a [tuple](../data-types/tuple.md#tuplet1-t2) of two [arrays](../data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys.
|
||||
|
||||
**Example**
|
||||
|
||||
Query with mapped arrays:
|
||||
|
||||
```sql
|
||||
SELECT mapPopulateSeries([1,2,4], [11,22,44], 5) AS res, toTypeName(res) AS type;
|
||||
SELECT mapSubtract(([toUInt8(1), 2], [toInt32(1), 1]), ([toUInt8(1), 2], [toInt32(2), 1])) as res, toTypeName(res) as type;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─res──────────────────────────┬─type──────────────────────────────┐
|
||||
│ ([1,2,3,4,5],[11,22,0,44,0]) │ Tuple(Array(UInt8), Array(UInt8)) │
|
||||
└──────────────────────────────┴───────────────────────────────────┘
|
||||
┌─res────────────┬─type──────────────────────────────┐
|
||||
│ ([1,2],[-1,0]) │ Tuple(Array(UInt8), Array(Int64)) │
|
||||
└────────────────┴───────────────────────────────────┘
|
||||
```
|
||||
|
||||
## mapPopulateSeries
|
||||
|
||||
Fills missing key-value pairs in a map with integer keys.
|
||||
To support extending the keys beyond the largest value, a maximum key can be specified.
|
||||
More specifically, the function returns a map in which the the keys form a series from the smallest to the largest key (or `max` argument if it specified) with step size of 1, and corresponding values.
|
||||
If no value is specified for a key, a default value is used as value.
|
||||
In case keys repeat, only the first value (in order of appearance) is associated with the key.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
mapPopulateSeries(map[, max])
|
||||
mapPopulateSeries(keys, values[, max])
|
||||
```
|
||||
|
||||
For array arguments the number of elements in `keys` and `values` must be the same for each row.
|
||||
|
||||
**Arguments**
|
||||
|
||||
Arguments are [Maps](../data-types/map.md) or two [Arrays](../data-types/array.md#data-type-array), where the first and second array contains keys and values for the each key.
|
||||
|
||||
Mapped arrays:
|
||||
|
||||
- `map` — Map with integer keys. [Map](../data-types/map.md).
|
||||
|
||||
or
|
||||
|
||||
- `keys` — Array of keys. [Array](../data-types/array.md#data-type-array)([Int](../data-types/int-uint.md#uint-ranges)).
|
||||
- `values` — Array of values. [Array](../data-types/array.md#data-type-array)([Int](../data-types/int-uint.md#uint-ranges)).
|
||||
- `max` — Maximum key value. Optional. [Int8, Int16, Int32, Int64, Int128, Int256](../data-types/int-uint.md#int-ranges).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Depending on the arguments a [Map](../data-types/map.md) or a [Tuple](../data-types/tuple.md#tuplet1-t2) of two [Arrays](../data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys.
|
||||
|
||||
**Example**
|
||||
|
||||
Query with `Map` type:
|
||||
|
||||
```sql
|
||||
@ -368,9 +370,23 @@ Result:
|
||||
└─────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query with mapped arrays:
|
||||
|
||||
```sql
|
||||
SELECT mapPopulateSeries([1,2,4], [11,22,44], 5) AS res, toTypeName(res) AS type;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─res──────────────────────────┬─type──────────────────────────────┐
|
||||
│ ([1,2,3,4,5],[11,22,0,44,0]) │ Tuple(Array(UInt8), Array(UInt8)) │
|
||||
└──────────────────────────────┴───────────────────────────────────┘
|
||||
```
|
||||
|
||||
## mapContains
|
||||
|
||||
Determines whether the `map` contains the `key` parameter.
|
||||
Returns if a given key is contained in a given map.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -381,7 +397,7 @@ mapContains(map, key)
|
||||
**Arguments**
|
||||
|
||||
- `map` — Map. [Map](../data-types/map.md).
|
||||
- `key` — Key. Type matches the type of keys of `map` parameter.
|
||||
- `key` — Key. Type must match the key type of `map`.
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -392,11 +408,11 @@ mapContains(map, key)
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE test (a Map(String,String)) ENGINE = Memory;
|
||||
CREATE TABLE tab (a Map(String, String)) ENGINE = Memory;
|
||||
|
||||
INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'});
|
||||
INSERT INTO tab VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'});
|
||||
|
||||
SELECT mapContains(a, 'name') FROM test;
|
||||
SELECT mapContains(a, 'name') FROM tab;
|
||||
|
||||
```
|
||||
|
||||
@ -411,9 +427,11 @@ Result:
|
||||
|
||||
## mapKeys
|
||||
|
||||
Returns all keys from the `map` parameter.
|
||||
Returns the keys of a given map.
|
||||
|
||||
Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [keys](../data-types/map.md#map-subcolumns) subcolumn instead of reading and processing the whole column data. The query `SELECT mapKeys(m) FROM table` transforms to `SELECT m.keys FROM table`.
|
||||
This function can be optimized by enabling setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns).
|
||||
With enabled setting, the function only reads the [keys](../data-types/map.md#map-subcolumns) subcolumn instead the whole map.
|
||||
The query `SELECT mapKeys(m) FROM table` is transformed to `SELECT m.keys FROM table`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -434,11 +452,11 @@ mapKeys(map)
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE test (a Map(String,String)) ENGINE = Memory;
|
||||
CREATE TABLE tab (a Map(String, String)) ENGINE = Memory;
|
||||
|
||||
INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'});
|
||||
INSERT INTO tab VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'});
|
||||
|
||||
SELECT mapKeys(a) FROM test;
|
||||
SELECT mapKeys(a) FROM tab;
|
||||
```
|
||||
|
||||
Result:
|
||||
@ -452,9 +470,11 @@ Result:
|
||||
|
||||
## mapValues
|
||||
|
||||
Returns all values from the `map` parameter.
|
||||
Returns the values of a given map.
|
||||
|
||||
Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [values](../data-types/map.md#map-subcolumns) subcolumn instead of reading and processing the whole column data. The query `SELECT mapValues(m) FROM table` transforms to `SELECT m.values FROM table`.
|
||||
This function can be optimized by enabling setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns).
|
||||
With enabled setting, the function only reads the [values](../data-types/map.md#map-subcolumns) subcolumn instead the whole map.
|
||||
The query `SELECT mapValues(m) FROM table` is transformed to `SELECT m.values FROM table`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -475,11 +495,11 @@ mapValues(map)
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE test (a Map(String,String)) ENGINE = Memory;
|
||||
CREATE TABLE tab (a Map(String, String)) ENGINE = Memory;
|
||||
|
||||
INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'});
|
||||
INSERT INTO tab VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'});
|
||||
|
||||
SELECT mapValues(a) FROM test;
|
||||
SELECT mapValues(a) FROM tab;
|
||||
```
|
||||
|
||||
Result:
|
||||
@ -512,11 +532,11 @@ mapContainsKeyLike(map, pattern)
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE test (a Map(String,String)) ENGINE = Memory;
|
||||
CREATE TABLE tab (a Map(String, String)) ENGINE = Memory;
|
||||
|
||||
INSERT INTO test VALUES ({'abc':'abc','def':'def'}), ({'hij':'hij','klm':'klm'});
|
||||
INSERT INTO tab VALUES ({'abc':'abc','def':'def'}), ({'hij':'hij','klm':'klm'});
|
||||
|
||||
SELECT mapContainsKeyLike(a, 'a%') FROM test;
|
||||
SELECT mapContainsKeyLike(a, 'a%') FROM tab;
|
||||
```
|
||||
|
||||
Result:
|
||||
@ -530,6 +550,8 @@ Result:
|
||||
|
||||
## mapExtractKeyLike
|
||||
|
||||
Give a map with string keys and a LIKE pattern, this function returns a map with elements where the key matches the pattern.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
@ -543,18 +565,18 @@ mapExtractKeyLike(map, pattern)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A map contained elements the key of which matches the specified pattern. If there are no elements matched the pattern, it will return an empty map.
|
||||
- A map containing elements the key matching the specified pattern. If no elements match the pattern, an empty map is returned.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE test (a Map(String,String)) ENGINE = Memory;
|
||||
CREATE TABLE tab (a Map(String, String)) ENGINE = Memory;
|
||||
|
||||
INSERT INTO test VALUES ({'abc':'abc','def':'def'}), ({'hij':'hij','klm':'klm'});
|
||||
INSERT INTO tab VALUES ({'abc':'abc','def':'def'}), ({'hij':'hij','klm':'klm'});
|
||||
|
||||
SELECT mapExtractKeyLike(a, 'a%') FROM test;
|
||||
SELECT mapExtractKeyLike(a, 'a%') FROM tab;
|
||||
```
|
||||
|
||||
Result:
|
||||
@ -568,6 +590,8 @@ Result:
|
||||
|
||||
## mapApply
|
||||
|
||||
Applies a function to each element of a map.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
@ -608,6 +632,8 @@ Result:
|
||||
|
||||
## mapFilter
|
||||
|
||||
Filters a map by applying a function to each map element.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
@ -623,7 +649,6 @@ mapFilter(func, map)
|
||||
|
||||
- Returns a map containing only the elements in `map` for which `func(map1[i], ..., mapN[i])` returns something other than 0.
|
||||
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
@ -647,7 +672,6 @@ Result:
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
|
||||
## mapUpdate
|
||||
|
||||
**Syntax**
|
||||
@ -683,6 +707,9 @@ Result:
|
||||
|
||||
## mapConcat
|
||||
|
||||
Concatenates multiple maps based on the equality of their keys.
|
||||
If elements with the same key exist in more than one input map, all elements are added to the result map, but only the first one is accessible via operator `[]`
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
@ -691,11 +718,11 @@ mapConcat(maps)
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `maps` – Arbitrary number of arguments of [Map](../data-types/map.md) type.
|
||||
- `maps` – Arbitrarily many [Maps](../data-types/map.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns a map with concatenated maps passed as arguments. If there are same keys in two or more maps, all of them are added to the result map, but only the first one is accessible via operator `[]`
|
||||
- Returns a map with concatenated maps passed as arguments.
|
||||
|
||||
**Examples**
|
||||
|
||||
@ -729,9 +756,12 @@ Result:
|
||||
|
||||
## mapExists(\[func,\], map)
|
||||
|
||||
Returns 1 if there is at least one key-value pair in `map` for which `func(key, value)` returns something other than 0. Otherwise, it returns 0.
|
||||
Returns 1 if at least one key-value pair in `map` exists for which `func(key, value)` returns something other than 0. Otherwise, it returns 0.
|
||||
|
||||
Note that the `mapExists` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
|
||||
:::note
|
||||
`mapExists` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions).
|
||||
You can pass a lambda function to it as the first argument.
|
||||
:::
|
||||
|
||||
**Example**
|
||||
|
||||
@ -743,7 +773,7 @@ SELECT mapExists((k, v) -> (v = 1), map('k1', 1, 'k2', 2)) AS res
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
```
|
||||
┌─res─┐
|
||||
│ 1 │
|
||||
└─────┘
|
||||
@ -753,7 +783,10 @@ Result:
|
||||
|
||||
Returns 1 if `func(key, value)` returns something other than 0 for all key-value pairs in `map`. Otherwise, it returns 0.
|
||||
|
||||
Note that the `mapAll` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
|
||||
:::note
|
||||
Note that the `mapAll` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions).
|
||||
You can pass a lambda function to it as the first argument.
|
||||
:::
|
||||
|
||||
**Example**
|
||||
|
||||
@ -765,7 +798,7 @@ SELECT mapAll((k, v) -> (v = 1), map('k1', 1, 'k2', 2)) AS res
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
```
|
||||
┌─res─┐
|
||||
│ 0 │
|
||||
└─────┘
|
||||
@ -773,7 +806,8 @@ Result:
|
||||
|
||||
## mapSort(\[func,\], map)
|
||||
|
||||
Sorts the elements of the `map` in ascending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the keys and values of the map.
|
||||
Sorts the elements of a map in ascending order.
|
||||
If the `func` function is specified, the sorting order is determined by the result of the `func` function applied to the keys and values of the map.
|
||||
|
||||
**Examples**
|
||||
|
||||
@ -801,8 +835,8 @@ For more details see the [reference](../../sql-reference/functions/array-functio
|
||||
|
||||
## mapReverseSort(\[func,\], map)
|
||||
|
||||
Sorts the elements of the `map` in descending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the keys and values of the map.
|
||||
|
||||
Sorts the elements of a map in descending order.
|
||||
If the `func` function is specified, the sorting order is determined by the result of the `func` function applied to the keys and values of the map.
|
||||
|
||||
**Examples**
|
||||
|
||||
@ -826,4 +860,4 @@ SELECT mapReverseSort((k, v) -> v, map('key2', 2, 'key3', 1, 'key1', 3)) AS map;
|
||||
└──────────────────────────────┘
|
||||
```
|
||||
|
||||
For more details see the [reference](../../sql-reference/functions/array-functions.md#array_functions-reverse-sort) for `arrayReverseSort` function.
|
||||
For more details see function [arrayReverseSort](../../sql-reference/functions/array-functions.md#array_functions-reverse-sort).
|
||||
|
@ -16,7 +16,7 @@ Most `ALTER TABLE` queries modify table settings or data:
|
||||
- [INDEX](/docs/en/sql-reference/statements/alter/skipping-index.md)
|
||||
- [CONSTRAINT](/docs/en/sql-reference/statements/alter/constraint.md)
|
||||
- [TTL](/docs/en/sql-reference/statements/alter/ttl.md)
|
||||
- [STATISTIC](/docs/en/sql-reference/statements/alter/statistic.md)
|
||||
- [STATISTICS](/docs/en/sql-reference/statements/alter/statistics.md)
|
||||
- [APPLY DELETED MASK](/docs/en/sql-reference/statements/alter/apply-deleted-mask.md)
|
||||
|
||||
:::note
|
||||
|
@ -1,25 +0,0 @@
|
||||
---
|
||||
slug: /en/sql-reference/statements/alter/statistic
|
||||
sidebar_position: 45
|
||||
sidebar_label: STATISTIC
|
||||
---
|
||||
|
||||
# Manipulating Column Statistics
|
||||
|
||||
The following operations are available:
|
||||
|
||||
- `ALTER TABLE [db].table ADD STATISTIC (columns list) TYPE type` - Adds statistic description to tables metadata.
|
||||
|
||||
- `ALTER TABLE [db].table DROP STATISTIC (columns list) TYPE type` - Removes statistic description from tables metadata and deletes statistic files from disk.
|
||||
|
||||
- `ALTER TABLE [db].table CLEAR STATISTIC (columns list) TYPE type` - Deletes statistic files from disk.
|
||||
|
||||
- `ALTER TABLE [db.]table MATERIALIZE STATISTIC (columns list) TYPE type` - Rebuilds the statistic for columns. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
|
||||
|
||||
The first two commands are lightweight in a sense that they only change metadata or remove files.
|
||||
|
||||
Also, they are replicated, syncing statistics metadata via ZooKeeper.
|
||||
|
||||
:::note
|
||||
Statistic manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants).
|
||||
:::
|
33
docs/en/sql-reference/statements/alter/statistics.md
Normal file
33
docs/en/sql-reference/statements/alter/statistics.md
Normal file
@ -0,0 +1,33 @@
|
||||
---
|
||||
slug: /en/sql-reference/statements/alter/statistics
|
||||
sidebar_position: 45
|
||||
sidebar_label: STATISTICS
|
||||
---
|
||||
|
||||
# Manipulating Column Statistics
|
||||
|
||||
The following operations are available:
|
||||
|
||||
- `ALTER TABLE [db].table ADD STATISTICS (columns list) TYPE (type list)` - Adds statistic description to tables metadata.
|
||||
|
||||
- `ALTER TABLE [db].table MODIFY STATISTICS (columns list) TYPE (type list)` - Modifies statistic description to tables metadata.
|
||||
|
||||
- `ALTER TABLE [db].table DROP STATISTICS (columns list)` - Removes statistics from the metadata of the specified columns and deletes all statistics objects in all parts for the specified columns.
|
||||
|
||||
- `ALTER TABLE [db].table CLEAR STATISTICS (columns list)` - Deletes all statistics objects in all parts for the specified columns. Statistics objects can be rebuild using `ALTER TABLE MATERIALIZE STATISTICS`.
|
||||
|
||||
- `ALTER TABLE [db.]table MATERIALIZE STATISTICS (columns list)` - Rebuilds the statistic for columns. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
|
||||
|
||||
The first two commands are lightweight in a sense that they only change metadata or remove files.
|
||||
|
||||
Also, they are replicated, syncing statistics metadata via ZooKeeper.
|
||||
|
||||
There is an example adding two statistics types to two columns:
|
||||
|
||||
```
|
||||
ALTER TABLE t1 MODIFY STATISTICS c, d TYPE TDigest, Uniq;
|
||||
```
|
||||
|
||||
:::note
|
||||
Statistic manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants).
|
||||
:::
|
@ -337,7 +337,7 @@ Then, when executing the query `SELECT name FROM users_a WHERE length(name) < 5;
|
||||
|
||||
Defines storage time for values. Can be specified only for MergeTree-family tables. For the detailed description, see [TTL for columns and tables](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).
|
||||
|
||||
## Column Compression Codecs
|
||||
## Column Compression Codecs {#column_compression_codec}
|
||||
|
||||
By default, ClickHouse applies `lz4` compression in the self-managed version, and `zstd` in ClickHouse Cloud.
|
||||
|
||||
|
@ -304,8 +304,8 @@ atan2(y, x)
|
||||
|
||||
**Аргументы**
|
||||
|
||||
- `y` — координата y точки, в которую проведена линия. [Float64](../../sql-reference/data-types/float.md#float32-float64).
|
||||
- `x` — координата х точки, в которую проведена линия. [Float64](../../sql-reference/data-types/float.md#float32-float64).
|
||||
- `y` — координата y точки, в которую проведена линия. [Float64](../../sql-reference/data-types/float.md#float32-float64) или [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `x` — координата х точки, в которую проведена линия. [Float64](../../sql-reference/data-types/float.md#float32-float64) или [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
@ -341,8 +341,8 @@ hypot(x, y)
|
||||
|
||||
**Аргументы**
|
||||
|
||||
- `x` — первый катет прямоугольного треугольника. [Float64](../../sql-reference/data-types/float.md#float32-float64).
|
||||
- `y` — второй катет прямоугольного треугольника. [Float64](../../sql-reference/data-types/float.md#float32-float64).
|
||||
- `x` — первый катет прямоугольного треугольника. [Float64](../../sql-reference/data-types/float.md#float32-float64) или [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `y` — второй катет прямоугольного треугольника. [Float64](../../sql-reference/data-types/float.md#float32-float64) или [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
|
@ -154,7 +154,8 @@ function _clickhouse_quote()
|
||||
# Extract every option (everything that starts with "-") from the --help dialog.
|
||||
function _clickhouse_get_options()
|
||||
{
|
||||
"$@" --help 2>&1 | awk -F '[ ,=<>.]' '{ for (i=1; i <= NF; ++i) { if (substr($i, 1, 1) == "-" && length($i) > 1) print $i; } }' | sort -u
|
||||
# By default --help will not print all settings, this is done only under --verbose
|
||||
"$@" --help --verbose 2>&1 | awk -F '[ ,=<>.]' '{ for (i=1; i <= NF; ++i) { if (substr($i, 1, 1) == "-" && length($i) > 1) print $i; } }' | sort -u
|
||||
}
|
||||
|
||||
function _complete_for_clickhouse_generic_bin_impl()
|
||||
|
@ -57,7 +57,7 @@ int mainEntryClickHouseKeeperConverter(int argc, char ** argv)
|
||||
DB::KeeperSnapshotManager manager(1, keeper_context);
|
||||
auto snp = manager.serializeSnapshotToBuffer(snapshot);
|
||||
auto file_info = manager.serializeSnapshotBufferToDisk(*snp, storage.getZXID());
|
||||
std::cout << "Snapshot serialized to path:" << fs::path(file_info.disk->getPath()) / file_info.path << std::endl;
|
||||
std::cout << "Snapshot serialized to path:" << fs::path(file_info->disk->getPath()) / file_info->path << std::endl;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
|
@ -9,8 +9,6 @@ set (CLICKHOUSE_KEEPER_LINK
|
||||
clickhouse_common_zookeeper
|
||||
daemon
|
||||
dbms
|
||||
|
||||
${LINK_RESOURCE_LIB}
|
||||
)
|
||||
|
||||
clickhouse_program_add(keeper)
|
||||
@ -210,8 +208,6 @@ if (BUILD_STANDALONE_KEEPER)
|
||||
loggers_no_text_log
|
||||
clickhouse_common_io
|
||||
clickhouse_parsers # Otherwise compression will not built. FIXME.
|
||||
|
||||
${LINK_RESOURCE_LIB_STANDALONE_KEEPER}
|
||||
)
|
||||
|
||||
set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../)
|
||||
|
@ -14,8 +14,6 @@ set (CLICKHOUSE_SERVER_LINK
|
||||
clickhouse_storages_system
|
||||
clickhouse_table_functions
|
||||
|
||||
${LINK_RESOURCE_LIB}
|
||||
|
||||
PUBLIC
|
||||
daemon
|
||||
)
|
||||
|
@ -51,10 +51,11 @@ enum class AccessType : uint8_t
|
||||
M(ALTER_CLEAR_INDEX, "CLEAR INDEX", TABLE, ALTER_INDEX) \
|
||||
M(ALTER_INDEX, "INDEX", GROUP, ALTER_TABLE) /* allows to execute ALTER ORDER BY or ALTER {ADD|DROP...} INDEX */\
|
||||
\
|
||||
M(ALTER_ADD_STATISTIC, "ALTER ADD STATISTIC", TABLE, ALTER_STATISTIC) \
|
||||
M(ALTER_DROP_STATISTIC, "ALTER DROP STATISTIC", TABLE, ALTER_STATISTIC) \
|
||||
M(ALTER_MATERIALIZE_STATISTIC, "ALTER MATERIALIZE STATISTIC", TABLE, ALTER_STATISTIC) \
|
||||
M(ALTER_STATISTIC, "STATISTIC", GROUP, ALTER_TABLE) /* allows to execute ALTER STATISTIC */\
|
||||
M(ALTER_ADD_STATISTICS, "ALTER ADD STATISTIC", TABLE, ALTER_STATISTICS) \
|
||||
M(ALTER_DROP_STATISTICS, "ALTER DROP STATISTIC", TABLE, ALTER_STATISTICS) \
|
||||
M(ALTER_MODIFY_STATISTICS, "ALTER MODIFY STATISTIC", TABLE, ALTER_STATISTICS) \
|
||||
M(ALTER_MATERIALIZE_STATISTICS, "ALTER MATERIALIZE STATISTIC", TABLE, ALTER_STATISTICS) \
|
||||
M(ALTER_STATISTICS, "STATISTIC", GROUP, ALTER_TABLE) /* allows to execute ALTER STATISTIC */\
|
||||
\
|
||||
M(ALTER_ADD_PROJECTION, "ADD PROJECTION", TABLE, ALTER_PROJECTION) \
|
||||
M(ALTER_DROP_PROJECTION, "DROP PROJECTION", TABLE, ALTER_PROJECTION) \
|
||||
|
@ -334,6 +334,18 @@ public:
|
||||
compress(); // Allows reading/writing TDigests with different epsilon/max_centroids params
|
||||
}
|
||||
|
||||
Float64 getCountEqual(Float64 value) const
|
||||
{
|
||||
Float64 result = 0;
|
||||
for (const auto & c : centroids)
|
||||
{
|
||||
/// std::cerr << "c "<< c.mean << " "<< c.count << std::endl;
|
||||
if (value == c.mean)
|
||||
result += c.count;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
Float64 getCountLessThan(Float64 value) const
|
||||
{
|
||||
bool first = true;
|
||||
|
@ -3777,7 +3777,8 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
|
||||
scope.scope_node->formatASTForErrorMessage());
|
||||
|
||||
auto & table_node = node->as<TableNode &>();
|
||||
result_projection_names.push_back(table_node.getStorageID().getFullNameNotQuoted());
|
||||
if (result_projection_names.empty())
|
||||
result_projection_names.push_back(table_node.getStorageID().getFullNameNotQuoted());
|
||||
|
||||
break;
|
||||
}
|
||||
@ -5475,7 +5476,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
|
||||
|
||||
/// Add current alias to non cached set, because in case of cyclic alias identifier should not be substituted from cache.
|
||||
/// See 02896_cyclic_aliases_crash.
|
||||
resolveExpressionNode(node, scope, true /*allow_lambda_expression*/, false /*allow_table_expression*/);
|
||||
resolveExpressionNode(node, scope, true /*allow_lambda_expression*/, true /*allow_table_expression*/);
|
||||
|
||||
bool has_node_in_alias_table = false;
|
||||
|
||||
@ -5484,7 +5485,16 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
|
||||
{
|
||||
has_node_in_alias_table = true;
|
||||
|
||||
if (!it->second->isEqual(*node))
|
||||
bool matched = it->second->isEqual(*node);
|
||||
if (!matched)
|
||||
/// Table expression could be resolved as scalar subquery,
|
||||
/// but for duplicating alias we allow table expression to be returned.
|
||||
/// So, check constant node source expression as well.
|
||||
if (const auto * constant_node = it->second->as<ConstantNode>())
|
||||
if (const auto & source_expression = constant_node->getSourceExpression())
|
||||
matched = source_expression->isEqual(*node);
|
||||
|
||||
if (!matched)
|
||||
throw Exception(ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS,
|
||||
"Multiple expressions {} and {} for alias {}. In scope {}",
|
||||
node->formatASTForErrorMessage(),
|
||||
|
@ -22,15 +22,6 @@ include (configure_config.cmake)
|
||||
configure_file (Common/config.h.in ${CONFIG_INCLUDE_PATH}/config.h)
|
||||
configure_file (Common/config_version.cpp.in ${CONFIG_INCLUDE_PATH}/config_version.cpp)
|
||||
|
||||
if (USE_DEBUG_HELPERS)
|
||||
get_target_property(MAGIC_ENUM_INCLUDE_DIR ch_contrib::magic_enum INTERFACE_INCLUDE_DIRECTORIES)
|
||||
# CMake generator expression will do insane quoting when it encounters special character like quotes, spaces, etc.
|
||||
# Prefixing "SHELL:" will force it to use the original text.
|
||||
set (INCLUDE_DEBUG_HELPERS "SHELL:-I\"${ClickHouse_SOURCE_DIR}/base\" -I\"${MAGIC_ENUM_INCLUDE_DIR}\" -include \"${ClickHouse_SOURCE_DIR}/src/Core/iostream_debug_helpers.h\"")
|
||||
# Use generator expression as we don't want to pollute CMAKE_CXX_FLAGS, which will interfere with CMake check system.
|
||||
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:${INCLUDE_DEBUG_HELPERS}>)
|
||||
endif ()
|
||||
|
||||
# ClickHouse developers may use platform-dependent code under some macro (e.g. `#ifdef ENABLE_MULTITARGET`).
|
||||
# If turned ON, this option defines such macro.
|
||||
# See `src/Common/TargetSpecific.h`
|
||||
|
@ -598,7 +598,7 @@ DataTypePtr QueryFuzzer::fuzzDataType(DataTypePtr type)
|
||||
{
|
||||
auto key_type = fuzzDataType(type_map->getKeyType());
|
||||
auto value_type = fuzzDataType(type_map->getValueType());
|
||||
if (!DataTypeMap::checkKeyType(key_type))
|
||||
if (!DataTypeMap::isValidKeyType(key_type))
|
||||
key_type = type_map->getKeyType();
|
||||
|
||||
return std::make_shared<DataTypeMap>(key_type, value_type);
|
||||
|
@ -828,7 +828,7 @@ ColumnPtr ColumnArray::filterTuple(const Filter & filt, ssize_t result_size_hint
|
||||
size_t tuple_size = tuple.tupleSize();
|
||||
|
||||
if (tuple_size == 0)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty tuple");
|
||||
return filterGeneric(filt, result_size_hint);
|
||||
|
||||
Columns temporary_arrays(tuple_size);
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
@ -1265,7 +1265,7 @@ ColumnPtr ColumnArray::replicateTuple(const Offsets & replicate_offsets) const
|
||||
size_t tuple_size = tuple.tupleSize();
|
||||
|
||||
if (tuple_size == 0)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty tuple");
|
||||
return replicateGeneric(replicate_offsets);
|
||||
|
||||
Columns temporary_arrays(tuple_size);
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
#include <Common/HashTable/Hash.h>
|
||||
#include <Common/RadixSort.h>
|
||||
#include <Common/SipHash.h>
|
||||
@ -264,6 +265,23 @@ void ColumnDecimal<T>::updatePermutation(IColumn::PermutationSortDirection direc
|
||||
}
|
||||
}
|
||||
|
||||
template <is_decimal T>
|
||||
size_t ColumnDecimal<T>::estimateCardinalityInPermutedRange(const IColumn::Permutation & permutation, const EqualRange & equal_range) const
|
||||
{
|
||||
const size_t range_size = equal_range.size();
|
||||
if (range_size <= 1)
|
||||
return range_size;
|
||||
|
||||
/// TODO use sampling if the range is too large (e.g. 16k elements, but configurable)
|
||||
HashSet<T> elements;
|
||||
for (size_t i = equal_range.from; i < equal_range.to; ++i)
|
||||
{
|
||||
size_t permuted_i = permutation[i];
|
||||
elements.insert(data[permuted_i]);
|
||||
}
|
||||
return elements.size();
|
||||
}
|
||||
|
||||
template <is_decimal T>
|
||||
ColumnPtr ColumnDecimal<T>::permute(const IColumn::Permutation & perm, size_t limit) const
|
||||
{
|
||||
|
@ -97,6 +97,8 @@ public:
|
||||
size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override;
|
||||
void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
|
||||
size_t limit, int, IColumn::Permutation & res, EqualRanges& equal_ranges) const override;
|
||||
size_t estimateCardinalityInPermutedRange(const IColumn::Permutation & permutation, const EqualRange & equal_range) const override;
|
||||
|
||||
|
||||
MutableColumnPtr cloneResized(size_t size) const override;
|
||||
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/HashTable/Hash.h>
|
||||
#include <Common/HashTable/StringHashSet.h>
|
||||
#include <Common/SipHash.h>
|
||||
#include <Common/WeakHash.h>
|
||||
#include <Common/assert_cast.h>
|
||||
@ -200,6 +201,24 @@ void ColumnFixedString::updatePermutation(IColumn::PermutationSortDirection dire
|
||||
updatePermutationImpl(limit, res, equal_ranges, ComparatorDescendingStable(*this), comparator_equal, DefaultSort(), DefaultPartialSort());
|
||||
}
|
||||
|
||||
size_t ColumnFixedString::estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const
|
||||
{
|
||||
const size_t range_size = equal_range.size();
|
||||
if (range_size <= 1)
|
||||
return range_size;
|
||||
|
||||
/// TODO use sampling if the range is too large (e.g. 16k elements, but configurable)
|
||||
StringHashSet elements;
|
||||
bool inserted = false;
|
||||
for (size_t i = equal_range.from; i < equal_range.to; ++i)
|
||||
{
|
||||
size_t permuted_i = permutation[i];
|
||||
StringRef value = getDataAt(permuted_i);
|
||||
elements.emplace(value, inserted);
|
||||
}
|
||||
return elements.size();
|
||||
}
|
||||
|
||||
void ColumnFixedString::insertRangeFrom(const IColumn & src, size_t start, size_t length)
|
||||
{
|
||||
const ColumnFixedString & src_concrete = assert_cast<const ColumnFixedString &>(src);
|
||||
|
@ -142,6 +142,8 @@ public:
|
||||
void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
|
||||
size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const override;
|
||||
|
||||
size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const override;
|
||||
|
||||
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
|
||||
|
||||
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override;
|
||||
|
@ -3,9 +3,12 @@
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <DataTypes/NumberTraits.h>
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Common/WeakHash.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include "Storages/IndicesDescription.h"
|
||||
#include "base/types.h"
|
||||
#include <base/sort.h>
|
||||
#include <base/scope_guard.h>
|
||||
|
||||
@ -486,6 +489,21 @@ void ColumnLowCardinality::updatePermutationWithCollation(const Collator & colla
|
||||
updatePermutationImpl(limit, res, equal_ranges, comparator, equal_comparator, DefaultSort(), DefaultPartialSort());
|
||||
}
|
||||
|
||||
size_t ColumnLowCardinality::estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const
|
||||
{
|
||||
const size_t range_size = equal_range.size();
|
||||
if (range_size <= 1)
|
||||
return range_size;
|
||||
|
||||
HashSet<UInt64> elements;
|
||||
for (size_t i = equal_range.from; i < equal_range.to; ++i)
|
||||
{
|
||||
UInt64 index = getIndexes().getUInt(permutation[i]);
|
||||
elements.insert(index);
|
||||
}
|
||||
return elements.size();
|
||||
}
|
||||
|
||||
std::vector<MutableColumnPtr> ColumnLowCardinality::scatter(ColumnIndex num_columns, const Selector & selector) const
|
||||
{
|
||||
auto columns = getIndexes().scatter(num_columns, selector);
|
||||
|
@ -145,6 +145,8 @@ public:
|
||||
void updatePermutationWithCollation(const Collator & collator, IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
|
||||
size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_ranges) const override;
|
||||
|
||||
size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const override;
|
||||
|
||||
ColumnPtr replicate(const Offsets & offsets) const override
|
||||
{
|
||||
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().replicate(offsets));
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/HashTable/StringHashSet.h>
|
||||
#include <Common/SipHash.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/WeakHash.h>
|
||||
@ -621,7 +622,7 @@ void ColumnNullable::updatePermutationImpl(IColumn::PermutationSortDirection dir
|
||||
if (unlikely(stability == PermutationSortStability::Stable))
|
||||
{
|
||||
for (auto & null_range : null_ranges)
|
||||
::sort(res.begin() + null_range.first, res.begin() + null_range.second);
|
||||
::sort(std::ranges::next(res.begin(), null_range.from), std::ranges::next(res.begin(), null_range.to));
|
||||
}
|
||||
|
||||
if (is_nulls_last || null_ranges.empty())
|
||||
@ -660,6 +661,33 @@ void ColumnNullable::updatePermutationWithCollation(const Collator & collator, I
|
||||
updatePermutationImpl(direction, stability, limit, null_direction_hint, res, equal_ranges, &collator);
|
||||
}
|
||||
|
||||
|
||||
size_t ColumnNullable::estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const
|
||||
{
|
||||
const size_t range_size = equal_range.size();
|
||||
if (range_size <= 1)
|
||||
return range_size;
|
||||
|
||||
/// TODO use sampling if the range is too large (e.g. 16k elements, but configurable)
|
||||
StringHashSet elements;
|
||||
bool has_null = false;
|
||||
bool inserted = false;
|
||||
for (size_t i = equal_range.from; i < equal_range.to; ++i)
|
||||
{
|
||||
size_t permuted_i = permutation[i];
|
||||
if (isNullAt(permuted_i))
|
||||
{
|
||||
has_null = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
StringRef value = getDataAt(permuted_i);
|
||||
elements.emplace(value, inserted);
|
||||
}
|
||||
}
|
||||
return elements.size() + (has_null ? 1 : 0);
|
||||
}
|
||||
|
||||
void ColumnNullable::reserve(size_t n)
|
||||
{
|
||||
getNestedColumn().reserve(n);
|
||||
|
@ -109,6 +109,7 @@ public:
|
||||
size_t limit, int null_direction_hint, Permutation & res) const override;
|
||||
void updatePermutationWithCollation(const Collator & collator, IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
|
||||
size_t limit, int null_direction_hint, Permutation & res, EqualRanges& equal_ranges) const override;
|
||||
size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const override;
|
||||
void reserve(size_t n) override;
|
||||
void shrinkToFit() override;
|
||||
void ensureOwnership() override;
|
||||
|
@ -820,6 +820,9 @@ ColumnPtr recursiveRemoveSparse(const ColumnPtr & column)
|
||||
if (const auto * column_tuple = typeid_cast<const ColumnTuple *>(column.get()))
|
||||
{
|
||||
auto columns = column_tuple->getColumns();
|
||||
if (columns.empty())
|
||||
return column;
|
||||
|
||||
for (auto & element : columns)
|
||||
element = recursiveRemoveSparse(element);
|
||||
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <Columns/ColumnCompressed.h>
|
||||
#include <Columns/MaskOperations.h>
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/HashTable/StringHashSet.h>
|
||||
#include <Common/HashTable/Hash.h>
|
||||
#include <Common/WeakHash.h>
|
||||
#include <Common/assert_cast.h>
|
||||
@ -481,6 +482,23 @@ void ColumnString::updatePermutationWithCollation(const Collator & collator, Per
|
||||
DefaultPartialSort());
|
||||
}
|
||||
|
||||
size_t ColumnString::estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const
|
||||
{
|
||||
const size_t range_size = equal_range.size();
|
||||
if (range_size <= 1)
|
||||
return range_size;
|
||||
|
||||
/// TODO use sampling if the range is too large (e.g. 16k elements, but configurable)
|
||||
StringHashSet elements;
|
||||
bool inserted = false;
|
||||
for (size_t i = equal_range.from; i < equal_range.to; ++i)
|
||||
{
|
||||
size_t permuted_i = permutation[i];
|
||||
StringRef value = getDataAt(permuted_i);
|
||||
elements.emplace(value, inserted);
|
||||
}
|
||||
return elements.size();
|
||||
}
|
||||
|
||||
ColumnPtr ColumnString::replicate(const Offsets & replicate_offsets) const
|
||||
{
|
||||
|
@ -260,6 +260,8 @@ public:
|
||||
void updatePermutationWithCollation(const Collator & collator, IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
|
||||
size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const override;
|
||||
|
||||
size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const override;
|
||||
|
||||
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
|
||||
|
||||
ColumnPtr compress() const override;
|
||||
|
@ -3,14 +3,16 @@
|
||||
#include <Columns/ColumnCompressed.h>
|
||||
#include <Columns/IColumnImpl.h>
|
||||
#include <Core/Field.h>
|
||||
#include <Processors/Transforms/ColumnGathererTransform.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/WeakHash.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/iota.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Columns/ColumnsCommon.h>
|
||||
#include <DataTypes/Serializations/SerializationInfoTuple.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <Processors/Transforms/ColumnGathererTransform.h>
|
||||
#include <base/sort.h>
|
||||
|
||||
|
||||
@ -23,6 +25,7 @@ namespace ErrorCodes
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
|
||||
@ -44,6 +47,9 @@ std::string ColumnTuple::getName() const
|
||||
|
||||
ColumnTuple::ColumnTuple(MutableColumns && mutable_columns)
|
||||
{
|
||||
if (mutable_columns.empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "This function cannot be used to construct empty tuple. It is a bug");
|
||||
|
||||
columns.reserve(mutable_columns.size());
|
||||
for (auto & column : mutable_columns)
|
||||
{
|
||||
@ -52,15 +58,21 @@ ColumnTuple::ColumnTuple(MutableColumns && mutable_columns)
|
||||
|
||||
columns.push_back(std::move(column));
|
||||
}
|
||||
column_length = columns[0]->size();
|
||||
}
|
||||
|
||||
ColumnTuple::ColumnTuple(size_t len) : column_length(len) {}
|
||||
|
||||
ColumnTuple::Ptr ColumnTuple::create(const Columns & columns)
|
||||
{
|
||||
if (columns.empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "This function cannot be used to construct empty tuple. It is a bug");
|
||||
|
||||
for (const auto & column : columns)
|
||||
if (isColumnConst(*column))
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "ColumnTuple cannot have ColumnConst as its element");
|
||||
|
||||
auto column_tuple = ColumnTuple::create(MutableColumns());
|
||||
auto column_tuple = ColumnTuple::create(columns[0]->size());
|
||||
column_tuple->columns.assign(columns.begin(), columns.end());
|
||||
|
||||
return column_tuple;
|
||||
@ -68,11 +80,14 @@ ColumnTuple::Ptr ColumnTuple::create(const Columns & columns)
|
||||
|
||||
ColumnTuple::Ptr ColumnTuple::create(const TupleColumns & columns)
|
||||
{
|
||||
if (columns.empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "This function cannot be used to construct empty tuple. It is a bug");
|
||||
|
||||
for (const auto & column : columns)
|
||||
if (isColumnConst(*column))
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "ColumnTuple cannot have ColumnConst as its element");
|
||||
|
||||
auto column_tuple = ColumnTuple::create(MutableColumns());
|
||||
auto column_tuple = ColumnTuple::create(columns[0]->size());
|
||||
column_tuple->columns = columns;
|
||||
|
||||
return column_tuple;
|
||||
@ -80,6 +95,9 @@ ColumnTuple::Ptr ColumnTuple::create(const TupleColumns & columns)
|
||||
|
||||
MutableColumnPtr ColumnTuple::cloneEmpty() const
|
||||
{
|
||||
if (columns.empty())
|
||||
return ColumnTuple::create(0);
|
||||
|
||||
const size_t tuple_size = columns.size();
|
||||
MutableColumns new_columns(tuple_size);
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
@ -90,6 +108,9 @@ MutableColumnPtr ColumnTuple::cloneEmpty() const
|
||||
|
||||
MutableColumnPtr ColumnTuple::cloneResized(size_t new_size) const
|
||||
{
|
||||
if (columns.empty())
|
||||
return ColumnTuple::create(new_size);
|
||||
|
||||
const size_t tuple_size = columns.size();
|
||||
MutableColumns new_columns(tuple_size);
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
@ -98,6 +119,16 @@ MutableColumnPtr ColumnTuple::cloneResized(size_t new_size) const
|
||||
return ColumnTuple::create(std::move(new_columns));
|
||||
}
|
||||
|
||||
size_t ColumnTuple::size() const
|
||||
{
|
||||
if (columns.empty())
|
||||
return column_length;
|
||||
|
||||
/// It's difficult to maintain a consistent `column_length` because there
|
||||
/// are many places that manipulates sub-columns directly.
|
||||
return columns.at(0)->size();
|
||||
}
|
||||
|
||||
Field ColumnTuple::operator[](size_t n) const
|
||||
{
|
||||
Field res;
|
||||
@ -144,6 +175,7 @@ void ColumnTuple::insert(const Field & x)
|
||||
if (tuple.size() != tuple_size)
|
||||
throw Exception(ErrorCodes::CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE, "Cannot insert value of different size into tuple");
|
||||
|
||||
++column_length;
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
columns[i]->insert(tuple[i]);
|
||||
}
|
||||
@ -181,6 +213,7 @@ void ColumnTuple::insertFrom(const IColumn & src_, size_t n)
|
||||
if (src.columns.size() != tuple_size)
|
||||
throw Exception(ErrorCodes::CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE, "Cannot insert value of different size into tuple");
|
||||
|
||||
++column_length;
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
columns[i]->insertFrom(*src.columns[i], n);
|
||||
}
|
||||
@ -199,18 +232,28 @@ void ColumnTuple::insertManyFrom(const IColumn & src, size_t position, size_t le
|
||||
|
||||
void ColumnTuple::insertDefault()
|
||||
{
|
||||
++column_length;
|
||||
for (auto & column : columns)
|
||||
column->insertDefault();
|
||||
}
|
||||
|
||||
void ColumnTuple::popBack(size_t n)
|
||||
{
|
||||
column_length -= n;
|
||||
for (auto & column : columns)
|
||||
column->popBack(n);
|
||||
}
|
||||
|
||||
StringRef ColumnTuple::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
|
||||
{
|
||||
if (columns.empty())
|
||||
{
|
||||
/// Has to put one useless byte into Arena, because serialization into zero number of bytes is ambiguous.
|
||||
char * res = arena.allocContinue(1, begin);
|
||||
*res = 0;
|
||||
return { res, 1 };
|
||||
}
|
||||
|
||||
StringRef res(begin, 0);
|
||||
for (const auto & column : columns)
|
||||
{
|
||||
@ -232,6 +275,11 @@ char * ColumnTuple::serializeValueIntoMemory(size_t n, char * memory) const
|
||||
|
||||
const char * ColumnTuple::deserializeAndInsertFromArena(const char * pos)
|
||||
{
|
||||
++column_length;
|
||||
|
||||
if (columns.empty())
|
||||
return pos + 1;
|
||||
|
||||
for (auto & column : columns)
|
||||
pos = column->deserializeAndInsertFromArena(pos);
|
||||
|
||||
@ -272,6 +320,7 @@ void ColumnTuple::updateHashFast(SipHash & hash) const
|
||||
|
||||
void ColumnTuple::insertRangeFrom(const IColumn & src, size_t start, size_t length)
|
||||
{
|
||||
column_length += length;
|
||||
const size_t tuple_size = columns.size();
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
columns[i]->insertRangeFrom(
|
||||
@ -281,6 +330,12 @@ void ColumnTuple::insertRangeFrom(const IColumn & src, size_t start, size_t leng
|
||||
|
||||
ColumnPtr ColumnTuple::filter(const Filter & filt, ssize_t result_size_hint) const
|
||||
{
|
||||
if (columns.empty())
|
||||
{
|
||||
size_t bytes = countBytesInFilter(filt);
|
||||
return cloneResized(bytes);
|
||||
}
|
||||
|
||||
const size_t tuple_size = columns.size();
|
||||
Columns new_columns(tuple_size);
|
||||
|
||||
@ -292,12 +347,29 @@ ColumnPtr ColumnTuple::filter(const Filter & filt, ssize_t result_size_hint) con
|
||||
|
||||
void ColumnTuple::expand(const Filter & mask, bool inverted)
|
||||
{
|
||||
if (columns.empty())
|
||||
{
|
||||
size_t bytes = countBytesInFilter(mask);
|
||||
if (inverted)
|
||||
bytes = mask.size() - bytes;
|
||||
column_length = bytes;
|
||||
return;
|
||||
}
|
||||
|
||||
for (auto & column : columns)
|
||||
column->expand(mask, inverted);
|
||||
}
|
||||
|
||||
ColumnPtr ColumnTuple::permute(const Permutation & perm, size_t limit) const
|
||||
{
|
||||
if (columns.empty())
|
||||
{
|
||||
if (column_length != perm.size())
|
||||
throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of permutation doesn't match size of column");
|
||||
|
||||
return cloneResized(limit ? std::min(column_length, limit) : column_length);
|
||||
}
|
||||
|
||||
const size_t tuple_size = columns.size();
|
||||
Columns new_columns(tuple_size);
|
||||
|
||||
@ -309,6 +381,14 @@ ColumnPtr ColumnTuple::permute(const Permutation & perm, size_t limit) const
|
||||
|
||||
ColumnPtr ColumnTuple::index(const IColumn & indexes, size_t limit) const
|
||||
{
|
||||
if (columns.empty())
|
||||
{
|
||||
if (indexes.size() < limit)
|
||||
throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of indexes is less than required");
|
||||
|
||||
return cloneResized(limit ? limit : column_length);
|
||||
}
|
||||
|
||||
const size_t tuple_size = columns.size();
|
||||
Columns new_columns(tuple_size);
|
||||
|
||||
@ -320,6 +400,14 @@ ColumnPtr ColumnTuple::index(const IColumn & indexes, size_t limit) const
|
||||
|
||||
ColumnPtr ColumnTuple::replicate(const Offsets & offsets) const
|
||||
{
|
||||
if (columns.empty())
|
||||
{
|
||||
if (column_length != offsets.size())
|
||||
throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of offsets doesn't match size of column");
|
||||
|
||||
return cloneResized(offsets.back());
|
||||
}
|
||||
|
||||
const size_t tuple_size = columns.size();
|
||||
Columns new_columns(tuple_size);
|
||||
|
||||
@ -331,6 +419,22 @@ ColumnPtr ColumnTuple::replicate(const Offsets & offsets) const
|
||||
|
||||
MutableColumns ColumnTuple::scatter(ColumnIndex num_columns, const Selector & selector) const
|
||||
{
|
||||
if (columns.empty())
|
||||
{
|
||||
if (column_length != selector.size())
|
||||
throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of selector doesn't match size of column");
|
||||
|
||||
std::vector<size_t> counts(num_columns);
|
||||
for (auto idx : selector)
|
||||
++counts[idx];
|
||||
|
||||
MutableColumns res(num_columns);
|
||||
for (size_t i = 0; i < num_columns; ++i)
|
||||
res[i] = cloneResized(counts[i]);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
const size_t tuple_size = columns.size();
|
||||
std::vector<MutableColumns> scattered_tuple_elements(tuple_size);
|
||||
|
||||
@ -413,6 +517,9 @@ void ColumnTuple::getPermutationImpl(IColumn::PermutationSortDirection direction
|
||||
res.resize(rows);
|
||||
iota(res.data(), rows, IColumn::Permutation::value_type(0));
|
||||
|
||||
if (columns.empty())
|
||||
return;
|
||||
|
||||
if (limit >= rows)
|
||||
limit = 0;
|
||||
|
||||
@ -429,7 +536,7 @@ void ColumnTuple::updatePermutationImpl(IColumn::PermutationSortDirection direct
|
||||
|
||||
for (const auto & column : columns)
|
||||
{
|
||||
while (!equal_ranges.empty() && limit && limit <= equal_ranges.back().first)
|
||||
while (!equal_ranges.empty() && limit && limit <= equal_ranges.back().from)
|
||||
equal_ranges.pop_back();
|
||||
|
||||
if (collator && column->isCollationSupported())
|
||||
@ -603,6 +710,9 @@ void ColumnTuple::takeDynamicStructureFromSourceColumns(const Columns & source_c
|
||||
|
||||
ColumnPtr ColumnTuple::compress() const
|
||||
{
|
||||
if (columns.empty())
|
||||
return Ptr();
|
||||
|
||||
size_t byte_size = 0;
|
||||
Columns compressed;
|
||||
compressed.reserve(columns.size());
|
||||
|
@ -26,6 +26,13 @@ private:
|
||||
explicit ColumnTuple(MutableColumns && columns);
|
||||
ColumnTuple(const ColumnTuple &) = default;
|
||||
|
||||
/// Empty tuple needs a dedicated field to store its size.
|
||||
/// This field used *only* for zero-sized tuples.
|
||||
/// Otherwise `columns[0].size()` should be used to get a size of tuple column
|
||||
size_t column_length;
|
||||
|
||||
/// Dedicated constructor for empty tuples.
|
||||
explicit ColumnTuple(size_t len);
|
||||
public:
|
||||
/** Create immutable column using immutable arguments. This arguments may be shared with other columns.
|
||||
* Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
|
||||
@ -39,6 +46,8 @@ public:
|
||||
requires std::is_rvalue_reference_v<Arg &&>
|
||||
static MutablePtr create(Arg && arg) { return Base::create(std::forward<Arg>(arg)); }
|
||||
|
||||
static MutablePtr create(size_t len_) { return Base::create(len_); }
|
||||
|
||||
std::string getName() const override;
|
||||
const char * getFamilyName() const override { return "Tuple"; }
|
||||
TypeIndex getDataType() const override { return TypeIndex::Tuple; }
|
||||
@ -46,10 +55,7 @@ public:
|
||||
MutableColumnPtr cloneEmpty() const override;
|
||||
MutableColumnPtr cloneResized(size_t size) const override;
|
||||
|
||||
size_t size() const override
|
||||
{
|
||||
return columns.at(0)->size();
|
||||
}
|
||||
size_t size() const override;
|
||||
|
||||
Field operator[](size_t n) const override;
|
||||
void get(size_t n, Field & res) const override;
|
||||
@ -117,6 +123,9 @@ public:
|
||||
bool hasDynamicStructure() const override;
|
||||
void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override;
|
||||
|
||||
/// Empty tuple needs a public method to manage its size.
|
||||
void addSize(size_t delta) { column_length += delta; }
|
||||
|
||||
private:
|
||||
int compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator=nullptr) const;
|
||||
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/HashTable/Hash.h>
|
||||
#include <Common/HashTable/StringHashSet.h>
|
||||
#include <Common/NaNUtils.h>
|
||||
#include <Common/RadixSort.h>
|
||||
#include <Common/SipHash.h>
|
||||
@ -413,6 +414,25 @@ void ColumnVector<T>::updatePermutation(IColumn::PermutationSortDirection direct
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
size_t ColumnVector<T>::estimateCardinalityInPermutedRange(const IColumn::Permutation & permutation, const EqualRange & equal_range) const
|
||||
{
|
||||
const size_t range_size = equal_range.size();
|
||||
if (range_size <= 1)
|
||||
return range_size;
|
||||
|
||||
/// TODO use sampling if the range is too large (e.g. 16k elements, but configurable)
|
||||
StringHashSet elements;
|
||||
bool inserted = false;
|
||||
for (size_t i = equal_range.from; i < equal_range.to; ++i)
|
||||
{
|
||||
size_t permuted_i = permutation[i];
|
||||
StringRef value = getDataAt(permuted_i);
|
||||
elements.emplace(value, inserted);
|
||||
}
|
||||
return elements.size();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
MutableColumnPtr ColumnVector<T>::cloneResized(size_t size) const
|
||||
{
|
||||
|
@ -161,6 +161,8 @@ public:
|
||||
void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
|
||||
size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges& equal_ranges) const override;
|
||||
|
||||
size_t estimateCardinalityInPermutedRange(const IColumn::Permutation & permutation, const EqualRange & equal_range) const override;
|
||||
|
||||
void reserve(size_t n) override
|
||||
{
|
||||
data.reserve_exact(n);
|
||||
|
@ -83,6 +83,11 @@ ColumnPtr IColumn::createWithOffsets(const Offsets & offsets, const ColumnConst
|
||||
return res;
|
||||
}
|
||||
|
||||
size_t IColumn::estimateCardinalityInPermutedRange(const IColumn::Permutation & /*permutation*/, const EqualRange & equal_range) const
|
||||
{
|
||||
return equal_range.size();
|
||||
}
|
||||
|
||||
void IColumn::forEachSubcolumn(ColumnCallback callback) const
|
||||
{
|
||||
const_cast<IColumn*>(this)->forEachSubcolumn([&callback](WrappedPtr & subcolumn)
|
||||
|
@ -36,11 +36,19 @@ class Field;
|
||||
class WeakHash32;
|
||||
class ColumnConst;
|
||||
|
||||
/*
|
||||
* Represents a set of equal ranges in previous column to perform sorting in current column.
|
||||
* Used in sorting by tuples.
|
||||
* */
|
||||
using EqualRanges = std::vector<std::pair<size_t, size_t> >;
|
||||
/// A range of column values between row indexes `from` and `to`. The name "equal range" is due to table sorting as its main use case: With
|
||||
/// a PRIMARY KEY (c_pk1, c_pk2, ...), the first PK column is fully sorted. The second PK column is sorted within equal-value runs of the
|
||||
/// first PK column, and so on. The number of runs (ranges) per column increases from one primary key column to the next. An "equal range"
|
||||
/// is a run in a previous column, within the values of the current column can be sorted.
|
||||
struct EqualRange
|
||||
{
|
||||
size_t from; /// inclusive
|
||||
size_t to; /// exclusive
|
||||
EqualRange(size_t from_, size_t to_) : from(from_), to(to_) { chassert(from <= to); }
|
||||
size_t size() const { return to - from; }
|
||||
};
|
||||
|
||||
using EqualRanges = std::vector<EqualRange>;
|
||||
|
||||
/// Declares interface to store columns in memory.
|
||||
class IColumn : public COW<IColumn>
|
||||
@ -399,6 +407,9 @@ public:
|
||||
"or for Array or Tuple, containing them.");
|
||||
}
|
||||
|
||||
/// Estimate the cardinality (number of unique values) of the values in 'equal_range' after permutation, formally: |{ column[permutation[r]] : r in equal_range }|.
|
||||
virtual size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const;
|
||||
|
||||
/** Copies each element according offsets parameter.
|
||||
* (i-th element should be copied offsets[i] - offsets[i - 1] times.)
|
||||
* It is necessary in ARRAY JOIN operation.
|
||||
|
@ -60,12 +60,9 @@ ColumnPtr IColumnDummy::filter(const Filter & filt, ssize_t /*result_size_hint*/
|
||||
return cloneDummy(bytes);
|
||||
}
|
||||
|
||||
void IColumnDummy::expand(const IColumn::Filter & mask, bool inverted)
|
||||
void IColumnDummy::expand(const IColumn::Filter & mask, bool)
|
||||
{
|
||||
size_t bytes = countBytesInFilter(mask);
|
||||
if (inverted)
|
||||
bytes = mask.size() - bytes;
|
||||
s = bytes;
|
||||
s = mask.size();
|
||||
}
|
||||
|
||||
ColumnPtr IColumnDummy::permute(const Permutation & perm, size_t limit) const
|
||||
|
@ -139,7 +139,7 @@ void IColumn::updatePermutationImpl(
|
||||
if (equal_ranges.empty())
|
||||
return;
|
||||
|
||||
if (limit >= size() || limit > equal_ranges.back().second)
|
||||
if (limit >= size() || limit > equal_ranges.back().to)
|
||||
limit = 0;
|
||||
|
||||
EqualRanges new_ranges;
|
||||
|
@ -77,7 +77,7 @@ INSTANTIATE(IPv6)
|
||||
|
||||
#undef INSTANTIATE
|
||||
|
||||
template <bool inverted, bool column_is_short, typename Container>
|
||||
template <bool inverted, typename Container>
|
||||
static size_t extractMaskNumericImpl(
|
||||
PaddedPODArray<UInt8> & mask,
|
||||
const Container & data,
|
||||
@ -85,42 +85,27 @@ static size_t extractMaskNumericImpl(
|
||||
const PaddedPODArray<UInt8> * null_bytemap,
|
||||
PaddedPODArray<UInt8> * nulls)
|
||||
{
|
||||
if constexpr (!column_is_short)
|
||||
{
|
||||
if (data.size() != mask.size())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a full data column is not equal to the size of a mask");
|
||||
}
|
||||
if (data.size() != mask.size())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a full data column is not equal to the size of a mask");
|
||||
|
||||
size_t ones_count = 0;
|
||||
size_t data_index = 0;
|
||||
|
||||
size_t mask_size = mask.size();
|
||||
size_t data_size = data.size();
|
||||
|
||||
for (size_t i = 0; i != mask_size && data_index != data_size; ++i)
|
||||
for (size_t i = 0; i != mask_size; ++i)
|
||||
{
|
||||
// Change mask only where value is 1.
|
||||
if (!mask[i])
|
||||
continue;
|
||||
|
||||
UInt8 value;
|
||||
size_t index;
|
||||
if constexpr (column_is_short)
|
||||
{
|
||||
index = data_index;
|
||||
++data_index;
|
||||
}
|
||||
else
|
||||
index = i;
|
||||
|
||||
if (null_bytemap && (*null_bytemap)[index])
|
||||
if (null_bytemap && (*null_bytemap)[i])
|
||||
{
|
||||
value = null_value;
|
||||
if (nulls)
|
||||
(*nulls)[i] = 1;
|
||||
}
|
||||
else
|
||||
value = static_cast<bool>(data[index]);
|
||||
value = static_cast<bool>(data[i]);
|
||||
|
||||
if constexpr (inverted)
|
||||
value = !value;
|
||||
@ -131,12 +116,6 @@ static size_t extractMaskNumericImpl(
|
||||
mask[i] = value;
|
||||
}
|
||||
|
||||
if constexpr (column_is_short)
|
||||
{
|
||||
if (data_index != data_size)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a short column is not equal to the number of ones in a mask");
|
||||
}
|
||||
|
||||
return ones_count;
|
||||
}
|
||||
|
||||
@ -155,10 +134,7 @@ static bool extractMaskNumeric(
|
||||
|
||||
const auto & data = numeric_column->getData();
|
||||
size_t ones_count;
|
||||
if (column->size() < mask.size())
|
||||
ones_count = extractMaskNumericImpl<inverted, true>(mask, data, null_value, null_bytemap, nulls);
|
||||
else
|
||||
ones_count = extractMaskNumericImpl<inverted, false>(mask, data, null_value, null_bytemap, nulls);
|
||||
ones_count = extractMaskNumericImpl<inverted>(mask, data, null_value, null_bytemap, nulls);
|
||||
|
||||
mask_info.has_ones = ones_count > 0;
|
||||
mask_info.has_zeros = ones_count != mask.size();
|
||||
@ -279,25 +255,32 @@ void maskedExecute(ColumnWithTypeAndName & column, const PaddedPODArray<UInt8> &
|
||||
if (!column_function)
|
||||
return;
|
||||
|
||||
size_t original_size = column.column->size();
|
||||
|
||||
ColumnWithTypeAndName result;
|
||||
/// If mask contains only zeros, we can just create
|
||||
/// an empty column with the execution result type.
|
||||
if (!mask_info.has_ones)
|
||||
{
|
||||
/// If mask contains only zeros, we can just create a column with default values as it will be ignored
|
||||
auto result_type = column_function->getResultType();
|
||||
auto empty_column = result_type->createColumn();
|
||||
result = {std::move(empty_column), result_type, ""};
|
||||
auto default_column = result_type->createColumnConstWithDefaultValue(original_size)->convertToFullColumnIfConst();
|
||||
column = {default_column, result_type, ""};
|
||||
}
|
||||
/// Filter column only if mask contains zeros.
|
||||
else if (mask_info.has_zeros)
|
||||
{
|
||||
/// If it contains both zeros and ones, we need to execute the function only on the mask values
|
||||
/// First we filter the column, which creates a new column, then we apply the column, and finally we expand it
|
||||
/// Expanding is done to keep consistency in function calls (all columns the same size) and it's ok
|
||||
/// since the values won't be used by `if`
|
||||
auto filtered = column_function->filter(mask, -1);
|
||||
result = typeid_cast<const ColumnFunction *>(filtered.get())->reduce();
|
||||
auto filter_after_execution = typeid_cast<const ColumnFunction *>(filtered.get())->reduce();
|
||||
auto mut_column = IColumn::mutate(std::move(filter_after_execution.column));
|
||||
mut_column->expand(mask, false);
|
||||
column.column = std::move(mut_column);
|
||||
}
|
||||
else
|
||||
result = column_function->reduce();
|
||||
column = column_function->reduce();
|
||||
|
||||
column = std::move(result);
|
||||
chassert(column.column->size() == original_size);
|
||||
}
|
||||
|
||||
void executeColumnIfNeeded(ColumnWithTypeAndName & column, bool empty)
|
||||
|
@ -13,8 +13,9 @@ struct CopyableAtomic
|
||||
: value(other.value.load())
|
||||
{}
|
||||
|
||||
explicit CopyableAtomic(T && value_)
|
||||
: value(std::forward<T>(value_))
|
||||
template <std::convertible_to<T> U>
|
||||
explicit CopyableAtomic(U && value_)
|
||||
: value(std::forward<U>(value_))
|
||||
{}
|
||||
|
||||
CopyableAtomic & operator=(const CopyableAtomic & other)
|
||||
@ -23,9 +24,10 @@ struct CopyableAtomic
|
||||
return *this;
|
||||
}
|
||||
|
||||
CopyableAtomic & operator=(bool value_)
|
||||
template <std::convertible_to<T> U>
|
||||
CopyableAtomic & operator=(U && value_)
|
||||
{
|
||||
value = value_;
|
||||
value = std::forward<U>(value_);
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -586,7 +586,7 @@
|
||||
M(705, TABLE_NOT_EMPTY) \
|
||||
M(706, LIBSSH_ERROR) \
|
||||
M(707, GCP_ERROR) \
|
||||
M(708, ILLEGAL_STATISTIC) \
|
||||
M(708, ILLEGAL_STATISTICS) \
|
||||
M(709, CANNOT_GET_REPLICATED_DATABASE_SNAPSHOT) \
|
||||
M(710, FAULT_INJECTED) \
|
||||
M(711, FILECACHE_ACCESS_DENIED) \
|
||||
|
@ -43,12 +43,13 @@ namespace
|
||||
endpoint,
|
||||
proxy_scheme,
|
||||
proxy_port,
|
||||
cache_ttl
|
||||
std::chrono::seconds {cache_ttl}
|
||||
};
|
||||
|
||||
return std::make_shared<RemoteProxyConfigurationResolver>(
|
||||
server_configuration,
|
||||
request_protocol,
|
||||
std::make_shared<RemoteProxyHostFetcherImpl>(),
|
||||
isTunnelingDisabledForHTTPSRequestsOverHTTPProxy(configuration));
|
||||
}
|
||||
|
||||
|
@ -6,22 +6,47 @@
|
||||
#include <Poco/Net/HTTPRequest.h>
|
||||
#include <Poco/Net/HTTPResponse.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/DNSResolver.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int RECEIVED_ERROR_FROM_REMOTE_IO_SERVER;
|
||||
}
|
||||
|
||||
std::string RemoteProxyHostFetcherImpl::fetch(const Poco::URI & endpoint, const ConnectionTimeouts & timeouts)
|
||||
{
|
||||
auto request = Poco::Net::HTTPRequest(Poco::Net::HTTPRequest::HTTP_GET, endpoint.getPath(), Poco::Net::HTTPRequest::HTTP_1_1);
|
||||
auto session = makeHTTPSession(HTTPConnectionGroupType::HTTP, endpoint, timeouts);
|
||||
|
||||
session->sendRequest(request);
|
||||
|
||||
Poco::Net::HTTPResponse response;
|
||||
auto & response_body_stream = session->receiveResponse(response);
|
||||
|
||||
if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK)
|
||||
throw HTTPException(
|
||||
ErrorCodes::RECEIVED_ERROR_FROM_REMOTE_IO_SERVER,
|
||||
endpoint.toString(),
|
||||
response.getStatus(),
|
||||
response.getReason(),
|
||||
"");
|
||||
|
||||
std::string proxy_host;
|
||||
Poco::StreamCopier::copyToString(response_body_stream, proxy_host);
|
||||
|
||||
return proxy_host;
|
||||
}
|
||||
|
||||
RemoteProxyConfigurationResolver::RemoteProxyConfigurationResolver(
|
||||
const RemoteServerConfiguration & remote_server_configuration_,
|
||||
Protocol request_protocol_,
|
||||
std::shared_ptr<RemoteProxyHostFetcher> fetcher_,
|
||||
bool disable_tunneling_for_https_requests_over_http_proxy_
|
||||
)
|
||||
: ProxyConfigurationResolver(request_protocol_, disable_tunneling_for_https_requests_over_http_proxy_), remote_server_configuration(remote_server_configuration_)
|
||||
: ProxyConfigurationResolver(request_protocol_, disable_tunneling_for_https_requests_over_http_proxy_),
|
||||
remote_server_configuration(remote_server_configuration_), fetcher(fetcher_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -29,9 +54,7 @@ ProxyConfiguration RemoteProxyConfigurationResolver::resolve()
|
||||
{
|
||||
auto logger = getLogger("RemoteProxyConfigurationResolver");
|
||||
|
||||
auto & [endpoint, proxy_protocol, proxy_port, cache_ttl_] = remote_server_configuration;
|
||||
|
||||
LOG_DEBUG(logger, "Obtain proxy using resolver: {}", endpoint.toString());
|
||||
auto & [endpoint, proxy_protocol_string, proxy_port, cache_ttl] = remote_server_configuration;
|
||||
|
||||
std::lock_guard lock(cache_mutex);
|
||||
|
||||
@ -55,66 +78,26 @@ ProxyConfiguration RemoteProxyConfigurationResolver::resolve()
|
||||
.withSendTimeout(1)
|
||||
.withReceiveTimeout(1);
|
||||
|
||||
try
|
||||
{
|
||||
/// It should be just empty GET request.
|
||||
Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, endpoint.getPath(), Poco::Net::HTTPRequest::HTTP_1_1);
|
||||
const auto proxy_host = fetcher->fetch(endpoint, timeouts);
|
||||
|
||||
const auto & host = endpoint.getHost();
|
||||
auto resolved_hosts = DNSResolver::instance().resolveHostAll(host);
|
||||
LOG_DEBUG(logger, "Use proxy: {}://{}:{}", proxy_protocol_string, proxy_host, proxy_port);
|
||||
|
||||
HTTPSessionPtr session;
|
||||
auto proxy_protocol = ProxyConfiguration::protocolFromString(proxy_protocol_string);
|
||||
|
||||
for (size_t i = 0; i < resolved_hosts.size(); ++i)
|
||||
{
|
||||
auto resolved_endpoint = endpoint;
|
||||
resolved_endpoint.setHost(resolved_hosts[i].toString());
|
||||
session = makeHTTPSession(HTTPConnectionGroupType::HTTP, resolved_endpoint, timeouts);
|
||||
bool use_tunneling_for_https_requests_over_http_proxy = useTunneling(
|
||||
request_protocol,
|
||||
proxy_protocol,
|
||||
disable_tunneling_for_https_requests_over_http_proxy);
|
||||
|
||||
try
|
||||
{
|
||||
session->sendRequest(request);
|
||||
break;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (i + 1 == resolved_hosts.size())
|
||||
throw;
|
||||
}
|
||||
}
|
||||
cached_config.protocol = proxy_protocol;
|
||||
cached_config.host = proxy_host;
|
||||
cached_config.port = proxy_port;
|
||||
cached_config.tunneling = use_tunneling_for_https_requests_over_http_proxy;
|
||||
cached_config.original_request_protocol = request_protocol;
|
||||
cache_timestamp = std::chrono::system_clock::now();
|
||||
cache_valid = true;
|
||||
|
||||
Poco::Net::HTTPResponse response;
|
||||
auto & response_body_stream = session->receiveResponse(response);
|
||||
|
||||
if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Proxy resolver returned not OK status: {}", response.getReason());
|
||||
|
||||
String proxy_host;
|
||||
/// Read proxy host as string from response body.
|
||||
Poco::StreamCopier::copyToString(response_body_stream, proxy_host);
|
||||
|
||||
LOG_DEBUG(logger, "Use proxy: {}://{}:{}", proxy_protocol, proxy_host, proxy_port);
|
||||
|
||||
bool use_tunneling_for_https_requests_over_http_proxy = useTunneling(
|
||||
request_protocol,
|
||||
cached_config.protocol,
|
||||
disable_tunneling_for_https_requests_over_http_proxy);
|
||||
|
||||
cached_config.protocol = ProxyConfiguration::protocolFromString(proxy_protocol);
|
||||
cached_config.host = proxy_host;
|
||||
cached_config.port = proxy_port;
|
||||
cached_config.tunneling = use_tunneling_for_https_requests_over_http_proxy;
|
||||
cached_config.original_request_protocol = request_protocol;
|
||||
cache_timestamp = std::chrono::system_clock::now();
|
||||
cache_valid = true;
|
||||
|
||||
return cached_config;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException("RemoteProxyConfigurationResolver", "Failed to obtain proxy");
|
||||
return {};
|
||||
}
|
||||
return cached_config;
|
||||
}
|
||||
|
||||
void RemoteProxyConfigurationResolver::errorReport(const ProxyConfiguration & config)
|
||||
@ -124,7 +107,7 @@ void RemoteProxyConfigurationResolver::errorReport(const ProxyConfiguration & co
|
||||
|
||||
std::lock_guard lock(cache_mutex);
|
||||
|
||||
if (!cache_ttl.count() || !cache_valid)
|
||||
if (!remote_server_configuration.cache_ttl_.count() || !cache_valid)
|
||||
return;
|
||||
|
||||
if (std::tie(cached_config.protocol, cached_config.host, cached_config.port)
|
||||
|
@ -10,6 +10,19 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct ConnectionTimeouts;
|
||||
|
||||
struct RemoteProxyHostFetcher
|
||||
{
|
||||
virtual ~RemoteProxyHostFetcher() = default;
|
||||
virtual std::string fetch(const Poco::URI & endpoint, const ConnectionTimeouts & timeouts) = 0;
|
||||
};
|
||||
|
||||
struct RemoteProxyHostFetcherImpl : public RemoteProxyHostFetcher
|
||||
{
|
||||
std::string fetch(const Poco::URI & endpoint, const ConnectionTimeouts & timeouts) override;
|
||||
};
|
||||
|
||||
/*
|
||||
* Makes an HTTP GET request to the specified endpoint to obtain a proxy host.
|
||||
* */
|
||||
@ -22,13 +35,14 @@ public:
|
||||
Poco::URI endpoint;
|
||||
String proxy_protocol;
|
||||
unsigned proxy_port;
|
||||
unsigned cache_ttl_;
|
||||
const std::chrono::seconds cache_ttl_;
|
||||
};
|
||||
|
||||
RemoteProxyConfigurationResolver(
|
||||
const RemoteServerConfiguration & remote_server_configuration_,
|
||||
Protocol request_protocol_,
|
||||
bool disable_tunneling_for_https_requests_over_http_proxy_ = true);
|
||||
std::shared_ptr<RemoteProxyHostFetcher> fetcher_,
|
||||
bool disable_tunneling_for_https_requests_over_http_proxy_ = false);
|
||||
|
||||
ProxyConfiguration resolve() override;
|
||||
|
||||
@ -36,11 +50,11 @@ public:
|
||||
|
||||
private:
|
||||
RemoteServerConfiguration remote_server_configuration;
|
||||
std::shared_ptr<RemoteProxyHostFetcher> fetcher;
|
||||
|
||||
std::mutex cache_mutex;
|
||||
bool cache_valid = false;
|
||||
std::chrono::time_point<std::chrono::system_clock> cache_timestamp;
|
||||
const std::chrono::seconds cache_ttl{0};
|
||||
ProxyConfiguration cached_config;
|
||||
};
|
||||
|
||||
|
@ -280,6 +280,10 @@ public:
|
||||
if (!initialized())
|
||||
abort();
|
||||
|
||||
/// Thread cannot join itself.
|
||||
if (state->thread_id == std::this_thread::get_id())
|
||||
abort();
|
||||
|
||||
state->event.wait();
|
||||
state.reset();
|
||||
}
|
||||
@ -293,12 +297,7 @@ public:
|
||||
|
||||
bool joinable() const
|
||||
{
|
||||
if (!state)
|
||||
return false;
|
||||
/// Thread cannot join itself.
|
||||
if (state->thread_id == std::this_thread::get_id())
|
||||
return false;
|
||||
return true;
|
||||
return initialized();
|
||||
}
|
||||
|
||||
std::thread::id get_id() const
|
||||
|
@ -637,6 +637,9 @@ void TestKeeper::finalize(const String &)
|
||||
expired = true;
|
||||
}
|
||||
|
||||
/// Signal request_queue to wake up processing thread without waiting for timeout
|
||||
requests_queue.finish();
|
||||
|
||||
processing_thread.join();
|
||||
|
||||
try
|
||||
|
@ -1,5 +1,4 @@
|
||||
#include "ZooKeeper.h"
|
||||
#include "Coordination/KeeperConstants.h"
|
||||
#include "Coordination/KeeperFeatureFlags.h"
|
||||
#include "ZooKeeperImpl.h"
|
||||
#include "KeeperException.h"
|
||||
@ -376,11 +375,14 @@ void ZooKeeper::createAncestors(const std::string & path)
|
||||
}
|
||||
|
||||
Coordination::Responses responses;
|
||||
Coordination::Error code = multiImpl(create_ops, responses, /*check_session_valid*/ false);
|
||||
const auto & [code, failure_reason] = multiImpl(create_ops, responses, /*check_session_valid*/ false);
|
||||
|
||||
if (code == Coordination::Error::ZOK)
|
||||
return;
|
||||
|
||||
if (!failure_reason.empty())
|
||||
throw KeeperException::fromMessage(code, failure_reason);
|
||||
|
||||
throw KeeperException::fromPath(code, path);
|
||||
}
|
||||
|
||||
@ -676,17 +678,19 @@ Coordination::Error ZooKeeper::trySet(const std::string & path, const std::strin
|
||||
}
|
||||
|
||||
|
||||
Coordination::Error ZooKeeper::multiImpl(const Coordination::Requests & requests, Coordination::Responses & responses, bool check_session_valid)
|
||||
std::pair<Coordination::Error, std::string>
|
||||
ZooKeeper::multiImpl(const Coordination::Requests & requests, Coordination::Responses & responses, bool check_session_valid)
|
||||
{
|
||||
if (requests.empty())
|
||||
return Coordination::Error::ZOK;
|
||||
return {Coordination::Error::ZOK, ""};
|
||||
|
||||
std::future<Coordination::MultiResponse> future_result;
|
||||
Coordination::Requests requests_with_check_session;
|
||||
if (check_session_valid)
|
||||
{
|
||||
Coordination::Requests new_requests = requests;
|
||||
addCheckSessionOp(new_requests);
|
||||
future_result = asyncTryMultiNoThrow(new_requests);
|
||||
requests_with_check_session = requests;
|
||||
addCheckSessionOp(requests_with_check_session);
|
||||
future_result = asyncTryMultiNoThrow(requests_with_check_session);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -696,7 +700,7 @@ Coordination::Error ZooKeeper::multiImpl(const Coordination::Requests & requests
|
||||
if (future_result.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready)
|
||||
{
|
||||
impl->finalize(fmt::format("Operation timeout on {} {}", Coordination::OpNum::Multi, requests[0]->getPath()));
|
||||
return Coordination::Error::ZOPERATIONTIMEOUT;
|
||||
return {Coordination::Error::ZOPERATIONTIMEOUT, ""};
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -704,11 +708,14 @@ Coordination::Error ZooKeeper::multiImpl(const Coordination::Requests & requests
|
||||
Coordination::Error code = response.error;
|
||||
responses = response.responses;
|
||||
|
||||
std::string reason;
|
||||
|
||||
if (check_session_valid)
|
||||
{
|
||||
if (code != Coordination::Error::ZOK && !Coordination::isHardwareError(code) && getFailedOpIndex(code, responses) == requests.size())
|
||||
{
|
||||
impl->finalize(fmt::format("Session was killed: {}", requests.back()->getPath()));
|
||||
reason = fmt::format("Session was killed: {}", requests_with_check_session.back()->getPath());
|
||||
impl->finalize(reason);
|
||||
code = Coordination::Error::ZSESSIONMOVED;
|
||||
}
|
||||
responses.pop_back();
|
||||
@ -717,23 +724,33 @@ Coordination::Error ZooKeeper::multiImpl(const Coordination::Requests & requests
|
||||
chassert(code == Coordination::Error::ZOK || Coordination::isHardwareError(code) || responses.back()->error != Coordination::Error::ZOK);
|
||||
}
|
||||
|
||||
return code;
|
||||
return {code, std::move(reason)};
|
||||
}
|
||||
}
|
||||
|
||||
Coordination::Responses ZooKeeper::multi(const Coordination::Requests & requests, bool check_session_valid)
|
||||
{
|
||||
Coordination::Responses responses;
|
||||
Coordination::Error code = multiImpl(requests, responses, check_session_valid);
|
||||
const auto & [code, failure_reason] = multiImpl(requests, responses, check_session_valid);
|
||||
if (!failure_reason.empty())
|
||||
throw KeeperException::fromMessage(code, failure_reason);
|
||||
|
||||
KeeperMultiException::check(code, requests, responses);
|
||||
return responses;
|
||||
}
|
||||
|
||||
Coordination::Error ZooKeeper::tryMulti(const Coordination::Requests & requests, Coordination::Responses & responses, bool check_session_valid)
|
||||
{
|
||||
Coordination::Error code = multiImpl(requests, responses, check_session_valid);
|
||||
const auto & [code, failure_reason] = multiImpl(requests, responses, check_session_valid);
|
||||
|
||||
if (code != Coordination::Error::ZOK && !Coordination::isUserError(code))
|
||||
{
|
||||
if (!failure_reason.empty())
|
||||
throw KeeperException::fromMessage(code, failure_reason);
|
||||
|
||||
throw KeeperException(code);
|
||||
}
|
||||
|
||||
return code;
|
||||
}
|
||||
|
||||
@ -1346,7 +1363,7 @@ Coordination::Error ZooKeeper::tryMultiNoThrow(const Coordination::Requests & re
|
||||
{
|
||||
try
|
||||
{
|
||||
return multiImpl(requests, responses, check_session_valid);
|
||||
return multiImpl(requests, responses, check_session_valid).first;
|
||||
}
|
||||
catch (const Coordination::Exception & e)
|
||||
{
|
||||
|
@ -2,10 +2,8 @@
|
||||
|
||||
#include "Types.h"
|
||||
#include <Poco/Util/LayeredConfiguration.h>
|
||||
#include <unordered_set>
|
||||
#include <future>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
@ -18,7 +16,6 @@
|
||||
#include <Common/thread_local_rng.h>
|
||||
#include <Coordination/KeeperFeatureFlags.h>
|
||||
#include <unistd.h>
|
||||
#include <random>
|
||||
|
||||
|
||||
namespace ProfileEvents
|
||||
@ -644,7 +641,11 @@ private:
|
||||
Coordination::Stat * stat,
|
||||
Coordination::WatchCallbackPtr watch_callback,
|
||||
Coordination::ListRequestType list_request_type);
|
||||
Coordination::Error multiImpl(const Coordination::Requests & requests, Coordination::Responses & responses, bool check_session_valid);
|
||||
|
||||
/// returns error code with optional reason
|
||||
std::pair<Coordination::Error, std::string>
|
||||
multiImpl(const Coordination::Requests & requests, Coordination::Responses & responses, bool check_session_valid);
|
||||
|
||||
Coordination::Error existsImpl(const std::string & path, Coordination::Stat * stat_, Coordination::WatchCallback watch_callback);
|
||||
Coordination::Error syncImpl(const std::string & path, std::string & returned_path);
|
||||
|
||||
|
172
src/Common/tests/gtest_proxy_remote_configuration_resolver.cpp
Normal file
172
src/Common/tests/gtest_proxy_remote_configuration_resolver.cpp
Normal file
@ -0,0 +1,172 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <Common/RemoteProxyConfigurationResolver.h>
|
||||
#include <Poco/URI.h>
|
||||
#include <IO/ConnectionTimeouts.h>
|
||||
#include <base/sleep.h>
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
struct RemoteProxyHostFetcherMock : public DB::RemoteProxyHostFetcher
|
||||
{
|
||||
explicit RemoteProxyHostFetcherMock(const std::string & return_mock_) : return_mock(return_mock_) {}
|
||||
|
||||
std::string fetch(const Poco::URI &, const DB::ConnectionTimeouts &) override
|
||||
{
|
||||
fetch_count++;
|
||||
return return_mock;
|
||||
}
|
||||
|
||||
std::string return_mock;
|
||||
std::size_t fetch_count {0};
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
TEST(RemoteProxyConfigurationResolver, HTTPOverHTTP)
|
||||
{
|
||||
const char * proxy_server_mock = "proxy1";
|
||||
auto remote_server_configuration = RemoteProxyConfigurationResolver::RemoteServerConfiguration
|
||||
{
|
||||
Poco::URI("not_important"),
|
||||
"http",
|
||||
80,
|
||||
std::chrono::seconds {10}
|
||||
};
|
||||
|
||||
RemoteProxyConfigurationResolver resolver(
|
||||
remote_server_configuration,
|
||||
ProxyConfiguration::Protocol::HTTP,
|
||||
std::make_shared<RemoteProxyHostFetcherMock>(proxy_server_mock)
|
||||
);
|
||||
|
||||
auto configuration = resolver.resolve();
|
||||
|
||||
ASSERT_EQ(configuration.host, proxy_server_mock);
|
||||
ASSERT_EQ(configuration.port, 80);
|
||||
ASSERT_EQ(configuration.protocol, ProxyConfiguration::Protocol::HTTP);
|
||||
ASSERT_EQ(configuration.original_request_protocol, ProxyConfiguration::Protocol::HTTP);
|
||||
ASSERT_EQ(configuration.tunneling, false);
|
||||
}
|
||||
|
||||
TEST(RemoteProxyConfigurationResolver, HTTPSOverHTTPS)
|
||||
{
|
||||
const char * proxy_server_mock = "proxy1";
|
||||
auto remote_server_configuration = RemoteProxyConfigurationResolver::RemoteServerConfiguration
|
||||
{
|
||||
Poco::URI("not_important"),
|
||||
"https",
|
||||
443,
|
||||
std::chrono::seconds {10}
|
||||
};
|
||||
|
||||
RemoteProxyConfigurationResolver resolver(
|
||||
remote_server_configuration,
|
||||
ProxyConfiguration::Protocol::HTTPS,
|
||||
std::make_shared<RemoteProxyHostFetcherMock>(proxy_server_mock)
|
||||
);
|
||||
|
||||
auto configuration = resolver.resolve();
|
||||
|
||||
ASSERT_EQ(configuration.host, proxy_server_mock);
|
||||
ASSERT_EQ(configuration.port, 443);
|
||||
ASSERT_EQ(configuration.protocol, ProxyConfiguration::Protocol::HTTPS);
|
||||
ASSERT_EQ(configuration.original_request_protocol, ProxyConfiguration::Protocol::HTTPS);
|
||||
// tunneling should not be used, https over https.
|
||||
ASSERT_EQ(configuration.tunneling, false);
|
||||
}
|
||||
|
||||
TEST(RemoteProxyConfigurationResolver, HTTPSOverHTTP)
|
||||
{
|
||||
const char * proxy_server_mock = "proxy1";
|
||||
auto remote_server_configuration = RemoteProxyConfigurationResolver::RemoteServerConfiguration
|
||||
{
|
||||
Poco::URI("not_important"),
|
||||
"http",
|
||||
80,
|
||||
std::chrono::seconds {10}
|
||||
};
|
||||
|
||||
RemoteProxyConfigurationResolver resolver(
|
||||
remote_server_configuration,
|
||||
ProxyConfiguration::Protocol::HTTPS,
|
||||
std::make_shared<RemoteProxyHostFetcherMock>(proxy_server_mock)
|
||||
);
|
||||
|
||||
auto configuration = resolver.resolve();
|
||||
|
||||
ASSERT_EQ(configuration.host, proxy_server_mock);
|
||||
ASSERT_EQ(configuration.port, 80);
|
||||
ASSERT_EQ(configuration.protocol, ProxyConfiguration::Protocol::HTTP);
|
||||
ASSERT_EQ(configuration.original_request_protocol, ProxyConfiguration::Protocol::HTTPS);
|
||||
// tunneling should be used, https over http.
|
||||
ASSERT_EQ(configuration.tunneling, true);
|
||||
}
|
||||
|
||||
TEST(RemoteProxyConfigurationResolver, HTTPSOverHTTPNoTunneling)
|
||||
{
|
||||
const char * proxy_server_mock = "proxy1";
|
||||
auto remote_server_configuration = RemoteProxyConfigurationResolver::RemoteServerConfiguration
|
||||
{
|
||||
Poco::URI("not_important"),
|
||||
"http",
|
||||
80,
|
||||
std::chrono::seconds {10}
|
||||
};
|
||||
|
||||
RemoteProxyConfigurationResolver resolver(
|
||||
remote_server_configuration,
|
||||
ProxyConfiguration::Protocol::HTTPS,
|
||||
std::make_shared<RemoteProxyHostFetcherMock>(proxy_server_mock),
|
||||
true /* disable_tunneling_for_https_requests_over_http_proxy_ */
|
||||
);
|
||||
|
||||
auto configuration = resolver.resolve();
|
||||
|
||||
ASSERT_EQ(configuration.host, proxy_server_mock);
|
||||
ASSERT_EQ(configuration.port, 80);
|
||||
ASSERT_EQ(configuration.protocol, ProxyConfiguration::Protocol::HTTP);
|
||||
ASSERT_EQ(configuration.original_request_protocol, ProxyConfiguration::Protocol::HTTPS);
|
||||
// tunneling should be used, https over http.
|
||||
ASSERT_EQ(configuration.tunneling, false);
|
||||
}
|
||||
|
||||
TEST(RemoteProxyConfigurationResolver, SimpleCacheTest)
|
||||
{
|
||||
const char * proxy_server_mock = "proxy1";
|
||||
auto cache_ttl = 5u;
|
||||
auto remote_server_configuration = RemoteProxyConfigurationResolver::RemoteServerConfiguration
|
||||
{
|
||||
Poco::URI("not_important"),
|
||||
"http",
|
||||
80,
|
||||
std::chrono::seconds {cache_ttl}
|
||||
};
|
||||
|
||||
auto fetcher_mock = std::make_shared<RemoteProxyHostFetcherMock>(proxy_server_mock);
|
||||
|
||||
RemoteProxyConfigurationResolver resolver(
|
||||
remote_server_configuration,
|
||||
ProxyConfiguration::Protocol::HTTP,
|
||||
fetcher_mock
|
||||
);
|
||||
|
||||
resolver.resolve();
|
||||
resolver.resolve();
|
||||
resolver.resolve();
|
||||
|
||||
ASSERT_EQ(fetcher_mock->fetch_count, 1u);
|
||||
|
||||
sleepForSeconds(cache_ttl * 2);
|
||||
|
||||
resolver.resolve();
|
||||
|
||||
ASSERT_EQ(fetcher_mock->fetch_count, 2);
|
||||
}
|
||||
|
||||
}
|
@ -305,7 +305,7 @@ String MonitorCommand::run()
|
||||
print(ret, "ephemerals_count", state_machine.getTotalEphemeralNodesCount());
|
||||
print(ret, "approximate_data_size", state_machine.getApproximateDataSize());
|
||||
print(ret, "key_arena_size", state_machine.getKeyArenaSize());
|
||||
print(ret, "latest_snapshot_size", state_machine.getLatestSnapshotBufSize());
|
||||
print(ret, "latest_snapshot_size", state_machine.getLatestSnapshotSize());
|
||||
|
||||
#if defined(OS_LINUX) || defined(OS_DARWIN)
|
||||
print(ret, "open_file_descriptor_count", getCurrentProcessFDCount());
|
||||
|
@ -332,9 +332,10 @@ void KeeperDispatcher::snapshotThread()
|
||||
if (shutdown_called)
|
||||
break;
|
||||
|
||||
if (snapshot_file_info.path.empty())
|
||||
if (!snapshot_file_info)
|
||||
continue;
|
||||
|
||||
chassert(snapshot_file_info->disk != nullptr);
|
||||
if (isLeader())
|
||||
snapshot_s3.uploadSnapshot(snapshot_file_info);
|
||||
}
|
||||
|
@ -618,7 +618,7 @@ KeeperSnapshotManager::KeeperSnapshotManager(
|
||||
|
||||
LOG_TRACE(log, "Found {} on {}", snapshot_file, disk->getName());
|
||||
size_t snapshot_up_to = getSnapshotPathUpToLogIdx(snapshot_file);
|
||||
auto [_, inserted] = existing_snapshots.insert_or_assign(snapshot_up_to, SnapshotFileInfo{snapshot_file, disk});
|
||||
auto [_, inserted] = existing_snapshots.insert_or_assign(snapshot_up_to, std::make_shared<SnapshotFileInfo>(snapshot_file, disk));
|
||||
|
||||
if (!inserted)
|
||||
LOG_WARNING(
|
||||
@ -651,7 +651,7 @@ KeeperSnapshotManager::KeeperSnapshotManager(
|
||||
moveSnapshotsIfNeeded();
|
||||
}
|
||||
|
||||
SnapshotFileInfo KeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft::buffer & buffer, uint64_t up_to_log_idx)
|
||||
SnapshotFileInfoPtr KeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft::buffer & buffer, uint64_t up_to_log_idx)
|
||||
{
|
||||
ReadBufferFromNuraftBuffer reader(buffer);
|
||||
|
||||
@ -672,11 +672,12 @@ SnapshotFileInfo KeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft::bu
|
||||
|
||||
disk->removeFile(tmp_snapshot_file_name);
|
||||
|
||||
existing_snapshots.emplace(up_to_log_idx, SnapshotFileInfo{snapshot_file_name, disk});
|
||||
auto snapshot_file_info = std::make_shared<SnapshotFileInfo>(snapshot_file_name, disk);
|
||||
existing_snapshots.emplace(up_to_log_idx, snapshot_file_info);
|
||||
removeOutdatedSnapshotsIfNeeded();
|
||||
moveSnapshotsIfNeeded();
|
||||
|
||||
return {snapshot_file_name, disk};
|
||||
return snapshot_file_info;
|
||||
}
|
||||
|
||||
nuraft::ptr<nuraft::buffer> KeeperSnapshotManager::deserializeLatestSnapshotBufferFromDisk()
|
||||
@ -690,7 +691,7 @@ nuraft::ptr<nuraft::buffer> KeeperSnapshotManager::deserializeLatestSnapshotBuff
|
||||
}
|
||||
catch (const DB::Exception &)
|
||||
{
|
||||
const auto & [path, disk] = latest_itr->second;
|
||||
const auto & [path, disk, size] = *latest_itr->second;
|
||||
disk->removeFile(path);
|
||||
existing_snapshots.erase(latest_itr->first);
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
@ -702,7 +703,7 @@ nuraft::ptr<nuraft::buffer> KeeperSnapshotManager::deserializeLatestSnapshotBuff
|
||||
|
||||
nuraft::ptr<nuraft::buffer> KeeperSnapshotManager::deserializeSnapshotBufferFromDisk(uint64_t up_to_log_idx) const
|
||||
{
|
||||
const auto & [snapshot_path, snapshot_disk] = existing_snapshots.at(up_to_log_idx);
|
||||
const auto & [snapshot_path, snapshot_disk, size] = *existing_snapshots.at(up_to_log_idx);
|
||||
WriteBufferFromNuraftBuffer writer;
|
||||
auto reader = snapshot_disk->readFile(snapshot_path);
|
||||
copyData(*reader, writer);
|
||||
@ -794,18 +795,18 @@ void KeeperSnapshotManager::moveSnapshotsIfNeeded()
|
||||
{
|
||||
if (idx == latest_snapshot_idx)
|
||||
{
|
||||
if (file_info.disk != latest_snapshot_disk)
|
||||
if (file_info->disk != latest_snapshot_disk)
|
||||
{
|
||||
moveSnapshotBetweenDisks(file_info.disk, file_info.path, latest_snapshot_disk, file_info.path, keeper_context);
|
||||
file_info.disk = latest_snapshot_disk;
|
||||
moveSnapshotBetweenDisks(file_info->disk, file_info->path, latest_snapshot_disk, file_info->path, keeper_context);
|
||||
file_info->disk = latest_snapshot_disk;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (file_info.disk != disk)
|
||||
if (file_info->disk != disk)
|
||||
{
|
||||
moveSnapshotBetweenDisks(file_info.disk, file_info.path, disk, file_info.path, keeper_context);
|
||||
file_info.disk = disk;
|
||||
moveSnapshotBetweenDisks(file_info->disk, file_info->path, disk, file_info->path, keeper_context);
|
||||
file_info->disk = disk;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -817,12 +818,12 @@ void KeeperSnapshotManager::removeSnapshot(uint64_t log_idx)
|
||||
auto itr = existing_snapshots.find(log_idx);
|
||||
if (itr == existing_snapshots.end())
|
||||
throw Exception(ErrorCodes::UNKNOWN_SNAPSHOT, "Unknown snapshot with log index {}", log_idx);
|
||||
const auto & [path, disk] = itr->second;
|
||||
const auto & [path, disk, size] = *itr->second;
|
||||
disk->removeFileIfExists(path);
|
||||
existing_snapshots.erase(itr);
|
||||
}
|
||||
|
||||
SnapshotFileInfo KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperStorageSnapshot & snapshot)
|
||||
SnapshotFileInfoPtr KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperStorageSnapshot & snapshot)
|
||||
{
|
||||
auto up_to_log_idx = snapshot.snapshot_meta->get_last_log_idx();
|
||||
auto snapshot_file_name = getSnapshotFileName(up_to_log_idx, compress_snapshots_zstd);
|
||||
@ -847,7 +848,8 @@ SnapshotFileInfo KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperStor
|
||||
|
||||
disk->removeFile(tmp_snapshot_file_name);
|
||||
|
||||
existing_snapshots.emplace(up_to_log_idx, SnapshotFileInfo{snapshot_file_name, disk});
|
||||
auto snapshot_file_info = std::make_shared<SnapshotFileInfo>(snapshot_file_name, disk);
|
||||
existing_snapshots.emplace(up_to_log_idx, snapshot_file_info);
|
||||
|
||||
try
|
||||
{
|
||||
@ -859,7 +861,7 @@ SnapshotFileInfo KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperStor
|
||||
tryLogCurrentException(log, "Failed to cleanup and/or move older snapshots");
|
||||
}
|
||||
|
||||
return {snapshot_file_name, disk};
|
||||
return snapshot_file_info;
|
||||
}
|
||||
|
||||
size_t KeeperSnapshotManager::getLatestSnapshotIndex() const
|
||||
@ -869,23 +871,23 @@ size_t KeeperSnapshotManager::getLatestSnapshotIndex() const
|
||||
return 0;
|
||||
}
|
||||
|
||||
SnapshotFileInfo KeeperSnapshotManager::getLatestSnapshotInfo() const
|
||||
SnapshotFileInfoPtr KeeperSnapshotManager::getLatestSnapshotInfo() const
|
||||
{
|
||||
if (!existing_snapshots.empty())
|
||||
{
|
||||
const auto & [path, disk] = existing_snapshots.at(getLatestSnapshotIndex());
|
||||
const auto & [path, disk, size] = *existing_snapshots.at(getLatestSnapshotIndex());
|
||||
|
||||
try
|
||||
{
|
||||
if (disk->exists(path))
|
||||
return {path, disk};
|
||||
return std::make_shared<SnapshotFileInfo>(path, disk);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(log);
|
||||
}
|
||||
}
|
||||
return {"", nullptr};
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
#include <Coordination/KeeperStorage.h>
|
||||
#include <Common/CopyableAtomic.h>
|
||||
#include <libnuraft/nuraft.hxx>
|
||||
|
||||
namespace DB
|
||||
@ -93,12 +94,20 @@ public:
|
||||
|
||||
struct SnapshotFileInfo
|
||||
{
|
||||
SnapshotFileInfo(std::string path_, DiskPtr disk_)
|
||||
: path(std::move(path_))
|
||||
, disk(std::move(disk_))
|
||||
{}
|
||||
|
||||
std::string path;
|
||||
DiskPtr disk;
|
||||
mutable std::atomic<size_t> size{0};
|
||||
};
|
||||
|
||||
using SnapshotFileInfoPtr = std::shared_ptr<SnapshotFileInfo>;
|
||||
|
||||
using KeeperStorageSnapshotPtr = std::shared_ptr<KeeperStorageSnapshot>;
|
||||
using CreateSnapshotCallback = std::function<SnapshotFileInfo(KeeperStorageSnapshotPtr &&, bool)>;
|
||||
using CreateSnapshotCallback = std::function<std::shared_ptr<SnapshotFileInfo>(KeeperStorageSnapshotPtr &&, bool)>;
|
||||
|
||||
using SnapshotMetaAndStorage = std::pair<SnapshotMetadataPtr, KeeperStoragePtr>;
|
||||
|
||||
@ -121,10 +130,10 @@ public:
|
||||
nuraft::ptr<nuraft::buffer> serializeSnapshotToBuffer(const KeeperStorageSnapshot & snapshot) const;
|
||||
|
||||
/// Serialize already compressed snapshot to disk (return path)
|
||||
SnapshotFileInfo serializeSnapshotBufferToDisk(nuraft::buffer & buffer, uint64_t up_to_log_idx);
|
||||
SnapshotFileInfoPtr serializeSnapshotBufferToDisk(nuraft::buffer & buffer, uint64_t up_to_log_idx);
|
||||
|
||||
/// Serialize snapshot directly to disk
|
||||
SnapshotFileInfo serializeSnapshotToDisk(const KeeperStorageSnapshot & snapshot);
|
||||
SnapshotFileInfoPtr serializeSnapshotToDisk(const KeeperStorageSnapshot & snapshot);
|
||||
|
||||
SnapshotDeserializationResult deserializeSnapshotFromBuffer(nuraft::ptr<nuraft::buffer> buffer) const;
|
||||
|
||||
@ -143,7 +152,7 @@ public:
|
||||
/// The most fresh snapshot log index we have
|
||||
size_t getLatestSnapshotIndex() const;
|
||||
|
||||
SnapshotFileInfo getLatestSnapshotInfo() const;
|
||||
SnapshotFileInfoPtr getLatestSnapshotInfo() const;
|
||||
|
||||
private:
|
||||
void removeOutdatedSnapshotsIfNeeded();
|
||||
@ -159,7 +168,7 @@ private:
|
||||
/// How many snapshots to keep before remove
|
||||
const size_t snapshots_to_keep;
|
||||
/// All existing snapshots in our path (log_index -> path)
|
||||
std::map<uint64_t, SnapshotFileInfo> existing_snapshots;
|
||||
std::map<uint64_t, SnapshotFileInfoPtr> existing_snapshots;
|
||||
/// Compress snapshots in common ZSTD format instead of custom ClickHouse block LZ4 format
|
||||
const bool compress_snapshots_zstd;
|
||||
/// Superdigest for deserialization of storage
|
||||
|
@ -147,7 +147,7 @@ std::shared_ptr<KeeperSnapshotManagerS3::S3Configuration> KeeperSnapshotManagerS
|
||||
|
||||
void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfo & snapshot_file_info)
|
||||
{
|
||||
const auto & [snapshot_path, snapshot_disk] = snapshot_file_info;
|
||||
const auto & [snapshot_path, snapshot_disk, snapshot_size] = snapshot_file_info;
|
||||
try
|
||||
{
|
||||
auto s3_client = getSnapshotS3Client();
|
||||
@ -169,9 +169,9 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfo & snapsh
|
||||
);
|
||||
};
|
||||
|
||||
LOG_INFO(log, "Will try to upload snapshot on {} to S3", snapshot_file_info.path);
|
||||
LOG_INFO(log, "Will try to upload snapshot on {} to S3", snapshot_path);
|
||||
|
||||
auto snapshot_file = snapshot_disk->readFile(snapshot_file_info.path);
|
||||
auto snapshot_file = snapshot_disk->readFile(snapshot_path);
|
||||
|
||||
auto snapshot_name = fs::path(snapshot_path).filename().string();
|
||||
auto lock_file = fmt::format(".{}_LOCK", snapshot_name);
|
||||
@ -261,31 +261,33 @@ void KeeperSnapshotManagerS3::snapshotS3Thread()
|
||||
|
||||
while (!shutdown_called)
|
||||
{
|
||||
SnapshotFileInfo snapshot_file_info;
|
||||
SnapshotFileInfoPtr snapshot_file_info;
|
||||
if (!snapshots_s3_queue.pop(snapshot_file_info))
|
||||
break;
|
||||
|
||||
if (shutdown_called)
|
||||
break;
|
||||
|
||||
uploadSnapshotImpl(snapshot_file_info);
|
||||
uploadSnapshotImpl(*snapshot_file_info);
|
||||
}
|
||||
}
|
||||
|
||||
void KeeperSnapshotManagerS3::uploadSnapshot(const SnapshotFileInfo & file_info, bool async_upload)
|
||||
void KeeperSnapshotManagerS3::uploadSnapshot(const SnapshotFileInfoPtr & file_info, bool async_upload)
|
||||
{
|
||||
chassert(file_info);
|
||||
|
||||
if (getSnapshotS3Client() == nullptr)
|
||||
return;
|
||||
|
||||
if (async_upload)
|
||||
{
|
||||
if (!snapshots_s3_queue.push(file_info))
|
||||
LOG_WARNING(log, "Failed to add snapshot {} to S3 queue", file_info.path);
|
||||
LOG_WARNING(log, "Failed to add snapshot {} to S3 queue", file_info->path);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
uploadSnapshotImpl(file_info);
|
||||
uploadSnapshotImpl(*file_info);
|
||||
}
|
||||
|
||||
void KeeperSnapshotManagerS3::startup(const Poco::Util::AbstractConfiguration & config, const MultiVersion<Macros>::Version & macros)
|
||||
|
@ -12,8 +12,6 @@
|
||||
#include <Common/ConcurrentBoundedQueue.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
|
||||
|
||||
#include <string>
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
@ -27,13 +25,13 @@ public:
|
||||
|
||||
/// 'macros' are used to substitute macros in endpoint of disks
|
||||
void updateS3Configuration(const Poco::Util::AbstractConfiguration & config, const MultiVersion<Macros>::Version & macros);
|
||||
void uploadSnapshot(const SnapshotFileInfo & file_info, bool async_upload = true);
|
||||
void uploadSnapshot(const SnapshotFileInfoPtr & file_info, bool async_upload = true);
|
||||
|
||||
/// 'macros' are used to substitute macros in endpoint of disks
|
||||
void startup(const Poco::Util::AbstractConfiguration & config, const MultiVersion<Macros>::Version & macros);
|
||||
void shutdown();
|
||||
private:
|
||||
using SnapshotS3Queue = ConcurrentBoundedQueue<SnapshotFileInfo>;
|
||||
using SnapshotS3Queue = ConcurrentBoundedQueue<SnapshotFileInfoPtr>;
|
||||
SnapshotS3Queue snapshots_s3_queue;
|
||||
|
||||
/// Upload new snapshots to S3
|
||||
@ -63,7 +61,7 @@ public:
|
||||
KeeperSnapshotManagerS3() = default;
|
||||
|
||||
void updateS3Configuration(const Poco::Util::AbstractConfiguration &, const MultiVersion<Macros>::Version &) {}
|
||||
void uploadSnapshot(const SnapshotFileInfo &, [[maybe_unused]] bool async_upload = true) {}
|
||||
void uploadSnapshot(const SnapshotFileInfoPtr &, [[maybe_unused]] bool async_upload = true) {}
|
||||
|
||||
void startup(const Poco::Util::AbstractConfiguration &, const MultiVersion<Macros>::Version &) {}
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
#include <atomic>
|
||||
#include <cerrno>
|
||||
#include <Coordination/KeeperSnapshotManager.h>
|
||||
#include <Coordination/KeeperStateMachine.h>
|
||||
@ -90,8 +91,9 @@ void KeeperStateMachine::init()
|
||||
latest_snapshot_buf = snapshot_manager.deserializeSnapshotBufferFromDisk(latest_log_index);
|
||||
auto snapshot_deserialization_result = snapshot_manager.deserializeSnapshotFromBuffer(latest_snapshot_buf);
|
||||
latest_snapshot_info = snapshot_manager.getLatestSnapshotInfo();
|
||||
chassert(latest_snapshot_info);
|
||||
|
||||
if (isLocalDisk(*latest_snapshot_info.disk))
|
||||
if (isLocalDisk(*latest_snapshot_info->disk))
|
||||
latest_snapshot_buf = nullptr;
|
||||
|
||||
storage = std::move(snapshot_deserialization_result.storage);
|
||||
@ -603,7 +605,11 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res
|
||||
}
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::KeeperSnapshotCreations);
|
||||
LOG_DEBUG(log, "Created persistent snapshot {} with path {}", latest_snapshot_meta->get_last_log_idx(), latest_snapshot_info.path);
|
||||
LOG_DEBUG(
|
||||
log,
|
||||
"Created persistent snapshot {} with path {}",
|
||||
latest_snapshot_meta->get_last_log_idx(),
|
||||
latest_snapshot_info->path);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -627,7 +633,7 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res
|
||||
|
||||
when_done(ret, exception);
|
||||
|
||||
return ret ? latest_snapshot_info : SnapshotFileInfo{};
|
||||
return ret ? latest_snapshot_info : nullptr;
|
||||
};
|
||||
|
||||
if (keeper_context->getServerState() == KeeperContext::Phase::SHUTDOWN)
|
||||
@ -635,9 +641,9 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res
|
||||
LOG_INFO(log, "Creating a snapshot during shutdown because 'create_snapshot_on_exit' is enabled.");
|
||||
auto snapshot_file_info = snapshot_task.create_snapshot(std::move(snapshot_task.snapshot), /*execute_only_cleanup=*/false);
|
||||
|
||||
if (!snapshot_file_info.path.empty() && snapshot_manager_s3)
|
||||
if (snapshot_file_info && snapshot_manager_s3)
|
||||
{
|
||||
LOG_INFO(log, "Uploading snapshot {} during shutdown because 'upload_snapshot_on_exit' is enabled.", snapshot_file_info.path);
|
||||
LOG_INFO(log, "Uploading snapshot {} during shutdown because 'upload_snapshot_on_exit' is enabled.", snapshot_file_info->path);
|
||||
snapshot_manager_s3->uploadSnapshot(snapshot_file_info, /* asnyc_upload */ false);
|
||||
}
|
||||
|
||||
@ -672,7 +678,7 @@ void KeeperStateMachine::save_logical_snp_obj(
|
||||
latest_snapshot_info = snapshot_manager.serializeSnapshotBufferToDisk(data, s.get_last_log_idx());
|
||||
latest_snapshot_meta = cloned_meta;
|
||||
latest_snapshot_buf = std::move(cloned_buffer);
|
||||
LOG_DEBUG(log, "Saved snapshot {} to path {}", s.get_last_log_idx(), latest_snapshot_info.path);
|
||||
LOG_DEBUG(log, "Saved snapshot {} to path {}", s.get_last_log_idx(), latest_snapshot_info->path);
|
||||
obj_id++;
|
||||
ProfileEvents::increment(ProfileEvents::KeeperSaveSnapshot);
|
||||
}
|
||||
@ -733,7 +739,7 @@ int KeeperStateMachine::read_logical_snp_obj(
|
||||
return -1;
|
||||
}
|
||||
|
||||
const auto & [path, disk] = latest_snapshot_info;
|
||||
const auto & [path, disk, size] = *latest_snapshot_info;
|
||||
if (isLocalDisk(*disk))
|
||||
{
|
||||
auto full_path = fs::path(disk->getPath()) / path;
|
||||
@ -862,12 +868,27 @@ uint64_t KeeperStateMachine::getKeyArenaSize() const
|
||||
return storage->getArenaDataSize();
|
||||
}
|
||||
|
||||
uint64_t KeeperStateMachine::getLatestSnapshotBufSize() const
|
||||
uint64_t KeeperStateMachine::getLatestSnapshotSize() const
|
||||
{
|
||||
std::lock_guard lock(snapshots_lock);
|
||||
if (latest_snapshot_buf)
|
||||
return latest_snapshot_buf->size();
|
||||
return 0;
|
||||
auto snapshot_info = [&]
|
||||
{
|
||||
std::lock_guard lock(snapshots_lock);
|
||||
return latest_snapshot_info;
|
||||
}();
|
||||
|
||||
if (snapshot_info == nullptr || snapshot_info->disk == nullptr)
|
||||
return 0;
|
||||
|
||||
/// there is a possibility multiple threads can try to get size
|
||||
/// this can happen in rare cases while it's not a heavy operation
|
||||
size_t size = snapshot_info->size.load(std::memory_order_relaxed);
|
||||
if (size == 0)
|
||||
{
|
||||
size = snapshot_info->disk->getFileSize(snapshot_info->path);
|
||||
snapshot_info->size.store(size, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
ClusterConfigPtr KeeperStateMachine::getClusterConfig() const
|
||||
|
@ -124,7 +124,7 @@ public:
|
||||
uint64_t getTotalEphemeralNodesCount() const;
|
||||
uint64_t getApproximateDataSize() const;
|
||||
uint64_t getKeyArenaSize() const;
|
||||
uint64_t getLatestSnapshotBufSize() const;
|
||||
uint64_t getLatestSnapshotSize() const;
|
||||
|
||||
void recalculateStorageStats();
|
||||
|
||||
@ -135,7 +135,7 @@ private:
|
||||
/// In our state machine we always have a single snapshot which is stored
|
||||
/// in memory in compressed (serialized) format.
|
||||
SnapshotMetadataPtr latest_snapshot_meta = nullptr;
|
||||
SnapshotFileInfo latest_snapshot_info;
|
||||
std::shared_ptr<SnapshotFileInfo> latest_snapshot_info;
|
||||
nuraft::ptr<nuraft::buffer> latest_snapshot_buf = nullptr;
|
||||
|
||||
/// Main state machine logic
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user