mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-27 01:51:59 +00:00
Merge branch 'master' of https://github.com/ClickHouse/ClickHouse into fix-alter-query-cte
This commit is contained in:
commit
63a3555881
195
.clang-tidy
195
.clang-tidy
@ -5,128 +5,127 @@
|
||||
# a) the new check is not controversial (this includes many checks in readability-* and google-*) or
|
||||
# b) too noisy (checks with > 100 new warnings are considered noisy, this includes e.g. cppcoreguidelines-*).
|
||||
|
||||
# TODO: Once clang(-tidy) 17 is the minimum, we can convert this list to YAML
|
||||
# See https://releases.llvm.org/17.0.1/tools/clang/tools/extra/docs/ReleaseNotes.html#improvements-to-clang-tidy
|
||||
|
||||
# TODO Let clang-tidy check headers in further directories
|
||||
# --> HeaderFilterRegex: '^.*/(src|base|programs|utils)/.*(h|hpp)$'
|
||||
HeaderFilterRegex: '^.*/(base|programs|utils)/.*(h|hpp)$'
|
||||
|
||||
Checks: '*,
|
||||
-abseil-*,
|
||||
Checks: [
|
||||
'*',
|
||||
|
||||
-altera-*,
|
||||
'-abseil-*',
|
||||
|
||||
-android-*,
|
||||
'-altera-*',
|
||||
|
||||
-bugprone-assignment-in-if-condition,
|
||||
-bugprone-branch-clone,
|
||||
-bugprone-easily-swappable-parameters,
|
||||
-bugprone-exception-escape,
|
||||
-bugprone-implicit-widening-of-multiplication-result,
|
||||
-bugprone-narrowing-conversions,
|
||||
-bugprone-not-null-terminated-result,
|
||||
-bugprone-reserved-identifier, # useful but too slow, TODO retry when https://reviews.llvm.org/rG1c282052624f9d0bd273bde0b47b30c96699c6c7 is merged
|
||||
-bugprone-unchecked-optional-access,
|
||||
'-android-*',
|
||||
|
||||
-cert-dcl16-c,
|
||||
-cert-dcl37-c,
|
||||
-cert-dcl51-cpp,
|
||||
-cert-err58-cpp,
|
||||
-cert-msc32-c,
|
||||
-cert-msc51-cpp,
|
||||
-cert-oop54-cpp,
|
||||
-cert-oop57-cpp,
|
||||
'-bugprone-assignment-in-if-condition',
|
||||
'-bugprone-branch-clone',
|
||||
'-bugprone-easily-swappable-parameters',
|
||||
'-bugprone-exception-escape',
|
||||
'-bugprone-implicit-widening-of-multiplication-result',
|
||||
'-bugprone-narrowing-conversions',
|
||||
'-bugprone-not-null-terminated-result',
|
||||
'-bugprone-reserved-identifier', # useful but too slow, TODO retry when https://reviews.llvm.org/rG1c282052624f9d0bd273bde0b47b30c96699c6c7 is merged
|
||||
'-bugprone-unchecked-optional-access',
|
||||
|
||||
-clang-analyzer-unix.Malloc,
|
||||
'-cert-dcl16-c',
|
||||
'-cert-dcl37-c',
|
||||
'-cert-dcl51-cpp',
|
||||
'-cert-err58-cpp',
|
||||
'-cert-msc32-c',
|
||||
'-cert-msc51-cpp',
|
||||
'-cert-oop54-cpp',
|
||||
'-cert-oop57-cpp',
|
||||
|
||||
-cppcoreguidelines-*, # impractical in a codebase as large as ClickHouse, also slow
|
||||
'-clang-analyzer-unix.Malloc',
|
||||
|
||||
-darwin-*,
|
||||
'-cppcoreguidelines-*', # impractical in a codebase as large as ClickHouse, also slow
|
||||
|
||||
-fuchsia-*,
|
||||
'-darwin-*',
|
||||
|
||||
-google-build-using-namespace,
|
||||
-google-readability-braces-around-statements,
|
||||
-google-readability-casting,
|
||||
-google-readability-function-size,
|
||||
-google-readability-namespace-comments,
|
||||
-google-readability-todo,
|
||||
'-fuchsia-*',
|
||||
|
||||
-hicpp-avoid-c-arrays,
|
||||
-hicpp-avoid-goto,
|
||||
-hicpp-braces-around-statements,
|
||||
-hicpp-explicit-conversions,
|
||||
-hicpp-function-size,
|
||||
-hicpp-member-init,
|
||||
-hicpp-move-const-arg,
|
||||
-hicpp-multiway-paths-covered,
|
||||
-hicpp-named-parameter,
|
||||
-hicpp-no-array-decay,
|
||||
-hicpp-no-assembler,
|
||||
-hicpp-no-malloc,
|
||||
-hicpp-signed-bitwise,
|
||||
-hicpp-special-member-functions,
|
||||
-hicpp-uppercase-literal-suffix,
|
||||
-hicpp-use-auto,
|
||||
-hicpp-use-emplace,
|
||||
-hicpp-vararg,
|
||||
'-google-build-using-namespace',
|
||||
'-google-readability-braces-around-statements',
|
||||
'-google-readability-casting',
|
||||
'-google-readability-function-size',
|
||||
'-google-readability-namespace-comments',
|
||||
'-google-readability-todo',
|
||||
|
||||
-linuxkernel-*,
|
||||
'-hicpp-avoid-c-arrays',
|
||||
'-hicpp-avoid-goto',
|
||||
'-hicpp-braces-around-statements',
|
||||
'-hicpp-explicit-conversions',
|
||||
'-hicpp-function-size',
|
||||
'-hicpp-member-init',
|
||||
'-hicpp-move-const-arg',
|
||||
'-hicpp-multiway-paths-covered',
|
||||
'-hicpp-named-parameter',
|
||||
'-hicpp-no-array-decay',
|
||||
'-hicpp-no-assembler',
|
||||
'-hicpp-no-malloc',
|
||||
'-hicpp-signed-bitwise',
|
||||
'-hicpp-special-member-functions',
|
||||
'-hicpp-uppercase-literal-suffix',
|
||||
'-hicpp-use-auto',
|
||||
'-hicpp-use-emplace',
|
||||
'-hicpp-vararg',
|
||||
|
||||
-llvm-*,
|
||||
'-linuxkernel-*',
|
||||
|
||||
-llvmlibc-*,
|
||||
'-llvm-*',
|
||||
|
||||
-openmp-*,
|
||||
'-llvmlibc-*',
|
||||
|
||||
-misc-const-correctness,
|
||||
-misc-include-cleaner, # useful but far too many occurrences
|
||||
-misc-no-recursion,
|
||||
-misc-non-private-member-variables-in-classes,
|
||||
-misc-confusable-identifiers, # useful but slooow
|
||||
-misc-use-anonymous-namespace,
|
||||
'-openmp-*',
|
||||
|
||||
-modernize-avoid-c-arrays,
|
||||
-modernize-concat-nested-namespaces,
|
||||
-modernize-macro-to-enum,
|
||||
-modernize-pass-by-value,
|
||||
-modernize-return-braced-init-list,
|
||||
-modernize-use-auto,
|
||||
-modernize-use-default-member-init,
|
||||
-modernize-use-emplace,
|
||||
-modernize-use-nodiscard,
|
||||
-modernize-use-override,
|
||||
-modernize-use-trailing-return-type,
|
||||
'-misc-const-correctness',
|
||||
'-misc-include-cleaner', # useful but far too many occurrences
|
||||
'-misc-no-recursion',
|
||||
'-misc-non-private-member-variables-in-classes',
|
||||
'-misc-confusable-identifiers', # useful but slooo
|
||||
'-misc-use-anonymous-namespace',
|
||||
|
||||
-performance-inefficient-string-concatenation,
|
||||
-performance-no-int-to-ptr,
|
||||
-performance-avoid-endl,
|
||||
-performance-unnecessary-value-param,
|
||||
'-modernize-avoid-c-arrays',
|
||||
'-modernize-concat-nested-namespaces',
|
||||
'-modernize-macro-to-enum',
|
||||
'-modernize-pass-by-value',
|
||||
'-modernize-return-braced-init-list',
|
||||
'-modernize-use-auto',
|
||||
'-modernize-use-default-member-init',
|
||||
'-modernize-use-emplace',
|
||||
'-modernize-use-nodiscard',
|
||||
'-modernize-use-override',
|
||||
'-modernize-use-trailing-return-type',
|
||||
|
||||
-portability-simd-intrinsics,
|
||||
'-performance-inefficient-string-concatenation',
|
||||
'-performance-no-int-to-ptr',
|
||||
'-performance-avoid-endl',
|
||||
'-performance-unnecessary-value-param',
|
||||
|
||||
-readability-avoid-unconditional-preprocessor-if,
|
||||
-readability-braces-around-statements,
|
||||
-readability-convert-member-functions-to-static,
|
||||
-readability-else-after-return,
|
||||
-readability-function-cognitive-complexity,
|
||||
-readability-function-size,
|
||||
-readability-identifier-length,
|
||||
-readability-identifier-naming, # useful but too slow
|
||||
-readability-implicit-bool-conversion,
|
||||
-readability-isolate-declaration,
|
||||
-readability-magic-numbers,
|
||||
-readability-named-parameter,
|
||||
-readability-redundant-declaration,
|
||||
-readability-simplify-boolean-expr,
|
||||
-readability-static-accessed-through-instance,
|
||||
-readability-suspicious-call-argument,
|
||||
-readability-uppercase-literal-suffix,
|
||||
-readability-use-anyofallof,
|
||||
'-portability-simd-intrinsics',
|
||||
|
||||
-zircon-*,
|
||||
'
|
||||
'-readability-avoid-unconditional-preprocessor-if',
|
||||
'-readability-braces-around-statements',
|
||||
'-readability-convert-member-functions-to-static',
|
||||
'-readability-else-after-return',
|
||||
'-readability-function-cognitive-complexity',
|
||||
'-readability-function-size',
|
||||
'-readability-identifier-length',
|
||||
'-readability-identifier-naming', # useful but too slow
|
||||
'-readability-implicit-bool-conversion',
|
||||
'-readability-isolate-declaration',
|
||||
'-readability-magic-numbers',
|
||||
'-readability-named-parameter',
|
||||
'-readability-redundant-declaration',
|
||||
'-readability-simplify-boolean-expr',
|
||||
'-readability-static-accessed-through-instance',
|
||||
'-readability-suspicious-call-argument',
|
||||
'-readability-uppercase-literal-suffix',
|
||||
'-readability-use-anyofallof',
|
||||
|
||||
'-zircon-*'
|
||||
]
|
||||
|
||||
WarningsAsErrors: '*'
|
||||
|
||||
|
59
.github/workflows/nightly.yml
vendored
59
.github/workflows/nightly.yml
vendored
@ -45,62 +45,3 @@ jobs:
|
||||
with:
|
||||
data: "${{ needs.RunConfig.outputs.data }}"
|
||||
set_latest: true
|
||||
SonarCloud:
|
||||
runs-on: [self-hosted, builder]
|
||||
env:
|
||||
SONAR_SCANNER_VERSION: 4.8.0.2856
|
||||
SONAR_SERVER_URL: "https://sonarcloud.io"
|
||||
BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed
|
||||
CC: clang-17
|
||||
CXX: clang++-17
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis
|
||||
filter: tree:0
|
||||
submodules: true
|
||||
- name: Set up JDK 11
|
||||
uses: actions/setup-java@v1
|
||||
with:
|
||||
java-version: 11
|
||||
- name: Download and set up sonar-scanner
|
||||
env:
|
||||
SONAR_SCANNER_DOWNLOAD_URL: https://binaries.sonarsource.com/Distribution/sonar-scanner-cli/sonar-scanner-cli-${{ env.SONAR_SCANNER_VERSION }}-linux.zip
|
||||
run: |
|
||||
mkdir -p "$HOME/.sonar"
|
||||
curl -sSLo "$HOME/.sonar/sonar-scanner.zip" "${{ env.SONAR_SCANNER_DOWNLOAD_URL }}"
|
||||
unzip -o "$HOME/.sonar/sonar-scanner.zip" -d "$HOME/.sonar/"
|
||||
echo "$HOME/.sonar/sonar-scanner-${{ env.SONAR_SCANNER_VERSION }}-linux/bin" >> "$GITHUB_PATH"
|
||||
- name: Download and set up build-wrapper
|
||||
env:
|
||||
BUILD_WRAPPER_DOWNLOAD_URL: ${{ env.SONAR_SERVER_URL }}/static/cpp/build-wrapper-linux-x86.zip
|
||||
run: |
|
||||
curl -sSLo "$HOME/.sonar/build-wrapper-linux-x86.zip" "${{ env.BUILD_WRAPPER_DOWNLOAD_URL }}"
|
||||
unzip -o "$HOME/.sonar/build-wrapper-linux-x86.zip" -d "$HOME/.sonar/"
|
||||
echo "$HOME/.sonar/build-wrapper-linux-x86" >> "$GITHUB_PATH"
|
||||
- name: Set Up Build Tools
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -yq git cmake ccache ninja-build python3 yasm nasm
|
||||
sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
|
||||
- name: Run build-wrapper
|
||||
run: |
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ..
|
||||
cd ..
|
||||
build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/
|
||||
- name: Run sonar-scanner
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
|
||||
run: |
|
||||
sonar-scanner \
|
||||
--define sonar.host.url="${{ env.SONAR_SERVER_URL }}" \
|
||||
--define sonar.cfamily.build-wrapper-output="${{ env.BUILD_WRAPPER_OUT_DIR }}" \
|
||||
--define sonar.projectKey="ClickHouse_ClickHouse" \
|
||||
--define sonar.organization="clickhouse-java" \
|
||||
--define sonar.cfamily.cpp23.enabled=true \
|
||||
--define sonar.exclusions="**/*.java,**/*.ts,**/*.js,**/*.css,**/*.sql"
|
||||
|
1
.github/workflows/pull_request.yml
vendored
1
.github/workflows/pull_request.yml
vendored
@ -172,6 +172,7 @@ jobs:
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 finish_check.py
|
||||
python3 merge_pr.py --check-approved
|
||||
|
||||
|
||||
#############################################################################################
|
||||
|
@ -56,13 +56,13 @@ option(ENABLE_CHECK_HEAVY_BUILDS "Don't allow C++ translation units to compile t
|
||||
if (ENABLE_CHECK_HEAVY_BUILDS)
|
||||
# set DATA (since RSS does not work since 2.6.x+) to 5G
|
||||
set (RLIMIT_DATA 5000000000)
|
||||
# set VIRT (RLIMIT_AS) to 10G (DATA*10)
|
||||
# set VIRT (RLIMIT_AS) to 10G (DATA*2)
|
||||
set (RLIMIT_AS 10000000000)
|
||||
# set CPU time limit to 1000 seconds
|
||||
set (RLIMIT_CPU 1000)
|
||||
|
||||
# -fsanitize=memory and address are too heavy
|
||||
if (SANITIZE)
|
||||
if (SANITIZE OR SANITIZE_COVERAGE OR WITH_COVERAGE)
|
||||
set (RLIMIT_DATA 10000000000) # 10G
|
||||
endif()
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
#include "coverage.h"
|
||||
#include <sys/mman.h>
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wreserved-identifier"
|
||||
#pragma clang diagnostic ignored "-Wreserved-identifier"
|
||||
|
||||
|
||||
/// WITH_COVERAGE enables the default implementation of code coverage,
|
||||
|
@ -108,16 +108,22 @@
|
||||
{
|
||||
[[noreturn]] void abortOnFailedAssertion(const String & description);
|
||||
}
|
||||
#define chassert(x) do { static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(#x); } while (0)
|
||||
#define chassert_1(x, ...) do { static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(#x); } while (0)
|
||||
#define chassert_2(x, comment, ...) do { static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(comment); } while (0)
|
||||
#define UNREACHABLE() abort()
|
||||
// clang-format off
|
||||
#else
|
||||
/// Here sizeof() trick is used to suppress unused warning for result,
|
||||
/// since simple "(void)x" will evaluate the expression, while
|
||||
/// "sizeof(!(x))" will not.
|
||||
#define chassert(x) (void)sizeof(!(x))
|
||||
#define chassert_1(x, ...) (void)sizeof(!(x))
|
||||
#define chassert_2(x, comment, ...) (void)sizeof(!(x))
|
||||
#define UNREACHABLE() __builtin_unreachable()
|
||||
#endif
|
||||
#define CHASSERT_DISPATCH(_1,_2, N,...) N(_1, _2)
|
||||
#define CHASSERT_INVOKE(tuple) CHASSERT_DISPATCH tuple
|
||||
#define chassert(...) CHASSERT_INVOKE((__VA_ARGS__, chassert_2, chassert_1))
|
||||
|
||||
#endif
|
||||
|
||||
/// Macros for Clang Thread Safety Analysis (TSA). They can be safely ignored by other compilers.
|
||||
|
@ -59,8 +59,8 @@ using ComparatorWrapper = Comparator;
|
||||
|
||||
#endif
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wold-style-cast"
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wold-style-cast"
|
||||
|
||||
#include <miniselect/floyd_rivest_select.h>
|
||||
|
||||
@ -115,7 +115,7 @@ void partial_sort(RandomIt first, RandomIt middle, RandomIt last)
|
||||
::partial_sort(first, middle, last, comparator());
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic pop
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
template <typename RandomIt, typename Compare>
|
||||
void sort(RandomIt first, RandomIt last, Compare compare)
|
||||
|
@ -93,7 +93,7 @@ void TCPServerDispatcher::release()
|
||||
|
||||
void TCPServerDispatcher::run()
|
||||
{
|
||||
AutoPtr<TCPServerDispatcher> guard(this, true); // ensure object stays alive
|
||||
AutoPtr<TCPServerDispatcher> guard(this); // ensure object stays alive
|
||||
|
||||
int idleTime = (int) _pParams->getThreadIdleTime().totalMilliseconds();
|
||||
|
||||
@ -149,11 +149,13 @@ void TCPServerDispatcher::enqueue(const StreamSocket& socket)
|
||||
{
|
||||
try
|
||||
{
|
||||
this->duplicate();
|
||||
_threadPool.startWithPriority(_pParams->getThreadPriority(), *this, threadName);
|
||||
++_currentThreads;
|
||||
}
|
||||
catch (Poco::Exception& exc)
|
||||
{
|
||||
this->release();
|
||||
++_refusedConnections;
|
||||
std::cerr << "Got exception while starting thread for connection. Error code: "
|
||||
<< exc.code() << ", message: '" << exc.displayText() << "'" << std::endl;
|
||||
|
2
contrib/curl
vendored
2
contrib/curl
vendored
@ -1 +1 @@
|
||||
Subproject commit 5ce164e0e9290c96eb7d502173426c0a135ec008
|
||||
Subproject commit 1a05e833f8f7140628b27882b10525fd9ec4b873
|
2
contrib/libhdfs3
vendored
2
contrib/libhdfs3
vendored
@ -1 +1 @@
|
||||
Subproject commit b9598e6016720a7c088bfe85ce1fa0410f9d2103
|
||||
Subproject commit 0d04201c45359f0d0701fb1e8297d25eff7cfecf
|
@ -17,6 +17,8 @@
|
||||
#ifndef METROHASH_METROHASH_128_H
|
||||
#define METROHASH_METROHASH_128_H
|
||||
|
||||
// NOLINTBEGIN(readability-avoid-const-params-in-decls)
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
class MetroHash128
|
||||
@ -68,5 +70,6 @@ private:
|
||||
void metrohash128_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out);
|
||||
void metrohash128_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out);
|
||||
|
||||
// NOLINTEND(readability-avoid-const-params-in-decls)
|
||||
|
||||
#endif // #ifndef METROHASH_METROHASH_128_H
|
||||
|
@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="24.2.1.2248"
|
||||
ARG VERSION="24.2.2.71"
|
||||
ARG PACKAGES="clickhouse-keeper"
|
||||
ARG DIRECT_DOWNLOAD_URLS=""
|
||||
|
||||
|
@ -4,6 +4,9 @@ FROM clickhouse/fasttest:$FROM_TAG
|
||||
ENV CC=clang-${LLVM_VERSION}
|
||||
ENV CXX=clang++-${LLVM_VERSION}
|
||||
|
||||
# If the cctools is updated, then first build it in the CI, then update here in a different commit
|
||||
COPY --from=clickhouse/cctools:d9e3596e706b /cctools /cctools
|
||||
|
||||
# Rust toolchain and libraries
|
||||
ENV RUSTUP_HOME=/rust/rustup
|
||||
ENV CARGO_HOME=/rust/cargo
|
||||
@ -73,9 +76,6 @@ RUN curl -Lo /usr/bin/clang-tidy-cache \
|
||||
"https://raw.githubusercontent.com/matus-chochlik/ctcache/$CLANG_TIDY_SHA1/clang-tidy-cache" \
|
||||
&& chmod +x /usr/bin/clang-tidy-cache
|
||||
|
||||
# If the cctools is updated, then first build it in the CI, then update here in a different commit
|
||||
COPY --from=clickhouse/cctools:5a908f73878a /cctools /cctools
|
||||
|
||||
RUN mkdir /workdir && chmod 777 /workdir
|
||||
WORKDIR /workdir
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
# It's based on the assumption that we don't care of the cctools version so much
|
||||
# It event does not depend on the clickhouse/fasttest in the `docker/images.json`
|
||||
ARG FROM_TAG=latest
|
||||
FROM clickhouse/fasttest:$FROM_TAG
|
||||
FROM clickhouse/fasttest:$FROM_TAG as builder
|
||||
|
||||
ENV CC=clang-${LLVM_VERSION}
|
||||
ENV CXX=clang++-${LLVM_VERSION}
|
||||
@ -29,3 +29,6 @@ RUN git clone https://github.com/tpoechtrager/cctools-port.git \
|
||||
&& make install -j$(nproc) \
|
||||
&& cd ../.. \
|
||||
&& rm -rf cctools-port
|
||||
|
||||
FROM scratch
|
||||
COPY --from=builder /cctools /cctools
|
||||
|
@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="24.2.1.2248"
|
||||
ARG VERSION="24.2.2.71"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
ARG DIRECT_DOWNLOAD_URLS=""
|
||||
|
||||
|
@ -27,7 +27,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
|
||||
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
|
||||
ARG VERSION="24.2.1.2248"
|
||||
ARG VERSION="24.2.2.71"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
# set non-empty deb_location_url url to create a docker image
|
||||
|
64
docs/changelogs/v23.12.5.81-stable.md
Normal file
64
docs/changelogs/v23.12.5.81-stable.md
Normal file
@ -0,0 +1,64 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v23.12.5.81-stable (a0fbe3ae813) FIXME as compared to v23.12.4.15-stable (4233d111d20)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#60290](https://github.com/ClickHouse/ClickHouse/issues/60290): Copy S3 file GCP fallback to buffer copy in case GCP returned `Internal Error` with `GATEWAY_TIMEOUT` HTTP error code. [#60164](https://github.com/ClickHouse/ClickHouse/pull/60164) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Backported in [#60830](https://github.com/ClickHouse/ClickHouse/issues/60830): Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#59883](https://github.com/ClickHouse/ClickHouse/issues/59883): If you want to run initdb scripts every time when ClickHouse container is starting you shoud initialize environment varible CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS. [#59808](https://github.com/ClickHouse/ClickHouse/pull/59808) ([Alexander Nikolaev](https://github.com/AlexNik)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix_kql_issue_found_by_wingfuzz [#59626](https://github.com/ClickHouse/ClickHouse/pull/59626) ([Yong Wang](https://github.com/kashwy)).
|
||||
* Fix error "Read beyond last offset" for AsynchronousBoundedReadBuffer [#59630](https://github.com/ClickHouse/ClickHouse/pull/59630) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix query start time on non initial queries [#59662](https://github.com/ClickHouse/ClickHouse/pull/59662) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* rabbitmq: fix having neither acked nor nacked messages [#59775](https://github.com/ClickHouse/ClickHouse/pull/59775) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix parsing of partition expressions surrounded by parens [#59901](https://github.com/ClickHouse/ClickHouse/pull/59901) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
|
||||
* Fix optimize_uniq_to_count removing the column alias [#60026](https://github.com/ClickHouse/ClickHouse/pull/60026) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix cosineDistance crash with Nullable [#60150](https://github.com/ClickHouse/ClickHouse/pull/60150) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Hide sensitive info for s3queue [#60233](https://github.com/ClickHouse/ClickHouse/pull/60233) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix_max_query_size_for_kql_compound_operator: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)).
|
||||
* Reduce the number of read rows from `system.numbers` [#60546](https://github.com/ClickHouse/ClickHouse/pull/60546) ([JackyWoo](https://github.com/JackyWoo)).
|
||||
* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Prevent setting custom metadata headers on unsupported multipart upload operations [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
|
||||
* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
||||
#### CI Fix or Improvement (changelog entry is not required)
|
||||
|
||||
* Backported in [#60767](https://github.com/ClickHouse/ClickHouse/issues/60767): Decoupled changes from [#60408](https://github.com/ClickHouse/ClickHouse/issues/60408). [#60553](https://github.com/ClickHouse/ClickHouse/pull/60553) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#60582](https://github.com/ClickHouse/ClickHouse/issues/60582): Arm and amd docker build jobs use similar job names and thus overwrite job reports - aarch64 and amd64 suffixes added to fix this. [#60554](https://github.com/ClickHouse/ClickHouse/pull/60554) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61041](https://github.com/ClickHouse/ClickHouse/issues/61041): Debug and fix markreleaseready. [#60611](https://github.com/ClickHouse/ClickHouse/pull/60611) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#61030](https://github.com/ClickHouse/ClickHouse/issues/61030): ... [#61022](https://github.com/ClickHouse/ClickHouse/pull/61022) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61224](https://github.com/ClickHouse/ClickHouse/issues/61224): ... [#61183](https://github.com/ClickHouse/ClickHouse/pull/61183) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Backported in [#61190](https://github.com/ClickHouse/ClickHouse/issues/61190): ... [#61185](https://github.com/ClickHouse/ClickHouse/pull/61185) ([Max K.](https://github.com/maxknv)).
|
||||
|
||||
#### NO CL ENTRY
|
||||
|
||||
* NO CL ENTRY: 'Revert "Backport [#59798](https://github.com/ClickHouse/ClickHouse/issues/59798) to 23.12: CI: do not reuse builds on release branches"'. [#59979](https://github.com/ClickHouse/ClickHouse/pull/59979) ([Max K.](https://github.com/maxknv)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* CI: move ci-specifics from job scripts to ci.py [#58516](https://github.com/ClickHouse/ClickHouse/pull/58516) ([Max K.](https://github.com/maxknv)).
|
||||
* Make ZooKeeper actually sequentialy consistent [#59735](https://github.com/ClickHouse/ClickHouse/pull/59735) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Fix special build reports in release branches [#59797](https://github.com/ClickHouse/ClickHouse/pull/59797) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* CI: do not reuse builds on release branches [#59798](https://github.com/ClickHouse/ClickHouse/pull/59798) ([Max K.](https://github.com/maxknv)).
|
||||
* Fix mark release ready [#59994](https://github.com/ClickHouse/ClickHouse/pull/59994) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Ability to detect undead ZooKeeper sessions [#60044](https://github.com/ClickHouse/ClickHouse/pull/60044) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Detect io_uring in tests [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Remove broken test while we fix it [#60547](https://github.com/ClickHouse/ClickHouse/pull/60547) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Speed up cctools building [#61011](https://github.com/ClickHouse/ClickHouse/pull/61011) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
24
docs/changelogs/v23.3.21.26-lts.md
Normal file
24
docs/changelogs/v23.3.21.26-lts.md
Normal file
@ -0,0 +1,24 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v23.3.21.26-lts (d9672a3731f) FIXME as compared to v23.3.20.27-lts (cc974ba4f81)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix reading from sparse columns after restart [#49660](https://github.com/ClickHouse/ClickHouse/pull/49660) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#57104](https://github.com/ClickHouse/ClickHouse/pull/57104) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Detect io_uring in tests [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
|
30
docs/changelogs/v23.8.11.28-lts.md
Normal file
30
docs/changelogs/v23.8.11.28-lts.md
Normal file
@ -0,0 +1,30 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v23.8.11.28-lts (31879d2ab4c) FIXME as compared to v23.8.10.43-lts (a278225bba9)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#60828](https://github.com/ClickHouse/ClickHouse/issues/60828): Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
||||
#### NO CL ENTRY
|
||||
|
||||
* NO CL ENTRY: 'Use the current branch test-utils to build cctools'. [#61276](https://github.com/ClickHouse/ClickHouse/pull/61276) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#57104](https://github.com/ClickHouse/ClickHouse/pull/57104) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Detect io_uring in tests [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
|
26
docs/changelogs/v24.1.7.18-stable.md
Normal file
26
docs/changelogs/v24.1.7.18-stable.md
Normal file
@ -0,0 +1,26 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v24.1.7.18-stable (90925babd78) FIXME as compared to v24.1.6.52-stable (fa09f677bc9)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix_max_query_size_for_kql_compound_operator: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)).
|
||||
* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
||||
#### CI Fix or Improvement (changelog entry is not required)
|
||||
|
||||
* Backported in [#61043](https://github.com/ClickHouse/ClickHouse/issues/61043): Debug and fix markreleaseready. [#60611](https://github.com/ClickHouse/ClickHouse/pull/60611) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#61168](https://github.com/ClickHouse/ClickHouse/issues/61168): Just a preparation for the merge queue support. [#61099](https://github.com/ClickHouse/ClickHouse/pull/61099) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61192](https://github.com/ClickHouse/ClickHouse/issues/61192): ... [#61185](https://github.com/ClickHouse/ClickHouse/pull/61185) ([Max K.](https://github.com/maxknv)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
|
55
docs/changelogs/v24.2.2.71-stable.md
Normal file
55
docs/changelogs/v24.2.2.71-stable.md
Normal file
@ -0,0 +1,55 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v24.2.2.71-stable (9293d361e72) FIXME as compared to v24.2.1.2248-stable (891689a4150)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#60834](https://github.com/ClickHouse/ClickHouse/issues/60834): Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* PartsSplitter invalid ranges for the same part [#60041](https://github.com/ClickHouse/ClickHouse/pull/60041) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Try to avoid calculation of scalar subqueries for CREATE TABLE. [#60464](https://github.com/ClickHouse/ClickHouse/pull/60464) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix_max_query_size_for_kql_compound_operator: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)).
|
||||
* Reduce the number of read rows from `system.numbers` [#60546](https://github.com/ClickHouse/ClickHouse/pull/60546) ([JackyWoo](https://github.com/JackyWoo)).
|
||||
* Don't output number tips for date types [#60577](https://github.com/ClickHouse/ClickHouse/pull/60577) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Prevent setting custom metadata headers on unsupported multipart upload operations [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
|
||||
* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix multiple bugs in groupArraySorted [#61203](https://github.com/ClickHouse/ClickHouse/pull/61203) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
||||
#### CI Fix or Improvement (changelog entry is not required)
|
||||
|
||||
* Backported in [#60758](https://github.com/ClickHouse/ClickHouse/issues/60758): Decoupled changes from [#60408](https://github.com/ClickHouse/ClickHouse/issues/60408). [#60553](https://github.com/ClickHouse/ClickHouse/pull/60553) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#60706](https://github.com/ClickHouse/ClickHouse/issues/60706): Eliminates the need to provide input args to docker server jobs to clean yml files. [#60602](https://github.com/ClickHouse/ClickHouse/pull/60602) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61045](https://github.com/ClickHouse/ClickHouse/issues/61045): Debug and fix markreleaseready. [#60611](https://github.com/ClickHouse/ClickHouse/pull/60611) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#60721](https://github.com/ClickHouse/ClickHouse/issues/60721): Fix build_report job so that it's defined by ci_config only (not yml file). [#60613](https://github.com/ClickHouse/ClickHouse/pull/60613) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#60668](https://github.com/ClickHouse/ClickHouse/issues/60668): Do not await ci pending jobs on release branches decrease wait timeout to fit into gh job timeout. [#60652](https://github.com/ClickHouse/ClickHouse/pull/60652) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#60863](https://github.com/ClickHouse/ClickHouse/issues/60863): Set limited number of builds for "special build check" report in backports. [#60850](https://github.com/ClickHouse/ClickHouse/pull/60850) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#60946](https://github.com/ClickHouse/ClickHouse/issues/60946): ... [#60935](https://github.com/ClickHouse/ClickHouse/pull/60935) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#60972](https://github.com/ClickHouse/ClickHouse/issues/60972): ... [#60952](https://github.com/ClickHouse/ClickHouse/pull/60952) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#60980](https://github.com/ClickHouse/ClickHouse/issues/60980): ... [#60958](https://github.com/ClickHouse/ClickHouse/pull/60958) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61170](https://github.com/ClickHouse/ClickHouse/issues/61170): Just a preparation for the merge queue support. [#61099](https://github.com/ClickHouse/ClickHouse/pull/61099) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61181](https://github.com/ClickHouse/ClickHouse/issues/61181): ... [#61172](https://github.com/ClickHouse/ClickHouse/pull/61172) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61228](https://github.com/ClickHouse/ClickHouse/issues/61228): ... [#61183](https://github.com/ClickHouse/ClickHouse/pull/61183) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Backported in [#61194](https://github.com/ClickHouse/ClickHouse/issues/61194): ... [#61185](https://github.com/ClickHouse/ClickHouse/pull/61185) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61244](https://github.com/ClickHouse/ClickHouse/issues/61244): ... [#61214](https://github.com/ClickHouse/ClickHouse/pull/61214) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61388](https://github.com/ClickHouse/ClickHouse/issues/61388):. [#61373](https://github.com/ClickHouse/ClickHouse/pull/61373) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* CI: make workflow yml abstract [#60421](https://github.com/ClickHouse/ClickHouse/pull/60421) ([Max K.](https://github.com/maxknv)).
|
||||
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* General sanity in function `seriesOutliersDetectTukey` [#60535](https://github.com/ClickHouse/ClickHouse/pull/60535) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Speed up cctools building [#61011](https://github.com/ClickHouse/ClickHouse/pull/61011) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
@ -18,8 +18,8 @@ This engine allows integrating ClickHouse with [RabbitMQ](https://www.rabbitmq.c
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
(
|
||||
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
|
||||
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
|
||||
name1 [type1],
|
||||
name2 [type2],
|
||||
...
|
||||
) ENGINE = RabbitMQ SETTINGS
|
||||
rabbitmq_host_port = 'host:port' [or rabbitmq_address = 'amqp(s)://guest:guest@localhost/vhost'],
|
||||
@ -198,6 +198,10 @@ Additional virtual columns when `kafka_handle_error_mode='stream'`:
|
||||
|
||||
Note: `_raw_message` and `_error` virtual columns are filled only in case of exception during parsing, they are always `NULL` when message was parsed successfully.
|
||||
|
||||
## Caveats {#caveats}
|
||||
|
||||
Even though you may specify [default column expressions](/docs/en/sql-reference/statements/create/table.md/#default_values) (such as `DEFAULT`, `MATERIALIZED`, `ALIAS`) in the table definition, these will be ignored. Instead, the columns will be filled with their respective default values for their types.
|
||||
|
||||
## Data formats support {#data-formats-support}
|
||||
|
||||
RabbitMQ engine supports all [formats](../../../interfaces/formats.md) supported in ClickHouse.
|
||||
|
@ -946,96 +946,6 @@ You could change storage policy after table creation with [ALTER TABLE ... MODIF
|
||||
|
||||
The number of threads performing background moves of data parts can be changed by [background_move_pool_size](/docs/en/operations/server-configuration-parameters/settings.md/#background_move_pool_size) setting.
|
||||
|
||||
### Dynamic Storage
|
||||
|
||||
This example query shows how to attach a table stored at a URL and configure the
|
||||
remote storage within the query. The web storage is not configured in the ClickHouse
|
||||
configuration files; all the settings are in the CREATE/ATTACH query.
|
||||
|
||||
:::note
|
||||
The example uses `type=web`, but any disk type can be configured as dynamic, even Local disk. Local disks require a path argument to be inside the server config parameter `custom_local_disks_base_directory`, which has no default, so set that also when using local disk.
|
||||
:::
|
||||
|
||||
#### Example dynamic web storage
|
||||
|
||||
:::tip
|
||||
A [demo dataset](https://github.com/ClickHouse/web-tables-demo) is hosted in GitHub. To prepare your own tables for web storage see the tool [clickhouse-static-files-uploader](/docs/en/operations/storing-data.md/#storing-data-on-webserver)
|
||||
:::
|
||||
|
||||
In this `ATTACH TABLE` query the `UUID` provided matches the directory name of the data, and the endpoint is the URL for the raw GitHub content.
|
||||
|
||||
```sql
|
||||
# highlight-next-line
|
||||
ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
|
||||
(
|
||||
price UInt32,
|
||||
date Date,
|
||||
postcode1 LowCardinality(String),
|
||||
postcode2 LowCardinality(String),
|
||||
type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
|
||||
is_new UInt8,
|
||||
duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
|
||||
addr1 String,
|
||||
addr2 String,
|
||||
street LowCardinality(String),
|
||||
locality LowCardinality(String),
|
||||
town LowCardinality(String),
|
||||
district LowCardinality(String),
|
||||
county LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (postcode1, postcode2, addr1, addr2)
|
||||
# highlight-start
|
||||
SETTINGS disk = disk(
|
||||
type=web,
|
||||
endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
|
||||
);
|
||||
# highlight-end
|
||||
```
|
||||
|
||||
### Nested Dynamic Storage
|
||||
|
||||
This example query builds on the above dynamic disk configuration and shows how to
|
||||
use a local disk to cache data from a table stored at a URL. Neither the cache disk
|
||||
nor the web storage is configured in the ClickHouse configuration files; both are
|
||||
configured in the CREATE/ATTACH query settings.
|
||||
|
||||
In the settings highlighted below notice that the disk of `type=web` is nested within
|
||||
the disk of `type=cache`.
|
||||
|
||||
```sql
|
||||
ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
|
||||
(
|
||||
price UInt32,
|
||||
date Date,
|
||||
postcode1 LowCardinality(String),
|
||||
postcode2 LowCardinality(String),
|
||||
type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
|
||||
is_new UInt8,
|
||||
duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
|
||||
addr1 String,
|
||||
addr2 String,
|
||||
street LowCardinality(String),
|
||||
locality LowCardinality(String),
|
||||
town LowCardinality(String),
|
||||
district LowCardinality(String),
|
||||
county LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (postcode1, postcode2, addr1, addr2)
|
||||
# highlight-start
|
||||
SETTINGS disk = disk(
|
||||
type=cache,
|
||||
max_size='1Gi',
|
||||
path='/var/lib/clickhouse/custom_disk_cache/',
|
||||
disk=disk(
|
||||
type=web,
|
||||
endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
|
||||
)
|
||||
);
|
||||
# highlight-end
|
||||
```
|
||||
|
||||
### Details {#details}
|
||||
|
||||
In the case of `MergeTree` tables, data is getting to disk in different ways:
|
||||
@ -1064,13 +974,11 @@ During this time, they are not moved to other volumes or disks. Therefore, until
|
||||
|
||||
User can assign new big parts to different disks of a [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures) volume in a balanced way using the [min_bytes_to_rebalance_partition_over_jbod](/docs/en/operations/settings/merge-tree-settings.md/#min-bytes-to-rebalance-partition-over-jbod) setting.
|
||||
|
||||
## Using S3 for Data Storage {#table_engine-mergetree-s3}
|
||||
## Using External Storage for Data Storage {#table_engine-mergetree-s3}
|
||||
|
||||
:::note
|
||||
Google Cloud Storage (GCS) is also supported using the type `s3`. See [GCS backed MergeTree](/docs/en/integrations/gcs).
|
||||
:::
|
||||
[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) family table engines can store data to `S3`, `AzureBlobStorage`, `HDFS` using a disk with types `s3`, `azure_blob_storage`, `hdfs` accordingly. See [configuring external storage options](/docs/en/operations/storing-data.md/#configuring-external-storage) for more details.
|
||||
|
||||
`MergeTree` family table engines can store data to [S3](https://aws.amazon.com/s3/) using a disk with type `s3`.
|
||||
Example for [S3](https://aws.amazon.com/s3/) as external storage using a disk with type `s3`.
|
||||
|
||||
Configuration markup:
|
||||
``` xml
|
||||
@ -1112,253 +1020,12 @@ Configuration markup:
|
||||
</storage_configuration>
|
||||
```
|
||||
|
||||
Also see [configuring external storage options](/docs/en/operations/storing-data.md/#configuring-external-storage).
|
||||
|
||||
:::note cache configuration
|
||||
ClickHouse versions 22.3 through 22.7 use a different cache configuration, see [using local cache](/docs/en/operations/storing-data.md/#using-local-cache) if you are using one of those versions.
|
||||
:::
|
||||
|
||||
### Configuring the S3 disk
|
||||
|
||||
Required parameters:
|
||||
|
||||
- `endpoint` — S3 endpoint URL in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint URL should contain a bucket and root path to store data.
|
||||
- `access_key_id` — S3 access key id.
|
||||
- `secret_access_key` — S3 secret access key.
|
||||
|
||||
Optional parameters:
|
||||
|
||||
- `region` — S3 region name.
|
||||
- `support_batch_delete` — This controls the check to see if batch deletes are supported. Set this to `false` when using Google Cloud Storage (GCS) as GCS does not support batch deletes and preventing the checks will prevent error messages in the logs.
|
||||
- `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`.
|
||||
- `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`.
|
||||
- `expiration_window_seconds` — Grace period for checking if expiration-based credentials have expired. Optional, default value is `120`.
|
||||
- `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL.
|
||||
- `connect_timeout_ms` — Socket connect timeout in milliseconds. Default value is `10 seconds`.
|
||||
- `request_timeout_ms` — Request timeout in milliseconds. Default value is `5 seconds`.
|
||||
- `retry_attempts` — Number of retry attempts in case of failed request. Default value is `10`.
|
||||
- `single_read_retries` — Number of retry attempts in case of connection drop during read. Default value is `4`.
|
||||
- `min_bytes_for_seek` — Minimal number of bytes to use seek operation instead of sequential read. Default value is `1 Mb`.
|
||||
- `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
|
||||
- `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`.
|
||||
- `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times.
|
||||
- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set.
|
||||
- `server_side_encryption_kms_key_id` - If specified, required headers for accessing S3 objects with [SSE-KMS encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingKMSEncryption.html) will be set. If an empty string is specified, the AWS managed S3 key will be used. Optional.
|
||||
- `server_side_encryption_kms_encryption_context` - If specified alongside `server_side_encryption_kms_key_id`, the given encryption context header for SSE-KMS will be set. Optional.
|
||||
- `server_side_encryption_kms_bucket_key_enabled` - If specified alongside `server_side_encryption_kms_key_id`, the header to enable S3 bucket keys for SSE-KMS will be set. Optional, can be `true` or `false`, defaults to nothing (matches the bucket-level setting).
|
||||
- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited).
|
||||
- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`.
|
||||
- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited).
|
||||
- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
|
||||
- `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
- `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
- `key_template` — Define the format with which the object keys are generated. By default, Clickhouse takes `root path` from `endpoint` option and adds random generated suffix. That suffix is a dir with 3 random symbols and a file name with 29 random symbols. With that option you have a full control how to the object keys are generated. Some usage scenarios require having random symbols in the prefix or in the middle of object key. For example: `[a-z]{3}-prefix-random/constant-part/random-middle-[a-z]{3}/random-suffix-[a-z]{29}`. The value is parsed with [`re2`](https://github.com/google/re2/wiki/Syntax). Only some subset of the syntax is supported. Check if your preferred format is supported before using that option. Disk isn't initialized if clickhouse is unable to generate a key by the value of `key_template`. It requires enabled feature flag [storage_metadata_write_full_object_key](/docs/en/operations/settings/settings#storage_metadata_write_full_object_key). It forbids declaring the `root path` in `endpoint` option. It requires definition of the option `key_compatibility_prefix`.
|
||||
- `key_compatibility_prefix` — That option is required when option `key_template` is in use. In order to be able to read the objects keys which were stored in the metadata files with the metadata version lower that `VERSION_FULL_OBJECT_KEY`, the previous `root path` from the `endpoint` option should be set here.
|
||||
|
||||
### Configuring the cache
|
||||
|
||||
This is the cache configuration from above:
|
||||
```xml
|
||||
<s3_cache>
|
||||
<type>cache</type>
|
||||
<disk>s3</disk>
|
||||
<path>/var/lib/clickhouse/disks/s3_cache/</path>
|
||||
<max_size>10Gi</max_size>
|
||||
</s3_cache>
|
||||
```
|
||||
|
||||
These parameters define the cache layer:
|
||||
- `type` — If a disk is of type `cache` it caches mark and index files in memory.
|
||||
- `disk` — The name of the disk that will be cached.
|
||||
|
||||
Cache parameters:
|
||||
- `path` — The path where metadata for the cache is stored.
|
||||
- `max_size` — The size (amount of disk space) that the cache can grow to.
|
||||
|
||||
:::tip
|
||||
There are several other cache parameters that you can use to tune your storage, see [using local cache](/docs/en/operations/storing-data.md/#using-local-cache) for the details.
|
||||
:::
|
||||
|
||||
S3 disk can be configured as `main` or `cold` storage:
|
||||
``` xml
|
||||
<storage_configuration>
|
||||
...
|
||||
<disks>
|
||||
<s3>
|
||||
<type>s3</type>
|
||||
<endpoint>https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/root-path/</endpoint>
|
||||
<access_key_id>your_access_key_id</access_key_id>
|
||||
<secret_access_key>your_secret_access_key</secret_access_key>
|
||||
</s3>
|
||||
</disks>
|
||||
<policies>
|
||||
<s3_main>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>s3</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</s3_main>
|
||||
<s3_cold>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>default</disk>
|
||||
</main>
|
||||
<external>
|
||||
<disk>s3</disk>
|
||||
</external>
|
||||
</volumes>
|
||||
<move_factor>0.2</move_factor>
|
||||
</s3_cold>
|
||||
</policies>
|
||||
...
|
||||
</storage_configuration>
|
||||
```
|
||||
|
||||
In case of `cold` option a data can be moved to S3 if local disk free size will be smaller than `move_factor * disk_size` or by TTL move rule.
|
||||
|
||||
## Using Azure Blob Storage for Data Storage {#table_engine-mergetree-azure-blob-storage}
|
||||
|
||||
`MergeTree` family table engines can store data to [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) using a disk with type `azure_blob_storage`.
|
||||
|
||||
As of February 2022, this feature is still a fresh addition, so expect that some Azure Blob Storage functionalities might be unimplemented.
|
||||
|
||||
Configuration markup:
|
||||
``` xml
|
||||
<storage_configuration>
|
||||
...
|
||||
<disks>
|
||||
<blob_storage_disk>
|
||||
<type>azure_blob_storage</type>
|
||||
<storage_account_url>http://account.blob.core.windows.net</storage_account_url>
|
||||
<container_name>container</container_name>
|
||||
<account_name>account</account_name>
|
||||
<account_key>pass123</account_key>
|
||||
<metadata_path>/var/lib/clickhouse/disks/blob_storage_disk/</metadata_path>
|
||||
<cache_path>/var/lib/clickhouse/disks/blob_storage_disk/cache/</cache_path>
|
||||
<skip_access_check>false</skip_access_check>
|
||||
</blob_storage_disk>
|
||||
</disks>
|
||||
...
|
||||
</storage_configuration>
|
||||
```
|
||||
|
||||
Connection parameters:
|
||||
* `endpoint` — AzureBlobStorage endpoint URL with container & prefix. Optionally can contain account_name if the authentication method used needs it. (`http://account.blob.core.windows.net:{port}/[account_name]{container_name}/{data_prefix}`) or these parameters can be provided separately using storage_account_url, account_name & container. For specifying prefix, endpoint should be used.
|
||||
* `endpoint_contains_account_name` - This flag is used to specify if endpoint contains account_name as it is only needed for certain authentication methods. (Default : true)
|
||||
* `storage_account_url` - Required if endpoint is not specified, Azure Blob Storage account URL, like `http://account.blob.core.windows.net` or `http://azurite1:10000/devstoreaccount1`.
|
||||
* `container_name` - Target container name, defaults to `default-container`.
|
||||
* `container_already_exists` - If set to `false`, a new container `container_name` is created in the storage account, if set to `true`, disk connects to the container directly, and if left unset, disk connects to the account, checks if the container `container_name` exists, and creates it if it doesn't exist yet.
|
||||
|
||||
Authentication parameters (the disk will try all available methods **and** Managed Identity Credential):
|
||||
* `connection_string` - For authentication using a connection string.
|
||||
* `account_name` and `account_key` - For authentication using Shared Key.
|
||||
|
||||
Limit parameters (mainly for internal usage):
|
||||
* `s3_max_single_part_upload_size` - Limits the size of a single block upload to Blob Storage.
|
||||
* `min_bytes_for_seek` - Limits the size of a seekable region.
|
||||
* `max_single_read_retries` - Limits the number of attempts to read a chunk of data from Blob Storage.
|
||||
* `max_single_download_retries` - Limits the number of attempts to download a readable buffer from Blob Storage.
|
||||
* `thread_pool_size` - Limits the number of threads with which `IDiskRemote` is instantiated.
|
||||
* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object.
|
||||
|
||||
Other parameters:
|
||||
* `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
|
||||
* `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`.
|
||||
* `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
* `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
|
||||
Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)).
|
||||
|
||||
:::note Zero-copy replication is not ready for production
|
||||
Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use.
|
||||
:::
|
||||
|
||||
## HDFS storage {#hdfs-storage}
|
||||
|
||||
In this sample configuration:
|
||||
- the disk is of type `hdfs`
|
||||
- the data is hosted at `hdfs://hdfs1:9000/clickhouse/`
|
||||
|
||||
```xml
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<hdfs>
|
||||
<type>hdfs</type>
|
||||
<endpoint>hdfs://hdfs1:9000/clickhouse/</endpoint>
|
||||
<skip_access_check>true</skip_access_check>
|
||||
</hdfs>
|
||||
<hdd>
|
||||
<type>local</type>
|
||||
<path>/</path>
|
||||
</hdd>
|
||||
</disks>
|
||||
<policies>
|
||||
<hdfs>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>hdfs</disk>
|
||||
</main>
|
||||
<external>
|
||||
<disk>hdd</disk>
|
||||
</external>
|
||||
</volumes>
|
||||
</hdfs>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
## Web storage (read-only) {#web-storage}
|
||||
|
||||
Web storage can be used for read-only purposes. An example use is for hosting sample
|
||||
data, or for migrating data.
|
||||
|
||||
:::tip
|
||||
Storage can also be configured temporarily within a query, if a web dataset is not expected
|
||||
to be used routinely, see [dynamic storage](#dynamic-storage) and skip editing the
|
||||
configuration file.
|
||||
:::
|
||||
|
||||
In this sample configuration:
|
||||
- the disk is of type `web`
|
||||
- the data is hosted at `http://nginx:80/test1/`
|
||||
- a cache on local storage is used
|
||||
|
||||
```xml
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<web>
|
||||
<type>web</type>
|
||||
<endpoint>http://nginx:80/test1/</endpoint>
|
||||
</web>
|
||||
<cached_web>
|
||||
<type>cache</type>
|
||||
<disk>web</disk>
|
||||
<path>cached_web_cache/</path>
|
||||
<max_size>100000000</max_size>
|
||||
</cached_web>
|
||||
</disks>
|
||||
<policies>
|
||||
<web>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>web</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</web>
|
||||
<cached_web>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>cached_web</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</cached_web>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
## Virtual Columns {#virtual-columns}
|
||||
|
||||
- `_part` — Name of a part.
|
||||
|
@ -10,10 +10,14 @@ The embeddings and the metadata are stored in separate files in the raw data. A
|
||||
converts them to CSV and imports them into ClickHouse. You can use the following `download.sh` script for that:
|
||||
|
||||
```bash
|
||||
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/img_emb/img_emb_${1}.npy # download image embedding
|
||||
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/text_emb/text_emb_${1}.npy # download text embedding
|
||||
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/metadata/metadata_${1}.parquet # download metadata
|
||||
python3 process.py ${1} # merge files and convert to CSV
|
||||
number=${1}
|
||||
if [[ $number == '' ]]; then
|
||||
number=1
|
||||
fi;
|
||||
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/img_emb/img_emb_${number}.npy # download image embedding
|
||||
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/text_emb/text_emb_${number}.npy # download text embedding
|
||||
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/metadata/metadata_${number}.parquet # download metadata
|
||||
python3 process.py $number # merge files and convert to CSV
|
||||
```
|
||||
Script `process.py` is defined as follows:
|
||||
|
||||
|
@ -248,6 +248,9 @@ Some of the files might not download fully. Check the file sizes and re-download
|
||||
|
||||
``` bash
|
||||
$ curl -O https://datasets.clickhouse.com/trips_mergetree/partitions/trips_mergetree.tar
|
||||
# Validate the checksum
|
||||
$ md5sum trips_mergetree.tar
|
||||
# Checksum should be equal to: f3b8d469b41d9a82da064ded7245d12c
|
||||
$ tar xvf trips_mergetree.tar -C /var/lib/clickhouse # path to ClickHouse data directory
|
||||
$ # check permissions of unpacked data, fix if required
|
||||
$ sudo service clickhouse-server restart
|
||||
|
@ -5,26 +5,416 @@ sidebar_label: "External Disks for Storing Data"
|
||||
title: "External Disks for Storing Data"
|
||||
---
|
||||
|
||||
Data, processed in ClickHouse, is usually stored in the local file system — on the same machine with the ClickHouse server. That requires large-capacity disks, which can be expensive enough. To avoid that you can store the data remotely — on [Amazon S3](https://aws.amazon.com/s3/) disks or in the Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)).
|
||||
Data, processed in ClickHouse, is usually stored in the local file system — on the same machine with the ClickHouse server. That requires large-capacity disks, which can be expensive enough. To avoid that you can store the data remotely. Various storages are supported:
|
||||
1. [Amazon S3](https://aws.amazon.com/s3/) object storage.
|
||||
2. The Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html))
|
||||
3. [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs).
|
||||
|
||||
To work with data stored on `Amazon S3` disks use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine, and to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine.
|
||||
:::note ClickHouse also has support for external table engines, which are different from external storage option described on this page as they allow to read data stored in some general file format (like Parquet), while on this page we are describing storage configuration for ClickHouse `MergeTree` family or `Log` family tables.
|
||||
1. to work with data stored on `Amazon S3` disks, use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine.
|
||||
2. to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine.
|
||||
3. to work with data stored in Azure Blob Storage use [AzureBlobStorage](/docs/en/engines/table-engines/integrations/azureBlobStorage.md) table engine.
|
||||
:::
|
||||
|
||||
To load data from a web server with static files use a disk with type [web](#storing-data-on-webserver).
|
||||
## Configuring external storage {#configuring-external-storage}
|
||||
|
||||
## Configuring HDFS {#configuring-hdfs}
|
||||
[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) family table engines can store data to `S3`, `AzureBlobStorage`, `HDFS` using a disk with types `s3`, `azure_blob_storage`, `hdfs` accordingly.
|
||||
|
||||
[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) family table engines can store data to HDFS using a disk with type `HDFS`.
|
||||
Disk configuration requires:
|
||||
1. `type` section, equal to one of `s3`, `azure_blob_storage`, `hdfs`, `local_blob_storage`, `web`.
|
||||
2. Configuration of a specific external storage type.
|
||||
|
||||
Configuration markup:
|
||||
Starting from 24.1 clickhouse version, it is possible to use a new configuration option.
|
||||
It requires to specify:
|
||||
1. `type` equal to `object_storage`
|
||||
2. `object_storage_type`, equal to one of `s3`, `azure_blob_storage` (or just `azure` from `24.3`), `hdfs`, `local_blob_storage` (or just `local` from `24.3`), `web`.
|
||||
Optionally, `metadata_type` can be specified (it is equal to `local` by default), but it can also be set to `plain`, `web`.
|
||||
Usage of `plain` metadata type is described in [plain storage section](/docs/en/operations/storing-data.md/#storing-data-on-webserver), `web` metadata type can be used only with `web` object storage type, `local` metadata type stores metadata files locally (each metadata files contains mapping to files in object storage and some additional meta information about them).
|
||||
|
||||
E.g. configuration option
|
||||
``` xml
|
||||
<s3>
|
||||
<type>s3</type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3>
|
||||
```
|
||||
|
||||
is equal to configuration (from `24.1`):
|
||||
``` xml
|
||||
<s3>
|
||||
<type>object_storage</type>
|
||||
<object_storage_type>s3</object_storage_type>
|
||||
<metadata_type>local</metadata_type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3>
|
||||
```
|
||||
|
||||
Configuration
|
||||
``` xml
|
||||
<s3_plain>
|
||||
<type>s3_plain</type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3_plain>
|
||||
```
|
||||
|
||||
is equal to
|
||||
``` xml
|
||||
<s3_plain>
|
||||
<type>object_storage</type>
|
||||
<object_storage_type>s3</object_storage_type>
|
||||
<metadata_type>plain</metadata_type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3_plain>
|
||||
```
|
||||
|
||||
Example of full storage configuration will look like:
|
||||
``` xml
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<s3>
|
||||
<type>s3</type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3>
|
||||
</disks>
|
||||
<policies>
|
||||
<s3>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>s3</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</s3>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
Starting with 24.1 clickhouse version, it can also look like:
|
||||
``` xml
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<s3>
|
||||
<type>object_storage</type>
|
||||
<object_storage_type>s3</object_storage_type>
|
||||
<metadata_type>local</metadata_type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3>
|
||||
</disks>
|
||||
<policies>
|
||||
<s3>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>s3</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</s3>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
In order to make a specific kind of storage a default option for all `MergeTree` tables add the following section to configuration file:
|
||||
``` xml
|
||||
<clickhouse>
|
||||
<merge_tree>
|
||||
<storage_policy>s3</storage_policy>
|
||||
</merge_tree>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
If you want to configure a specific storage policy only to specific table, you can define it in settings while creating the table:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE test (a Int32, b String)
|
||||
ENGINE = MergeTree() ORDER BY a
|
||||
SETTINGS storage_policy = 's3';
|
||||
```
|
||||
|
||||
You can also use `disk` instead of `storage_policy`. In this case it is not requires to have `storage_policy` section in configuration file, only `disk` section would be enough.
|
||||
|
||||
``` sql
|
||||
CREATE TABLE test (a Int32, b String)
|
||||
ENGINE = MergeTree() ORDER BY a
|
||||
SETTINGS disk = 's3';
|
||||
```
|
||||
|
||||
## Dynamic Configuration {#dynamic-configuration}
|
||||
|
||||
There is also a possibility to specify storage configuration without a predefined disk in configuration in a configuration file, but can be configured in the `CREATE`/`ATTACH` query settings.
|
||||
|
||||
The following example query builds on the above dynamic disk configuration and shows how to use a local disk to cache data from a table stored at a URL.
|
||||
|
||||
```sql
|
||||
ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
|
||||
(
|
||||
price UInt32,
|
||||
date Date,
|
||||
postcode1 LowCardinality(String),
|
||||
postcode2 LowCardinality(String),
|
||||
type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
|
||||
is_new UInt8,
|
||||
duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
|
||||
addr1 String,
|
||||
addr2 String,
|
||||
street LowCardinality(String),
|
||||
locality LowCardinality(String),
|
||||
town LowCardinality(String),
|
||||
district LowCardinality(String),
|
||||
county LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (postcode1, postcode2, addr1, addr2)
|
||||
# highlight-start
|
||||
SETTINGS disk = disk(
|
||||
type=web,
|
||||
endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
|
||||
);
|
||||
# highlight-end
|
||||
```
|
||||
|
||||
The example below adds cache to external storage.
|
||||
|
||||
```sql
|
||||
ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
|
||||
(
|
||||
price UInt32,
|
||||
date Date,
|
||||
postcode1 LowCardinality(String),
|
||||
postcode2 LowCardinality(String),
|
||||
type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
|
||||
is_new UInt8,
|
||||
duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
|
||||
addr1 String,
|
||||
addr2 String,
|
||||
street LowCardinality(String),
|
||||
locality LowCardinality(String),
|
||||
town LowCardinality(String),
|
||||
district LowCardinality(String),
|
||||
county LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (postcode1, postcode2, addr1, addr2)
|
||||
# highlight-start
|
||||
SETTINGS disk = disk(
|
||||
type=cache,
|
||||
max_size='1Gi',
|
||||
path='/var/lib/clickhouse/custom_disk_cache/',
|
||||
disk=disk(
|
||||
type=web,
|
||||
endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
|
||||
)
|
||||
);
|
||||
# highlight-end
|
||||
```
|
||||
|
||||
In the settings highlighted below notice that the disk of `type=web` is nested within
|
||||
the disk of `type=cache`.
|
||||
|
||||
:::note
|
||||
The example uses `type=web`, but any disk type can be configured as dynamic, even Local disk. Local disks require a path argument to be inside the server config parameter `custom_local_disks_base_directory`, which has no default, so set that also when using local disk.
|
||||
:::
|
||||
|
||||
A combination of config-based configuration and sql-defined configuration is also possible:
|
||||
|
||||
```sql
|
||||
ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
|
||||
(
|
||||
price UInt32,
|
||||
date Date,
|
||||
postcode1 LowCardinality(String),
|
||||
postcode2 LowCardinality(String),
|
||||
type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
|
||||
is_new UInt8,
|
||||
duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
|
||||
addr1 String,
|
||||
addr2 String,
|
||||
street LowCardinality(String),
|
||||
locality LowCardinality(String),
|
||||
town LowCardinality(String),
|
||||
district LowCardinality(String),
|
||||
county LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (postcode1, postcode2, addr1, addr2)
|
||||
# highlight-start
|
||||
SETTINGS disk = disk(
|
||||
type=cache,
|
||||
max_size='1Gi',
|
||||
path='/var/lib/clickhouse/custom_disk_cache/',
|
||||
disk=disk(
|
||||
type=web,
|
||||
endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
|
||||
)
|
||||
);
|
||||
# highlight-end
|
||||
```
|
||||
|
||||
where `web` is a from a server configuration file:
|
||||
|
||||
``` xml
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<web>
|
||||
<type>web</type>
|
||||
<endpoint>'https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'</endpoint>
|
||||
</web>
|
||||
</disks>
|
||||
</storage_configuration>
|
||||
```
|
||||
|
||||
### Using S3 Storage {#s3-storage}
|
||||
|
||||
Required parameters:
|
||||
|
||||
- `endpoint` — S3 endpoint URL in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint URL should contain a bucket and root path to store data.
|
||||
- `access_key_id` — S3 access key id.
|
||||
- `secret_access_key` — S3 secret access key.
|
||||
|
||||
Optional parameters:
|
||||
|
||||
- `region` — S3 region name.
|
||||
- `support_batch_delete` — This controls the check to see if batch deletes are supported. Set this to `false` when using Google Cloud Storage (GCS) as GCS does not support batch deletes and preventing the checks will prevent error messages in the logs.
|
||||
- `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`.
|
||||
- `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`.
|
||||
- `expiration_window_seconds` — Grace period for checking if expiration-based credentials have expired. Optional, default value is `120`.
|
||||
- `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL.
|
||||
- `connect_timeout_ms` — Socket connect timeout in milliseconds. Default value is `10 seconds`.
|
||||
- `request_timeout_ms` — Request timeout in milliseconds. Default value is `5 seconds`.
|
||||
- `retry_attempts` — Number of retry attempts in case of failed request. Default value is `10`.
|
||||
- `single_read_retries` — Number of retry attempts in case of connection drop during read. Default value is `4`.
|
||||
- `min_bytes_for_seek` — Minimal number of bytes to use seek operation instead of sequential read. Default value is `1 Mb`.
|
||||
- `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
|
||||
- `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`.
|
||||
- `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times.
|
||||
- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set.
|
||||
- `server_side_encryption_kms_key_id` - If specified, required headers for accessing S3 objects with [SSE-KMS encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingKMSEncryption.html) will be set. If an empty string is specified, the AWS managed S3 key will be used. Optional.
|
||||
- `server_side_encryption_kms_encryption_context` - If specified alongside `server_side_encryption_kms_key_id`, the given encryption context header for SSE-KMS will be set. Optional.
|
||||
- `server_side_encryption_kms_bucket_key_enabled` - If specified alongside `server_side_encryption_kms_key_id`, the header to enable S3 bucket keys for SSE-KMS will be set. Optional, can be `true` or `false`, defaults to nothing (matches the bucket-level setting).
|
||||
- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited).
|
||||
- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`.
|
||||
- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited).
|
||||
- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
|
||||
- `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
- `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
- `key_template` — Define the format with which the object keys are generated. By default, Clickhouse takes `root path` from `endpoint` option and adds random generated suffix. That suffix is a dir with 3 random symbols and a file name with 29 random symbols. With that option you have a full control how to the object keys are generated. Some usage scenarios require having random symbols in the prefix or in the middle of object key. For example: `[a-z]{3}-prefix-random/constant-part/random-middle-[a-z]{3}/random-suffix-[a-z]{29}`. The value is parsed with [`re2`](https://github.com/google/re2/wiki/Syntax). Only some subset of the syntax is supported. Check if your preferred format is supported before using that option. Disk isn't initialized if clickhouse is unable to generate a key by the value of `key_template`. It requires enabled feature flag [storage_metadata_write_full_object_key](/docs/en/operations/settings/settings#storage_metadata_write_full_object_key). It forbids declaring the `root path` in `endpoint` option. It requires definition of the option `key_compatibility_prefix`.
|
||||
- `key_compatibility_prefix` — That option is required when option `key_template` is in use. In order to be able to read the objects keys which were stored in the metadata files with the metadata version lower that `VERSION_FULL_OBJECT_KEY`, the previous `root path` from the `endpoint` option should be set here.
|
||||
|
||||
:::note
|
||||
Google Cloud Storage (GCS) is also supported using the type `s3`. See [GCS backed MergeTree](/docs/en/integrations/gcs).
|
||||
:::
|
||||
|
||||
### Using Plain Storage {#plain-storage}
|
||||
|
||||
In `22.10` a new disk type `s3_plain` was introduced, which provides a write-once storage. Configuration parameters are the same as for `s3` disk type.
|
||||
Unlike `s3` disk type, it stores data as is, e.g. instead of randomly-generated blob names, it uses normal file names (the same way as clickhouse stores files on local disk) and does not store any metadata locally, e.g. it is derived from data on `s3`.
|
||||
|
||||
This disk type allows to keep a static version of the table, as it does not allow executing merges on the existing data and does not allow inserting of new data.
|
||||
A use case for this disk type is to create backups on it, which can be done via `BACKUP TABLE data TO Disk('plain_disk_name', 'backup_name')`. Afterwards you can do `RESTORE TABLE data AS data_restored FROM Disk('plain_disk_name', 'backup_name')` or using `ATTACH TABLE data (...) ENGINE = MergeTree() SETTINGS disk = 'plain_disk_name'`.
|
||||
|
||||
Configuration:
|
||||
``` xml
|
||||
<s3_plain>
|
||||
<type>s3_plain</type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3_plain>
|
||||
```
|
||||
|
||||
Starting from `24.1` it is possible configure any object storage disk (`s3`, `azure`, `hdfs`, `local`) using `plain` metadata type.
|
||||
|
||||
Configuration:
|
||||
``` xml
|
||||
<s3_plain>
|
||||
<type>object_storage</type>
|
||||
<object_storage_type>azure</object_storage_type>
|
||||
<metadata_type>plain</metadata_type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3_plain>
|
||||
```
|
||||
|
||||
### Using Azure Blob Storage {#azure-blob-storage}
|
||||
|
||||
`MergeTree` family table engines can store data to [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) using a disk with type `azure_blob_storage`.
|
||||
|
||||
As of February 2022, this feature is still a fresh addition, so expect that some Azure Blob Storage functionalities might be unimplemented.
|
||||
|
||||
Configuration markup:
|
||||
``` xml
|
||||
<storage_configuration>
|
||||
...
|
||||
<disks>
|
||||
<blob_storage_disk>
|
||||
<type>azure_blob_storage</type>
|
||||
<storage_account_url>http://account.blob.core.windows.net</storage_account_url>
|
||||
<container_name>container</container_name>
|
||||
<account_name>account</account_name>
|
||||
<account_key>pass123</account_key>
|
||||
<metadata_path>/var/lib/clickhouse/disks/blob_storage_disk/</metadata_path>
|
||||
<cache_path>/var/lib/clickhouse/disks/blob_storage_disk/cache/</cache_path>
|
||||
<skip_access_check>false</skip_access_check>
|
||||
</blob_storage_disk>
|
||||
</disks>
|
||||
...
|
||||
</storage_configuration>
|
||||
```
|
||||
|
||||
Connection parameters:
|
||||
* `storage_account_url` - **Required**, Azure Blob Storage account URL, like `http://account.blob.core.windows.net` or `http://azurite1:10000/devstoreaccount1`.
|
||||
* `container_name` - Target container name, defaults to `default-container`.
|
||||
* `container_already_exists` - If set to `false`, a new container `container_name` is created in the storage account, if set to `true`, disk connects to the container directly, and if left unset, disk connects to the account, checks if the container `container_name` exists, and creates it if it doesn't exist yet.
|
||||
|
||||
Authentication parameters (the disk will try all available methods **and** Managed Identity Credential):
|
||||
* `connection_string` - For authentication using a connection string.
|
||||
* `account_name` and `account_key` - For authentication using Shared Key.
|
||||
|
||||
Limit parameters (mainly for internal usage):
|
||||
* `s3_max_single_part_upload_size` - Limits the size of a single block upload to Blob Storage.
|
||||
* `min_bytes_for_seek` - Limits the size of a seekable region.
|
||||
* `max_single_read_retries` - Limits the number of attempts to read a chunk of data from Blob Storage.
|
||||
* `max_single_download_retries` - Limits the number of attempts to download a readable buffer from Blob Storage.
|
||||
* `thread_pool_size` - Limits the number of threads with which `IDiskRemote` is instantiated.
|
||||
* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object.
|
||||
|
||||
Other parameters:
|
||||
* `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
|
||||
* `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`.
|
||||
* `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
* `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
|
||||
Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)).
|
||||
|
||||
:::note Zero-copy replication is not ready for production
|
||||
Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use.
|
||||
:::
|
||||
|
||||
## Using HDFS storage {#hdfs-storage}
|
||||
|
||||
In this sample configuration:
|
||||
- the disk is of type `hdfs`
|
||||
- the data is hosted at `hdfs://hdfs1:9000/clickhouse/`
|
||||
|
||||
```xml
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<hdfs>
|
||||
<type>hdfs</type>
|
||||
<endpoint>hdfs://hdfs1:9000/clickhouse/</endpoint>
|
||||
<skip_access_check>true</skip_access_check>
|
||||
</hdfs>
|
||||
<hdd>
|
||||
<type>local</type>
|
||||
<path>/</path>
|
||||
</hdd>
|
||||
</disks>
|
||||
<policies>
|
||||
<hdfs>
|
||||
@ -32,26 +422,17 @@ Configuration markup:
|
||||
<main>
|
||||
<disk>hdfs</disk>
|
||||
</main>
|
||||
<external>
|
||||
<disk>hdd</disk>
|
||||
</external>
|
||||
</volumes>
|
||||
</hdfs>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
|
||||
<merge_tree>
|
||||
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
|
||||
</merge_tree>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
Required parameters:
|
||||
|
||||
- `endpoint` — HDFS endpoint URL in `path` format. Endpoint URL should contain a root path to store data.
|
||||
|
||||
Optional parameters:
|
||||
|
||||
- `min_bytes_for_seek` — The minimal number of bytes to use seek operation instead of sequential read. Default value: `1 Mb`.
|
||||
|
||||
## Using Virtual File System for Data Encryption {#encrypted-virtual-file-system}
|
||||
### Using Data Encryption {#encrypted-virtual-file-system}
|
||||
|
||||
You can encrypt the data stored on [S3](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3), or [HDFS](#configuring-hdfs) external disks, or on a local disk. To turn on the encryption mode, in the configuration file you must define a disk with the type `encrypted` and choose a disk on which the data will be saved. An `encrypted` disk ciphers all written files on the fly, and when you read files from an `encrypted` disk it deciphers them automatically. So you can work with an `encrypted` disk like with a normal one.
|
||||
|
||||
@ -112,7 +493,7 @@ Example of disk configuration:
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
## Using local cache {#using-local-cache}
|
||||
### Using local cache {#using-local-cache}
|
||||
|
||||
It is possible to configure local cache over disks in storage configuration starting from version 22.3.
|
||||
For versions 22.3 - 22.7 cache is supported only for `s3` disk type. For versions >= 22.8 cache is supported for any disk type: S3, Azure, Local, Encrypted, etc.
|
||||
@ -275,23 +656,92 @@ Cache profile events:
|
||||
|
||||
- `CachedWriteBufferCacheWriteBytes`, `CachedWriteBufferCacheWriteMicroseconds`
|
||||
|
||||
## Using in-memory cache (userspace page cache) {#userspace-page-cache}
|
||||
|
||||
The File Cache described above stores cached data in local files. Alternatively, object-store-based disks can be configured to use "Userspace Page Cache", which is RAM-only. Userspace page cache is recommended only if file cache can't be used for some reason, e.g. if the machine doesn't have a local disk at all. Note that file cache effectively uses RAM for caching too, since the OS caches contents of local files.
|
||||
|
||||
To enable userspace page cache for disks that don't use file cache, use setting `use_page_cache_for_disks_without_file_cache`.
|
||||
|
||||
By default, on Linux, the userspace page cache will use all available memory, similar to the OS page cache. In tools like `top` and `ps`, the clickhouse server process will typically show resident set size near 100% of the machine's RAM - this is normal, and most of this memory is actually reclaimable by the OS on memory pressure (`MADV_FREE`). This behavior can be disabled with server setting `page_cache_use_madv_free = 0`, making the userspace page cache just use a fixed amount of memory `page_cache_size` with no special interaction with the OS. On Mac OS, `page_cache_use_madv_free` is always disabled as it doesn't have lazy `MADV_FREE`.
|
||||
|
||||
Unfortunately, `page_cache_use_madv_free` makes it difficult to tell if the server is close to running out of memory, since the RSS metric becomes useless. Async metric `UnreclaimableRSS` shows the amount of physical memory used by the server, excluding the memory reclaimable by the OS: `select value from system.asynchronous_metrics where metric = 'UnreclaimableRSS'`. Use it for monitoring instead of RSS. This metric is only available if `page_cache_use_madv_free` is enabled.
|
||||
|
||||
## Storing Data on Web Server {#storing-data-on-webserver}
|
||||
|
||||
There is a tool `clickhouse-static-files-uploader`, which prepares a data directory for a given table (`SELECT data_paths FROM system.tables WHERE name = 'table_name'`). For each table you need, you get a directory of files. These files can be uploaded to, for example, a web server with static files. After this preparation, you can load this table into any ClickHouse server via `DiskWeb`.
|
||||
### Using static Web storage (read-only) {#web-storage}
|
||||
|
||||
This is a read-only disk. Its data is only read and never modified. A new table is loaded to this disk via `ATTACH TABLE` query (see example below). Local disk is not actually used, each `SELECT` query will result in a `http` request to fetch required data. All modification of the table data will result in an exception, i.e. the following types of queries are not allowed: [CREATE TABLE](/docs/en/sql-reference/statements/create/table.md), [ALTER TABLE](/docs/en/sql-reference/statements/alter/index.md), [RENAME TABLE](/docs/en/sql-reference/statements/rename.md/#misc_operations-rename_table), [DETACH TABLE](/docs/en/sql-reference/statements/detach.md) and [TRUNCATE TABLE](/docs/en/sql-reference/statements/truncate.md).
|
||||
Web storage can be used for read-only purposes. An example use is for hosting sample data, or for migrating data.
|
||||
There is a tool `clickhouse-static-files-uploader`, which prepares a data directory for a given table (`SELECT data_paths FROM system.tables WHERE name = 'table_name'`). For each table you need, you get a directory of files. These files can be uploaded to, for example, a web server with static files. After this preparation, you can load this table into any ClickHouse server via `DiskWeb`.
|
||||
|
||||
Web server storage is supported only for the [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) engine families. To access the data stored on a `web` disk, use the [storage_policy](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#terms) setting when executing the query. For example, `ATTACH TABLE table_web UUID '{}' (id Int32) ENGINE = MergeTree() ORDER BY id SETTINGS storage_policy = 'web'`.
|
||||
In this sample configuration:
|
||||
- the disk is of type `web`
|
||||
- the data is hosted at `http://nginx:80/test1/`
|
||||
- a cache on local storage is used
|
||||
|
||||
```xml
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<web>
|
||||
<type>web</type>
|
||||
<endpoint>http://nginx:80/test1/</endpoint>
|
||||
</web>
|
||||
<cached_web>
|
||||
<type>cache</type>
|
||||
<disk>web</disk>
|
||||
<path>cached_web_cache/</path>
|
||||
<max_size>100000000</max_size>
|
||||
</cached_web>
|
||||
</disks>
|
||||
<policies>
|
||||
<web>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>web</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</web>
|
||||
<cached_web>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>cached_web</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</cached_web>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
:::tip
|
||||
Storage can also be configured temporarily within a query, if a web dataset is not expected
|
||||
to be used routinely, see [dynamic configuration](#dynamic-configuration) and skip editing the
|
||||
configuration file.
|
||||
:::
|
||||
|
||||
:::tip
|
||||
A [demo dataset](https://github.com/ClickHouse/web-tables-demo) is hosted in GitHub. To prepare your own tables for web storage see the tool [clickhouse-static-files-uploader](/docs/en/operations/storing-data.md/#storing-data-on-webserver)
|
||||
:::
|
||||
|
||||
In this `ATTACH TABLE` query the `UUID` provided matches the directory name of the data, and the endpoint is the URL for the raw GitHub content.
|
||||
|
||||
```sql
|
||||
# highlight-next-line
|
||||
ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
|
||||
(
|
||||
price UInt32,
|
||||
date Date,
|
||||
postcode1 LowCardinality(String),
|
||||
postcode2 LowCardinality(String),
|
||||
type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
|
||||
is_new UInt8,
|
||||
duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
|
||||
addr1 String,
|
||||
addr2 String,
|
||||
street LowCardinality(String),
|
||||
locality LowCardinality(String),
|
||||
town LowCardinality(String),
|
||||
district LowCardinality(String),
|
||||
county LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (postcode1, postcode2, addr1, addr2)
|
||||
# highlight-start
|
||||
SETTINGS disk = disk(
|
||||
type=web,
|
||||
endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
|
||||
);
|
||||
# highlight-end
|
||||
```
|
||||
|
||||
A ready test case. You need to add this configuration to config:
|
||||
|
||||
@ -487,7 +937,7 @@ If URL is not reachable on disk load when the server is starting up tables, then
|
||||
Use [http_max_single_read_retries](/docs/en/operations/settings/settings.md/#http-max-single-read-retries) setting to limit the maximum number of retries during a single HTTP read.
|
||||
|
||||
|
||||
## Zero-copy Replication (not ready for production) {#zero-copy}
|
||||
### Zero-copy Replication (not ready for production) {#zero-copy}
|
||||
|
||||
Zero-copy replication is possible, but not recommended, with `S3` and `HDFS` disks. Zero-copy replication means that if the data is stored remotely on several machines and needs to be synchronized, then only the metadata is replicated (paths to the data parts), but not the data itself.
|
||||
|
||||
|
@ -26,7 +26,9 @@ priority: 0
|
||||
is_active: 0
|
||||
active_children: 0
|
||||
dequeued_requests: 67
|
||||
canceled_requests: 0
|
||||
dequeued_cost: 4692272
|
||||
canceled_cost: 0
|
||||
busy_periods: 63
|
||||
vruntime: 938454.1999999989
|
||||
system_vruntime: ᴺᵁᴸᴸ
|
||||
@ -54,7 +56,9 @@ Columns:
|
||||
- `is_active` (`UInt8`) - Whether this node is currently active - has resource requests to be dequeued and constraints satisfied.
|
||||
- `active_children` (`UInt64`) - The number of children in active state.
|
||||
- `dequeued_requests` (`UInt64`) - The total number of resource requests dequeued from this node.
|
||||
- `canceled_requests` (`UInt64`) - The total number of resource requests canceled from this node.
|
||||
- `dequeued_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests dequeued from this node.
|
||||
- `canceled_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests canceled from this node.
|
||||
- `busy_periods` (`UInt64`) - The total number of deactivations of this node.
|
||||
- `vruntime` (`Nullable(Float64)`) - For children of `fair` nodes only. Virtual runtime of a node used by SFQ algorithm to select the next child to process in a max-min fair manner.
|
||||
- `system_vruntime` (`Nullable(Float64)`) - For `fair` nodes only. Virtual runtime showing `vruntime` of the last processed resource request. Used during child activation as the new value of `vruntime`.
|
||||
|
@ -16,7 +16,9 @@ ClickHouse also supports:
|
||||
|
||||
## NULL Processing
|
||||
|
||||
During aggregation, all `NULL`s are skipped. If the aggregation has several parameters it will ignore any row in which one or more of the parameters are NULL.
|
||||
During aggregation, all `NULL` arguments are skipped. If the aggregation has several arguments it will ignore any row in which one or more of them are NULL.
|
||||
|
||||
There is an exception to this rule, which are the functions [`first_value`](../../sql-reference/aggregate-functions/reference/first_value.md), [`last_value`](../../sql-reference/aggregate-functions/reference/last_value.md) and their aliases when followed by the modifier `RESPECT NULLS`: `FIRST_VALUE(b) RESPECT NULLS`.
|
||||
|
||||
**Examples:**
|
||||
|
||||
@ -85,3 +87,50 @@ FROM t_null_big;
|
||||
│ [2,2,3] │ [2,NULL,2,3,NULL] │
|
||||
└───────────────┴───────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Note that aggregations are skipped when the columns are used as arguments to an aggregated function. For example [`count`](../../sql-reference/aggregate-functions/reference/count.md) without parameters (`count()`) or with constant ones (`count(1)`) will count all rows in the block (independently of the value of the GROUP BY column as it's not an argument), while `count(column)` will only return the number of rows where column is not NULL.
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
v,
|
||||
count(1),
|
||||
count(v)
|
||||
FROM
|
||||
(
|
||||
SELECT if(number < 10, NULL, number % 3) AS v
|
||||
FROM numbers(15)
|
||||
)
|
||||
GROUP BY v
|
||||
|
||||
┌────v─┬─count()─┬─count(v)─┐
|
||||
│ ᴺᵁᴸᴸ │ 10 │ 0 │
|
||||
│ 0 │ 1 │ 1 │
|
||||
│ 1 │ 2 │ 2 │
|
||||
│ 2 │ 2 │ 2 │
|
||||
└──────┴─────────┴──────────┘
|
||||
```
|
||||
|
||||
And here is an example of of first_value with `RESPECT NULLS` where we can see that NULL inputs are respected and it will return the first value read, whether it's NULL or not:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
col || '_' || ((col + 1) * 5 - 1) as range,
|
||||
first_value(odd_or_null) as first,
|
||||
first_value(odd_or_null) IGNORE NULLS as first_ignore_null,
|
||||
first_value(odd_or_null) RESPECT NULLS as first_respect_nulls
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
intDiv(number, 5) AS col,
|
||||
if(number % 2 == 0, NULL, number) as odd_or_null
|
||||
FROM numbers(15)
|
||||
)
|
||||
GROUP BY col
|
||||
ORDER BY col
|
||||
|
||||
┌─range─┬─first─┬─first_ignore_null─┬─first_respect_nulls─┐
|
||||
│ 0_4 │ 1 │ 1 │ ᴺᵁᴸᴸ │
|
||||
│ 1_9 │ 5 │ 5 │ 5 │
|
||||
│ 2_14 │ 11 │ 11 │ ᴺᵁᴸᴸ │
|
||||
└───────┴───────┴───────────────────┴─────────────────────┘
|
||||
```
|
||||
|
@ -1,16 +1,99 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/varpop
|
||||
title: "varPop"
|
||||
slug: "/en/sql-reference/aggregate-functions/reference/varpop"
|
||||
sidebar_position: 32
|
||||
---
|
||||
|
||||
# varPop(x)
|
||||
This page covers the `varPop` and `varPopStable` functions available in ClickHouse.
|
||||
|
||||
Calculates the amount `Σ((x - x̅)^2) / n`, where `n` is the sample size and `x̅`is the average value of `x`.
|
||||
## varPop
|
||||
|
||||
In other words, dispersion for a set of values. Returns `Float64`.
|
||||
Calculates the population covariance between two data columns. The population covariance measures the degree to which two variables vary together. Calculates the amount `Σ((x - x̅)^2) / n`, where `n` is the sample size and `x̅`is the average value of `x`.
|
||||
|
||||
Alias: `VAR_POP`.
|
||||
**Syntax**
|
||||
|
||||
:::note
|
||||
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varPopStable` function. It works slower but provides a lower computational error.
|
||||
:::
|
||||
```sql
|
||||
covarPop(x, y)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `x`: The first data column. [Numeric](../../../native-protocol/columns.md)
|
||||
- `y`: The second data column. [Numeric](../../../native-protocol/columns.md)
|
||||
|
||||
**Returned value**
|
||||
|
||||
Returns an integer of type `Float64`.
|
||||
|
||||
**Implementation details**
|
||||
|
||||
This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varPopStable` function](#varPopStable).
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS test_data;
|
||||
CREATE TABLE test_data
|
||||
(
|
||||
x Int32,
|
||||
y Int32
|
||||
)
|
||||
ENGINE = Memory;
|
||||
|
||||
INSERT INTO test_data VALUES (1, 2), (2, 3), (3, 5), (4, 6), (5, 8);
|
||||
|
||||
SELECT
|
||||
covarPop(x, y) AS covar_pop
|
||||
FROM test_data;
|
||||
```
|
||||
|
||||
```response
|
||||
3
|
||||
```
|
||||
|
||||
## varPopStable
|
||||
|
||||
Calculates population covariance between two data columns using a stable, numerically accurate method to calculate the variance. This function is designed to provide reliable results even with large datasets or values that might cause numerical instability in other implementations.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
covarPopStable(x, y)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `x`: The first data column. [String literal](../../syntax#syntax-string-literal)
|
||||
- `y`: The second data column. [Expression](../../syntax#syntax-expressions)
|
||||
|
||||
**Returned value**
|
||||
|
||||
Returns an integer of type `Float64`.
|
||||
|
||||
**Implementation details**
|
||||
|
||||
Unlike [`varPop()`](#varPop), this function uses a stable, numerically accurate algorithm to calculate the population variance to avoid issues like catastrophic cancellation or loss of precision. This function also handles `NaN` and `Inf` values correctly, excluding them from calculations.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS test_data;
|
||||
CREATE TABLE test_data
|
||||
(
|
||||
x Int32,
|
||||
y Int32
|
||||
)
|
||||
ENGINE = Memory;
|
||||
|
||||
INSERT INTO test_data VALUES (1, 2), (2, 9), (9, 5), (4, 6), (5, 8);
|
||||
|
||||
SELECT
|
||||
covarPopStable(x, y) AS covar_pop_stable
|
||||
FROM test_data;
|
||||
```
|
||||
|
||||
```response
|
||||
0.5999999999999999
|
||||
```
|
||||
|
@ -1,18 +1,128 @@
|
||||
---
|
||||
title: "varSamp"
|
||||
slug: /en/sql-reference/aggregate-functions/reference/varsamp
|
||||
sidebar_position: 33
|
||||
---
|
||||
|
||||
# varSamp
|
||||
This page contains information on the `varSamp` and `varSampStable` ClickHouse functions.
|
||||
|
||||
Calculates the amount `Σ((x - x̅)^2) / (n - 1)`, where `n` is the sample size and `x̅`is the average value of `x`.
|
||||
## varSamp
|
||||
|
||||
It represents an unbiased estimate of the variance of a random variable if passed values from its sample.
|
||||
Calculate the sample variance of a data set.
|
||||
|
||||
Returns `Float64`. When `n <= 1`, returns `+∞`.
|
||||
**Syntax**
|
||||
|
||||
Alias: `VAR_SAMP`.
|
||||
```sql
|
||||
varSamp(expr)
|
||||
```
|
||||
|
||||
:::note
|
||||
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varSampStable` function. It works slower but provides a lower computational error.
|
||||
:::
|
||||
**Parameters**
|
||||
|
||||
- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../../syntax#syntax-expressions)
|
||||
|
||||
**Returned value**
|
||||
|
||||
Returns a Float64 value representing the sample variance of the input data set.
|
||||
|
||||
**Implementation details**
|
||||
|
||||
The `varSamp()` function calculates the sample variance using the following formula:
|
||||
|
||||
```plaintext
|
||||
∑(x - mean(x))^2 / (n - 1)
|
||||
```
|
||||
|
||||
Where:
|
||||
|
||||
- `x` is each individual data point in the data set.
|
||||
- `mean(x)` is the arithmetic mean of the data set.
|
||||
- `n` is the number of data points in the data set.
|
||||
|
||||
The function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPop()` function](./varpop#varpop) instead.
|
||||
|
||||
This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varSampStable` function](#varSampStable).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE example_table
|
||||
(
|
||||
id UInt64,
|
||||
value Float64
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
|
||||
INSERT INTO example_table VALUES (1, 10.5), (2, 12.3), (3, 9.8), (4, 11.2), (5, 10.7);
|
||||
|
||||
SELECT varSamp(value) FROM example_table;
|
||||
```
|
||||
|
||||
Response:
|
||||
|
||||
```response
|
||||
0.8650000000000091
|
||||
```
|
||||
|
||||
## varSampStable
|
||||
|
||||
Calculate the sample variance of a data set using a numerically stable algorithm.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
varSampStable(expr)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../../syntax#syntax-expressions)
|
||||
|
||||
**Returned value**
|
||||
|
||||
The `varSampStable()` function returns a Float64 value representing the sample variance of the input data set.
|
||||
|
||||
**Implementation details**
|
||||
|
||||
The `varSampStable()` function calculates the sample variance using the same formula as the [`varSamp()`](#varSamp function):
|
||||
|
||||
```plaintext
|
||||
∑(x - mean(x))^2 / (n - 1)
|
||||
```
|
||||
|
||||
Where:
|
||||
- `x` is each individual data point in the data set.
|
||||
- `mean(x)` is the arithmetic mean of the data set.
|
||||
- `n` is the number of data points in the data set.
|
||||
|
||||
The difference between `varSampStable()` and `varSamp()` is that `varSampStable()` is designed to provide a more deterministic and stable result when dealing with floating-point arithmetic. It uses an algorithm that minimizes the accumulation of rounding errors, which can be particularly important when dealing with large data sets or data with a wide range of values.
|
||||
|
||||
Like `varSamp()`, the `varSampStable()` function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPopStable()` function](./varpop#varpopstable) instead.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE example_table
|
||||
(
|
||||
id UInt64,
|
||||
value Float64
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
|
||||
INSERT INTO example_table VALUES (1, 10.5), (2, 12.3), (3, 9.8), (4, 11.2), (5, 10.7);
|
||||
|
||||
SELECT varSampStable(value) FROM example_table;
|
||||
```
|
||||
|
||||
Response:
|
||||
|
||||
```response
|
||||
0.865
|
||||
```
|
||||
|
||||
This query calculates the sample variance of the `value` column in the `example_table` using the `varSampStable()` function. The result shows that the sample variance of the values `[10.5, 12.3, 9.8, 11.2, 10.7]` is approximately 0.865, which may differ slightly from the result of `varSamp()` due to the more precise handling of floating-point arithmetic.
|
||||
|
@ -10,6 +10,8 @@ sidebar_label: Nullable
|
||||
|
||||
Returns whether the argument is [NULL](../../sql-reference/syntax.md#null).
|
||||
|
||||
See also operator [`IS NULL`](../operators/index.md#is_null).
|
||||
|
||||
``` sql
|
||||
isNull(x)
|
||||
```
|
||||
@ -54,6 +56,8 @@ Result:
|
||||
|
||||
Returns whether the argument is not [NULL](../../sql-reference/syntax.md#null-literal).
|
||||
|
||||
See also operator [`IS NOT NULL`](../operators/index.md#is_not_null).
|
||||
|
||||
``` sql
|
||||
isNotNull(x)
|
||||
```
|
||||
|
@ -5,80 +5,372 @@ sidebar_label: JSON
|
||||
---
|
||||
|
||||
There are two sets of functions to parse JSON.
|
||||
- `visitParam*` (`simpleJSON*`) is made to parse a special very limited subset of a JSON, but these functions are extremely fast.
|
||||
- `simpleJSON*` (`visitParam*`) is made to parse a special very limited subset of a JSON, but these functions are extremely fast.
|
||||
- `JSONExtract*` is made to parse normal JSON.
|
||||
|
||||
# visitParam functions
|
||||
# simpleJSON/visitParam functions
|
||||
|
||||
ClickHouse has special functions for working with simplified JSON. All these JSON functions are based on strong assumptions about what the JSON can be, but they try to do as little as possible to get the job done.
|
||||
|
||||
The following assumptions are made:
|
||||
|
||||
1. The field name (function argument) must be a constant.
|
||||
2. The field name is somehow canonically encoded in JSON. For example: `visitParamHas('{"abc":"def"}', 'abc') = 1`, but `visitParamHas('{"\\u0061\\u0062\\u0063":"def"}', 'abc') = 0`
|
||||
2. The field name is somehow canonically encoded in JSON. For example: `simpleJSONHas('{"abc":"def"}', 'abc') = 1`, but `simpleJSONHas('{"\\u0061\\u0062\\u0063":"def"}', 'abc') = 0`
|
||||
3. Fields are searched for on any nesting level, indiscriminately. If there are multiple matching fields, the first occurrence is used.
|
||||
4. The JSON does not have space characters outside of string literals.
|
||||
|
||||
## visitParamHas(params, name)
|
||||
## simpleJSONHas
|
||||
|
||||
Checks whether there is a field with the `name` name.
|
||||
Checks whether there is a field named `field_name`. The result is `UInt8`.
|
||||
|
||||
Alias: `simpleJSONHas`.
|
||||
**Syntax**
|
||||
|
||||
## visitParamExtractUInt(params, name)
|
||||
|
||||
Parses UInt64 from the value of the field named `name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns 0.
|
||||
|
||||
Alias: `simpleJSONExtractUInt`.
|
||||
|
||||
## visitParamExtractInt(params, name)
|
||||
|
||||
The same as for Int64.
|
||||
|
||||
Alias: `simpleJSONExtractInt`.
|
||||
|
||||
## visitParamExtractFloat(params, name)
|
||||
|
||||
The same as for Float64.
|
||||
|
||||
Alias: `simpleJSONExtractFloat`.
|
||||
|
||||
## visitParamExtractBool(params, name)
|
||||
|
||||
Parses a true/false value. The result is UInt8.
|
||||
|
||||
Alias: `simpleJSONExtractBool`.
|
||||
|
||||
## visitParamExtractRaw(params, name)
|
||||
|
||||
Returns the value of a field, including separators.
|
||||
|
||||
Alias: `simpleJSONExtractRaw`.
|
||||
|
||||
Examples:
|
||||
|
||||
``` sql
|
||||
visitParamExtractRaw('{"abc":"\\n\\u0000"}', 'abc') = '"\\n\\u0000"';
|
||||
visitParamExtractRaw('{"abc":{"def":[1,2,3]}}', 'abc') = '{"def":[1,2,3]}';
|
||||
```sql
|
||||
simpleJSONHas(json, field_name)
|
||||
```
|
||||
|
||||
## visitParamExtractString(params, name)
|
||||
**Parameters**
|
||||
|
||||
Parses the string in double quotes. The value is unescaped. If unescaping failed, it returns an empty string.
|
||||
- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
|
||||
- `field_name`: The name of the field to search for. [String literal](../syntax#string)
|
||||
|
||||
Alias: `simpleJSONExtractString`.
|
||||
**Returned value**
|
||||
|
||||
Examples:
|
||||
It returns `1` if the field exists, `0` otherwise.
|
||||
|
||||
``` sql
|
||||
visitParamExtractString('{"abc":"\\n\\u0000"}', 'abc') = '\n\0';
|
||||
visitParamExtractString('{"abc":"\\u263a"}', 'abc') = '☺';
|
||||
visitParamExtractString('{"abc":"\\u263"}', 'abc') = '';
|
||||
visitParamExtractString('{"abc":"hello}', 'abc') = '';
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE jsons
|
||||
(
|
||||
`json` String
|
||||
)
|
||||
ENGINE = Memory;
|
||||
|
||||
INSERT INTO jsons VALUES ('{"foo":"true","qux":1}');
|
||||
|
||||
SELECT simpleJSONHas(json, 'foo') FROM jsons;
|
||||
SELECT simpleJSONHas(json, 'bar') FROM jsons;
|
||||
```
|
||||
|
||||
```response
|
||||
1
|
||||
0
|
||||
```
|
||||
## simpleJSONExtractUInt
|
||||
|
||||
Parses `UInt64` from the value of the field named `field_name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns `0`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
simpleJSONExtractUInt(json, field_name)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
|
||||
- `field_name`: The name of the field to search for. [String literal](../syntax#string)
|
||||
|
||||
**Returned value**
|
||||
|
||||
It returns the number parsed from the field if the field exists and contains a number, `0` otherwise.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE jsons
|
||||
(
|
||||
`json` String
|
||||
)
|
||||
ENGINE = Memory;
|
||||
|
||||
INSERT INTO jsons VALUES ('{"foo":"4e3"}');
|
||||
INSERT INTO jsons VALUES ('{"foo":3.4}');
|
||||
INSERT INTO jsons VALUES ('{"foo":5}');
|
||||
INSERT INTO jsons VALUES ('{"foo":"not1number"}');
|
||||
INSERT INTO jsons VALUES ('{"baz":2}');
|
||||
|
||||
SELECT simpleJSONExtractUInt(json, 'foo') FROM jsons ORDER BY json;
|
||||
```
|
||||
|
||||
```response
|
||||
0
|
||||
4
|
||||
0
|
||||
3
|
||||
5
|
||||
```
|
||||
|
||||
## simpleJSONExtractInt
|
||||
|
||||
Parses `Int64` from the value of the field named `field_name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns `0`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
simpleJSONExtractInt(json, field_name)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
|
||||
- `field_name`: The name of the field to search for. [String literal](../syntax#string)
|
||||
|
||||
**Returned value**
|
||||
|
||||
It returns the number parsed from the field if the field exists and contains a number, `0` otherwise.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE jsons
|
||||
(
|
||||
`json` String
|
||||
)
|
||||
ENGINE = Memory;
|
||||
|
||||
INSERT INTO jsons VALUES ('{"foo":"-4e3"}');
|
||||
INSERT INTO jsons VALUES ('{"foo":-3.4}');
|
||||
INSERT INTO jsons VALUES ('{"foo":5}');
|
||||
INSERT INTO jsons VALUES ('{"foo":"not1number"}');
|
||||
INSERT INTO jsons VALUES ('{"baz":2}');
|
||||
|
||||
SELECT simpleJSONExtractInt(json, 'foo') FROM jsons ORDER BY json;
|
||||
```
|
||||
|
||||
```response
|
||||
0
|
||||
-4
|
||||
0
|
||||
-3
|
||||
5
|
||||
```
|
||||
|
||||
## simpleJSONExtractFloat
|
||||
|
||||
Parses `Float64` from the value of the field named `field_name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns `0`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
simpleJSONExtractFloat(json, field_name)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
|
||||
- `field_name`: The name of the field to search for. [String literal](../syntax#string)
|
||||
|
||||
**Returned value**
|
||||
|
||||
It returns the number parsed from the field if the field exists and contains a number, `0` otherwise.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE jsons
|
||||
(
|
||||
`json` String
|
||||
)
|
||||
ENGINE = Memory;
|
||||
|
||||
INSERT INTO jsons VALUES ('{"foo":"-4e3"}');
|
||||
INSERT INTO jsons VALUES ('{"foo":-3.4}');
|
||||
INSERT INTO jsons VALUES ('{"foo":5}');
|
||||
INSERT INTO jsons VALUES ('{"foo":"not1number"}');
|
||||
INSERT INTO jsons VALUES ('{"baz":2}');
|
||||
|
||||
SELECT simpleJSONExtractFloat(json, 'foo') FROM jsons ORDER BY json;
|
||||
```
|
||||
|
||||
```response
|
||||
0
|
||||
-4000
|
||||
0
|
||||
-3.4
|
||||
5
|
||||
```
|
||||
|
||||
## simpleJSONExtractBool
|
||||
|
||||
Parses a true/false value from the value of the field named `field_name`. The result is `UInt8`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
simpleJSONExtractBool(json, field_name)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
|
||||
- `field_name`: The name of the field to search for. [String literal](../syntax#string)
|
||||
|
||||
**Returned value**
|
||||
|
||||
It returns `1` if the value of the field is `true`, `0` otherwise. This means this function will return `0` including (and not only) in the following cases:
|
||||
- If the field doesn't exists.
|
||||
- If the field contains `true` as a string, e.g.: `{"field":"true"}`.
|
||||
- If the field contains `1` as a numerical value.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE jsons
|
||||
(
|
||||
`json` String
|
||||
)
|
||||
ENGINE = Memory;
|
||||
|
||||
INSERT INTO jsons VALUES ('{"foo":false,"bar":true}');
|
||||
INSERT INTO jsons VALUES ('{"foo":"true","qux":1}');
|
||||
|
||||
SELECT simpleJSONExtractBool(json, 'bar') FROM jsons ORDER BY json;
|
||||
SELECT simpleJSONExtractBool(json, 'foo') FROM jsons ORDER BY json;
|
||||
```
|
||||
|
||||
```response
|
||||
0
|
||||
1
|
||||
0
|
||||
0
|
||||
```
|
||||
|
||||
## simpleJSONExtractRaw
|
||||
|
||||
Returns the value of the field named `field_name` as a `String`, including separators.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
simpleJSONExtractRaw(json, field_name)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
|
||||
- `field_name`: The name of the field to search for. [String literal](../syntax#string)
|
||||
|
||||
**Returned value**
|
||||
|
||||
It returns the value of the field as a [`String`](../../sql-reference/data-types/string.md#string), including separators if the field exists, or an empty `String` otherwise.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE jsons
|
||||
(
|
||||
`json` String
|
||||
)
|
||||
ENGINE = Memory;
|
||||
|
||||
INSERT INTO jsons VALUES ('{"foo":"-4e3"}');
|
||||
INSERT INTO jsons VALUES ('{"foo":-3.4}');
|
||||
INSERT INTO jsons VALUES ('{"foo":5}');
|
||||
INSERT INTO jsons VALUES ('{"foo":{"def":[1,2,3]}}');
|
||||
INSERT INTO jsons VALUES ('{"baz":2}');
|
||||
|
||||
SELECT simpleJSONExtractRaw(json, 'foo') FROM jsons ORDER BY json;
|
||||
```
|
||||
|
||||
```response
|
||||
|
||||
"-4e3"
|
||||
-3.4
|
||||
5
|
||||
{"def":[1,2,3]}
|
||||
```
|
||||
|
||||
## simpleJSONExtractString
|
||||
|
||||
Parses `String` in double quotes from the value of the field named `field_name`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
simpleJSONExtractString(json, field_name)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
|
||||
- `field_name`: The name of the field to search for. [String literal](../syntax#string)
|
||||
|
||||
**Returned value**
|
||||
|
||||
It returns the value of a field as a [`String`](../../sql-reference/data-types/string.md#string), including separators. The value is unescaped. It returns an empty `String`: if the field doesn't contain a double quoted string, if unescaping fails or if the field doesn't exist.
|
||||
|
||||
**Implementation details**
|
||||
|
||||
There is currently no support for code points in the format `\uXXXX\uYYYY` that are not from the basic multilingual plane (they are converted to CESU-8 instead of UTF-8).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE jsons
|
||||
(
|
||||
`json` String
|
||||
)
|
||||
ENGINE = Memory;
|
||||
|
||||
INSERT INTO jsons VALUES ('{"foo":"\\n\\u0000"}');
|
||||
INSERT INTO jsons VALUES ('{"foo":"\\u263"}');
|
||||
INSERT INTO jsons VALUES ('{"foo":"\\u263a"}');
|
||||
INSERT INTO jsons VALUES ('{"foo":"hello}');
|
||||
|
||||
SELECT simpleJSONExtractString(json, 'foo') FROM jsons ORDER BY json;
|
||||
```
|
||||
|
||||
```response
|
||||
\n\0
|
||||
|
||||
☺
|
||||
|
||||
```
|
||||
|
||||
## visitParamHas
|
||||
|
||||
This function is [an alias of `simpleJSONHas`](./json-functions#simplejsonhas).
|
||||
|
||||
## visitParamExtractUInt
|
||||
|
||||
This function is [an alias of `simpleJSONExtractUInt`](./json-functions#simplejsonextractuint).
|
||||
|
||||
## visitParamExtractInt
|
||||
|
||||
This function is [an alias of `simpleJSONExtractInt`](./json-functions#simplejsonextractint).
|
||||
|
||||
## visitParamExtractFloat
|
||||
|
||||
This function is [an alias of `simpleJSONExtractFloat`](./json-functions#simplejsonextractfloat).
|
||||
|
||||
## visitParamExtractBool
|
||||
|
||||
This function is [an alias of `simpleJSONExtractBool`](./json-functions#simplejsonextractbool).
|
||||
|
||||
## visitParamExtractRaw
|
||||
|
||||
This function is [an alias of `simpleJSONExtractRaw`](./json-functions#simplejsonextractraw).
|
||||
|
||||
## visitParamExtractString
|
||||
|
||||
This function is [an alias of `simpleJSONExtractString`](./json-functions#simplejsonextractstring).
|
||||
|
||||
# JSONExtract functions
|
||||
|
||||
The following functions are based on [simdjson](https://github.com/lemire/simdjson) designed for more complex JSON parsing requirements.
|
||||
|
@ -299,6 +299,18 @@ sin(x)
|
||||
|
||||
Type: [Float*](../../sql-reference/data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT sin(1.23);
|
||||
```
|
||||
|
||||
```response
|
||||
0.9424888019316975
|
||||
```
|
||||
|
||||
## cos
|
||||
|
||||
Returns the cosine of the argument.
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -588,8 +588,41 @@ Result:
|
||||
|
||||
## substringUTF8
|
||||
|
||||
Like `substring` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
|
||||
Returns the substring of a string `s` which starts at the specified byte index `offset` for Unicode code points. Byte counting starts from `1`. If `offset` is `0`, an empty string is returned. If `offset` is negative, the substring starts `pos` characters from the end of the string, rather than from the beginning. An optional argument `length` specifies the maximum number of bytes the returned substring may have.
|
||||
|
||||
Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
substringUTF8(s, offset[, length])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `s`: The string to calculate a substring from. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md) or [Enum](../../sql-reference/data-types/enum.md)
|
||||
- `offset`: The starting position of the substring in `s` . [(U)Int*](../../sql-reference/data-types/int-uint.md).
|
||||
- `length`: The maximum length of the substring. [(U)Int*](../../sql-reference/data-types/int-uint.md). Optional.
|
||||
|
||||
**Returned value**
|
||||
|
||||
A substring of `s` with `length` many bytes, starting at index `offset`.
|
||||
|
||||
**Implementation details**
|
||||
|
||||
Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
SELECT 'Täglich grüßt das Murmeltier.' AS str,
|
||||
substringUTF8(str, 9),
|
||||
substringUTF8(str, 9, 5)
|
||||
```
|
||||
|
||||
```response
|
||||
Täglich grüßt das Murmeltier. grüßt das Murmeltier. grüßt
|
||||
```
|
||||
|
||||
## substringIndex
|
||||
|
||||
@ -624,7 +657,39 @@ Result:
|
||||
|
||||
## substringIndexUTF8
|
||||
|
||||
Like `substringIndex` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
|
||||
Returns the substring of `s` before `count` occurrences of the delimiter `delim`, specifically for Unicode code points.
|
||||
|
||||
Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
substringIndexUTF8(s, delim, count)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `s`: The string to extract substring from. [String](../../sql-reference/data-types/string.md).
|
||||
- `delim`: The character to split. [String](../../sql-reference/data-types/string.md).
|
||||
- `count`: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md)
|
||||
|
||||
**Returned value**
|
||||
|
||||
A substring [String](../../sql-reference/data-types/string.md) of `s` before `count` occurrences of `delim`.
|
||||
|
||||
**Implementation details**
|
||||
|
||||
Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
SELECT substringIndexUTF8('www.straßen-in-europa.de', '.', 2)
|
||||
```
|
||||
|
||||
```response
|
||||
www.straßen-in-europa
|
||||
```
|
||||
|
||||
## appendTrailingCharIfAbsent
|
||||
|
||||
|
@ -353,7 +353,7 @@ For efficiency, the `and` and `or` functions accept any number of arguments. The
|
||||
|
||||
ClickHouse supports the `IS NULL` and `IS NOT NULL` operators.
|
||||
|
||||
### IS NULL
|
||||
### IS NULL {#is_null}
|
||||
|
||||
- For [Nullable](../../sql-reference/data-types/nullable.md) type values, the `IS NULL` operator returns:
|
||||
- `1`, if the value is `NULL`.
|
||||
@ -374,7 +374,7 @@ SELECT x+100 FROM t_null WHERE y IS NULL
|
||||
└──────────────┘
|
||||
```
|
||||
|
||||
### IS NOT NULL
|
||||
### IS NOT NULL {#is_not_null}
|
||||
|
||||
- For [Nullable](../../sql-reference/data-types/nullable.md) type values, the `IS NOT NULL` operator returns:
|
||||
- `0`, if the value is `NULL`.
|
||||
|
@ -50,6 +50,7 @@
|
||||
#include <Functions/registerFunctions.h>
|
||||
#include <AggregateFunctions/registerAggregateFunctions.h>
|
||||
#include <Formats/registerFormats.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
using namespace std::literals;
|
||||
@ -1137,6 +1138,13 @@ void Client::processOptions(const OptionsDescription & options_description,
|
||||
}
|
||||
|
||||
|
||||
static bool checkIfStdoutIsRegularFile()
|
||||
{
|
||||
struct stat file_stat;
|
||||
return fstat(STDOUT_FILENO, &file_stat) == 0 && S_ISREG(file_stat.st_mode);
|
||||
}
|
||||
|
||||
|
||||
void Client::processConfig()
|
||||
{
|
||||
if (!queries.empty() && config().has("queries-file"))
|
||||
@ -1173,7 +1181,14 @@ void Client::processConfig()
|
||||
pager = config().getString("pager", "");
|
||||
|
||||
is_default_format = !config().has("vertical") && !config().has("format");
|
||||
if (config().has("vertical"))
|
||||
if (is_default_format && checkIfStdoutIsRegularFile())
|
||||
{
|
||||
is_default_format = false;
|
||||
std::optional<String> format_from_file_name;
|
||||
format_from_file_name = FormatFactory::instance().tryGetFormatFromFileDescriptor(STDOUT_FILENO);
|
||||
format = format_from_file_name ? *format_from_file_name : "TabSeparated";
|
||||
}
|
||||
else if (config().has("vertical"))
|
||||
format = config().getString("format", "Vertical");
|
||||
else
|
||||
format = config().getString("format", is_interactive ? "PrettyCompact" : "TabSeparated");
|
||||
@ -1377,8 +1392,8 @@ void Client::readArguments(
|
||||
}
|
||||
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wunused-function"
|
||||
#pragma GCC diagnostic ignored "-Wmissing-declarations"
|
||||
#pragma clang diagnostic ignored "-Wunused-function"
|
||||
#pragma clang diagnostic ignored "-Wmissing-declarations"
|
||||
|
||||
int mainEntryClickHouseClient(int argc, char ** argv)
|
||||
{
|
||||
|
@ -109,8 +109,8 @@ static std::vector<std::string> extractFromConfig(
|
||||
return {configuration->getString(key)};
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wunused-function"
|
||||
#pragma GCC diagnostic ignored "-Wmissing-declarations"
|
||||
#pragma clang diagnostic ignored "-Wunused-function"
|
||||
#pragma clang diagnostic ignored "-Wmissing-declarations"
|
||||
|
||||
int mainEntryClickHouseExtractFromConfig(int argc, char ** argv)
|
||||
{
|
||||
|
@ -70,8 +70,8 @@ void skipSpacesAndComments(const char*& pos, const char* end, bool print_comment
|
||||
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wunused-function"
|
||||
#pragma GCC diagnostic ignored "-Wmissing-declarations"
|
||||
#pragma clang diagnostic ignored "-Wunused-function"
|
||||
#pragma clang diagnostic ignored "-Wmissing-declarations"
|
||||
|
||||
extern const char * auto_time_zones[];
|
||||
|
||||
|
@ -327,6 +327,14 @@ static bool checkIfStdinIsRegularFile()
|
||||
return fstat(STDIN_FILENO, &file_stat) == 0 && S_ISREG(file_stat.st_mode);
|
||||
}
|
||||
|
||||
|
||||
static bool checkIfStdoutIsRegularFile()
|
||||
{
|
||||
struct stat file_stat;
|
||||
return fstat(STDOUT_FILENO, &file_stat) == 0 && S_ISREG(file_stat.st_mode);
|
||||
}
|
||||
|
||||
|
||||
std::string LocalServer::getInitialCreateTableQuery()
|
||||
{
|
||||
if (!config().has("table-structure") && !config().has("table-file") && !config().has("table-data-format") && (!checkIfStdinIsRegularFile() || queries.empty()))
|
||||
@ -638,7 +646,14 @@ void LocalServer::processConfig()
|
||||
if (config().has("macros"))
|
||||
global_context->setMacros(std::make_unique<Macros>(config(), "macros", log));
|
||||
|
||||
format = config().getString("output-format", config().getString("format", is_interactive ? "PrettyCompact" : "TSV"));
|
||||
if (!config().has("output-format") && !config().has("format") && checkIfStdoutIsRegularFile())
|
||||
{
|
||||
std::optional<String> format_from_file_name;
|
||||
format_from_file_name = FormatFactory::instance().tryGetFormatFromFileDescriptor(STDOUT_FILENO);
|
||||
format = format_from_file_name ? *format_from_file_name : "TSV";
|
||||
}
|
||||
else
|
||||
format = config().getString("output-format", config().getString("format", is_interactive ? "PrettyCompact" : "TSV"));
|
||||
insert_format = "Values";
|
||||
|
||||
/// Setting value from cmd arg overrides one from config
|
||||
@ -944,8 +959,8 @@ void LocalServer::readArguments(int argc, char ** argv, Arguments & common_argum
|
||||
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wunused-function"
|
||||
#pragma GCC diagnostic ignored "-Wmissing-declarations"
|
||||
#pragma clang diagnostic ignored "-Wunused-function"
|
||||
#pragma clang diagnostic ignored "-Wmissing-declarations"
|
||||
|
||||
int mainEntryClickHouseLocal(int argc, char ** argv)
|
||||
{
|
||||
|
@ -1204,8 +1204,8 @@ public:
|
||||
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wunused-function"
|
||||
#pragma GCC diagnostic ignored "-Wmissing-declarations"
|
||||
#pragma clang diagnostic ignored "-Wunused-function"
|
||||
#pragma clang diagnostic ignored "-Wmissing-declarations"
|
||||
|
||||
int mainEntryClickHouseObfuscator(int argc, char ** argv)
|
||||
try
|
||||
|
@ -133,20 +133,20 @@ public:
|
||||
/// This function also enables custom prefixes to be used.
|
||||
void setCustomSettingsPrefixes(const Strings & prefixes);
|
||||
void setCustomSettingsPrefixes(const String & comma_separated_prefixes);
|
||||
bool isSettingNameAllowed(const std::string_view name) const;
|
||||
void checkSettingNameIsAllowed(const std::string_view name) const;
|
||||
bool isSettingNameAllowed(std::string_view name) const;
|
||||
void checkSettingNameIsAllowed(std::string_view name) const;
|
||||
|
||||
/// Allows implicit user creation without password (by default it's allowed).
|
||||
/// In other words, allow 'CREATE USER' queries without 'IDENTIFIED WITH' clause.
|
||||
void setImplicitNoPasswordAllowed(const bool allow_implicit_no_password_);
|
||||
void setImplicitNoPasswordAllowed(bool allow_implicit_no_password_);
|
||||
bool isImplicitNoPasswordAllowed() const;
|
||||
|
||||
/// Allows users without password (by default it's allowed).
|
||||
void setNoPasswordAllowed(const bool allow_no_password_);
|
||||
void setNoPasswordAllowed(bool allow_no_password_);
|
||||
bool isNoPasswordAllowed() const;
|
||||
|
||||
/// Allows users with plaintext password (by default it's allowed).
|
||||
void setPlaintextPasswordAllowed(const bool allow_plaintext_password_);
|
||||
void setPlaintextPasswordAllowed(bool allow_plaintext_password_);
|
||||
bool isPlaintextPasswordAllowed() const;
|
||||
|
||||
/// Default password type when the user does not specify it.
|
||||
|
@ -616,7 +616,7 @@ UUID IAccessStorage::generateRandomID()
|
||||
}
|
||||
|
||||
|
||||
void IAccessStorage::clearConflictsInEntitiesList(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, const LoggerPtr log_)
|
||||
void IAccessStorage::clearConflictsInEntitiesList(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, LoggerPtr log_)
|
||||
{
|
||||
std::unordered_map<UUID, size_t> positions_by_id;
|
||||
std::unordered_map<std::string_view, size_t> positions_by_type_and_name[static_cast<size_t>(AccessEntityType::MAX)];
|
||||
|
@ -228,7 +228,7 @@ protected:
|
||||
static UUID generateRandomID();
|
||||
LoggerPtr getLogger() const;
|
||||
static String formatEntityTypeWithName(AccessEntityType type, const String & name) { return AccessEntityTypeInfo::get(type).formatEntityNameWithType(name); }
|
||||
static void clearConflictsInEntitiesList(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, const LoggerPtr log_);
|
||||
static void clearConflictsInEntitiesList(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, LoggerPtr log_);
|
||||
[[noreturn]] void throwNotFound(const UUID & id) const;
|
||||
[[noreturn]] void throwNotFound(AccessEntityType type, const String & name) const;
|
||||
[[noreturn]] static void throwBadCast(const UUID & id, AccessEntityType type, const String & name, AccessEntityType required_type);
|
||||
|
@ -182,11 +182,14 @@ public:
|
||||
|
||||
if constexpr (Trait::sampler == Sampler::NONE)
|
||||
{
|
||||
if (limit_num_elems && cur_elems.value.size() >= max_elems)
|
||||
if constexpr (limit_num_elems)
|
||||
{
|
||||
if constexpr (Trait::last)
|
||||
cur_elems.value[(cur_elems.total_values - 1) % max_elems] = row_value;
|
||||
return;
|
||||
if (cur_elems.value.size() >= max_elems)
|
||||
{
|
||||
if constexpr (Trait::last)
|
||||
cur_elems.value[(cur_elems.total_values - 1) % max_elems] = row_value;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
cur_elems.value.push_back(row_value, arena);
|
||||
@ -236,7 +239,7 @@ public:
|
||||
|
||||
void mergeNoSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
|
||||
{
|
||||
if (!limit_num_elems)
|
||||
if constexpr (!limit_num_elems)
|
||||
{
|
||||
if (rhs_elems.value.size())
|
||||
cur_elems.value.insertByOffsets(rhs_elems.value, 0, rhs_elems.value.size(), arena);
|
||||
|
@ -310,9 +310,11 @@ public:
|
||||
{
|
||||
for (Field & element : values)
|
||||
{
|
||||
UInt8 is_null = 0;
|
||||
readBinary(is_null, buf);
|
||||
if (!is_null)
|
||||
/// We must initialize the Field type since some internal functions (like operator=) use them
|
||||
new (&element) Field;
|
||||
bool has_value = false;
|
||||
readBinary(has_value, buf);
|
||||
if (has_value)
|
||||
serialization->deserializeBinary(element, buf, {});
|
||||
}
|
||||
}
|
||||
|
@ -144,7 +144,7 @@ public:
|
||||
count = other.count;
|
||||
compressed = other.compressed;
|
||||
|
||||
sampled.resize(other.sampled.size());
|
||||
sampled.resize_exact(other.sampled.size());
|
||||
memcpy(sampled.data(), other.sampled.data(), sizeof(Stats) * other.sampled.size());
|
||||
return;
|
||||
}
|
||||
@ -180,7 +180,7 @@ public:
|
||||
compress();
|
||||
|
||||
backup_sampled.clear();
|
||||
backup_sampled.reserve(sampled.size() + other.sampled.size());
|
||||
backup_sampled.reserve_exact(sampled.size() + other.sampled.size());
|
||||
double merged_relative_error = std::max(relative_error, other.relative_error);
|
||||
size_t merged_count = count + other.count;
|
||||
Int64 additional_self_delta = static_cast<Int64>(std::floor(2 * other.relative_error * other.count));
|
||||
@ -268,11 +268,7 @@ public:
|
||||
|
||||
size_t sampled_len = 0;
|
||||
readBinaryLittleEndian(sampled_len, buf);
|
||||
if (sampled_len > compress_threshold)
|
||||
throw Exception(
|
||||
ErrorCodes::INCORRECT_DATA, "The number of elements {} for quantileGK exceeds {}", sampled_len, compress_threshold);
|
||||
|
||||
sampled.resize(sampled_len);
|
||||
sampled.resize_exact(sampled_len);
|
||||
|
||||
for (size_t i = 0; i < sampled_len; ++i)
|
||||
{
|
||||
@ -317,7 +313,7 @@ private:
|
||||
::sort(head_sampled.begin(), head_sampled.end());
|
||||
|
||||
backup_sampled.clear();
|
||||
backup_sampled.reserve(sampled.size() + head_sampled.size());
|
||||
backup_sampled.reserve_exact(sampled.size() + head_sampled.size());
|
||||
|
||||
size_t sample_idx = 0;
|
||||
size_t ops_idx = 0;
|
||||
|
@ -483,6 +483,7 @@ public:
|
||||
}
|
||||
|
||||
bool isAbleToParallelizeMerge() const override { return is_able_to_parallelize_merge; }
|
||||
bool canOptimizeEqualKeysRanges() const override { return !is_able_to_parallelize_merge; }
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena *) const override
|
||||
{
|
||||
@ -576,6 +577,7 @@ public:
|
||||
}
|
||||
|
||||
bool isAbleToParallelizeMerge() const override { return is_able_to_parallelize_merge; }
|
||||
bool canOptimizeEqualKeysRanges() const override { return !is_able_to_parallelize_merge; }
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena *) const override
|
||||
{
|
||||
|
@ -142,6 +142,7 @@ public:
|
||||
}
|
||||
|
||||
bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); }
|
||||
bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); }
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override
|
||||
{
|
||||
|
@ -165,6 +165,7 @@ public:
|
||||
}
|
||||
|
||||
bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); }
|
||||
bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); }
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override
|
||||
{
|
||||
|
@ -111,6 +111,7 @@ public:
|
||||
}
|
||||
|
||||
bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); }
|
||||
bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); }
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override
|
||||
{
|
||||
|
@ -152,6 +152,7 @@ public:
|
||||
}
|
||||
|
||||
bool isAbleToParallelizeMerge() const override { return nested_function->isAbleToParallelizeMerge(); }
|
||||
bool canOptimizeEqualKeysRanges() const override { return nested_function->canOptimizeEqualKeysRanges(); }
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override
|
||||
{
|
||||
|
@ -92,6 +92,7 @@ public:
|
||||
}
|
||||
|
||||
bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); }
|
||||
bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); }
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override
|
||||
{
|
||||
|
@ -162,6 +162,10 @@ public:
|
||||
/// Tells if merge() with thread pool parameter could be used.
|
||||
virtual bool isAbleToParallelizeMerge() const { return false; }
|
||||
|
||||
/// Return true if it is allowed to replace call of `addBatch`
|
||||
/// to `addBatchSinglePlace` for ranges of consecutive equal keys.
|
||||
virtual bool canOptimizeEqualKeysRanges() const { return true; }
|
||||
|
||||
/// Should be used only if isAbleToParallelizeMerge() returned true.
|
||||
virtual void
|
||||
merge(AggregateDataPtr __restrict /*place*/, ConstAggregateDataPtr /*rhs*/, ThreadPool & /*thread_pool*/, Arena * /*arena*/) const
|
||||
|
@ -54,10 +54,10 @@ public:
|
||||
if (!constant_node)
|
||||
return;
|
||||
|
||||
const auto & constant_value_literal = constant_node->getValue();
|
||||
if (!isInt64OrUInt64FieldType(constant_value_literal.getType()))
|
||||
if (auto constant_type = constant_node->getResultType(); !isNativeInteger(constant_type))
|
||||
return;
|
||||
|
||||
const auto & constant_value_literal = constant_node->getValue();
|
||||
if (getSettings().aggregate_functions_null_for_empty)
|
||||
return;
|
||||
|
||||
|
@ -14,6 +14,8 @@
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#include <boost/algorithm/string.hpp>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -914,6 +914,38 @@ ASTPtr QueryFuzzer::fuzzLiteralUnderExpressionList(ASTPtr child)
|
||||
child = makeASTFunction(
|
||||
"toFixedString", std::make_shared<ASTLiteral>(value), std::make_shared<ASTLiteral>(static_cast<UInt64>(value.size())));
|
||||
}
|
||||
else if (type == Field::Types::Which::UInt64 && fuzz_rand() % 7 == 0)
|
||||
{
|
||||
child = makeASTFunction(fuzz_rand() % 2 == 0 ? "toUInt128" : "toUInt256", std::make_shared<ASTLiteral>(l->value.get<UInt64>()));
|
||||
}
|
||||
else if (type == Field::Types::Which::Int64 && fuzz_rand() % 7 == 0)
|
||||
{
|
||||
child = makeASTFunction(fuzz_rand() % 2 == 0 ? "toInt128" : "toInt256", std::make_shared<ASTLiteral>(l->value.get<Int64>()));
|
||||
}
|
||||
else if (type == Field::Types::Which::Float64 && fuzz_rand() % 7 == 0)
|
||||
{
|
||||
int decimal = fuzz_rand() % 4;
|
||||
if (decimal == 0)
|
||||
child = makeASTFunction(
|
||||
"toDecimal32",
|
||||
std::make_shared<ASTLiteral>(l->value.get<Float64>()),
|
||||
std::make_shared<ASTLiteral>(static_cast<UInt64>(fuzz_rand() % 9)));
|
||||
else if (decimal == 1)
|
||||
child = makeASTFunction(
|
||||
"toDecimal64",
|
||||
std::make_shared<ASTLiteral>(l->value.get<Float64>()),
|
||||
std::make_shared<ASTLiteral>(static_cast<UInt64>(fuzz_rand() % 18)));
|
||||
else if (decimal == 2)
|
||||
child = makeASTFunction(
|
||||
"toDecimal128",
|
||||
std::make_shared<ASTLiteral>(l->value.get<Float64>()),
|
||||
std::make_shared<ASTLiteral>(static_cast<UInt64>(fuzz_rand() % 38)));
|
||||
else
|
||||
child = makeASTFunction(
|
||||
"toDecimal256",
|
||||
std::make_shared<ASTLiteral>(l->value.get<Float64>()),
|
||||
std::make_shared<ASTLiteral>(static_cast<UInt64>(fuzz_rand() % 76)));
|
||||
}
|
||||
|
||||
if (fuzz_rand() % 7 == 0)
|
||||
child = makeASTFunction("toNullable", child);
|
||||
@ -933,7 +965,19 @@ ASTPtr QueryFuzzer::reverseLiteralFuzzing(ASTPtr child)
|
||||
{
|
||||
if (auto * function = child.get()->as<ASTFunction>())
|
||||
{
|
||||
std::unordered_set<String> can_be_reverted{"toNullable", "toLowCardinality", "materialize"};
|
||||
const std::unordered_set<String> can_be_reverted{
|
||||
"materialize",
|
||||
"toDecimal32", /// Keeping the first parameter only should be ok (valid query most of the time)
|
||||
"toDecimal64",
|
||||
"toDecimal128",
|
||||
"toDecimal256",
|
||||
"toFixedString", /// Same as toDecimal
|
||||
"toInt128",
|
||||
"toInt256",
|
||||
"toLowCardinality",
|
||||
"toNullable",
|
||||
"toUInt128",
|
||||
"toUInt256"};
|
||||
if (can_be_reverted.contains(function->name) && function->children.size() == 1)
|
||||
{
|
||||
if (fuzz_rand() % 7 == 0)
|
||||
|
@ -47,9 +47,7 @@ private:
|
||||
|
||||
std::unique_ptr<MemoryChunk> prev;
|
||||
|
||||
MemoryChunk()
|
||||
{
|
||||
}
|
||||
MemoryChunk() = default;
|
||||
|
||||
void swap(MemoryChunk & other)
|
||||
{
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <IO/MMappedFileCache.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <base/errnoToString.h>
|
||||
#include <base/find_symbols.h>
|
||||
#include <base/getPageSize.h>
|
||||
#include <sys/resource.h>
|
||||
#include <chrono>
|
||||
@ -90,6 +91,9 @@ AsynchronousMetrics::AsynchronousMetrics(
|
||||
openFileIfExists("/sys/fs/cgroup/cpu/cpu.cfs_quota_us", cgroupcpu_cfs_quota);
|
||||
}
|
||||
|
||||
openFileIfExists("/proc/sys/vm/max_map_count", vm_max_map_count);
|
||||
openFileIfExists("/proc/self/maps", vm_maps);
|
||||
|
||||
openSensors();
|
||||
openBlockDevices();
|
||||
openEDAC();
|
||||
@ -1423,6 +1427,55 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update)
|
||||
}
|
||||
}
|
||||
|
||||
if (vm_max_map_count)
|
||||
{
|
||||
try
|
||||
{
|
||||
vm_max_map_count->rewind();
|
||||
|
||||
uint64_t max_map_count = 0;
|
||||
readText(max_map_count, *vm_max_map_count);
|
||||
new_values["VMMaxMapCount"] = { max_map_count, "The maximum number of memory mappings a process may have (/proc/sys/vm/max_map_count)."};
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
openFileIfExists("/proc/sys/vm/max_map_count", vm_max_map_count);
|
||||
}
|
||||
}
|
||||
|
||||
if (vm_maps)
|
||||
{
|
||||
try
|
||||
{
|
||||
vm_maps->rewind();
|
||||
|
||||
uint64_t num_maps = 0;
|
||||
while (!vm_maps->eof())
|
||||
{
|
||||
char * next_pos = find_first_symbols<'\n'>(vm_maps->position(), vm_maps->buffer().end());
|
||||
vm_maps->position() = next_pos;
|
||||
|
||||
if (!vm_maps->hasPendingData())
|
||||
continue;
|
||||
|
||||
if (*vm_maps->position() == '\n')
|
||||
{
|
||||
++num_maps;
|
||||
++vm_maps->position();
|
||||
}
|
||||
}
|
||||
new_values["VMNumMaps"] = { num_maps,
|
||||
"The current number of memory mappings of the process (/proc/self/maps)."
|
||||
" If it is close to the maximum (VMMaxMapCount), you should increase the limit for vm.max_map_count in /etc/sysctl.conf"};
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
openFileIfExists("/proc/self/maps", vm_maps);
|
||||
}
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
for (size_t i = 0, size = thermal.size(); i < size; ++i)
|
||||
|
@ -123,6 +123,9 @@ private:
|
||||
std::optional<ReadBufferFromFilePRead> cgroupcpu_cfs_quota TSA_GUARDED_BY(data_mutex);
|
||||
std::optional<ReadBufferFromFilePRead> cgroupcpu_max TSA_GUARDED_BY(data_mutex);
|
||||
|
||||
std::optional<ReadBufferFromFilePRead> vm_max_map_count TSA_GUARDED_BY(data_mutex);
|
||||
std::optional<ReadBufferFromFilePRead> vm_maps TSA_GUARDED_BY(data_mutex);
|
||||
|
||||
std::vector<std::unique_ptr<ReadBufferFromFilePRead>> thermal TSA_GUARDED_BY(data_mutex);
|
||||
|
||||
std::unordered_map<String /* device name */,
|
||||
|
@ -62,7 +62,6 @@ struct LastElementCache
|
||||
bool check(const Key & key) const { return value.first == key; }
|
||||
|
||||
bool hasOnlyOneValue() const { return found && misses == 1; }
|
||||
UInt64 getMisses() const { return misses; }
|
||||
};
|
||||
|
||||
template <typename Data>
|
||||
@ -232,7 +231,7 @@ public:
|
||||
ALWAYS_INLINE UInt64 getCacheMissesSinceLastReset() const
|
||||
{
|
||||
if constexpr (consecutive_keys_optimization)
|
||||
return cache.getMisses();
|
||||
return cache.misses;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -297,7 +297,7 @@ void DNSResolver::setDisableCacheFlag(bool is_disabled)
|
||||
impl->disable_cache = is_disabled;
|
||||
}
|
||||
|
||||
void DNSResolver::setCacheMaxEntries(const UInt64 cache_max_entries)
|
||||
void DNSResolver::setCacheMaxEntries(UInt64 cache_max_entries)
|
||||
{
|
||||
impl->cache_address.setMaxSizeInBytes(cache_max_entries);
|
||||
impl->cache_host.setMaxSizeInBytes(cache_max_entries);
|
||||
|
@ -56,7 +56,7 @@ public:
|
||||
void setDisableCacheFlag(bool is_disabled = true);
|
||||
|
||||
/// Set a limit of entries in cache
|
||||
void setCacheMaxEntries(const UInt64 cache_max_entries);
|
||||
void setCacheMaxEntries(UInt64 cache_max_entries);
|
||||
|
||||
/// Drops all caches
|
||||
void dropCache();
|
||||
|
@ -255,7 +255,7 @@ private:
|
||||
|
||||
static LUTIndex toLUTIndex(ExtendedDayNum d)
|
||||
{
|
||||
return normalizeLUTIndex(static_cast<Int64>(d + daynum_offset_epoch));
|
||||
return normalizeLUTIndex(static_cast<Int64>(d + daynum_offset_epoch)); /// NOLINT
|
||||
}
|
||||
|
||||
LUTIndex toLUTIndex(Time t) const
|
||||
|
@ -1,5 +1,7 @@
|
||||
#include <Common/FunctionDocumentation.h>
|
||||
|
||||
#include <boost/algorithm/string.hpp>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -31,14 +33,7 @@ std::string FunctionDocumentation::examplesAsString() const
|
||||
|
||||
std::string FunctionDocumentation::categoriesAsString() const
|
||||
{
|
||||
if (categories.empty())
|
||||
return "";
|
||||
|
||||
auto it = categories.begin();
|
||||
std::string res = *it;
|
||||
for (; it != categories.end(); ++it)
|
||||
res += ", " + *it;
|
||||
return res;
|
||||
return boost::algorithm::join(categories, ", ");
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -41,9 +41,9 @@ public:
|
||||
}
|
||||
|
||||
/// There is no copy constructor because only one MultiVersion should own the same object.
|
||||
MultiVersion(MultiVersion && src) { *this = std::move(src); }
|
||||
MultiVersion(MultiVersion && src) { *this = std::move(src); } /// NOLINT
|
||||
|
||||
MultiVersion & operator=(MultiVersion && src)
|
||||
MultiVersion & operator=(MultiVersion && src) /// NOLINT
|
||||
{
|
||||
if (this != &src)
|
||||
{
|
||||
|
@ -25,7 +25,7 @@
|
||||
*/
|
||||
template <typename T, typename U>
|
||||
constexpr bool memcpy_can_be_used_for_assignment = std::is_same_v<T, U>
|
||||
|| (std::is_integral_v<T> && std::is_integral_v<U> && sizeof(T) == sizeof(U));
|
||||
|| (std::is_integral_v<T> && std::is_integral_v<U> && sizeof(T) == sizeof(U)); /// NOLINT(misc-redundant-expression)
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -558,7 +558,7 @@ public:
|
||||
}
|
||||
|
||||
template <typename... TAllocatorParams>
|
||||
void swap(PODArray & rhs, TAllocatorParams &&... allocator_params)
|
||||
void swap(PODArray & rhs, TAllocatorParams &&... allocator_params) /// NOLINT(performance-noexcept-swap)
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
this->unprotect();
|
||||
@ -756,7 +756,7 @@ public:
|
||||
};
|
||||
|
||||
template <typename T, size_t initial_bytes, typename TAllocator, size_t pad_right_, size_t pad_left_>
|
||||
void swap(PODArray<T, initial_bytes, TAllocator, pad_right_, pad_left_> & lhs, PODArray<T, initial_bytes, TAllocator, pad_right_, pad_left_> & rhs)
|
||||
void swap(PODArray<T, initial_bytes, TAllocator, pad_right_, pad_left_> & lhs, PODArray<T, initial_bytes, TAllocator, pad_right_, pad_left_> & rhs) /// NOLINT
|
||||
{
|
||||
lhs.swap(rhs);
|
||||
}
|
||||
|
@ -476,6 +476,7 @@ The server successfully detected this situation and will download merged part fr
|
||||
M(FileSegmentRemoveMicroseconds, "File segment remove() time") \
|
||||
M(FileSegmentHolderCompleteMicroseconds, "File segments holder complete() time") \
|
||||
M(FileSegmentFailToIncreasePriority, "Number of times the priority was not increased due to a high contention on the cache lock") \
|
||||
M(FilesystemCacheFailToReserveSpaceBecauseOfLockContention, "Number of times space reservation was skipped due to a high contention on the cache lock") \
|
||||
M(FilesystemCacheHoldFileSegments, "Filesystem cache file segments count, which were hold") \
|
||||
M(FilesystemCacheUnusedHoldFileSegments, "Filesystem cache file segments count, which were hold, but not used (because of seek or LIMIT n, etc)") \
|
||||
\
|
||||
@ -533,6 +534,7 @@ The server successfully detected this situation and will download merged part fr
|
||||
\
|
||||
M(AggregationPreallocatedElementsInHashTables, "How many elements were preallocated in hash tables for aggregation.") \
|
||||
M(AggregationHashTablesInitializedAsTwoLevel, "How many hash tables were inited as two-level for aggregation.") \
|
||||
M(AggregationOptimizedEqualRangesOfKeys, "For how many blocks optimization of equal ranges of keys was applied") \
|
||||
\
|
||||
M(MetadataFromKeeperCacheHit, "Number of times an object storage metadata request was answered from cache without making request to Keeper") \
|
||||
M(MetadataFromKeeperCacheMiss, "Number of times an object storage metadata request had to be answered from Keeper") \
|
||||
|
@ -2,13 +2,13 @@
|
||||
# if USE_SSH
|
||||
# include <stdexcept>
|
||||
|
||||
# pragma GCC diagnostic push
|
||||
# pragma GCC diagnostic ignored "-Wreserved-macro-identifier"
|
||||
# pragma GCC diagnostic ignored "-Wreserved-identifier"
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wreserved-macro-identifier"
|
||||
# pragma clang diagnostic ignored "-Wreserved-identifier"
|
||||
|
||||
# include <libssh/libssh.h>
|
||||
|
||||
# pragma GCC diagnostic pop
|
||||
# pragma clang diagnostic pop
|
||||
|
||||
namespace
|
||||
{
|
||||
|
@ -387,7 +387,9 @@ public:
|
||||
|
||||
/// Introspection
|
||||
std::atomic<UInt64> dequeued_requests{0};
|
||||
std::atomic<UInt64> canceled_requests{0};
|
||||
std::atomic<ResourceCost> dequeued_cost{0};
|
||||
std::atomic<ResourceCost> canceled_cost{0};
|
||||
std::atomic<UInt64> busy_periods{0};
|
||||
};
|
||||
|
||||
|
@ -50,6 +50,12 @@ public:
|
||||
/// Should be called outside of scheduling subsystem, implementation must be thread-safe.
|
||||
virtual void enqueueRequest(ResourceRequest * request) = 0;
|
||||
|
||||
/// Cancel previously enqueued request.
|
||||
/// Returns `false` and does nothing given unknown or already executed request.
|
||||
/// Returns `true` if requests has been found and canceled.
|
||||
/// Should be called outside of scheduling subsystem, implementation must be thread-safe.
|
||||
virtual bool cancelRequest(ResourceRequest * request) = 0;
|
||||
|
||||
/// For introspection
|
||||
ResourceCost getBudget() const
|
||||
{
|
||||
|
@ -134,56 +134,65 @@ public:
|
||||
|
||||
std::pair<ResourceRequest *, bool> dequeueRequest() override
|
||||
{
|
||||
if (heap_size == 0)
|
||||
return {nullptr, false};
|
||||
|
||||
// Recursively pull request from child
|
||||
auto [request, child_active] = items.front().child->dequeueRequest();
|
||||
assert(request != nullptr);
|
||||
std::pop_heap(items.begin(), items.begin() + heap_size);
|
||||
Item & current = items[heap_size - 1];
|
||||
|
||||
// SFQ fairness invariant: system vruntime equals last served request start-time
|
||||
assert(current.vruntime >= system_vruntime);
|
||||
system_vruntime = current.vruntime;
|
||||
|
||||
// By definition vruntime is amount of consumed resource (cost) divided by weight
|
||||
current.vruntime += double(request->cost) / current.child->info.weight;
|
||||
max_vruntime = std::max(max_vruntime, current.vruntime);
|
||||
|
||||
if (child_active) // Put active child back in heap after vruntime update
|
||||
// Cycle is required to do deactivations in the case of canceled requests, when dequeueRequest returns `nullptr`
|
||||
while (true)
|
||||
{
|
||||
std::push_heap(items.begin(), items.begin() + heap_size);
|
||||
}
|
||||
else // Deactivate child if it is empty, but remember it's vruntime for latter activations
|
||||
{
|
||||
heap_size--;
|
||||
if (heap_size == 0)
|
||||
return {nullptr, false};
|
||||
|
||||
// Store index of this inactive child in `parent.idx`
|
||||
// This enables O(1) search of inactive children instead of O(n)
|
||||
current.child->info.parent.idx = heap_size;
|
||||
}
|
||||
// Recursively pull request from child
|
||||
auto [request, child_active] = items.front().child->dequeueRequest();
|
||||
std::pop_heap(items.begin(), items.begin() + heap_size);
|
||||
Item & current = items[heap_size - 1];
|
||||
|
||||
// Reset any difference between children on busy period end
|
||||
if (heap_size == 0)
|
||||
{
|
||||
// Reset vtime to zero to avoid floating-point error accumulation,
|
||||
// but do not reset too often, because it's O(N)
|
||||
UInt64 ns = clock_gettime_ns();
|
||||
if (last_reset_ns + 1000000000 < ns)
|
||||
if (request)
|
||||
{
|
||||
last_reset_ns = ns;
|
||||
for (Item & item : items)
|
||||
item.vruntime = 0;
|
||||
max_vruntime = 0;
|
||||
}
|
||||
system_vruntime = max_vruntime;
|
||||
busy_periods++;
|
||||
}
|
||||
// SFQ fairness invariant: system vruntime equals last served request start-time
|
||||
assert(current.vruntime >= system_vruntime);
|
||||
system_vruntime = current.vruntime;
|
||||
|
||||
dequeued_requests++;
|
||||
dequeued_cost += request->cost;
|
||||
return {request, heap_size > 0};
|
||||
// By definition vruntime is amount of consumed resource (cost) divided by weight
|
||||
current.vruntime += double(request->cost) / current.child->info.weight;
|
||||
max_vruntime = std::max(max_vruntime, current.vruntime);
|
||||
}
|
||||
|
||||
if (child_active) // Put active child back in heap after vruntime update
|
||||
{
|
||||
std::push_heap(items.begin(), items.begin() + heap_size);
|
||||
}
|
||||
else // Deactivate child if it is empty, but remember it's vruntime for latter activations
|
||||
{
|
||||
heap_size--;
|
||||
|
||||
// Store index of this inactive child in `parent.idx`
|
||||
// This enables O(1) search of inactive children instead of O(n)
|
||||
current.child->info.parent.idx = heap_size;
|
||||
}
|
||||
|
||||
// Reset any difference between children on busy period end
|
||||
if (heap_size == 0)
|
||||
{
|
||||
// Reset vtime to zero to avoid floating-point error accumulation,
|
||||
// but do not reset too often, because it's O(N)
|
||||
UInt64 ns = clock_gettime_ns();
|
||||
if (last_reset_ns + 1000000000 < ns)
|
||||
{
|
||||
last_reset_ns = ns;
|
||||
for (Item & item : items)
|
||||
item.vruntime = 0;
|
||||
max_vruntime = 0;
|
||||
}
|
||||
system_vruntime = max_vruntime;
|
||||
busy_periods++;
|
||||
}
|
||||
|
||||
if (request)
|
||||
{
|
||||
dequeued_requests++;
|
||||
dequeued_cost += request->cost;
|
||||
return {request, heap_size > 0};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool isActive() override
|
||||
|
@ -39,8 +39,7 @@ public:
|
||||
|
||||
void enqueueRequest(ResourceRequest * request) override
|
||||
{
|
||||
std::unique_lock lock(mutex);
|
||||
request->enqueue_ns = clock_gettime_ns();
|
||||
std::lock_guard lock(mutex);
|
||||
queue_cost += request->cost;
|
||||
bool was_empty = requests.empty();
|
||||
requests.push_back(request);
|
||||
@ -50,7 +49,7 @@ public:
|
||||
|
||||
std::pair<ResourceRequest *, bool> dequeueRequest() override
|
||||
{
|
||||
std::unique_lock lock(mutex);
|
||||
std::lock_guard lock(mutex);
|
||||
if (requests.empty())
|
||||
return {nullptr, false};
|
||||
ResourceRequest * result = requests.front();
|
||||
@ -63,9 +62,29 @@ public:
|
||||
return {result, !requests.empty()};
|
||||
}
|
||||
|
||||
bool cancelRequest(ResourceRequest * request) override
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
// TODO(serxa): reimplement queue as intrusive list of ResourceRequest to make this O(1) instead of O(N)
|
||||
for (auto i = requests.begin(), e = requests.end(); i != e; ++i)
|
||||
{
|
||||
if (*i == request)
|
||||
{
|
||||
requests.erase(i);
|
||||
if (requests.empty())
|
||||
busy_periods++;
|
||||
queue_cost -= request->cost;
|
||||
canceled_requests++;
|
||||
canceled_cost += request->cost;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isActive() override
|
||||
{
|
||||
std::unique_lock lock(mutex);
|
||||
std::lock_guard lock(mutex);
|
||||
return !requests.empty();
|
||||
}
|
||||
|
||||
@ -98,14 +117,14 @@ public:
|
||||
|
||||
std::pair<UInt64, Int64> getQueueLengthAndCost()
|
||||
{
|
||||
std::unique_lock lock(mutex);
|
||||
std::lock_guard lock(mutex);
|
||||
return {requests.size(), queue_cost};
|
||||
}
|
||||
|
||||
private:
|
||||
std::mutex mutex;
|
||||
Int64 queue_cost = 0;
|
||||
std::deque<ResourceRequest *> requests;
|
||||
std::deque<ResourceRequest *> requests; // TODO(serxa): reimplement it using intrusive list to avoid allocations/deallocations and O(N) during cancel
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -102,25 +102,31 @@ public:
|
||||
|
||||
std::pair<ResourceRequest *, bool> dequeueRequest() override
|
||||
{
|
||||
if (items.empty())
|
||||
return {nullptr, false};
|
||||
|
||||
// Recursively pull request from child
|
||||
auto [request, child_active] = items.front().child->dequeueRequest();
|
||||
assert(request != nullptr);
|
||||
|
||||
// Deactivate child if it is empty
|
||||
if (!child_active)
|
||||
// Cycle is required to do deactivations in the case of canceled requests, when dequeueRequest returns `nullptr`
|
||||
while (true)
|
||||
{
|
||||
std::pop_heap(items.begin(), items.end());
|
||||
items.pop_back();
|
||||
if (items.empty())
|
||||
busy_periods++;
|
||||
}
|
||||
return {nullptr, false};
|
||||
|
||||
dequeued_requests++;
|
||||
dequeued_cost += request->cost;
|
||||
return {request, !items.empty()};
|
||||
// Recursively pull request from child
|
||||
auto [request, child_active] = items.front().child->dequeueRequest();
|
||||
|
||||
// Deactivate child if it is empty
|
||||
if (!child_active)
|
||||
{
|
||||
std::pop_heap(items.begin(), items.end());
|
||||
items.pop_back();
|
||||
if (items.empty())
|
||||
busy_periods++;
|
||||
}
|
||||
|
||||
if (request)
|
||||
{
|
||||
dequeued_requests++;
|
||||
dequeued_cost += request->cost;
|
||||
return {request, !items.empty()};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool isActive() override
|
||||
|
@ -38,7 +38,6 @@ TEST(SchedulerDynamicResourceManager, Smoke)
|
||||
{
|
||||
ResourceGuard gA(cA->get("res1"), ResourceGuard::PostponeLocking);
|
||||
gA.lock();
|
||||
gA.setFailure();
|
||||
gA.unlock();
|
||||
|
||||
ResourceGuard gB(cB->get("res1"));
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
#include <Common/Scheduler/Nodes/tests/ResourceTest.h>
|
||||
|
||||
#include <barrier>
|
||||
#include <future>
|
||||
|
||||
using namespace DB;
|
||||
@ -73,6 +74,22 @@ struct ResourceHolder
|
||||
}
|
||||
};
|
||||
|
||||
struct MyRequest : public ResourceRequest
|
||||
{
|
||||
std::function<void()> on_execute;
|
||||
|
||||
explicit MyRequest(ResourceCost cost_, std::function<void()> on_execute_)
|
||||
: ResourceRequest(cost_)
|
||||
, on_execute(on_execute_)
|
||||
{}
|
||||
|
||||
void execute() override
|
||||
{
|
||||
if (on_execute)
|
||||
on_execute();
|
||||
}
|
||||
};
|
||||
|
||||
TEST(SchedulerRoot, Smoke)
|
||||
{
|
||||
ResourceTest t;
|
||||
@ -111,3 +128,49 @@ TEST(SchedulerRoot, Smoke)
|
||||
EXPECT_TRUE(fc2->requests.contains(&rg.request));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SchedulerRoot, Cancel)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
ResourceHolder r1(t);
|
||||
auto * fc1 = r1.add<ConstraintTest>("/", "<max_requests>1</max_requests>");
|
||||
r1.add<PriorityPolicy>("/prio");
|
||||
auto a = r1.addQueue("/prio/A", "<priority>1</priority>");
|
||||
auto b = r1.addQueue("/prio/B", "<priority>2</priority>");
|
||||
r1.registerResource();
|
||||
|
||||
std::barrier destruct_sync(2);
|
||||
std::barrier sync(2);
|
||||
std::thread consumer1([&]
|
||||
{
|
||||
MyRequest request(1,[&]
|
||||
{
|
||||
sync.arrive_and_wait(); // (A)
|
||||
EXPECT_TRUE(fc1->requests.contains(&request));
|
||||
sync.arrive_and_wait(); // (B)
|
||||
request.finish();
|
||||
destruct_sync.arrive_and_wait(); // (C)
|
||||
});
|
||||
a.queue->enqueueRequest(&request);
|
||||
destruct_sync.arrive_and_wait(); // (C)
|
||||
});
|
||||
|
||||
std::thread consumer2([&]
|
||||
{
|
||||
MyRequest request(1,[&]
|
||||
{
|
||||
FAIL() << "This request must be canceled, but instead executes";
|
||||
});
|
||||
sync.arrive_and_wait(); // (A) wait for request of consumer1 to be inside execute, so that constraint is in violated state and our request will not be executed immediately
|
||||
b.queue->enqueueRequest(&request);
|
||||
bool canceled = b.queue->cancelRequest(&request);
|
||||
EXPECT_TRUE(canceled);
|
||||
sync.arrive_and_wait(); // (B) release request of consumer1 to be finished
|
||||
});
|
||||
|
||||
consumer1.join();
|
||||
consumer2.join();
|
||||
|
||||
EXPECT_TRUE(fc1->requests.empty());
|
||||
}
|
||||
|
@ -71,8 +71,7 @@ public:
|
||||
// lock(mutex) is not required because `Dequeued` request cannot be used by the scheduler thread
|
||||
chassert(state == Dequeued);
|
||||
state = Finished;
|
||||
if (constraint)
|
||||
constraint->finishRequest(this);
|
||||
ResourceRequest::finish();
|
||||
}
|
||||
|
||||
static Request & local()
|
||||
@ -126,12 +125,6 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
/// Mark request as unsuccessful; by default request is considered to be successful
|
||||
void setFailure()
|
||||
{
|
||||
request.successful = false;
|
||||
}
|
||||
|
||||
ResourceLink link;
|
||||
Request & request;
|
||||
};
|
||||
|
13
src/Common/Scheduler/ResourceRequest.cpp
Normal file
13
src/Common/Scheduler/ResourceRequest.cpp
Normal file
@ -0,0 +1,13 @@
|
||||
#include <Common/Scheduler/ResourceRequest.h>
|
||||
#include <Common/Scheduler/ISchedulerConstraint.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void ResourceRequest::finish()
|
||||
{
|
||||
if (constraint)
|
||||
constraint->finishRequest(this);
|
||||
}
|
||||
|
||||
}
|
@ -14,9 +14,6 @@ class ISchedulerConstraint;
|
||||
using ResourceCost = Int64;
|
||||
constexpr ResourceCost ResourceCostMax = std::numeric_limits<int>::max();
|
||||
|
||||
/// Timestamps (nanoseconds since epoch)
|
||||
using ResourceNs = UInt64;
|
||||
|
||||
/*
|
||||
* Request for a resource consumption. The main moving part of the scheduling subsystem.
|
||||
* Resource requests processing workflow:
|
||||
@ -31,7 +28,7 @@ using ResourceNs = UInt64;
|
||||
* 3) Scheduler calls ISchedulerNode::dequeueRequest() that returns the request.
|
||||
* 4) Callback ResourceRequest::execute() is called to provide access to the resource.
|
||||
* 5) The resource consumption is happening outside of the scheduling subsystem.
|
||||
* 6) request->constraint->finishRequest() is called when consumption is finished.
|
||||
* 6) ResourceRequest::finish() is called when consumption is finished.
|
||||
*
|
||||
* Steps (5) and (6) can be omitted if constraint is not used by the resource.
|
||||
*
|
||||
@ -39,7 +36,10 @@ using ResourceNs = UInt64;
|
||||
* Request ownership is done outside of the scheduling subsystem.
|
||||
* After (6) request can be destructed safely.
|
||||
*
|
||||
* Request cancelling is not supported yet.
|
||||
* Request can also be canceled before (3) using ISchedulerQueue::cancelRequest().
|
||||
* Returning false means it is too late for request to be canceled. It should be processed in a regular way.
|
||||
* Returning true means successful cancel and therefore steps (4) and (5) are not going to happen
|
||||
* and step (6) MUST be omitted.
|
||||
*/
|
||||
class ResourceRequest
|
||||
{
|
||||
@ -48,32 +48,20 @@ public:
|
||||
/// NOTE: If cost is not known in advance, ResourceBudget should be used (note that every ISchedulerQueue has it)
|
||||
ResourceCost cost;
|
||||
|
||||
/// Request outcome
|
||||
/// Should be filled during resource consumption
|
||||
bool successful;
|
||||
|
||||
/// Scheduler node to be notified on consumption finish
|
||||
/// Auto-filled during request enqueue/dequeue
|
||||
ISchedulerConstraint * constraint;
|
||||
|
||||
/// Timestamps for introspection
|
||||
ResourceNs enqueue_ns;
|
||||
ResourceNs execute_ns;
|
||||
ResourceNs finish_ns;
|
||||
|
||||
explicit ResourceRequest(ResourceCost cost_ = 1)
|
||||
{
|
||||
reset(cost_);
|
||||
}
|
||||
|
||||
/// ResourceRequest object may be reused again after reset()
|
||||
void reset(ResourceCost cost_)
|
||||
{
|
||||
cost = cost_;
|
||||
successful = true;
|
||||
constraint = nullptr;
|
||||
enqueue_ns = 0;
|
||||
execute_ns = 0;
|
||||
finish_ns = 0;
|
||||
}
|
||||
|
||||
virtual ~ResourceRequest() = default;
|
||||
@ -83,6 +71,12 @@ public:
|
||||
/// just triggering start of a consumption, not doing the consumption itself
|
||||
/// (e.g. setting an std::promise or creating a job in a thread pool)
|
||||
virtual void execute() = 0;
|
||||
|
||||
/// Stop resource consumption and notify resource scheduler.
|
||||
/// Should be called when resource consumption is finished by consumer.
|
||||
/// ResourceRequest should not be destructed or reset before calling to `finish()`.
|
||||
/// WARNING: this function MUST not be called if request was canceled.
|
||||
void finish();
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -145,22 +145,27 @@ public:
|
||||
|
||||
std::pair<ResourceRequest *, bool> dequeueRequest() override
|
||||
{
|
||||
if (current == nullptr) // No active resources
|
||||
return {nullptr, false};
|
||||
while (true)
|
||||
{
|
||||
if (current == nullptr) // No active resources
|
||||
return {nullptr, false};
|
||||
|
||||
// Dequeue request from current resource
|
||||
auto [request, resource_active] = current->root->dequeueRequest();
|
||||
assert(request != nullptr);
|
||||
// Dequeue request from current resource
|
||||
auto [request, resource_active] = current->root->dequeueRequest();
|
||||
|
||||
// Deactivate resource if required
|
||||
if (!resource_active)
|
||||
deactivate(current);
|
||||
else
|
||||
current = current->next; // Just move round-robin pointer
|
||||
// Deactivate resource if required
|
||||
if (!resource_active)
|
||||
deactivate(current);
|
||||
else
|
||||
current = current->next; // Just move round-robin pointer
|
||||
|
||||
dequeued_requests++;
|
||||
dequeued_cost += request->cost;
|
||||
return {request, current != nullptr};
|
||||
if (request == nullptr) // Possible in case of request cancel, just retry
|
||||
continue;
|
||||
|
||||
dequeued_requests++;
|
||||
dequeued_cost += request->cost;
|
||||
return {request, current != nullptr};
|
||||
}
|
||||
}
|
||||
|
||||
bool isActive() override
|
||||
@ -245,7 +250,6 @@ private:
|
||||
|
||||
void execute(ResourceRequest * request)
|
||||
{
|
||||
request->execute_ns = clock_gettime_ns();
|
||||
request->execute();
|
||||
}
|
||||
|
||||
|
@ -149,7 +149,7 @@ public:
|
||||
|
||||
/// Pad the remainder, which is missing up to an 8-byte word.
|
||||
current_word = 0;
|
||||
switch (end - data)
|
||||
switch (end - data) /// NOLINT(bugprone-switch-missing-default-case)
|
||||
{
|
||||
case 7: current_bytes[CURRENT_BYTES_IDX(6)] = data[6]; [[fallthrough]];
|
||||
case 6: current_bytes[CURRENT_BYTES_IDX(5)] = data[5]; [[fallthrough]];
|
||||
|
@ -16,7 +16,7 @@ class MergeTreeTransaction;
|
||||
/// or transaction object is not needed and not passed intentionally.
|
||||
#ifndef NO_TRANSACTION_PTR
|
||||
#define NO_TRANSACTION_PTR std::shared_ptr<MergeTreeTransaction>(nullptr)
|
||||
#define NO_TRANSACTION_RAW static_cast<MergeTreeTransaction *>(nullptr)
|
||||
#define NO_TRANSACTION_RAW static_cast<MergeTreeTransaction *>(nullptr) /// NOLINT(bugprone-macro-parentheses)
|
||||
#endif
|
||||
|
||||
/// Commit Sequence Number
|
||||
|
@ -23,7 +23,7 @@ namespace ProfileEvents
|
||||
namespace Coordination
|
||||
{
|
||||
|
||||
void Exception::incrementErrorMetrics(const Error code_)
|
||||
void Exception::incrementErrorMetrics(Error code_)
|
||||
{
|
||||
if (Coordination::isUserError(code_))
|
||||
ProfileEvents::increment(ProfileEvents::ZooKeeperUserExceptions);
|
||||
@ -33,14 +33,14 @@ void Exception::incrementErrorMetrics(const Error code_)
|
||||
ProfileEvents::increment(ProfileEvents::ZooKeeperOtherExceptions);
|
||||
}
|
||||
|
||||
Exception::Exception(const std::string & msg, const Error code_, int)
|
||||
Exception::Exception(const std::string & msg, Error code_, int)
|
||||
: DB::Exception(msg, DB::ErrorCodes::KEEPER_EXCEPTION)
|
||||
, code(code_)
|
||||
{
|
||||
incrementErrorMetrics(code);
|
||||
}
|
||||
|
||||
Exception::Exception(PreformattedMessage && msg, const Error code_)
|
||||
Exception::Exception(PreformattedMessage && msg, Error code_)
|
||||
: DB::Exception(std::move(msg), DB::ErrorCodes::KEEPER_EXCEPTION)
|
||||
, code(code_)
|
||||
{
|
||||
@ -48,7 +48,7 @@ Exception::Exception(PreformattedMessage && msg, const Error code_)
|
||||
incrementErrorMetrics(code);
|
||||
}
|
||||
|
||||
Exception::Exception(const Error code_)
|
||||
Exception::Exception(Error code_)
|
||||
: Exception(code_, "Coordination error: {}", errorMessage(code_))
|
||||
{
|
||||
}
|
||||
|
@ -466,13 +466,13 @@ class Exception : public DB::Exception
|
||||
{
|
||||
private:
|
||||
/// Delegate constructor, used to minimize repetition; last parameter used for overload resolution.
|
||||
Exception(const std::string & msg, const Error code_, int); /// NOLINT
|
||||
Exception(PreformattedMessage && msg, const Error code_);
|
||||
Exception(const std::string & msg, Error code_, int); /// NOLINT
|
||||
Exception(PreformattedMessage && msg, Error code_);
|
||||
|
||||
/// Message must be a compile-time constant
|
||||
template <typename T>
|
||||
requires std::is_convertible_v<T, String>
|
||||
Exception(T && message, const Error code_) : DB::Exception(std::forward<T>(message), DB::ErrorCodes::KEEPER_EXCEPTION, /* remote_= */ false), code(code_)
|
||||
Exception(T && message, Error code_) : DB::Exception(std::forward<T>(message), DB::ErrorCodes::KEEPER_EXCEPTION, /* remote_= */ false), code(code_)
|
||||
{
|
||||
incrementErrorMetrics(code);
|
||||
}
|
||||
@ -480,23 +480,23 @@ private:
|
||||
static void incrementErrorMetrics(Error code_);
|
||||
|
||||
public:
|
||||
explicit Exception(const Error code_); /// NOLINT
|
||||
explicit Exception(Error code_); /// NOLINT
|
||||
Exception(const Exception & exc);
|
||||
|
||||
template <typename... Args>
|
||||
Exception(const Error code_, FormatStringHelper<Args...> fmt, Args &&... args)
|
||||
Exception(Error code_, FormatStringHelper<Args...> fmt, Args &&... args)
|
||||
: DB::Exception(DB::ErrorCodes::KEEPER_EXCEPTION, std::move(fmt), std::forward<Args>(args)...)
|
||||
, code(code_)
|
||||
{
|
||||
incrementErrorMetrics(code);
|
||||
}
|
||||
|
||||
inline static Exception createDeprecated(const std::string & msg, const Error code_)
|
||||
inline static Exception createDeprecated(const std::string & msg, Error code_)
|
||||
{
|
||||
return Exception(msg, code_, 0);
|
||||
}
|
||||
|
||||
inline static Exception fromPath(const Error code_, const std::string & path)
|
||||
inline static Exception fromPath(Error code_, const std::string & path)
|
||||
{
|
||||
return Exception(code_, "Coordination error: {}, path {}", errorMessage(code_), path);
|
||||
}
|
||||
@ -504,7 +504,7 @@ public:
|
||||
/// Message must be a compile-time constant
|
||||
template <typename T>
|
||||
requires std::is_convertible_v<T, String>
|
||||
inline static Exception fromMessage(const Error code_, T && message)
|
||||
inline static Exception fromMessage(Error code_, T && message)
|
||||
{
|
||||
return Exception(std::forward<T>(message), code_);
|
||||
}
|
||||
|
@ -19,14 +19,14 @@ namespace Poco { class Logger; }
|
||||
|
||||
using LogSeriesLimiterPtr = std::shared_ptr<LogSeriesLimiter>;
|
||||
|
||||
namespace
|
||||
namespace impl
|
||||
{
|
||||
[[maybe_unused]] LoggerPtr getLoggerHelper(const LoggerPtr & logger) { return logger; }
|
||||
[[maybe_unused]] LoggerPtr getLoggerHelper(const AtomicLogger & logger) { return logger.load(); }
|
||||
[[maybe_unused]] const ::Poco::Logger * getLoggerHelper(const ::Poco::Logger * logger) { return logger; }
|
||||
[[maybe_unused]] std::unique_ptr<LogToStrImpl> getLoggerHelper(std::unique_ptr<LogToStrImpl> && logger) { return logger; }
|
||||
[[maybe_unused]] std::unique_ptr<LogFrequencyLimiterIml> getLoggerHelper(std::unique_ptr<LogFrequencyLimiterIml> && logger) { return logger; }
|
||||
[[maybe_unused]] LogSeriesLimiterPtr getLoggerHelper(LogSeriesLimiterPtr & logger) { return logger; }
|
||||
[[maybe_unused]] inline LoggerPtr getLoggerHelper(const LoggerPtr & logger) { return logger; }
|
||||
[[maybe_unused]] inline LoggerPtr getLoggerHelper(const AtomicLogger & logger) { return logger.load(); }
|
||||
[[maybe_unused]] inline const ::Poco::Logger * getLoggerHelper(const ::Poco::Logger * logger) { return logger; }
|
||||
[[maybe_unused]] inline std::unique_ptr<LogToStrImpl> getLoggerHelper(std::unique_ptr<LogToStrImpl> && logger) { return logger; }
|
||||
[[maybe_unused]] inline std::unique_ptr<LogFrequencyLimiterIml> getLoggerHelper(std::unique_ptr<LogFrequencyLimiterIml> && logger) { return logger; }
|
||||
[[maybe_unused]] inline LogSeriesLimiterPtr getLoggerHelper(LogSeriesLimiterPtr & logger) { return logger; }
|
||||
}
|
||||
|
||||
#define LOG_IMPL_FIRST_ARG(X, ...) X
|
||||
@ -65,7 +65,7 @@ namespace
|
||||
|
||||
#define LOG_IMPL(logger, priority, PRIORITY, ...) do \
|
||||
{ \
|
||||
auto _logger = ::getLoggerHelper(logger); \
|
||||
auto _logger = ::impl::getLoggerHelper(logger); \
|
||||
const bool _is_clients_log = (DB::CurrentThread::getGroup() != nullptr) && \
|
||||
(DB::CurrentThread::get().getClientLogsLevel() >= (priority)); \
|
||||
if (!_is_clients_log && !_logger->is((PRIORITY))) \
|
||||
|
@ -123,17 +123,15 @@ protected:
|
||||
|
||||
std::string getServerUrl() const
|
||||
{
|
||||
return "http://" + server_data.socket->address().toString();
|
||||
return "http://" + server_data.server->socket().address().toString();
|
||||
}
|
||||
|
||||
void startServer()
|
||||
{
|
||||
server_data.reset();
|
||||
server_data.params = new Poco::Net::HTTPServerParams();
|
||||
server_data.socket = std::make_unique<Poco::Net::ServerSocket>(server_data.port);
|
||||
server_data.handler_factory = new HTTPRequestHandlerFactory(slowdown_receive);
|
||||
server_data.server = std::make_unique<Poco::Net::HTTPServer>(
|
||||
server_data.handler_factory, *server_data.socket, server_data.params);
|
||||
server_data.handler_factory, server_data.port);
|
||||
|
||||
server_data.server->start();
|
||||
}
|
||||
@ -155,8 +153,7 @@ protected:
|
||||
{
|
||||
// just some port to avoid collisions with others tests
|
||||
UInt16 port = 9871;
|
||||
Poco::Net::HTTPServerParams::Ptr params;
|
||||
std::unique_ptr<Poco::Net::ServerSocket> socket;
|
||||
|
||||
HTTPRequestHandlerFactory::Ptr handler_factory;
|
||||
std::unique_ptr<Poco::Net::HTTPServer> server;
|
||||
|
||||
@ -171,8 +168,6 @@ protected:
|
||||
|
||||
server = nullptr;
|
||||
handler_factory = nullptr;
|
||||
socket = nullptr;
|
||||
params = nullptr;
|
||||
}
|
||||
|
||||
~ServerData() {
|
||||
|
@ -121,7 +121,8 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo
|
||||
auth_settings.use_insecure_imds_request.value_or(false),
|
||||
auth_settings.expiration_window_seconds.value_or(S3::DEFAULT_EXPIRATION_WINDOW_SECONDS),
|
||||
auth_settings.no_sign_request.value_or(false),
|
||||
});
|
||||
},
|
||||
credentials.GetSessionToken());
|
||||
|
||||
auto new_client = std::make_shared<KeeperSnapshotManagerS3::S3Configuration>(std::move(new_uri), std::move(auth_settings), std::move(client));
|
||||
|
||||
|
@ -374,7 +374,7 @@ void Context::updateKeeperConfiguration([[maybe_unused]] const Poco::Util::Abstr
|
||||
if (!shared->keeper_dispatcher)
|
||||
return;
|
||||
|
||||
shared->keeper_dispatcher->updateConfiguration(getConfigRef(), getMacros());
|
||||
shared->keeper_dispatcher->updateConfiguration(config_, getMacros());
|
||||
}
|
||||
|
||||
std::shared_ptr<zkutil::ZooKeeper> Context::getZooKeeper() const
|
||||
|
@ -36,7 +36,7 @@ void insertDefaultPostgreSQLValue(IColumn & column, const IColumn & sample_colum
|
||||
|
||||
void insertPostgreSQLValue(
|
||||
IColumn & column, std::string_view value,
|
||||
const ExternalResultDescription::ValueType type, const DataTypePtr data_type,
|
||||
ExternalResultDescription::ValueType type, DataTypePtr data_type,
|
||||
const std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t idx)
|
||||
{
|
||||
switch (type)
|
||||
@ -170,7 +170,7 @@ void insertPostgreSQLValue(
|
||||
|
||||
|
||||
void preparePostgreSQLArrayInfo(
|
||||
std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t column_idx, const DataTypePtr data_type)
|
||||
std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t column_idx, DataTypePtr data_type)
|
||||
{
|
||||
const auto * array_type = typeid_cast<const DataTypeArray *>(data_type.get());
|
||||
auto nested = array_type->getNestedType();
|
||||
|
@ -22,11 +22,11 @@ struct PostgreSQLArrayInfo
|
||||
|
||||
void insertPostgreSQLValue(
|
||||
IColumn & column, std::string_view value,
|
||||
const ExternalResultDescription::ValueType type, const DataTypePtr data_type,
|
||||
ExternalResultDescription::ValueType type, DataTypePtr data_type,
|
||||
const std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t idx);
|
||||
|
||||
void preparePostgreSQLArrayInfo(
|
||||
std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t column_idx, const DataTypePtr data_type);
|
||||
std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t column_idx, DataTypePtr data_type);
|
||||
|
||||
void insertDefaultPostgreSQLValue(IColumn & column, const IColumn & sample_column);
|
||||
|
||||
|
@ -15,6 +15,7 @@ namespace ErrorCodes
|
||||
extern const int THERE_IS_NO_PROFILE;
|
||||
extern const int NO_ELEMENTS_IN_CONFIG;
|
||||
extern const int UNKNOWN_ELEMENT_IN_CONFIG;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
IMPLEMENT_SETTINGS_TRAITS(SettingsTraits, LIST_OF_SETTINGS)
|
||||
@ -114,7 +115,11 @@ std::vector<String> Settings::getAllRegisteredNames() const
|
||||
void Settings::set(std::string_view name, const Field & value)
|
||||
{
|
||||
if (name == "compatibility")
|
||||
{
|
||||
if (value.getType() != Field::Types::Which::String)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected type of value for setting 'compatibility'. Expected String, got {}", value.getTypeName());
|
||||
applyCompatibilitySetting(value.get<String>());
|
||||
}
|
||||
/// If we change setting that was changed by compatibility setting before
|
||||
/// we should remove it from settings_changed_by_compatibility_setting,
|
||||
/// otherwise the next time we will change compatibility setting
|
||||
|
@ -567,7 +567,7 @@ class IColumn;
|
||||
M(UInt64, min_free_disk_space_for_temporary_data, 0, "The minimum disk space to keep while writing temporary data used in external sorting and aggregation.", 0) \
|
||||
\
|
||||
M(DefaultTableEngine, default_temporary_table_engine, DefaultTableEngine::Memory, "Default table engine used when ENGINE is not set in CREATE TEMPORARY statement.",0) \
|
||||
M(DefaultTableEngine, default_table_engine, DefaultTableEngine::None, "Default table engine used when ENGINE is not set in CREATE statement.",0) \
|
||||
M(DefaultTableEngine, default_table_engine, DefaultTableEngine::MergeTree, "Default table engine used when ENGINE is not set in CREATE statement.",0) \
|
||||
M(Bool, show_table_uuid_in_table_create_query_if_not_nil, false, "For tables in databases with Engine=Atomic show UUID of the table in its CREATE query.", 0) \
|
||||
M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \
|
||||
M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \
|
||||
@ -778,6 +778,7 @@ class IColumn;
|
||||
M(UInt64, filesystem_cache_max_download_size, (128UL * 1024 * 1024 * 1024), "Max remote filesystem cache size that can be downloaded by a single query", 0) \
|
||||
M(Bool, throw_on_error_from_cache_on_write_operations, false, "Ignore error from cache when caching on write operations (INSERT, merges)", 0) \
|
||||
M(UInt64, filesystem_cache_segments_batch_size, 20, "Limit on size of a single batch of file segments that a read buffer can request from cache. Too low value will lead to excessive requests to cache, too large may slow down eviction from cache", 0) \
|
||||
M(UInt64, filesystem_cache_reserve_space_wait_lock_timeout_milliseconds, 1000, "Wait time to lock cache for sapce reservation in filesystem cache", 0) \
|
||||
\
|
||||
M(Bool, use_page_cache_for_disks_without_file_cache, false, "Use userspace page cache for remote disks that don't have filesystem cache enabled.", 0) \
|
||||
M(Bool, read_from_page_cache_if_exists_otherwise_bypass_cache, false, "Use userspace page cache in passive mode, similar to read_from_filesystem_cache_if_exists_otherwise_bypass_cache.", 0) \
|
||||
@ -1192,6 +1193,7 @@ class IColumn;
|
||||
FORMAT_FACTORY_SETTINGS(M, ALIAS) \
|
||||
OBSOLETE_FORMAT_SETTINGS(M, ALIAS) \
|
||||
|
||||
/// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding)
|
||||
DECLARE_SETTINGS_TRAITS_ALLOW_CUSTOM_SETTINGS(SettingsTraits, LIST_OF_SETTINGS)
|
||||
|
||||
|
||||
@ -1236,6 +1238,7 @@ private:
|
||||
/*
|
||||
* User-specified file format settings for File and URL engines.
|
||||
*/
|
||||
/// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding)
|
||||
DECLARE_SETTINGS_TRAITS(FormatFactorySettingsTraits, LIST_OF_ALL_FORMAT_SETTINGS)
|
||||
|
||||
struct FormatFactorySettings : public BaseSettings<FormatFactorySettingsTraits>
|
||||
|
@ -90,9 +90,11 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
|
||||
{"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"},
|
||||
{"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"},
|
||||
{"page_cache_inject_eviction", false, false, "Added userspace page cache"},
|
||||
{"default_table_engine", "None", "MergeTree", "Set default table engine to MergeTree for better usability"},
|
||||
{"input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects", false, false, "Allow to use String type for ambiguous paths during named tuple inference from JSON objects"},
|
||||
{"throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert", false, true, "Deduplication is dependent materialized view cannot work together with async inserts."},
|
||||
{"parallel_replicas_allow_in_with_subquery", false, true, "If true, subquery for IN will be executed on every follower replica"},
|
||||
{"filesystem_cache_reserve_space_wait_lock_timeout_milliseconds", 1000, 1000, "Wait time to lock cache for sapce reservation in filesystem cache"},
|
||||
}},
|
||||
{"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"},
|
||||
{"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"},
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <DataTypes/Serializations/SerializationNullable.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Core/Field.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
@ -56,6 +57,30 @@ bool DataTypeNullable::equals(const IDataType & rhs) const
|
||||
return rhs.isNullable() && nested_data_type->equals(*static_cast<const DataTypeNullable &>(rhs).nested_data_type);
|
||||
}
|
||||
|
||||
ColumnPtr DataTypeNullable::createColumnConst(size_t size, const Field & field) const
|
||||
{
|
||||
if (onlyNull())
|
||||
{
|
||||
auto column = createColumn();
|
||||
column->insert(field);
|
||||
return ColumnConst::create(std::move(column), size);
|
||||
}
|
||||
|
||||
auto column = nested_data_type->createColumn();
|
||||
bool is_null = field.isNull();
|
||||
|
||||
if (is_null)
|
||||
nested_data_type->insertDefaultInto(*column);
|
||||
else
|
||||
column->insert(field);
|
||||
|
||||
auto null_mask = ColumnUInt8::create();
|
||||
null_mask->getData().push_back(is_null ? 1 : 0);
|
||||
|
||||
auto res = ColumnNullable::create(std::move(column), std::move(null_mask));
|
||||
return ColumnConst::create(std::move(res), size);
|
||||
}
|
||||
|
||||
SerializationPtr DataTypeNullable::doGetDefaultSerialization() const
|
||||
{
|
||||
return std::make_shared<SerializationNullable>(nested_data_type->getDefaultSerialization());
|
||||
|
@ -41,6 +41,7 @@ public:
|
||||
bool onlyNull() const override;
|
||||
bool canBeInsideLowCardinality() const override { return nested_data_type->canBeInsideLowCardinality(); }
|
||||
bool canBePromoted() const override { return nested_data_type->canBePromoted(); }
|
||||
ColumnPtr createColumnConst(size_t size, const Field & field) const override;
|
||||
|
||||
const DataTypePtr & getNestedType() const { return nested_data_type; }
|
||||
|
||||
|
@ -423,7 +423,7 @@ MutableColumns CacheDictionary<dictionary_key_type>::aggregateColumnsInOrderOfKe
|
||||
const DictionaryStorageFetchRequest & request,
|
||||
const MutableColumns & fetched_columns,
|
||||
const PaddedPODArray<KeyState> & key_index_to_state,
|
||||
IColumn::Filter * const default_mask) const
|
||||
IColumn::Filter * default_mask) const
|
||||
{
|
||||
MutableColumns aggregated_columns = request.makeAttributesResultColumns();
|
||||
|
||||
@ -473,7 +473,7 @@ MutableColumns CacheDictionary<dictionary_key_type>::aggregateColumns(
|
||||
const PaddedPODArray<KeyState> & key_index_to_fetched_columns_from_storage_result,
|
||||
const MutableColumns & fetched_columns_during_update,
|
||||
const HashMap<KeyType, size_t> & found_keys_to_fetched_columns_during_update_index,
|
||||
IColumn::Filter * const default_mask) const
|
||||
IColumn::Filter * default_mask) const
|
||||
{
|
||||
/**
|
||||
* Aggregation of columns fetched from storage and from source during update.
|
||||
|
@ -162,7 +162,7 @@ private:
|
||||
const DictionaryStorageFetchRequest & request,
|
||||
const MutableColumns & fetched_columns,
|
||||
const PaddedPODArray<KeyState> & key_index_to_state,
|
||||
IColumn::Filter * const default_mask = nullptr) const;
|
||||
IColumn::Filter * default_mask = nullptr) const;
|
||||
|
||||
MutableColumns aggregateColumns(
|
||||
const PaddedPODArray<KeyType> & keys,
|
||||
|
@ -14,7 +14,7 @@ class IRegionsHierarchyReader
|
||||
public:
|
||||
virtual bool readNext(RegionEntry & entry) = 0;
|
||||
|
||||
virtual ~IRegionsHierarchyReader() {}
|
||||
virtual ~IRegionsHierarchyReader() = default;
|
||||
};
|
||||
|
||||
using IRegionsHierarchyReaderPtr = std::unique_ptr<IRegionsHierarchyReader>;
|
||||
|
@ -568,7 +568,7 @@ bool RegExpTreeDictionary::setAttributesShortCircuit(
|
||||
const String & data,
|
||||
std::unordered_set<UInt64> & visited_nodes,
|
||||
const std::unordered_map<String, const DictionaryAttribute &> & attributes,
|
||||
std::unordered_set<String> * const defaults) const
|
||||
std::unordered_set<String> * defaults) const
|
||||
{
|
||||
if (visited_nodes.contains(id))
|
||||
return attributes_to_set.attributesFull() == attributes.size();
|
||||
|
@ -210,7 +210,7 @@ private:
|
||||
const String & data,
|
||||
std::unordered_set<UInt64> & visited_nodes,
|
||||
const std::unordered_map<String, const DictionaryAttribute &> & attributes,
|
||||
std::unordered_set<String> * const defaults) const;
|
||||
std::unordered_set<String> * defaults) const;
|
||||
|
||||
struct RegexTreeNode;
|
||||
using RegexTreeNodePtr = std::shared_ptr<RegexTreeNode>;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user