mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge remote-tracking branch 'blessed/master' into experiment_int128
This commit is contained in:
commit
4626a0b3a1
195
.clang-tidy
195
.clang-tidy
@ -5,128 +5,127 @@
|
||||
# a) the new check is not controversial (this includes many checks in readability-* and google-*) or
|
||||
# b) too noisy (checks with > 100 new warnings are considered noisy, this includes e.g. cppcoreguidelines-*).
|
||||
|
||||
# TODO: Once clang(-tidy) 17 is the minimum, we can convert this list to YAML
|
||||
# See https://releases.llvm.org/17.0.1/tools/clang/tools/extra/docs/ReleaseNotes.html#improvements-to-clang-tidy
|
||||
|
||||
# TODO Let clang-tidy check headers in further directories
|
||||
# --> HeaderFilterRegex: '^.*/(src|base|programs|utils)/.*(h|hpp)$'
|
||||
HeaderFilterRegex: '^.*/(base|programs|utils)/.*(h|hpp)$'
|
||||
|
||||
Checks: '*,
|
||||
-abseil-*,
|
||||
Checks: [
|
||||
'*',
|
||||
|
||||
-altera-*,
|
||||
'-abseil-*',
|
||||
|
||||
-android-*,
|
||||
'-altera-*',
|
||||
|
||||
-bugprone-assignment-in-if-condition,
|
||||
-bugprone-branch-clone,
|
||||
-bugprone-easily-swappable-parameters,
|
||||
-bugprone-exception-escape,
|
||||
-bugprone-implicit-widening-of-multiplication-result,
|
||||
-bugprone-narrowing-conversions,
|
||||
-bugprone-not-null-terminated-result,
|
||||
-bugprone-reserved-identifier, # useful but too slow, TODO retry when https://reviews.llvm.org/rG1c282052624f9d0bd273bde0b47b30c96699c6c7 is merged
|
||||
-bugprone-unchecked-optional-access,
|
||||
'-android-*',
|
||||
|
||||
-cert-dcl16-c,
|
||||
-cert-dcl37-c,
|
||||
-cert-dcl51-cpp,
|
||||
-cert-err58-cpp,
|
||||
-cert-msc32-c,
|
||||
-cert-msc51-cpp,
|
||||
-cert-oop54-cpp,
|
||||
-cert-oop57-cpp,
|
||||
'-bugprone-assignment-in-if-condition',
|
||||
'-bugprone-branch-clone',
|
||||
'-bugprone-easily-swappable-parameters',
|
||||
'-bugprone-exception-escape',
|
||||
'-bugprone-implicit-widening-of-multiplication-result',
|
||||
'-bugprone-narrowing-conversions',
|
||||
'-bugprone-not-null-terminated-result',
|
||||
'-bugprone-reserved-identifier', # useful but too slow, TODO retry when https://reviews.llvm.org/rG1c282052624f9d0bd273bde0b47b30c96699c6c7 is merged
|
||||
'-bugprone-unchecked-optional-access',
|
||||
|
||||
-clang-analyzer-unix.Malloc,
|
||||
'-cert-dcl16-c',
|
||||
'-cert-dcl37-c',
|
||||
'-cert-dcl51-cpp',
|
||||
'-cert-err58-cpp',
|
||||
'-cert-msc32-c',
|
||||
'-cert-msc51-cpp',
|
||||
'-cert-oop54-cpp',
|
||||
'-cert-oop57-cpp',
|
||||
|
||||
-cppcoreguidelines-*, # impractical in a codebase as large as ClickHouse, also slow
|
||||
'-clang-analyzer-unix.Malloc',
|
||||
|
||||
-darwin-*,
|
||||
'-cppcoreguidelines-*', # impractical in a codebase as large as ClickHouse, also slow
|
||||
|
||||
-fuchsia-*,
|
||||
'-darwin-*',
|
||||
|
||||
-google-build-using-namespace,
|
||||
-google-readability-braces-around-statements,
|
||||
-google-readability-casting,
|
||||
-google-readability-function-size,
|
||||
-google-readability-namespace-comments,
|
||||
-google-readability-todo,
|
||||
'-fuchsia-*',
|
||||
|
||||
-hicpp-avoid-c-arrays,
|
||||
-hicpp-avoid-goto,
|
||||
-hicpp-braces-around-statements,
|
||||
-hicpp-explicit-conversions,
|
||||
-hicpp-function-size,
|
||||
-hicpp-member-init,
|
||||
-hicpp-move-const-arg,
|
||||
-hicpp-multiway-paths-covered,
|
||||
-hicpp-named-parameter,
|
||||
-hicpp-no-array-decay,
|
||||
-hicpp-no-assembler,
|
||||
-hicpp-no-malloc,
|
||||
-hicpp-signed-bitwise,
|
||||
-hicpp-special-member-functions,
|
||||
-hicpp-uppercase-literal-suffix,
|
||||
-hicpp-use-auto,
|
||||
-hicpp-use-emplace,
|
||||
-hicpp-vararg,
|
||||
'-google-build-using-namespace',
|
||||
'-google-readability-braces-around-statements',
|
||||
'-google-readability-casting',
|
||||
'-google-readability-function-size',
|
||||
'-google-readability-namespace-comments',
|
||||
'-google-readability-todo',
|
||||
|
||||
-linuxkernel-*,
|
||||
'-hicpp-avoid-c-arrays',
|
||||
'-hicpp-avoid-goto',
|
||||
'-hicpp-braces-around-statements',
|
||||
'-hicpp-explicit-conversions',
|
||||
'-hicpp-function-size',
|
||||
'-hicpp-member-init',
|
||||
'-hicpp-move-const-arg',
|
||||
'-hicpp-multiway-paths-covered',
|
||||
'-hicpp-named-parameter',
|
||||
'-hicpp-no-array-decay',
|
||||
'-hicpp-no-assembler',
|
||||
'-hicpp-no-malloc',
|
||||
'-hicpp-signed-bitwise',
|
||||
'-hicpp-special-member-functions',
|
||||
'-hicpp-uppercase-literal-suffix',
|
||||
'-hicpp-use-auto',
|
||||
'-hicpp-use-emplace',
|
||||
'-hicpp-vararg',
|
||||
|
||||
-llvm-*,
|
||||
'-linuxkernel-*',
|
||||
|
||||
-llvmlibc-*,
|
||||
'-llvm-*',
|
||||
|
||||
-openmp-*,
|
||||
'-llvmlibc-*',
|
||||
|
||||
-misc-const-correctness,
|
||||
-misc-include-cleaner, # useful but far too many occurrences
|
||||
-misc-no-recursion,
|
||||
-misc-non-private-member-variables-in-classes,
|
||||
-misc-confusable-identifiers, # useful but slooow
|
||||
-misc-use-anonymous-namespace,
|
||||
'-openmp-*',
|
||||
|
||||
-modernize-avoid-c-arrays,
|
||||
-modernize-concat-nested-namespaces,
|
||||
-modernize-macro-to-enum,
|
||||
-modernize-pass-by-value,
|
||||
-modernize-return-braced-init-list,
|
||||
-modernize-use-auto,
|
||||
-modernize-use-default-member-init,
|
||||
-modernize-use-emplace,
|
||||
-modernize-use-nodiscard,
|
||||
-modernize-use-override,
|
||||
-modernize-use-trailing-return-type,
|
||||
'-misc-const-correctness',
|
||||
'-misc-include-cleaner', # useful but far too many occurrences
|
||||
'-misc-no-recursion',
|
||||
'-misc-non-private-member-variables-in-classes',
|
||||
'-misc-confusable-identifiers', # useful but slooo
|
||||
'-misc-use-anonymous-namespace',
|
||||
|
||||
-performance-inefficient-string-concatenation,
|
||||
-performance-no-int-to-ptr,
|
||||
-performance-avoid-endl,
|
||||
-performance-unnecessary-value-param,
|
||||
'-modernize-avoid-c-arrays',
|
||||
'-modernize-concat-nested-namespaces',
|
||||
'-modernize-macro-to-enum',
|
||||
'-modernize-pass-by-value',
|
||||
'-modernize-return-braced-init-list',
|
||||
'-modernize-use-auto',
|
||||
'-modernize-use-default-member-init',
|
||||
'-modernize-use-emplace',
|
||||
'-modernize-use-nodiscard',
|
||||
'-modernize-use-override',
|
||||
'-modernize-use-trailing-return-type',
|
||||
|
||||
-portability-simd-intrinsics,
|
||||
'-performance-inefficient-string-concatenation',
|
||||
'-performance-no-int-to-ptr',
|
||||
'-performance-avoid-endl',
|
||||
'-performance-unnecessary-value-param',
|
||||
|
||||
-readability-avoid-unconditional-preprocessor-if,
|
||||
-readability-braces-around-statements,
|
||||
-readability-convert-member-functions-to-static,
|
||||
-readability-else-after-return,
|
||||
-readability-function-cognitive-complexity,
|
||||
-readability-function-size,
|
||||
-readability-identifier-length,
|
||||
-readability-identifier-naming, # useful but too slow
|
||||
-readability-implicit-bool-conversion,
|
||||
-readability-isolate-declaration,
|
||||
-readability-magic-numbers,
|
||||
-readability-named-parameter,
|
||||
-readability-redundant-declaration,
|
||||
-readability-simplify-boolean-expr,
|
||||
-readability-static-accessed-through-instance,
|
||||
-readability-suspicious-call-argument,
|
||||
-readability-uppercase-literal-suffix,
|
||||
-readability-use-anyofallof,
|
||||
'-portability-simd-intrinsics',
|
||||
|
||||
-zircon-*,
|
||||
'
|
||||
'-readability-avoid-unconditional-preprocessor-if',
|
||||
'-readability-braces-around-statements',
|
||||
'-readability-convert-member-functions-to-static',
|
||||
'-readability-else-after-return',
|
||||
'-readability-function-cognitive-complexity',
|
||||
'-readability-function-size',
|
||||
'-readability-identifier-length',
|
||||
'-readability-identifier-naming', # useful but too slow
|
||||
'-readability-implicit-bool-conversion',
|
||||
'-readability-isolate-declaration',
|
||||
'-readability-magic-numbers',
|
||||
'-readability-named-parameter',
|
||||
'-readability-redundant-declaration',
|
||||
'-readability-simplify-boolean-expr',
|
||||
'-readability-static-accessed-through-instance',
|
||||
'-readability-suspicious-call-argument',
|
||||
'-readability-uppercase-literal-suffix',
|
||||
'-readability-use-anyofallof',
|
||||
|
||||
'-zircon-*'
|
||||
]
|
||||
|
||||
WarningsAsErrors: '*'
|
||||
|
||||
|
59
.github/workflows/nightly.yml
vendored
59
.github/workflows/nightly.yml
vendored
@ -45,62 +45,3 @@ jobs:
|
||||
with:
|
||||
data: "${{ needs.RunConfig.outputs.data }}"
|
||||
set_latest: true
|
||||
SonarCloud:
|
||||
runs-on: [self-hosted, builder]
|
||||
env:
|
||||
SONAR_SCANNER_VERSION: 4.8.0.2856
|
||||
SONAR_SERVER_URL: "https://sonarcloud.io"
|
||||
BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed
|
||||
CC: clang-17
|
||||
CXX: clang++-17
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis
|
||||
filter: tree:0
|
||||
submodules: true
|
||||
- name: Set up JDK 11
|
||||
uses: actions/setup-java@v1
|
||||
with:
|
||||
java-version: 11
|
||||
- name: Download and set up sonar-scanner
|
||||
env:
|
||||
SONAR_SCANNER_DOWNLOAD_URL: https://binaries.sonarsource.com/Distribution/sonar-scanner-cli/sonar-scanner-cli-${{ env.SONAR_SCANNER_VERSION }}-linux.zip
|
||||
run: |
|
||||
mkdir -p "$HOME/.sonar"
|
||||
curl -sSLo "$HOME/.sonar/sonar-scanner.zip" "${{ env.SONAR_SCANNER_DOWNLOAD_URL }}"
|
||||
unzip -o "$HOME/.sonar/sonar-scanner.zip" -d "$HOME/.sonar/"
|
||||
echo "$HOME/.sonar/sonar-scanner-${{ env.SONAR_SCANNER_VERSION }}-linux/bin" >> "$GITHUB_PATH"
|
||||
- name: Download and set up build-wrapper
|
||||
env:
|
||||
BUILD_WRAPPER_DOWNLOAD_URL: ${{ env.SONAR_SERVER_URL }}/static/cpp/build-wrapper-linux-x86.zip
|
||||
run: |
|
||||
curl -sSLo "$HOME/.sonar/build-wrapper-linux-x86.zip" "${{ env.BUILD_WRAPPER_DOWNLOAD_URL }}"
|
||||
unzip -o "$HOME/.sonar/build-wrapper-linux-x86.zip" -d "$HOME/.sonar/"
|
||||
echo "$HOME/.sonar/build-wrapper-linux-x86" >> "$GITHUB_PATH"
|
||||
- name: Set Up Build Tools
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -yq git cmake ccache ninja-build python3 yasm nasm
|
||||
sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
|
||||
- name: Run build-wrapper
|
||||
run: |
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ..
|
||||
cd ..
|
||||
build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/
|
||||
- name: Run sonar-scanner
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
|
||||
run: |
|
||||
sonar-scanner \
|
||||
--define sonar.host.url="${{ env.SONAR_SERVER_URL }}" \
|
||||
--define sonar.cfamily.build-wrapper-output="${{ env.BUILD_WRAPPER_OUT_DIR }}" \
|
||||
--define sonar.projectKey="ClickHouse_ClickHouse" \
|
||||
--define sonar.organization="clickhouse-java" \
|
||||
--define sonar.cfamily.cpp23.enabled=true \
|
||||
--define sonar.exclusions="**/*.java,**/*.ts,**/*.js,**/*.css,**/*.sql"
|
||||
|
1
.github/workflows/pull_request.yml
vendored
1
.github/workflows/pull_request.yml
vendored
@ -172,6 +172,7 @@ jobs:
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 finish_check.py
|
||||
python3 merge_pr.py --check-approved
|
||||
|
||||
|
||||
#############################################################################################
|
||||
|
3
.github/workflows/reusable_build.yml
vendored
3
.github/workflows/reusable_build.yml
vendored
@ -43,8 +43,7 @@ jobs:
|
||||
runs-on: [self-hosted, '${{inputs.runner_type}}']
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
# WIP: temporary try commit with limited perallelization of checkout
|
||||
uses: ClickHouse/checkout@0be3f7b3098bae494d3ef5d29d2e0676fb606232
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
ref: ${{ fromJson(inputs.data).git_ref }}
|
||||
|
@ -56,13 +56,13 @@ option(ENABLE_CHECK_HEAVY_BUILDS "Don't allow C++ translation units to compile t
|
||||
if (ENABLE_CHECK_HEAVY_BUILDS)
|
||||
# set DATA (since RSS does not work since 2.6.x+) to 5G
|
||||
set (RLIMIT_DATA 5000000000)
|
||||
# set VIRT (RLIMIT_AS) to 10G (DATA*10)
|
||||
# set VIRT (RLIMIT_AS) to 10G (DATA*2)
|
||||
set (RLIMIT_AS 10000000000)
|
||||
# set CPU time limit to 1000 seconds
|
||||
set (RLIMIT_CPU 1000)
|
||||
|
||||
# -fsanitize=memory and address are too heavy
|
||||
if (SANITIZE)
|
||||
if (SANITIZE OR SANITIZE_COVERAGE OR WITH_COVERAGE)
|
||||
set (RLIMIT_DATA 10000000000) # 10G
|
||||
endif()
|
||||
|
||||
@ -110,11 +110,6 @@ endif()
|
||||
# - sanitize.cmake
|
||||
add_library(global-libs INTERFACE)
|
||||
|
||||
# We don't want to instrument everything with fuzzer, but only specific targets (see below),
|
||||
# also, since we build our own llvm, we specifically don't want to instrument
|
||||
# libFuzzer library itself - it would result in infinite recursion
|
||||
#include (cmake/fuzzer.cmake)
|
||||
|
||||
include (cmake/sanitize.cmake)
|
||||
|
||||
option(ENABLE_COLORED_BUILD "Enable colors in compiler output" ON)
|
||||
@ -554,7 +549,9 @@ if (ENABLE_RUST)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND NOT SANITIZE AND NOT SANITIZE_COVERAGE AND OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64))
|
||||
if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO"
|
||||
AND NOT SANITIZE AND NOT SANITIZE_COVERAGE AND NOT ENABLE_FUZZING
|
||||
AND OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64))
|
||||
set(CHECK_LARGE_OBJECT_SIZES_DEFAULT ON)
|
||||
else ()
|
||||
set(CHECK_LARGE_OBJECT_SIZES_DEFAULT OFF)
|
||||
@ -577,10 +574,7 @@ if (FUZZER)
|
||||
if (NOT(target_type STREQUAL "INTERFACE_LIBRARY" OR target_type STREQUAL "UTILITY"))
|
||||
target_compile_options(${target} PRIVATE "-fsanitize=fuzzer-no-link")
|
||||
endif()
|
||||
# clickhouse fuzzer isn't working correctly
|
||||
# initial PR https://github.com/ClickHouse/ClickHouse/pull/27526
|
||||
#if (target MATCHES ".+_fuzzer" OR target STREQUAL "clickhouse")
|
||||
if (target_type STREQUAL "EXECUTABLE" AND target MATCHES ".+_fuzzer")
|
||||
if (target_type STREQUAL "EXECUTABLE" AND (target MATCHES ".+_fuzzer" OR target STREQUAL "clickhouse"))
|
||||
message(STATUS "${target} instrumented with fuzzer")
|
||||
target_link_libraries(${target} PUBLIC ch_contrib::fuzzer)
|
||||
# Add to fuzzers bundle
|
||||
|
@ -1,7 +1,7 @@
|
||||
#include "coverage.h"
|
||||
#include <sys/mman.h>
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wreserved-identifier"
|
||||
#pragma clang diagnostic ignored "-Wreserved-identifier"
|
||||
|
||||
|
||||
/// WITH_COVERAGE enables the default implementation of code coverage,
|
||||
|
@ -59,8 +59,8 @@ using ComparatorWrapper = Comparator;
|
||||
|
||||
#endif
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wold-style-cast"
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wold-style-cast"
|
||||
|
||||
#include <miniselect/floyd_rivest_select.h>
|
||||
|
||||
@ -115,7 +115,7 @@ void partial_sort(RandomIt first, RandomIt middle, RandomIt last)
|
||||
::partial_sort(first, middle, last, comparator());
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic pop
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
template <typename RandomIt, typename Compare>
|
||||
void sort(RandomIt first, RandomIt last, Compare compare)
|
||||
|
@ -93,7 +93,7 @@ void TCPServerDispatcher::release()
|
||||
|
||||
void TCPServerDispatcher::run()
|
||||
{
|
||||
AutoPtr<TCPServerDispatcher> guard(this, true); // ensure object stays alive
|
||||
AutoPtr<TCPServerDispatcher> guard(this); // ensure object stays alive
|
||||
|
||||
int idleTime = (int) _pParams->getThreadIdleTime().totalMilliseconds();
|
||||
|
||||
@ -149,11 +149,13 @@ void TCPServerDispatcher::enqueue(const StreamSocket& socket)
|
||||
{
|
||||
try
|
||||
{
|
||||
this->duplicate();
|
||||
_threadPool.startWithPriority(_pParams->getThreadPriority(), *this, threadName);
|
||||
++_currentThreads;
|
||||
}
|
||||
catch (Poco::Exception& exc)
|
||||
{
|
||||
this->release();
|
||||
++_refusedConnections;
|
||||
std::cerr << "Got exception while starting thread for connection. Error code: "
|
||||
<< exc.code() << ", message: '" << exc.displayText() << "'" << std::endl;
|
||||
|
@ -1,17 +0,0 @@
|
||||
# see ./CMakeLists.txt for variable declaration
|
||||
if (FUZZER)
|
||||
if (FUZZER STREQUAL "libfuzzer")
|
||||
# NOTE: Eldar Zaitov decided to name it "libfuzzer" instead of "fuzzer" to keep in mind another possible fuzzer backends.
|
||||
# NOTE: no-link means that all the targets are built with instrumentation for fuzzer, but only some of them
|
||||
# (tests) have entry point for fuzzer and it's not checked.
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link -DFUZZER=1")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link -DFUZZER=1")
|
||||
|
||||
# NOTE: oss-fuzz can change LIB_FUZZING_ENGINE variable
|
||||
if (NOT LIB_FUZZING_ENGINE)
|
||||
set (LIB_FUZZING_ENGINE "-fsanitize=fuzzer")
|
||||
endif ()
|
||||
else ()
|
||||
message (FATAL_ERROR "Unknown fuzzer type: ${FUZZER}")
|
||||
endif ()
|
||||
endif()
|
2
contrib/libhdfs3
vendored
2
contrib/libhdfs3
vendored
@ -1 +1 @@
|
||||
Subproject commit b9598e6016720a7c088bfe85ce1fa0410f9d2103
|
||||
Subproject commit 0d04201c45359f0d0701fb1e8297d25eff7cfecf
|
@ -17,6 +17,8 @@
|
||||
#ifndef METROHASH_METROHASH_128_H
|
||||
#define METROHASH_METROHASH_128_H
|
||||
|
||||
// NOLINTBEGIN(readability-avoid-const-params-in-decls)
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
class MetroHash128
|
||||
@ -68,5 +70,6 @@ private:
|
||||
void metrohash128_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out);
|
||||
void metrohash128_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out);
|
||||
|
||||
// NOLINTEND(readability-avoid-const-params-in-decls)
|
||||
|
||||
#endif // #ifndef METROHASH_METROHASH_128_H
|
||||
|
@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="24.2.1.2248"
|
||||
ARG VERSION="24.2.2.71"
|
||||
ARG PACKAGES="clickhouse-keeper"
|
||||
ARG DIRECT_DOWNLOAD_URLS=""
|
||||
|
||||
|
@ -4,6 +4,9 @@ FROM clickhouse/fasttest:$FROM_TAG
|
||||
ENV CC=clang-${LLVM_VERSION}
|
||||
ENV CXX=clang++-${LLVM_VERSION}
|
||||
|
||||
# If the cctools is updated, then first build it in the CI, then update here in a different commit
|
||||
COPY --from=clickhouse/cctools:d9e3596e706b /cctools /cctools
|
||||
|
||||
# Rust toolchain and libraries
|
||||
ENV RUSTUP_HOME=/rust/rustup
|
||||
ENV CARGO_HOME=/rust/cargo
|
||||
@ -73,9 +76,6 @@ RUN curl -Lo /usr/bin/clang-tidy-cache \
|
||||
"https://raw.githubusercontent.com/matus-chochlik/ctcache/$CLANG_TIDY_SHA1/clang-tidy-cache" \
|
||||
&& chmod +x /usr/bin/clang-tidy-cache
|
||||
|
||||
# If the cctools is updated, then first build it in the CI, then update here in a different commit
|
||||
COPY --from=clickhouse/cctools:5a908f73878a /cctools /cctools
|
||||
|
||||
RUN mkdir /workdir && chmod 777 /workdir
|
||||
WORKDIR /workdir
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
# It's based on the assumption that we don't care of the cctools version so much
|
||||
# It event does not depend on the clickhouse/fasttest in the `docker/images.json`
|
||||
ARG FROM_TAG=latest
|
||||
FROM clickhouse/fasttest:$FROM_TAG
|
||||
FROM clickhouse/fasttest:$FROM_TAG as builder
|
||||
|
||||
ENV CC=clang-${LLVM_VERSION}
|
||||
ENV CXX=clang++-${LLVM_VERSION}
|
||||
@ -29,3 +29,6 @@ RUN git clone https://github.com/tpoechtrager/cctools-port.git \
|
||||
&& make install -j$(nproc) \
|
||||
&& cd ../.. \
|
||||
&& rm -rf cctools-port
|
||||
|
||||
FROM scratch
|
||||
COPY --from=builder /cctools /cctools
|
||||
|
@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="24.2.1.2248"
|
||||
ARG VERSION="24.2.2.71"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
ARG DIRECT_DOWNLOAD_URLS=""
|
||||
|
||||
|
@ -27,7 +27,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
|
||||
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
|
||||
ARG VERSION="24.2.1.2248"
|
||||
ARG VERSION="24.2.2.71"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
# set non-empty deb_location_url url to create a docker image
|
||||
|
64
docs/changelogs/v23.12.5.81-stable.md
Normal file
64
docs/changelogs/v23.12.5.81-stable.md
Normal file
@ -0,0 +1,64 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v23.12.5.81-stable (a0fbe3ae813) FIXME as compared to v23.12.4.15-stable (4233d111d20)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#60290](https://github.com/ClickHouse/ClickHouse/issues/60290): Copy S3 file GCP fallback to buffer copy in case GCP returned `Internal Error` with `GATEWAY_TIMEOUT` HTTP error code. [#60164](https://github.com/ClickHouse/ClickHouse/pull/60164) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Backported in [#60830](https://github.com/ClickHouse/ClickHouse/issues/60830): Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#59883](https://github.com/ClickHouse/ClickHouse/issues/59883): If you want to run initdb scripts every time when ClickHouse container is starting you shoud initialize environment varible CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS. [#59808](https://github.com/ClickHouse/ClickHouse/pull/59808) ([Alexander Nikolaev](https://github.com/AlexNik)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix_kql_issue_found_by_wingfuzz [#59626](https://github.com/ClickHouse/ClickHouse/pull/59626) ([Yong Wang](https://github.com/kashwy)).
|
||||
* Fix error "Read beyond last offset" for AsynchronousBoundedReadBuffer [#59630](https://github.com/ClickHouse/ClickHouse/pull/59630) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix query start time on non initial queries [#59662](https://github.com/ClickHouse/ClickHouse/pull/59662) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* rabbitmq: fix having neither acked nor nacked messages [#59775](https://github.com/ClickHouse/ClickHouse/pull/59775) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix parsing of partition expressions surrounded by parens [#59901](https://github.com/ClickHouse/ClickHouse/pull/59901) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
|
||||
* Fix optimize_uniq_to_count removing the column alias [#60026](https://github.com/ClickHouse/ClickHouse/pull/60026) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix cosineDistance crash with Nullable [#60150](https://github.com/ClickHouse/ClickHouse/pull/60150) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Hide sensitive info for s3queue [#60233](https://github.com/ClickHouse/ClickHouse/pull/60233) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix_max_query_size_for_kql_compound_operator: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)).
|
||||
* Reduce the number of read rows from `system.numbers` [#60546](https://github.com/ClickHouse/ClickHouse/pull/60546) ([JackyWoo](https://github.com/JackyWoo)).
|
||||
* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Prevent setting custom metadata headers on unsupported multipart upload operations [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
|
||||
* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
||||
#### CI Fix or Improvement (changelog entry is not required)
|
||||
|
||||
* Backported in [#60767](https://github.com/ClickHouse/ClickHouse/issues/60767): Decoupled changes from [#60408](https://github.com/ClickHouse/ClickHouse/issues/60408). [#60553](https://github.com/ClickHouse/ClickHouse/pull/60553) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#60582](https://github.com/ClickHouse/ClickHouse/issues/60582): Arm and amd docker build jobs use similar job names and thus overwrite job reports - aarch64 and amd64 suffixes added to fix this. [#60554](https://github.com/ClickHouse/ClickHouse/pull/60554) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61041](https://github.com/ClickHouse/ClickHouse/issues/61041): Debug and fix markreleaseready. [#60611](https://github.com/ClickHouse/ClickHouse/pull/60611) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#61030](https://github.com/ClickHouse/ClickHouse/issues/61030): ... [#61022](https://github.com/ClickHouse/ClickHouse/pull/61022) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61224](https://github.com/ClickHouse/ClickHouse/issues/61224): ... [#61183](https://github.com/ClickHouse/ClickHouse/pull/61183) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Backported in [#61190](https://github.com/ClickHouse/ClickHouse/issues/61190): ... [#61185](https://github.com/ClickHouse/ClickHouse/pull/61185) ([Max K.](https://github.com/maxknv)).
|
||||
|
||||
#### NO CL ENTRY
|
||||
|
||||
* NO CL ENTRY: 'Revert "Backport [#59798](https://github.com/ClickHouse/ClickHouse/issues/59798) to 23.12: CI: do not reuse builds on release branches"'. [#59979](https://github.com/ClickHouse/ClickHouse/pull/59979) ([Max K.](https://github.com/maxknv)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* CI: move ci-specifics from job scripts to ci.py [#58516](https://github.com/ClickHouse/ClickHouse/pull/58516) ([Max K.](https://github.com/maxknv)).
|
||||
* Make ZooKeeper actually sequentialy consistent [#59735](https://github.com/ClickHouse/ClickHouse/pull/59735) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Fix special build reports in release branches [#59797](https://github.com/ClickHouse/ClickHouse/pull/59797) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* CI: do not reuse builds on release branches [#59798](https://github.com/ClickHouse/ClickHouse/pull/59798) ([Max K.](https://github.com/maxknv)).
|
||||
* Fix mark release ready [#59994](https://github.com/ClickHouse/ClickHouse/pull/59994) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Ability to detect undead ZooKeeper sessions [#60044](https://github.com/ClickHouse/ClickHouse/pull/60044) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Detect io_uring in tests [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Remove broken test while we fix it [#60547](https://github.com/ClickHouse/ClickHouse/pull/60547) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Speed up cctools building [#61011](https://github.com/ClickHouse/ClickHouse/pull/61011) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
24
docs/changelogs/v23.3.21.26-lts.md
Normal file
24
docs/changelogs/v23.3.21.26-lts.md
Normal file
@ -0,0 +1,24 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v23.3.21.26-lts (d9672a3731f) FIXME as compared to v23.3.20.27-lts (cc974ba4f81)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix reading from sparse columns after restart [#49660](https://github.com/ClickHouse/ClickHouse/pull/49660) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#57104](https://github.com/ClickHouse/ClickHouse/pull/57104) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Detect io_uring in tests [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
|
30
docs/changelogs/v23.8.11.28-lts.md
Normal file
30
docs/changelogs/v23.8.11.28-lts.md
Normal file
@ -0,0 +1,30 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v23.8.11.28-lts (31879d2ab4c) FIXME as compared to v23.8.10.43-lts (a278225bba9)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#60828](https://github.com/ClickHouse/ClickHouse/issues/60828): Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
||||
#### NO CL ENTRY
|
||||
|
||||
* NO CL ENTRY: 'Use the current branch test-utils to build cctools'. [#61276](https://github.com/ClickHouse/ClickHouse/pull/61276) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#57104](https://github.com/ClickHouse/ClickHouse/pull/57104) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Detect io_uring in tests [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
|
26
docs/changelogs/v24.1.7.18-stable.md
Normal file
26
docs/changelogs/v24.1.7.18-stable.md
Normal file
@ -0,0 +1,26 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v24.1.7.18-stable (90925babd78) FIXME as compared to v24.1.6.52-stable (fa09f677bc9)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix_max_query_size_for_kql_compound_operator: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)).
|
||||
* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
||||
#### CI Fix or Improvement (changelog entry is not required)
|
||||
|
||||
* Backported in [#61043](https://github.com/ClickHouse/ClickHouse/issues/61043): Debug and fix markreleaseready. [#60611](https://github.com/ClickHouse/ClickHouse/pull/60611) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#61168](https://github.com/ClickHouse/ClickHouse/issues/61168): Just a preparation for the merge queue support. [#61099](https://github.com/ClickHouse/ClickHouse/pull/61099) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61192](https://github.com/ClickHouse/ClickHouse/issues/61192): ... [#61185](https://github.com/ClickHouse/ClickHouse/pull/61185) ([Max K.](https://github.com/maxknv)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
|
55
docs/changelogs/v24.2.2.71-stable.md
Normal file
55
docs/changelogs/v24.2.2.71-stable.md
Normal file
@ -0,0 +1,55 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v24.2.2.71-stable (9293d361e72) FIXME as compared to v24.2.1.2248-stable (891689a4150)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#60834](https://github.com/ClickHouse/ClickHouse/issues/60834): Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* PartsSplitter invalid ranges for the same part [#60041](https://github.com/ClickHouse/ClickHouse/pull/60041) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Try to avoid calculation of scalar subqueries for CREATE TABLE. [#60464](https://github.com/ClickHouse/ClickHouse/pull/60464) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix_max_query_size_for_kql_compound_operator: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)).
|
||||
* Reduce the number of read rows from `system.numbers` [#60546](https://github.com/ClickHouse/ClickHouse/pull/60546) ([JackyWoo](https://github.com/JackyWoo)).
|
||||
* Don't output number tips for date types [#60577](https://github.com/ClickHouse/ClickHouse/pull/60577) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Prevent setting custom metadata headers on unsupported multipart upload operations [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
|
||||
* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix multiple bugs in groupArraySorted [#61203](https://github.com/ClickHouse/ClickHouse/pull/61203) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
||||
#### CI Fix or Improvement (changelog entry is not required)
|
||||
|
||||
* Backported in [#60758](https://github.com/ClickHouse/ClickHouse/issues/60758): Decoupled changes from [#60408](https://github.com/ClickHouse/ClickHouse/issues/60408). [#60553](https://github.com/ClickHouse/ClickHouse/pull/60553) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#60706](https://github.com/ClickHouse/ClickHouse/issues/60706): Eliminates the need to provide input args to docker server jobs to clean yml files. [#60602](https://github.com/ClickHouse/ClickHouse/pull/60602) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61045](https://github.com/ClickHouse/ClickHouse/issues/61045): Debug and fix markreleaseready. [#60611](https://github.com/ClickHouse/ClickHouse/pull/60611) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#60721](https://github.com/ClickHouse/ClickHouse/issues/60721): Fix build_report job so that it's defined by ci_config only (not yml file). [#60613](https://github.com/ClickHouse/ClickHouse/pull/60613) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#60668](https://github.com/ClickHouse/ClickHouse/issues/60668): Do not await ci pending jobs on release branches decrease wait timeout to fit into gh job timeout. [#60652](https://github.com/ClickHouse/ClickHouse/pull/60652) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#60863](https://github.com/ClickHouse/ClickHouse/issues/60863): Set limited number of builds for "special build check" report in backports. [#60850](https://github.com/ClickHouse/ClickHouse/pull/60850) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#60946](https://github.com/ClickHouse/ClickHouse/issues/60946): ... [#60935](https://github.com/ClickHouse/ClickHouse/pull/60935) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#60972](https://github.com/ClickHouse/ClickHouse/issues/60972): ... [#60952](https://github.com/ClickHouse/ClickHouse/pull/60952) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#60980](https://github.com/ClickHouse/ClickHouse/issues/60980): ... [#60958](https://github.com/ClickHouse/ClickHouse/pull/60958) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61170](https://github.com/ClickHouse/ClickHouse/issues/61170): Just a preparation for the merge queue support. [#61099](https://github.com/ClickHouse/ClickHouse/pull/61099) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61181](https://github.com/ClickHouse/ClickHouse/issues/61181): ... [#61172](https://github.com/ClickHouse/ClickHouse/pull/61172) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61228](https://github.com/ClickHouse/ClickHouse/issues/61228): ... [#61183](https://github.com/ClickHouse/ClickHouse/pull/61183) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Backported in [#61194](https://github.com/ClickHouse/ClickHouse/issues/61194): ... [#61185](https://github.com/ClickHouse/ClickHouse/pull/61185) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61244](https://github.com/ClickHouse/ClickHouse/issues/61244): ... [#61214](https://github.com/ClickHouse/ClickHouse/pull/61214) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61388](https://github.com/ClickHouse/ClickHouse/issues/61388):. [#61373](https://github.com/ClickHouse/ClickHouse/pull/61373) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* CI: make workflow yml abstract [#60421](https://github.com/ClickHouse/ClickHouse/pull/60421) ([Max K.](https://github.com/maxknv)).
|
||||
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* General sanity in function `seriesOutliersDetectTukey` [#60535](https://github.com/ClickHouse/ClickHouse/pull/60535) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Speed up cctools building [#61011](https://github.com/ClickHouse/ClickHouse/pull/61011) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
@ -18,8 +18,8 @@ This engine allows integrating ClickHouse with [RabbitMQ](https://www.rabbitmq.c
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
(
|
||||
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
|
||||
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
|
||||
name1 [type1],
|
||||
name2 [type2],
|
||||
...
|
||||
) ENGINE = RabbitMQ SETTINGS
|
||||
rabbitmq_host_port = 'host:port' [or rabbitmq_address = 'amqp(s)://guest:guest@localhost/vhost'],
|
||||
@ -198,6 +198,10 @@ Additional virtual columns when `kafka_handle_error_mode='stream'`:
|
||||
|
||||
Note: `_raw_message` and `_error` virtual columns are filled only in case of exception during parsing, they are always `NULL` when message was parsed successfully.
|
||||
|
||||
## Caveats {#caveats}
|
||||
|
||||
Even though you may specify [default column expressions](/docs/en/sql-reference/statements/create/table.md/#default_values) (such as `DEFAULT`, `MATERIALIZED`, `ALIAS`) in the table definition, these will be ignored. Instead, the columns will be filled with their respective default values for their types.
|
||||
|
||||
## Data formats support {#data-formats-support}
|
||||
|
||||
RabbitMQ engine supports all [formats](../../../interfaces/formats.md) supported in ClickHouse.
|
||||
|
@ -946,96 +946,6 @@ You could change storage policy after table creation with [ALTER TABLE ... MODIF
|
||||
|
||||
The number of threads performing background moves of data parts can be changed by [background_move_pool_size](/docs/en/operations/server-configuration-parameters/settings.md/#background_move_pool_size) setting.
|
||||
|
||||
### Dynamic Storage
|
||||
|
||||
This example query shows how to attach a table stored at a URL and configure the
|
||||
remote storage within the query. The web storage is not configured in the ClickHouse
|
||||
configuration files; all the settings are in the CREATE/ATTACH query.
|
||||
|
||||
:::note
|
||||
The example uses `type=web`, but any disk type can be configured as dynamic, even Local disk. Local disks require a path argument to be inside the server config parameter `custom_local_disks_base_directory`, which has no default, so set that also when using local disk.
|
||||
:::
|
||||
|
||||
#### Example dynamic web storage
|
||||
|
||||
:::tip
|
||||
A [demo dataset](https://github.com/ClickHouse/web-tables-demo) is hosted in GitHub. To prepare your own tables for web storage see the tool [clickhouse-static-files-uploader](/docs/en/operations/storing-data.md/#storing-data-on-webserver)
|
||||
:::
|
||||
|
||||
In this `ATTACH TABLE` query the `UUID` provided matches the directory name of the data, and the endpoint is the URL for the raw GitHub content.
|
||||
|
||||
```sql
|
||||
# highlight-next-line
|
||||
ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
|
||||
(
|
||||
price UInt32,
|
||||
date Date,
|
||||
postcode1 LowCardinality(String),
|
||||
postcode2 LowCardinality(String),
|
||||
type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
|
||||
is_new UInt8,
|
||||
duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
|
||||
addr1 String,
|
||||
addr2 String,
|
||||
street LowCardinality(String),
|
||||
locality LowCardinality(String),
|
||||
town LowCardinality(String),
|
||||
district LowCardinality(String),
|
||||
county LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (postcode1, postcode2, addr1, addr2)
|
||||
# highlight-start
|
||||
SETTINGS disk = disk(
|
||||
type=web,
|
||||
endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
|
||||
);
|
||||
# highlight-end
|
||||
```
|
||||
|
||||
### Nested Dynamic Storage
|
||||
|
||||
This example query builds on the above dynamic disk configuration and shows how to
|
||||
use a local disk to cache data from a table stored at a URL. Neither the cache disk
|
||||
nor the web storage is configured in the ClickHouse configuration files; both are
|
||||
configured in the CREATE/ATTACH query settings.
|
||||
|
||||
In the settings highlighted below notice that the disk of `type=web` is nested within
|
||||
the disk of `type=cache`.
|
||||
|
||||
```sql
|
||||
ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
|
||||
(
|
||||
price UInt32,
|
||||
date Date,
|
||||
postcode1 LowCardinality(String),
|
||||
postcode2 LowCardinality(String),
|
||||
type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
|
||||
is_new UInt8,
|
||||
duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
|
||||
addr1 String,
|
||||
addr2 String,
|
||||
street LowCardinality(String),
|
||||
locality LowCardinality(String),
|
||||
town LowCardinality(String),
|
||||
district LowCardinality(String),
|
||||
county LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (postcode1, postcode2, addr1, addr2)
|
||||
# highlight-start
|
||||
SETTINGS disk = disk(
|
||||
type=cache,
|
||||
max_size='1Gi',
|
||||
path='/var/lib/clickhouse/custom_disk_cache/',
|
||||
disk=disk(
|
||||
type=web,
|
||||
endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
|
||||
)
|
||||
);
|
||||
# highlight-end
|
||||
```
|
||||
|
||||
### Details {#details}
|
||||
|
||||
In the case of `MergeTree` tables, data is getting to disk in different ways:
|
||||
@ -1064,13 +974,11 @@ During this time, they are not moved to other volumes or disks. Therefore, until
|
||||
|
||||
User can assign new big parts to different disks of a [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures) volume in a balanced way using the [min_bytes_to_rebalance_partition_over_jbod](/docs/en/operations/settings/merge-tree-settings.md/#min-bytes-to-rebalance-partition-over-jbod) setting.
|
||||
|
||||
## Using S3 for Data Storage {#table_engine-mergetree-s3}
|
||||
## Using External Storage for Data Storage {#table_engine-mergetree-s3}
|
||||
|
||||
:::note
|
||||
Google Cloud Storage (GCS) is also supported using the type `s3`. See [GCS backed MergeTree](/docs/en/integrations/gcs).
|
||||
:::
|
||||
[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) family table engines can store data to `S3`, `AzureBlobStorage`, `HDFS` using a disk with types `s3`, `azure_blob_storage`, `hdfs` accordingly. See [configuring external storage options](/docs/en/operations/storing-data.md/#configuring-external-storage) for more details.
|
||||
|
||||
`MergeTree` family table engines can store data to [S3](https://aws.amazon.com/s3/) using a disk with type `s3`.
|
||||
Example for [S3](https://aws.amazon.com/s3/) as external storage using a disk with type `s3`.
|
||||
|
||||
Configuration markup:
|
||||
``` xml
|
||||
@ -1112,253 +1020,12 @@ Configuration markup:
|
||||
</storage_configuration>
|
||||
```
|
||||
|
||||
Also see [configuring external storage options](/docs/en/operations/storing-data.md/#configuring-external-storage).
|
||||
|
||||
:::note cache configuration
|
||||
ClickHouse versions 22.3 through 22.7 use a different cache configuration, see [using local cache](/docs/en/operations/storing-data.md/#using-local-cache) if you are using one of those versions.
|
||||
:::
|
||||
|
||||
### Configuring the S3 disk
|
||||
|
||||
Required parameters:
|
||||
|
||||
- `endpoint` — S3 endpoint URL in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint URL should contain a bucket and root path to store data.
|
||||
- `access_key_id` — S3 access key id.
|
||||
- `secret_access_key` — S3 secret access key.
|
||||
|
||||
Optional parameters:
|
||||
|
||||
- `region` — S3 region name.
|
||||
- `support_batch_delete` — This controls the check to see if batch deletes are supported. Set this to `false` when using Google Cloud Storage (GCS) as GCS does not support batch deletes and preventing the checks will prevent error messages in the logs.
|
||||
- `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`.
|
||||
- `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`.
|
||||
- `expiration_window_seconds` — Grace period for checking if expiration-based credentials have expired. Optional, default value is `120`.
|
||||
- `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL.
|
||||
- `connect_timeout_ms` — Socket connect timeout in milliseconds. Default value is `10 seconds`.
|
||||
- `request_timeout_ms` — Request timeout in milliseconds. Default value is `5 seconds`.
|
||||
- `retry_attempts` — Number of retry attempts in case of failed request. Default value is `10`.
|
||||
- `single_read_retries` — Number of retry attempts in case of connection drop during read. Default value is `4`.
|
||||
- `min_bytes_for_seek` — Minimal number of bytes to use seek operation instead of sequential read. Default value is `1 Mb`.
|
||||
- `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
|
||||
- `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`.
|
||||
- `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times.
|
||||
- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set.
|
||||
- `server_side_encryption_kms_key_id` - If specified, required headers for accessing S3 objects with [SSE-KMS encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingKMSEncryption.html) will be set. If an empty string is specified, the AWS managed S3 key will be used. Optional.
|
||||
- `server_side_encryption_kms_encryption_context` - If specified alongside `server_side_encryption_kms_key_id`, the given encryption context header for SSE-KMS will be set. Optional.
|
||||
- `server_side_encryption_kms_bucket_key_enabled` - If specified alongside `server_side_encryption_kms_key_id`, the header to enable S3 bucket keys for SSE-KMS will be set. Optional, can be `true` or `false`, defaults to nothing (matches the bucket-level setting).
|
||||
- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited).
|
||||
- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`.
|
||||
- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited).
|
||||
- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
|
||||
- `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
- `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
- `key_template` — Define the format with which the object keys are generated. By default, Clickhouse takes `root path` from `endpoint` option and adds random generated suffix. That suffix is a dir with 3 random symbols and a file name with 29 random symbols. With that option you have a full control how to the object keys are generated. Some usage scenarios require having random symbols in the prefix or in the middle of object key. For example: `[a-z]{3}-prefix-random/constant-part/random-middle-[a-z]{3}/random-suffix-[a-z]{29}`. The value is parsed with [`re2`](https://github.com/google/re2/wiki/Syntax). Only some subset of the syntax is supported. Check if your preferred format is supported before using that option. Disk isn't initialized if clickhouse is unable to generate a key by the value of `key_template`. It requires enabled feature flag [storage_metadata_write_full_object_key](/docs/en/operations/settings/settings#storage_metadata_write_full_object_key). It forbids declaring the `root path` in `endpoint` option. It requires definition of the option `key_compatibility_prefix`.
|
||||
- `key_compatibility_prefix` — That option is required when option `key_template` is in use. In order to be able to read the objects keys which were stored in the metadata files with the metadata version lower that `VERSION_FULL_OBJECT_KEY`, the previous `root path` from the `endpoint` option should be set here.
|
||||
|
||||
### Configuring the cache
|
||||
|
||||
This is the cache configuration from above:
|
||||
```xml
|
||||
<s3_cache>
|
||||
<type>cache</type>
|
||||
<disk>s3</disk>
|
||||
<path>/var/lib/clickhouse/disks/s3_cache/</path>
|
||||
<max_size>10Gi</max_size>
|
||||
</s3_cache>
|
||||
```
|
||||
|
||||
These parameters define the cache layer:
|
||||
- `type` — If a disk is of type `cache` it caches mark and index files in memory.
|
||||
- `disk` — The name of the disk that will be cached.
|
||||
|
||||
Cache parameters:
|
||||
- `path` — The path where metadata for the cache is stored.
|
||||
- `max_size` — The size (amount of disk space) that the cache can grow to.
|
||||
|
||||
:::tip
|
||||
There are several other cache parameters that you can use to tune your storage, see [using local cache](/docs/en/operations/storing-data.md/#using-local-cache) for the details.
|
||||
:::
|
||||
|
||||
S3 disk can be configured as `main` or `cold` storage:
|
||||
``` xml
|
||||
<storage_configuration>
|
||||
...
|
||||
<disks>
|
||||
<s3>
|
||||
<type>s3</type>
|
||||
<endpoint>https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/root-path/</endpoint>
|
||||
<access_key_id>your_access_key_id</access_key_id>
|
||||
<secret_access_key>your_secret_access_key</secret_access_key>
|
||||
</s3>
|
||||
</disks>
|
||||
<policies>
|
||||
<s3_main>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>s3</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</s3_main>
|
||||
<s3_cold>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>default</disk>
|
||||
</main>
|
||||
<external>
|
||||
<disk>s3</disk>
|
||||
</external>
|
||||
</volumes>
|
||||
<move_factor>0.2</move_factor>
|
||||
</s3_cold>
|
||||
</policies>
|
||||
...
|
||||
</storage_configuration>
|
||||
```
|
||||
|
||||
In case of `cold` option a data can be moved to S3 if local disk free size will be smaller than `move_factor * disk_size` or by TTL move rule.
|
||||
|
||||
## Using Azure Blob Storage for Data Storage {#table_engine-mergetree-azure-blob-storage}
|
||||
|
||||
`MergeTree` family table engines can store data to [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) using a disk with type `azure_blob_storage`.
|
||||
|
||||
As of February 2022, this feature is still a fresh addition, so expect that some Azure Blob Storage functionalities might be unimplemented.
|
||||
|
||||
Configuration markup:
|
||||
``` xml
|
||||
<storage_configuration>
|
||||
...
|
||||
<disks>
|
||||
<blob_storage_disk>
|
||||
<type>azure_blob_storage</type>
|
||||
<storage_account_url>http://account.blob.core.windows.net</storage_account_url>
|
||||
<container_name>container</container_name>
|
||||
<account_name>account</account_name>
|
||||
<account_key>pass123</account_key>
|
||||
<metadata_path>/var/lib/clickhouse/disks/blob_storage_disk/</metadata_path>
|
||||
<cache_path>/var/lib/clickhouse/disks/blob_storage_disk/cache/</cache_path>
|
||||
<skip_access_check>false</skip_access_check>
|
||||
</blob_storage_disk>
|
||||
</disks>
|
||||
...
|
||||
</storage_configuration>
|
||||
```
|
||||
|
||||
Connection parameters:
|
||||
* `endpoint` — AzureBlobStorage endpoint URL with container & prefix. Optionally can contain account_name if the authentication method used needs it. (`http://account.blob.core.windows.net:{port}/[account_name]{container_name}/{data_prefix}`) or these parameters can be provided separately using storage_account_url, account_name & container. For specifying prefix, endpoint should be used.
|
||||
* `endpoint_contains_account_name` - This flag is used to specify if endpoint contains account_name as it is only needed for certain authentication methods. (Default : true)
|
||||
* `storage_account_url` - Required if endpoint is not specified, Azure Blob Storage account URL, like `http://account.blob.core.windows.net` or `http://azurite1:10000/devstoreaccount1`.
|
||||
* `container_name` - Target container name, defaults to `default-container`.
|
||||
* `container_already_exists` - If set to `false`, a new container `container_name` is created in the storage account, if set to `true`, disk connects to the container directly, and if left unset, disk connects to the account, checks if the container `container_name` exists, and creates it if it doesn't exist yet.
|
||||
|
||||
Authentication parameters (the disk will try all available methods **and** Managed Identity Credential):
|
||||
* `connection_string` - For authentication using a connection string.
|
||||
* `account_name` and `account_key` - For authentication using Shared Key.
|
||||
|
||||
Limit parameters (mainly for internal usage):
|
||||
* `s3_max_single_part_upload_size` - Limits the size of a single block upload to Blob Storage.
|
||||
* `min_bytes_for_seek` - Limits the size of a seekable region.
|
||||
* `max_single_read_retries` - Limits the number of attempts to read a chunk of data from Blob Storage.
|
||||
* `max_single_download_retries` - Limits the number of attempts to download a readable buffer from Blob Storage.
|
||||
* `thread_pool_size` - Limits the number of threads with which `IDiskRemote` is instantiated.
|
||||
* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object.
|
||||
|
||||
Other parameters:
|
||||
* `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
|
||||
* `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`.
|
||||
* `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
* `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
|
||||
Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)).
|
||||
|
||||
:::note Zero-copy replication is not ready for production
|
||||
Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use.
|
||||
:::
|
||||
|
||||
## HDFS storage {#hdfs-storage}
|
||||
|
||||
In this sample configuration:
|
||||
- the disk is of type `hdfs`
|
||||
- the data is hosted at `hdfs://hdfs1:9000/clickhouse/`
|
||||
|
||||
```xml
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<hdfs>
|
||||
<type>hdfs</type>
|
||||
<endpoint>hdfs://hdfs1:9000/clickhouse/</endpoint>
|
||||
<skip_access_check>true</skip_access_check>
|
||||
</hdfs>
|
||||
<hdd>
|
||||
<type>local</type>
|
||||
<path>/</path>
|
||||
</hdd>
|
||||
</disks>
|
||||
<policies>
|
||||
<hdfs>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>hdfs</disk>
|
||||
</main>
|
||||
<external>
|
||||
<disk>hdd</disk>
|
||||
</external>
|
||||
</volumes>
|
||||
</hdfs>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
## Web storage (read-only) {#web-storage}
|
||||
|
||||
Web storage can be used for read-only purposes. An example use is for hosting sample
|
||||
data, or for migrating data.
|
||||
|
||||
:::tip
|
||||
Storage can also be configured temporarily within a query, if a web dataset is not expected
|
||||
to be used routinely, see [dynamic storage](#dynamic-storage) and skip editing the
|
||||
configuration file.
|
||||
:::
|
||||
|
||||
In this sample configuration:
|
||||
- the disk is of type `web`
|
||||
- the data is hosted at `http://nginx:80/test1/`
|
||||
- a cache on local storage is used
|
||||
|
||||
```xml
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<web>
|
||||
<type>web</type>
|
||||
<endpoint>http://nginx:80/test1/</endpoint>
|
||||
</web>
|
||||
<cached_web>
|
||||
<type>cache</type>
|
||||
<disk>web</disk>
|
||||
<path>cached_web_cache/</path>
|
||||
<max_size>100000000</max_size>
|
||||
</cached_web>
|
||||
</disks>
|
||||
<policies>
|
||||
<web>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>web</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</web>
|
||||
<cached_web>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>cached_web</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</cached_web>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
## Virtual Columns {#virtual-columns}
|
||||
|
||||
- `_part` — Name of a part.
|
||||
|
@ -55,7 +55,7 @@ CREATE TABLE criteo_log (
|
||||
) ENGINE = Log;
|
||||
```
|
||||
|
||||
Download the data:
|
||||
Insert the data:
|
||||
|
||||
``` bash
|
||||
$ for i in {00..23}; do echo $i; zcat datasets/criteo/day_${i#0}.gz | sed -r 's/^/2000-01-'${i/00/24}'\t/' | clickhouse-client --host=example-perftest01j --query="INSERT INTO criteo_log FORMAT TabSeparated"; done
|
||||
|
@ -10,10 +10,14 @@ The embeddings and the metadata are stored in separate files in the raw data. A
|
||||
converts them to CSV and imports them into ClickHouse. You can use the following `download.sh` script for that:
|
||||
|
||||
```bash
|
||||
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/img_emb/img_emb_${1}.npy # download image embedding
|
||||
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/text_emb/text_emb_${1}.npy # download text embedding
|
||||
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/metadata/metadata_${1}.parquet # download metadata
|
||||
python3 process.py ${1} # merge files and convert to CSV
|
||||
number=${1}
|
||||
if [[ $number == '' ]]; then
|
||||
number=1
|
||||
fi;
|
||||
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/img_emb/img_emb_${number}.npy # download image embedding
|
||||
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/text_emb/text_emb_${number}.npy # download text embedding
|
||||
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/metadata/metadata_${number}.parquet # download metadata
|
||||
python3 process.py $number # merge files and convert to CSV
|
||||
```
|
||||
Script `process.py` is defined as follows:
|
||||
|
||||
|
@ -248,6 +248,9 @@ Some of the files might not download fully. Check the file sizes and re-download
|
||||
|
||||
``` bash
|
||||
$ curl -O https://datasets.clickhouse.com/trips_mergetree/partitions/trips_mergetree.tar
|
||||
# Validate the checksum
|
||||
$ md5sum trips_mergetree.tar
|
||||
# Checksum should be equal to: f3b8d469b41d9a82da064ded7245d12c
|
||||
$ tar xvf trips_mergetree.tar -C /var/lib/clickhouse # path to ClickHouse data directory
|
||||
$ # check permissions of unpacked data, fix if required
|
||||
$ sudo service clickhouse-server restart
|
||||
|
@ -95,9 +95,11 @@ which is equal to
|
||||
|
||||
## Substituting Configuration {#substitution}
|
||||
|
||||
The config can also define “substitutions”. If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/clickhouse/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md#macros)).
|
||||
The config can define substitutions. There are two types of substitutions:
|
||||
|
||||
If you want to replace an entire element with a substitution use `include` as the element name.
|
||||
- If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/clickhouse/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md#macros)).
|
||||
|
||||
- If you want to replace an entire element with a substitution, use `include` as the element name. Substitutions can also be performed from ZooKeeper by specifying attribute `from_zk = "/path/to/node"`. In this case, the element value is replaced with the contents of the Zookeeper node at `/path/to/node`. This also works with you store an entire XML subtree as a Zookeeper node, it will be fully inserted into the source element.
|
||||
|
||||
XML substitution example:
|
||||
|
||||
@ -114,7 +116,7 @@ XML substitution example:
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node, and it will be fully inserted into the source element.
|
||||
If you want to merge the substituting content with the existing configuration instead of appending you can use attribute `merge="true"`, for example: `<include from_zk="/some_path" merge="true">`. In this case, the existing configuration will be merged with the content from the substitution and the existing configuration settings will be replaced with values from substitution.
|
||||
|
||||
## Encrypting and Hiding Configuration {#encryption}
|
||||
|
||||
|
@ -933,9 +933,9 @@ Hard limit is configured via system tools
|
||||
|
||||
## database_atomic_delay_before_drop_table_sec {#database_atomic_delay_before_drop_table_sec}
|
||||
|
||||
Sets the delay before remove table data in seconds. If the query has `SYNC` modifier, this setting is ignored.
|
||||
The delay before a table data is dropped in seconds. If the `DROP TABLE` query has a `SYNC` modifier, this setting is ignored.
|
||||
|
||||
Default value: `480` (8 minute).
|
||||
Default value: `480` (8 minutes).
|
||||
|
||||
## database_catalog_unused_dir_hide_timeout_sec {#database_catalog_unused_dir_hide_timeout_sec}
|
||||
|
||||
|
@ -5,26 +5,416 @@ sidebar_label: "External Disks for Storing Data"
|
||||
title: "External Disks for Storing Data"
|
||||
---
|
||||
|
||||
Data, processed in ClickHouse, is usually stored in the local file system — on the same machine with the ClickHouse server. That requires large-capacity disks, which can be expensive enough. To avoid that you can store the data remotely — on [Amazon S3](https://aws.amazon.com/s3/) disks or in the Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)).
|
||||
Data, processed in ClickHouse, is usually stored in the local file system — on the same machine with the ClickHouse server. That requires large-capacity disks, which can be expensive enough. To avoid that you can store the data remotely. Various storages are supported:
|
||||
1. [Amazon S3](https://aws.amazon.com/s3/) object storage.
|
||||
2. The Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html))
|
||||
3. [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs).
|
||||
|
||||
To work with data stored on `Amazon S3` disks use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine, and to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine.
|
||||
:::note ClickHouse also has support for external table engines, which are different from external storage option described on this page as they allow to read data stored in some general file format (like Parquet), while on this page we are describing storage configuration for ClickHouse `MergeTree` family or `Log` family tables.
|
||||
1. to work with data stored on `Amazon S3` disks, use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine.
|
||||
2. to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine.
|
||||
3. to work with data stored in Azure Blob Storage use [AzureBlobStorage](/docs/en/engines/table-engines/integrations/azureBlobStorage.md) table engine.
|
||||
:::
|
||||
|
||||
To load data from a web server with static files use a disk with type [web](#storing-data-on-webserver).
|
||||
## Configuring external storage {#configuring-external-storage}
|
||||
|
||||
## Configuring HDFS {#configuring-hdfs}
|
||||
[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) family table engines can store data to `S3`, `AzureBlobStorage`, `HDFS` using a disk with types `s3`, `azure_blob_storage`, `hdfs` accordingly.
|
||||
|
||||
[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) family table engines can store data to HDFS using a disk with type `HDFS`.
|
||||
Disk configuration requires:
|
||||
1. `type` section, equal to one of `s3`, `azure_blob_storage`, `hdfs`, `local_blob_storage`, `web`.
|
||||
2. Configuration of a specific external storage type.
|
||||
|
||||
Configuration markup:
|
||||
Starting from 24.1 clickhouse version, it is possible to use a new configuration option.
|
||||
It requires to specify:
|
||||
1. `type` equal to `object_storage`
|
||||
2. `object_storage_type`, equal to one of `s3`, `azure_blob_storage` (or just `azure` from `24.3`), `hdfs`, `local_blob_storage` (or just `local` from `24.3`), `web`.
|
||||
Optionally, `metadata_type` can be specified (it is equal to `local` by default), but it can also be set to `plain`, `web`.
|
||||
Usage of `plain` metadata type is described in [plain storage section](/docs/en/operations/storing-data.md/#storing-data-on-webserver), `web` metadata type can be used only with `web` object storage type, `local` metadata type stores metadata files locally (each metadata files contains mapping to files in object storage and some additional meta information about them).
|
||||
|
||||
E.g. configuration option
|
||||
``` xml
|
||||
<s3>
|
||||
<type>s3</type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3>
|
||||
```
|
||||
|
||||
is equal to configuration (from `24.1`):
|
||||
``` xml
|
||||
<s3>
|
||||
<type>object_storage</type>
|
||||
<object_storage_type>s3</object_storage_type>
|
||||
<metadata_type>local</metadata_type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3>
|
||||
```
|
||||
|
||||
Configuration
|
||||
``` xml
|
||||
<s3_plain>
|
||||
<type>s3_plain</type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3_plain>
|
||||
```
|
||||
|
||||
is equal to
|
||||
``` xml
|
||||
<s3_plain>
|
||||
<type>object_storage</type>
|
||||
<object_storage_type>s3</object_storage_type>
|
||||
<metadata_type>plain</metadata_type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3_plain>
|
||||
```
|
||||
|
||||
Example of full storage configuration will look like:
|
||||
``` xml
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<s3>
|
||||
<type>s3</type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3>
|
||||
</disks>
|
||||
<policies>
|
||||
<s3>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>s3</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</s3>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
Starting with 24.1 clickhouse version, it can also look like:
|
||||
``` xml
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<s3>
|
||||
<type>object_storage</type>
|
||||
<object_storage_type>s3</object_storage_type>
|
||||
<metadata_type>local</metadata_type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3>
|
||||
</disks>
|
||||
<policies>
|
||||
<s3>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>s3</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</s3>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
In order to make a specific kind of storage a default option for all `MergeTree` tables add the following section to configuration file:
|
||||
``` xml
|
||||
<clickhouse>
|
||||
<merge_tree>
|
||||
<storage_policy>s3</storage_policy>
|
||||
</merge_tree>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
If you want to configure a specific storage policy only to specific table, you can define it in settings while creating the table:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE test (a Int32, b String)
|
||||
ENGINE = MergeTree() ORDER BY a
|
||||
SETTINGS storage_policy = 's3';
|
||||
```
|
||||
|
||||
You can also use `disk` instead of `storage_policy`. In this case it is not requires to have `storage_policy` section in configuration file, only `disk` section would be enough.
|
||||
|
||||
``` sql
|
||||
CREATE TABLE test (a Int32, b String)
|
||||
ENGINE = MergeTree() ORDER BY a
|
||||
SETTINGS disk = 's3';
|
||||
```
|
||||
|
||||
## Dynamic Configuration {#dynamic-configuration}
|
||||
|
||||
There is also a possibility to specify storage configuration without a predefined disk in configuration in a configuration file, but can be configured in the `CREATE`/`ATTACH` query settings.
|
||||
|
||||
The following example query builds on the above dynamic disk configuration and shows how to use a local disk to cache data from a table stored at a URL.
|
||||
|
||||
```sql
|
||||
ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
|
||||
(
|
||||
price UInt32,
|
||||
date Date,
|
||||
postcode1 LowCardinality(String),
|
||||
postcode2 LowCardinality(String),
|
||||
type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
|
||||
is_new UInt8,
|
||||
duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
|
||||
addr1 String,
|
||||
addr2 String,
|
||||
street LowCardinality(String),
|
||||
locality LowCardinality(String),
|
||||
town LowCardinality(String),
|
||||
district LowCardinality(String),
|
||||
county LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (postcode1, postcode2, addr1, addr2)
|
||||
# highlight-start
|
||||
SETTINGS disk = disk(
|
||||
type=web,
|
||||
endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
|
||||
);
|
||||
# highlight-end
|
||||
```
|
||||
|
||||
The example below adds cache to external storage.
|
||||
|
||||
```sql
|
||||
ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
|
||||
(
|
||||
price UInt32,
|
||||
date Date,
|
||||
postcode1 LowCardinality(String),
|
||||
postcode2 LowCardinality(String),
|
||||
type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
|
||||
is_new UInt8,
|
||||
duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
|
||||
addr1 String,
|
||||
addr2 String,
|
||||
street LowCardinality(String),
|
||||
locality LowCardinality(String),
|
||||
town LowCardinality(String),
|
||||
district LowCardinality(String),
|
||||
county LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (postcode1, postcode2, addr1, addr2)
|
||||
# highlight-start
|
||||
SETTINGS disk = disk(
|
||||
type=cache,
|
||||
max_size='1Gi',
|
||||
path='/var/lib/clickhouse/custom_disk_cache/',
|
||||
disk=disk(
|
||||
type=web,
|
||||
endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
|
||||
)
|
||||
);
|
||||
# highlight-end
|
||||
```
|
||||
|
||||
In the settings highlighted below notice that the disk of `type=web` is nested within
|
||||
the disk of `type=cache`.
|
||||
|
||||
:::note
|
||||
The example uses `type=web`, but any disk type can be configured as dynamic, even Local disk. Local disks require a path argument to be inside the server config parameter `custom_local_disks_base_directory`, which has no default, so set that also when using local disk.
|
||||
:::
|
||||
|
||||
A combination of config-based configuration and sql-defined configuration is also possible:
|
||||
|
||||
```sql
|
||||
ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
|
||||
(
|
||||
price UInt32,
|
||||
date Date,
|
||||
postcode1 LowCardinality(String),
|
||||
postcode2 LowCardinality(String),
|
||||
type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
|
||||
is_new UInt8,
|
||||
duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
|
||||
addr1 String,
|
||||
addr2 String,
|
||||
street LowCardinality(String),
|
||||
locality LowCardinality(String),
|
||||
town LowCardinality(String),
|
||||
district LowCardinality(String),
|
||||
county LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (postcode1, postcode2, addr1, addr2)
|
||||
# highlight-start
|
||||
SETTINGS disk = disk(
|
||||
type=cache,
|
||||
max_size='1Gi',
|
||||
path='/var/lib/clickhouse/custom_disk_cache/',
|
||||
disk=disk(
|
||||
type=web,
|
||||
endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
|
||||
)
|
||||
);
|
||||
# highlight-end
|
||||
```
|
||||
|
||||
where `web` is a from a server configuration file:
|
||||
|
||||
``` xml
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<web>
|
||||
<type>web</type>
|
||||
<endpoint>'https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'</endpoint>
|
||||
</web>
|
||||
</disks>
|
||||
</storage_configuration>
|
||||
```
|
||||
|
||||
### Using S3 Storage {#s3-storage}
|
||||
|
||||
Required parameters:
|
||||
|
||||
- `endpoint` — S3 endpoint URL in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint URL should contain a bucket and root path to store data.
|
||||
- `access_key_id` — S3 access key id.
|
||||
- `secret_access_key` — S3 secret access key.
|
||||
|
||||
Optional parameters:
|
||||
|
||||
- `region` — S3 region name.
|
||||
- `support_batch_delete` — This controls the check to see if batch deletes are supported. Set this to `false` when using Google Cloud Storage (GCS) as GCS does not support batch deletes and preventing the checks will prevent error messages in the logs.
|
||||
- `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`.
|
||||
- `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`.
|
||||
- `expiration_window_seconds` — Grace period for checking if expiration-based credentials have expired. Optional, default value is `120`.
|
||||
- `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL.
|
||||
- `connect_timeout_ms` — Socket connect timeout in milliseconds. Default value is `10 seconds`.
|
||||
- `request_timeout_ms` — Request timeout in milliseconds. Default value is `5 seconds`.
|
||||
- `retry_attempts` — Number of retry attempts in case of failed request. Default value is `10`.
|
||||
- `single_read_retries` — Number of retry attempts in case of connection drop during read. Default value is `4`.
|
||||
- `min_bytes_for_seek` — Minimal number of bytes to use seek operation instead of sequential read. Default value is `1 Mb`.
|
||||
- `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
|
||||
- `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`.
|
||||
- `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times.
|
||||
- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set.
|
||||
- `server_side_encryption_kms_key_id` - If specified, required headers for accessing S3 objects with [SSE-KMS encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingKMSEncryption.html) will be set. If an empty string is specified, the AWS managed S3 key will be used. Optional.
|
||||
- `server_side_encryption_kms_encryption_context` - If specified alongside `server_side_encryption_kms_key_id`, the given encryption context header for SSE-KMS will be set. Optional.
|
||||
- `server_side_encryption_kms_bucket_key_enabled` - If specified alongside `server_side_encryption_kms_key_id`, the header to enable S3 bucket keys for SSE-KMS will be set. Optional, can be `true` or `false`, defaults to nothing (matches the bucket-level setting).
|
||||
- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited).
|
||||
- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`.
|
||||
- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited).
|
||||
- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
|
||||
- `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
- `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
- `key_template` — Define the format with which the object keys are generated. By default, Clickhouse takes `root path` from `endpoint` option and adds random generated suffix. That suffix is a dir with 3 random symbols and a file name with 29 random symbols. With that option you have a full control how to the object keys are generated. Some usage scenarios require having random symbols in the prefix or in the middle of object key. For example: `[a-z]{3}-prefix-random/constant-part/random-middle-[a-z]{3}/random-suffix-[a-z]{29}`. The value is parsed with [`re2`](https://github.com/google/re2/wiki/Syntax). Only some subset of the syntax is supported. Check if your preferred format is supported before using that option. Disk isn't initialized if clickhouse is unable to generate a key by the value of `key_template`. It requires enabled feature flag [storage_metadata_write_full_object_key](/docs/en/operations/settings/settings#storage_metadata_write_full_object_key). It forbids declaring the `root path` in `endpoint` option. It requires definition of the option `key_compatibility_prefix`.
|
||||
- `key_compatibility_prefix` — That option is required when option `key_template` is in use. In order to be able to read the objects keys which were stored in the metadata files with the metadata version lower that `VERSION_FULL_OBJECT_KEY`, the previous `root path` from the `endpoint` option should be set here.
|
||||
|
||||
:::note
|
||||
Google Cloud Storage (GCS) is also supported using the type `s3`. See [GCS backed MergeTree](/docs/en/integrations/gcs).
|
||||
:::
|
||||
|
||||
### Using Plain Storage {#plain-storage}
|
||||
|
||||
In `22.10` a new disk type `s3_plain` was introduced, which provides a write-once storage. Configuration parameters are the same as for `s3` disk type.
|
||||
Unlike `s3` disk type, it stores data as is, e.g. instead of randomly-generated blob names, it uses normal file names (the same way as clickhouse stores files on local disk) and does not store any metadata locally, e.g. it is derived from data on `s3`.
|
||||
|
||||
This disk type allows to keep a static version of the table, as it does not allow executing merges on the existing data and does not allow inserting of new data.
|
||||
A use case for this disk type is to create backups on it, which can be done via `BACKUP TABLE data TO Disk('plain_disk_name', 'backup_name')`. Afterwards you can do `RESTORE TABLE data AS data_restored FROM Disk('plain_disk_name', 'backup_name')` or using `ATTACH TABLE data (...) ENGINE = MergeTree() SETTINGS disk = 'plain_disk_name'`.
|
||||
|
||||
Configuration:
|
||||
``` xml
|
||||
<s3_plain>
|
||||
<type>s3_plain</type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3_plain>
|
||||
```
|
||||
|
||||
Starting from `24.1` it is possible configure any object storage disk (`s3`, `azure`, `hdfs`, `local`) using `plain` metadata type.
|
||||
|
||||
Configuration:
|
||||
``` xml
|
||||
<s3_plain>
|
||||
<type>object_storage</type>
|
||||
<object_storage_type>azure</object_storage_type>
|
||||
<metadata_type>plain</metadata_type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3_plain>
|
||||
```
|
||||
|
||||
### Using Azure Blob Storage {#azure-blob-storage}
|
||||
|
||||
`MergeTree` family table engines can store data to [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) using a disk with type `azure_blob_storage`.
|
||||
|
||||
As of February 2022, this feature is still a fresh addition, so expect that some Azure Blob Storage functionalities might be unimplemented.
|
||||
|
||||
Configuration markup:
|
||||
``` xml
|
||||
<storage_configuration>
|
||||
...
|
||||
<disks>
|
||||
<blob_storage_disk>
|
||||
<type>azure_blob_storage</type>
|
||||
<storage_account_url>http://account.blob.core.windows.net</storage_account_url>
|
||||
<container_name>container</container_name>
|
||||
<account_name>account</account_name>
|
||||
<account_key>pass123</account_key>
|
||||
<metadata_path>/var/lib/clickhouse/disks/blob_storage_disk/</metadata_path>
|
||||
<cache_path>/var/lib/clickhouse/disks/blob_storage_disk/cache/</cache_path>
|
||||
<skip_access_check>false</skip_access_check>
|
||||
</blob_storage_disk>
|
||||
</disks>
|
||||
...
|
||||
</storage_configuration>
|
||||
```
|
||||
|
||||
Connection parameters:
|
||||
* `storage_account_url` - **Required**, Azure Blob Storage account URL, like `http://account.blob.core.windows.net` or `http://azurite1:10000/devstoreaccount1`.
|
||||
* `container_name` - Target container name, defaults to `default-container`.
|
||||
* `container_already_exists` - If set to `false`, a new container `container_name` is created in the storage account, if set to `true`, disk connects to the container directly, and if left unset, disk connects to the account, checks if the container `container_name` exists, and creates it if it doesn't exist yet.
|
||||
|
||||
Authentication parameters (the disk will try all available methods **and** Managed Identity Credential):
|
||||
* `connection_string` - For authentication using a connection string.
|
||||
* `account_name` and `account_key` - For authentication using Shared Key.
|
||||
|
||||
Limit parameters (mainly for internal usage):
|
||||
* `s3_max_single_part_upload_size` - Limits the size of a single block upload to Blob Storage.
|
||||
* `min_bytes_for_seek` - Limits the size of a seekable region.
|
||||
* `max_single_read_retries` - Limits the number of attempts to read a chunk of data from Blob Storage.
|
||||
* `max_single_download_retries` - Limits the number of attempts to download a readable buffer from Blob Storage.
|
||||
* `thread_pool_size` - Limits the number of threads with which `IDiskRemote` is instantiated.
|
||||
* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object.
|
||||
|
||||
Other parameters:
|
||||
* `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
|
||||
* `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`.
|
||||
* `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
* `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
|
||||
Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)).
|
||||
|
||||
:::note Zero-copy replication is not ready for production
|
||||
Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use.
|
||||
:::
|
||||
|
||||
## Using HDFS storage {#hdfs-storage}
|
||||
|
||||
In this sample configuration:
|
||||
- the disk is of type `hdfs`
|
||||
- the data is hosted at `hdfs://hdfs1:9000/clickhouse/`
|
||||
|
||||
```xml
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<hdfs>
|
||||
<type>hdfs</type>
|
||||
<endpoint>hdfs://hdfs1:9000/clickhouse/</endpoint>
|
||||
<skip_access_check>true</skip_access_check>
|
||||
</hdfs>
|
||||
<hdd>
|
||||
<type>local</type>
|
||||
<path>/</path>
|
||||
</hdd>
|
||||
</disks>
|
||||
<policies>
|
||||
<hdfs>
|
||||
@ -32,26 +422,17 @@ Configuration markup:
|
||||
<main>
|
||||
<disk>hdfs</disk>
|
||||
</main>
|
||||
<external>
|
||||
<disk>hdd</disk>
|
||||
</external>
|
||||
</volumes>
|
||||
</hdfs>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
|
||||
<merge_tree>
|
||||
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
|
||||
</merge_tree>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
Required parameters:
|
||||
|
||||
- `endpoint` — HDFS endpoint URL in `path` format. Endpoint URL should contain a root path to store data.
|
||||
|
||||
Optional parameters:
|
||||
|
||||
- `min_bytes_for_seek` — The minimal number of bytes to use seek operation instead of sequential read. Default value: `1 Mb`.
|
||||
|
||||
## Using Virtual File System for Data Encryption {#encrypted-virtual-file-system}
|
||||
### Using Data Encryption {#encrypted-virtual-file-system}
|
||||
|
||||
You can encrypt the data stored on [S3](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3), or [HDFS](#configuring-hdfs) external disks, or on a local disk. To turn on the encryption mode, in the configuration file you must define a disk with the type `encrypted` and choose a disk on which the data will be saved. An `encrypted` disk ciphers all written files on the fly, and when you read files from an `encrypted` disk it deciphers them automatically. So you can work with an `encrypted` disk like with a normal one.
|
||||
|
||||
@ -112,7 +493,7 @@ Example of disk configuration:
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
## Using local cache {#using-local-cache}
|
||||
### Using local cache {#using-local-cache}
|
||||
|
||||
It is possible to configure local cache over disks in storage configuration starting from version 22.3.
|
||||
For versions 22.3 - 22.7 cache is supported only for `s3` disk type. For versions >= 22.8 cache is supported for any disk type: S3, Azure, Local, Encrypted, etc.
|
||||
@ -275,23 +656,92 @@ Cache profile events:
|
||||
|
||||
- `CachedWriteBufferCacheWriteBytes`, `CachedWriteBufferCacheWriteMicroseconds`
|
||||
|
||||
## Using in-memory cache (userspace page cache) {#userspace-page-cache}
|
||||
|
||||
The File Cache described above stores cached data in local files. Alternatively, object-store-based disks can be configured to use "Userspace Page Cache", which is RAM-only. Userspace page cache is recommended only if file cache can't be used for some reason, e.g. if the machine doesn't have a local disk at all. Note that file cache effectively uses RAM for caching too, since the OS caches contents of local files.
|
||||
|
||||
To enable userspace page cache for disks that don't use file cache, use setting `use_page_cache_for_disks_without_file_cache`.
|
||||
|
||||
By default, on Linux, the userspace page cache will use all available memory, similar to the OS page cache. In tools like `top` and `ps`, the clickhouse server process will typically show resident set size near 100% of the machine's RAM - this is normal, and most of this memory is actually reclaimable by the OS on memory pressure (`MADV_FREE`). This behavior can be disabled with server setting `page_cache_use_madv_free = 0`, making the userspace page cache just use a fixed amount of memory `page_cache_size` with no special interaction with the OS. On Mac OS, `page_cache_use_madv_free` is always disabled as it doesn't have lazy `MADV_FREE`.
|
||||
|
||||
Unfortunately, `page_cache_use_madv_free` makes it difficult to tell if the server is close to running out of memory, since the RSS metric becomes useless. Async metric `UnreclaimableRSS` shows the amount of physical memory used by the server, excluding the memory reclaimable by the OS: `select value from system.asynchronous_metrics where metric = 'UnreclaimableRSS'`. Use it for monitoring instead of RSS. This metric is only available if `page_cache_use_madv_free` is enabled.
|
||||
|
||||
## Storing Data on Web Server {#storing-data-on-webserver}
|
||||
|
||||
There is a tool `clickhouse-static-files-uploader`, which prepares a data directory for a given table (`SELECT data_paths FROM system.tables WHERE name = 'table_name'`). For each table you need, you get a directory of files. These files can be uploaded to, for example, a web server with static files. After this preparation, you can load this table into any ClickHouse server via `DiskWeb`.
|
||||
### Using static Web storage (read-only) {#web-storage}
|
||||
|
||||
This is a read-only disk. Its data is only read and never modified. A new table is loaded to this disk via `ATTACH TABLE` query (see example below). Local disk is not actually used, each `SELECT` query will result in a `http` request to fetch required data. All modification of the table data will result in an exception, i.e. the following types of queries are not allowed: [CREATE TABLE](/docs/en/sql-reference/statements/create/table.md), [ALTER TABLE](/docs/en/sql-reference/statements/alter/index.md), [RENAME TABLE](/docs/en/sql-reference/statements/rename.md/#misc_operations-rename_table), [DETACH TABLE](/docs/en/sql-reference/statements/detach.md) and [TRUNCATE TABLE](/docs/en/sql-reference/statements/truncate.md).
|
||||
Web storage can be used for read-only purposes. An example use is for hosting sample data, or for migrating data.
|
||||
There is a tool `clickhouse-static-files-uploader`, which prepares a data directory for a given table (`SELECT data_paths FROM system.tables WHERE name = 'table_name'`). For each table you need, you get a directory of files. These files can be uploaded to, for example, a web server with static files. After this preparation, you can load this table into any ClickHouse server via `DiskWeb`.
|
||||
|
||||
Web server storage is supported only for the [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) engine families. To access the data stored on a `web` disk, use the [storage_policy](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#terms) setting when executing the query. For example, `ATTACH TABLE table_web UUID '{}' (id Int32) ENGINE = MergeTree() ORDER BY id SETTINGS storage_policy = 'web'`.
|
||||
In this sample configuration:
|
||||
- the disk is of type `web`
|
||||
- the data is hosted at `http://nginx:80/test1/`
|
||||
- a cache on local storage is used
|
||||
|
||||
```xml
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<web>
|
||||
<type>web</type>
|
||||
<endpoint>http://nginx:80/test1/</endpoint>
|
||||
</web>
|
||||
<cached_web>
|
||||
<type>cache</type>
|
||||
<disk>web</disk>
|
||||
<path>cached_web_cache/</path>
|
||||
<max_size>100000000</max_size>
|
||||
</cached_web>
|
||||
</disks>
|
||||
<policies>
|
||||
<web>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>web</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</web>
|
||||
<cached_web>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>cached_web</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</cached_web>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
:::tip
|
||||
Storage can also be configured temporarily within a query, if a web dataset is not expected
|
||||
to be used routinely, see [dynamic configuration](#dynamic-configuration) and skip editing the
|
||||
configuration file.
|
||||
:::
|
||||
|
||||
:::tip
|
||||
A [demo dataset](https://github.com/ClickHouse/web-tables-demo) is hosted in GitHub. To prepare your own tables for web storage see the tool [clickhouse-static-files-uploader](/docs/en/operations/storing-data.md/#storing-data-on-webserver)
|
||||
:::
|
||||
|
||||
In this `ATTACH TABLE` query the `UUID` provided matches the directory name of the data, and the endpoint is the URL for the raw GitHub content.
|
||||
|
||||
```sql
|
||||
# highlight-next-line
|
||||
ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
|
||||
(
|
||||
price UInt32,
|
||||
date Date,
|
||||
postcode1 LowCardinality(String),
|
||||
postcode2 LowCardinality(String),
|
||||
type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
|
||||
is_new UInt8,
|
||||
duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
|
||||
addr1 String,
|
||||
addr2 String,
|
||||
street LowCardinality(String),
|
||||
locality LowCardinality(String),
|
||||
town LowCardinality(String),
|
||||
district LowCardinality(String),
|
||||
county LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (postcode1, postcode2, addr1, addr2)
|
||||
# highlight-start
|
||||
SETTINGS disk = disk(
|
||||
type=web,
|
||||
endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
|
||||
);
|
||||
# highlight-end
|
||||
```
|
||||
|
||||
A ready test case. You need to add this configuration to config:
|
||||
|
||||
@ -487,7 +937,7 @@ If URL is not reachable on disk load when the server is starting up tables, then
|
||||
Use [http_max_single_read_retries](/docs/en/operations/settings/settings.md/#http-max-single-read-retries) setting to limit the maximum number of retries during a single HTTP read.
|
||||
|
||||
|
||||
## Zero-copy Replication (not ready for production) {#zero-copy}
|
||||
### Zero-copy Replication (not ready for production) {#zero-copy}
|
||||
|
||||
Zero-copy replication is possible, but not recommended, with `S3` and `HDFS` disks. Zero-copy replication means that if the data is stored remotely on several machines and needs to be synchronized, then only the metadata is replicated (paths to the data parts), but not the data itself.
|
||||
|
||||
|
@ -26,7 +26,9 @@ priority: 0
|
||||
is_active: 0
|
||||
active_children: 0
|
||||
dequeued_requests: 67
|
||||
canceled_requests: 0
|
||||
dequeued_cost: 4692272
|
||||
canceled_cost: 0
|
||||
busy_periods: 63
|
||||
vruntime: 938454.1999999989
|
||||
system_vruntime: ᴺᵁᴸᴸ
|
||||
@ -54,7 +56,9 @@ Columns:
|
||||
- `is_active` (`UInt8`) - Whether this node is currently active - has resource requests to be dequeued and constraints satisfied.
|
||||
- `active_children` (`UInt64`) - The number of children in active state.
|
||||
- `dequeued_requests` (`UInt64`) - The total number of resource requests dequeued from this node.
|
||||
- `canceled_requests` (`UInt64`) - The total number of resource requests canceled from this node.
|
||||
- `dequeued_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests dequeued from this node.
|
||||
- `canceled_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests canceled from this node.
|
||||
- `busy_periods` (`UInt64`) - The total number of deactivations of this node.
|
||||
- `vruntime` (`Nullable(Float64)`) - For children of `fair` nodes only. Virtual runtime of a node used by SFQ algorithm to select the next child to process in a max-min fair manner.
|
||||
- `system_vruntime` (`Nullable(Float64)`) - For `fair` nodes only. Virtual runtime showing `vruntime` of the last processed resource request. Used during child activation as the new value of `vruntime`.
|
||||
|
@ -16,7 +16,9 @@ ClickHouse also supports:
|
||||
|
||||
## NULL Processing
|
||||
|
||||
During aggregation, all `NULL`s are skipped. If the aggregation has several parameters it will ignore any row in which one or more of the parameters are NULL.
|
||||
During aggregation, all `NULL` arguments are skipped. If the aggregation has several arguments it will ignore any row in which one or more of them are NULL.
|
||||
|
||||
There is an exception to this rule, which are the functions [`first_value`](../../sql-reference/aggregate-functions/reference/first_value.md), [`last_value`](../../sql-reference/aggregate-functions/reference/last_value.md) and their aliases when followed by the modifier `RESPECT NULLS`: `FIRST_VALUE(b) RESPECT NULLS`.
|
||||
|
||||
**Examples:**
|
||||
|
||||
@ -85,3 +87,50 @@ FROM t_null_big;
|
||||
│ [2,2,3] │ [2,NULL,2,3,NULL] │
|
||||
└───────────────┴───────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Note that aggregations are skipped when the columns are used as arguments to an aggregated function. For example [`count`](../../sql-reference/aggregate-functions/reference/count.md) without parameters (`count()`) or with constant ones (`count(1)`) will count all rows in the block (independently of the value of the GROUP BY column as it's not an argument), while `count(column)` will only return the number of rows where column is not NULL.
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
v,
|
||||
count(1),
|
||||
count(v)
|
||||
FROM
|
||||
(
|
||||
SELECT if(number < 10, NULL, number % 3) AS v
|
||||
FROM numbers(15)
|
||||
)
|
||||
GROUP BY v
|
||||
|
||||
┌────v─┬─count()─┬─count(v)─┐
|
||||
│ ᴺᵁᴸᴸ │ 10 │ 0 │
|
||||
│ 0 │ 1 │ 1 │
|
||||
│ 1 │ 2 │ 2 │
|
||||
│ 2 │ 2 │ 2 │
|
||||
└──────┴─────────┴──────────┘
|
||||
```
|
||||
|
||||
And here is an example of of first_value with `RESPECT NULLS` where we can see that NULL inputs are respected and it will return the first value read, whether it's NULL or not:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
col || '_' || ((col + 1) * 5 - 1) as range,
|
||||
first_value(odd_or_null) as first,
|
||||
first_value(odd_or_null) IGNORE NULLS as first_ignore_null,
|
||||
first_value(odd_or_null) RESPECT NULLS as first_respect_nulls
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
intDiv(number, 5) AS col,
|
||||
if(number % 2 == 0, NULL, number) as odd_or_null
|
||||
FROM numbers(15)
|
||||
)
|
||||
GROUP BY col
|
||||
ORDER BY col
|
||||
|
||||
┌─range─┬─first─┬─first_ignore_null─┬─first_respect_nulls─┐
|
||||
│ 0_4 │ 1 │ 1 │ ᴺᵁᴸᴸ │
|
||||
│ 1_9 │ 5 │ 5 │ 5 │
|
||||
│ 2_14 │ 11 │ 11 │ ᴺᵁᴸᴸ │
|
||||
└───────┴───────┴───────────────────┴─────────────────────┘
|
||||
```
|
||||
|
@ -1,16 +1,99 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/varpop
|
||||
title: "varPop"
|
||||
slug: "/en/sql-reference/aggregate-functions/reference/varpop"
|
||||
sidebar_position: 32
|
||||
---
|
||||
|
||||
# varPop(x)
|
||||
This page covers the `varPop` and `varPopStable` functions available in ClickHouse.
|
||||
|
||||
Calculates the amount `Σ((x - x̅)^2) / n`, where `n` is the sample size and `x̅`is the average value of `x`.
|
||||
## varPop
|
||||
|
||||
In other words, dispersion for a set of values. Returns `Float64`.
|
||||
Calculates the population covariance between two data columns. The population covariance measures the degree to which two variables vary together. Calculates the amount `Σ((x - x̅)^2) / n`, where `n` is the sample size and `x̅`is the average value of `x`.
|
||||
|
||||
Alias: `VAR_POP`.
|
||||
**Syntax**
|
||||
|
||||
:::note
|
||||
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varPopStable` function. It works slower but provides a lower computational error.
|
||||
:::
|
||||
```sql
|
||||
covarPop(x, y)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `x`: The first data column. [Numeric](../../../native-protocol/columns.md)
|
||||
- `y`: The second data column. [Numeric](../../../native-protocol/columns.md)
|
||||
|
||||
**Returned value**
|
||||
|
||||
Returns an integer of type `Float64`.
|
||||
|
||||
**Implementation details**
|
||||
|
||||
This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varPopStable` function](#varPopStable).
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS test_data;
|
||||
CREATE TABLE test_data
|
||||
(
|
||||
x Int32,
|
||||
y Int32
|
||||
)
|
||||
ENGINE = Memory;
|
||||
|
||||
INSERT INTO test_data VALUES (1, 2), (2, 3), (3, 5), (4, 6), (5, 8);
|
||||
|
||||
SELECT
|
||||
covarPop(x, y) AS covar_pop
|
||||
FROM test_data;
|
||||
```
|
||||
|
||||
```response
|
||||
3
|
||||
```
|
||||
|
||||
## varPopStable
|
||||
|
||||
Calculates population covariance between two data columns using a stable, numerically accurate method to calculate the variance. This function is designed to provide reliable results even with large datasets or values that might cause numerical instability in other implementations.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
covarPopStable(x, y)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `x`: The first data column. [String literal](../../syntax#syntax-string-literal)
|
||||
- `y`: The second data column. [Expression](../../syntax#syntax-expressions)
|
||||
|
||||
**Returned value**
|
||||
|
||||
Returns an integer of type `Float64`.
|
||||
|
||||
**Implementation details**
|
||||
|
||||
Unlike [`varPop()`](#varPop), this function uses a stable, numerically accurate algorithm to calculate the population variance to avoid issues like catastrophic cancellation or loss of precision. This function also handles `NaN` and `Inf` values correctly, excluding them from calculations.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS test_data;
|
||||
CREATE TABLE test_data
|
||||
(
|
||||
x Int32,
|
||||
y Int32
|
||||
)
|
||||
ENGINE = Memory;
|
||||
|
||||
INSERT INTO test_data VALUES (1, 2), (2, 9), (9, 5), (4, 6), (5, 8);
|
||||
|
||||
SELECT
|
||||
covarPopStable(x, y) AS covar_pop_stable
|
||||
FROM test_data;
|
||||
```
|
||||
|
||||
```response
|
||||
0.5999999999999999
|
||||
```
|
||||
|
@ -1,18 +1,128 @@
|
||||
---
|
||||
title: "varSamp"
|
||||
slug: /en/sql-reference/aggregate-functions/reference/varsamp
|
||||
sidebar_position: 33
|
||||
---
|
||||
|
||||
# varSamp
|
||||
This page contains information on the `varSamp` and `varSampStable` ClickHouse functions.
|
||||
|
||||
Calculates the amount `Σ((x - x̅)^2) / (n - 1)`, where `n` is the sample size and `x̅`is the average value of `x`.
|
||||
## varSamp
|
||||
|
||||
It represents an unbiased estimate of the variance of a random variable if passed values from its sample.
|
||||
Calculate the sample variance of a data set.
|
||||
|
||||
Returns `Float64`. When `n <= 1`, returns `+∞`.
|
||||
**Syntax**
|
||||
|
||||
Alias: `VAR_SAMP`.
|
||||
```sql
|
||||
varSamp(expr)
|
||||
```
|
||||
|
||||
:::note
|
||||
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varSampStable` function. It works slower but provides a lower computational error.
|
||||
:::
|
||||
**Parameters**
|
||||
|
||||
- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../../syntax#syntax-expressions)
|
||||
|
||||
**Returned value**
|
||||
|
||||
Returns a Float64 value representing the sample variance of the input data set.
|
||||
|
||||
**Implementation details**
|
||||
|
||||
The `varSamp()` function calculates the sample variance using the following formula:
|
||||
|
||||
```plaintext
|
||||
∑(x - mean(x))^2 / (n - 1)
|
||||
```
|
||||
|
||||
Where:
|
||||
|
||||
- `x` is each individual data point in the data set.
|
||||
- `mean(x)` is the arithmetic mean of the data set.
|
||||
- `n` is the number of data points in the data set.
|
||||
|
||||
The function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPop()` function](./varpop#varpop) instead.
|
||||
|
||||
This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varSampStable` function](#varSampStable).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE example_table
|
||||
(
|
||||
id UInt64,
|
||||
value Float64
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
|
||||
INSERT INTO example_table VALUES (1, 10.5), (2, 12.3), (3, 9.8), (4, 11.2), (5, 10.7);
|
||||
|
||||
SELECT varSamp(value) FROM example_table;
|
||||
```
|
||||
|
||||
Response:
|
||||
|
||||
```response
|
||||
0.8650000000000091
|
||||
```
|
||||
|
||||
## varSampStable
|
||||
|
||||
Calculate the sample variance of a data set using a numerically stable algorithm.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
varSampStable(expr)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../../syntax#syntax-expressions)
|
||||
|
||||
**Returned value**
|
||||
|
||||
The `varSampStable()` function returns a Float64 value representing the sample variance of the input data set.
|
||||
|
||||
**Implementation details**
|
||||
|
||||
The `varSampStable()` function calculates the sample variance using the same formula as the [`varSamp()`](#varSamp function):
|
||||
|
||||
```plaintext
|
||||
∑(x - mean(x))^2 / (n - 1)
|
||||
```
|
||||
|
||||
Where:
|
||||
- `x` is each individual data point in the data set.
|
||||
- `mean(x)` is the arithmetic mean of the data set.
|
||||
- `n` is the number of data points in the data set.
|
||||
|
||||
The difference between `varSampStable()` and `varSamp()` is that `varSampStable()` is designed to provide a more deterministic and stable result when dealing with floating-point arithmetic. It uses an algorithm that minimizes the accumulation of rounding errors, which can be particularly important when dealing with large data sets or data with a wide range of values.
|
||||
|
||||
Like `varSamp()`, the `varSampStable()` function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPopStable()` function](./varpop#varpopstable) instead.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE example_table
|
||||
(
|
||||
id UInt64,
|
||||
value Float64
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
|
||||
INSERT INTO example_table VALUES (1, 10.5), (2, 12.3), (3, 9.8), (4, 11.2), (5, 10.7);
|
||||
|
||||
SELECT varSampStable(value) FROM example_table;
|
||||
```
|
||||
|
||||
Response:
|
||||
|
||||
```response
|
||||
0.865
|
||||
```
|
||||
|
||||
This query calculates the sample variance of the `value` column in the `example_table` using the `varSampStable()` function. The result shows that the sample variance of the values `[10.5, 12.3, 9.8, 11.2, 10.7]` is approximately 0.865, which may differ slightly from the result of `varSamp()` due to the more precise handling of floating-point arithmetic.
|
||||
|
@ -10,6 +10,8 @@ sidebar_label: Nullable
|
||||
|
||||
Returns whether the argument is [NULL](../../sql-reference/syntax.md#null).
|
||||
|
||||
See also operator [`IS NULL`](../operators/index.md#is_null).
|
||||
|
||||
``` sql
|
||||
isNull(x)
|
||||
```
|
||||
@ -54,6 +56,8 @@ Result:
|
||||
|
||||
Returns whether the argument is not [NULL](../../sql-reference/syntax.md#null-literal).
|
||||
|
||||
See also operator [`IS NOT NULL`](../operators/index.md#is_not_null).
|
||||
|
||||
``` sql
|
||||
isNotNull(x)
|
||||
```
|
||||
|
@ -5,80 +5,372 @@ sidebar_label: JSON
|
||||
---
|
||||
|
||||
There are two sets of functions to parse JSON.
|
||||
- `visitParam*` (`simpleJSON*`) is made to parse a special very limited subset of a JSON, but these functions are extremely fast.
|
||||
- `simpleJSON*` (`visitParam*`) is made to parse a special very limited subset of a JSON, but these functions are extremely fast.
|
||||
- `JSONExtract*` is made to parse normal JSON.
|
||||
|
||||
# visitParam functions
|
||||
# simpleJSON/visitParam functions
|
||||
|
||||
ClickHouse has special functions for working with simplified JSON. All these JSON functions are based on strong assumptions about what the JSON can be, but they try to do as little as possible to get the job done.
|
||||
|
||||
The following assumptions are made:
|
||||
|
||||
1. The field name (function argument) must be a constant.
|
||||
2. The field name is somehow canonically encoded in JSON. For example: `visitParamHas('{"abc":"def"}', 'abc') = 1`, but `visitParamHas('{"\\u0061\\u0062\\u0063":"def"}', 'abc') = 0`
|
||||
2. The field name is somehow canonically encoded in JSON. For example: `simpleJSONHas('{"abc":"def"}', 'abc') = 1`, but `simpleJSONHas('{"\\u0061\\u0062\\u0063":"def"}', 'abc') = 0`
|
||||
3. Fields are searched for on any nesting level, indiscriminately. If there are multiple matching fields, the first occurrence is used.
|
||||
4. The JSON does not have space characters outside of string literals.
|
||||
|
||||
## visitParamHas(params, name)
|
||||
## simpleJSONHas
|
||||
|
||||
Checks whether there is a field with the `name` name.
|
||||
Checks whether there is a field named `field_name`. The result is `UInt8`.
|
||||
|
||||
Alias: `simpleJSONHas`.
|
||||
**Syntax**
|
||||
|
||||
## visitParamExtractUInt(params, name)
|
||||
|
||||
Parses UInt64 from the value of the field named `name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns 0.
|
||||
|
||||
Alias: `simpleJSONExtractUInt`.
|
||||
|
||||
## visitParamExtractInt(params, name)
|
||||
|
||||
The same as for Int64.
|
||||
|
||||
Alias: `simpleJSONExtractInt`.
|
||||
|
||||
## visitParamExtractFloat(params, name)
|
||||
|
||||
The same as for Float64.
|
||||
|
||||
Alias: `simpleJSONExtractFloat`.
|
||||
|
||||
## visitParamExtractBool(params, name)
|
||||
|
||||
Parses a true/false value. The result is UInt8.
|
||||
|
||||
Alias: `simpleJSONExtractBool`.
|
||||
|
||||
## visitParamExtractRaw(params, name)
|
||||
|
||||
Returns the value of a field, including separators.
|
||||
|
||||
Alias: `simpleJSONExtractRaw`.
|
||||
|
||||
Examples:
|
||||
|
||||
``` sql
|
||||
visitParamExtractRaw('{"abc":"\\n\\u0000"}', 'abc') = '"\\n\\u0000"';
|
||||
visitParamExtractRaw('{"abc":{"def":[1,2,3]}}', 'abc') = '{"def":[1,2,3]}';
|
||||
```sql
|
||||
simpleJSONHas(json, field_name)
|
||||
```
|
||||
|
||||
## visitParamExtractString(params, name)
|
||||
**Parameters**
|
||||
|
||||
Parses the string in double quotes. The value is unescaped. If unescaping failed, it returns an empty string.
|
||||
- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
|
||||
- `field_name`: The name of the field to search for. [String literal](../syntax#string)
|
||||
|
||||
Alias: `simpleJSONExtractString`.
|
||||
**Returned value**
|
||||
|
||||
Examples:
|
||||
It returns `1` if the field exists, `0` otherwise.
|
||||
|
||||
``` sql
|
||||
visitParamExtractString('{"abc":"\\n\\u0000"}', 'abc') = '\n\0';
|
||||
visitParamExtractString('{"abc":"\\u263a"}', 'abc') = '☺';
|
||||
visitParamExtractString('{"abc":"\\u263"}', 'abc') = '';
|
||||
visitParamExtractString('{"abc":"hello}', 'abc') = '';
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE jsons
|
||||
(
|
||||
`json` String
|
||||
)
|
||||
ENGINE = Memory;
|
||||
|
||||
INSERT INTO jsons VALUES ('{"foo":"true","qux":1}');
|
||||
|
||||
SELECT simpleJSONHas(json, 'foo') FROM jsons;
|
||||
SELECT simpleJSONHas(json, 'bar') FROM jsons;
|
||||
```
|
||||
|
||||
```response
|
||||
1
|
||||
0
|
||||
```
|
||||
## simpleJSONExtractUInt
|
||||
|
||||
Parses `UInt64` from the value of the field named `field_name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns `0`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
simpleJSONExtractUInt(json, field_name)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
|
||||
- `field_name`: The name of the field to search for. [String literal](../syntax#string)
|
||||
|
||||
**Returned value**
|
||||
|
||||
It returns the number parsed from the field if the field exists and contains a number, `0` otherwise.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE jsons
|
||||
(
|
||||
`json` String
|
||||
)
|
||||
ENGINE = Memory;
|
||||
|
||||
INSERT INTO jsons VALUES ('{"foo":"4e3"}');
|
||||
INSERT INTO jsons VALUES ('{"foo":3.4}');
|
||||
INSERT INTO jsons VALUES ('{"foo":5}');
|
||||
INSERT INTO jsons VALUES ('{"foo":"not1number"}');
|
||||
INSERT INTO jsons VALUES ('{"baz":2}');
|
||||
|
||||
SELECT simpleJSONExtractUInt(json, 'foo') FROM jsons ORDER BY json;
|
||||
```
|
||||
|
||||
```response
|
||||
0
|
||||
4
|
||||
0
|
||||
3
|
||||
5
|
||||
```
|
||||
|
||||
## simpleJSONExtractInt
|
||||
|
||||
Parses `Int64` from the value of the field named `field_name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns `0`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
simpleJSONExtractInt(json, field_name)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
|
||||
- `field_name`: The name of the field to search for. [String literal](../syntax#string)
|
||||
|
||||
**Returned value**
|
||||
|
||||
It returns the number parsed from the field if the field exists and contains a number, `0` otherwise.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE jsons
|
||||
(
|
||||
`json` String
|
||||
)
|
||||
ENGINE = Memory;
|
||||
|
||||
INSERT INTO jsons VALUES ('{"foo":"-4e3"}');
|
||||
INSERT INTO jsons VALUES ('{"foo":-3.4}');
|
||||
INSERT INTO jsons VALUES ('{"foo":5}');
|
||||
INSERT INTO jsons VALUES ('{"foo":"not1number"}');
|
||||
INSERT INTO jsons VALUES ('{"baz":2}');
|
||||
|
||||
SELECT simpleJSONExtractInt(json, 'foo') FROM jsons ORDER BY json;
|
||||
```
|
||||
|
||||
```response
|
||||
0
|
||||
-4
|
||||
0
|
||||
-3
|
||||
5
|
||||
```
|
||||
|
||||
## simpleJSONExtractFloat
|
||||
|
||||
Parses `Float64` from the value of the field named `field_name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns `0`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
simpleJSONExtractFloat(json, field_name)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
|
||||
- `field_name`: The name of the field to search for. [String literal](../syntax#string)
|
||||
|
||||
**Returned value**
|
||||
|
||||
It returns the number parsed from the field if the field exists and contains a number, `0` otherwise.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE jsons
|
||||
(
|
||||
`json` String
|
||||
)
|
||||
ENGINE = Memory;
|
||||
|
||||
INSERT INTO jsons VALUES ('{"foo":"-4e3"}');
|
||||
INSERT INTO jsons VALUES ('{"foo":-3.4}');
|
||||
INSERT INTO jsons VALUES ('{"foo":5}');
|
||||
INSERT INTO jsons VALUES ('{"foo":"not1number"}');
|
||||
INSERT INTO jsons VALUES ('{"baz":2}');
|
||||
|
||||
SELECT simpleJSONExtractFloat(json, 'foo') FROM jsons ORDER BY json;
|
||||
```
|
||||
|
||||
```response
|
||||
0
|
||||
-4000
|
||||
0
|
||||
-3.4
|
||||
5
|
||||
```
|
||||
|
||||
## simpleJSONExtractBool
|
||||
|
||||
Parses a true/false value from the value of the field named `field_name`. The result is `UInt8`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
simpleJSONExtractBool(json, field_name)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
|
||||
- `field_name`: The name of the field to search for. [String literal](../syntax#string)
|
||||
|
||||
**Returned value**
|
||||
|
||||
It returns `1` if the value of the field is `true`, `0` otherwise. This means this function will return `0` including (and not only) in the following cases:
|
||||
- If the field doesn't exists.
|
||||
- If the field contains `true` as a string, e.g.: `{"field":"true"}`.
|
||||
- If the field contains `1` as a numerical value.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE jsons
|
||||
(
|
||||
`json` String
|
||||
)
|
||||
ENGINE = Memory;
|
||||
|
||||
INSERT INTO jsons VALUES ('{"foo":false,"bar":true}');
|
||||
INSERT INTO jsons VALUES ('{"foo":"true","qux":1}');
|
||||
|
||||
SELECT simpleJSONExtractBool(json, 'bar') FROM jsons ORDER BY json;
|
||||
SELECT simpleJSONExtractBool(json, 'foo') FROM jsons ORDER BY json;
|
||||
```
|
||||
|
||||
```response
|
||||
0
|
||||
1
|
||||
0
|
||||
0
|
||||
```
|
||||
|
||||
## simpleJSONExtractRaw
|
||||
|
||||
Returns the value of the field named `field_name` as a `String`, including separators.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
simpleJSONExtractRaw(json, field_name)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
|
||||
- `field_name`: The name of the field to search for. [String literal](../syntax#string)
|
||||
|
||||
**Returned value**
|
||||
|
||||
It returns the value of the field as a [`String`](../../sql-reference/data-types/string.md#string), including separators if the field exists, or an empty `String` otherwise.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE jsons
|
||||
(
|
||||
`json` String
|
||||
)
|
||||
ENGINE = Memory;
|
||||
|
||||
INSERT INTO jsons VALUES ('{"foo":"-4e3"}');
|
||||
INSERT INTO jsons VALUES ('{"foo":-3.4}');
|
||||
INSERT INTO jsons VALUES ('{"foo":5}');
|
||||
INSERT INTO jsons VALUES ('{"foo":{"def":[1,2,3]}}');
|
||||
INSERT INTO jsons VALUES ('{"baz":2}');
|
||||
|
||||
SELECT simpleJSONExtractRaw(json, 'foo') FROM jsons ORDER BY json;
|
||||
```
|
||||
|
||||
```response
|
||||
|
||||
"-4e3"
|
||||
-3.4
|
||||
5
|
||||
{"def":[1,2,3]}
|
||||
```
|
||||
|
||||
## simpleJSONExtractString
|
||||
|
||||
Parses `String` in double quotes from the value of the field named `field_name`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
simpleJSONExtractString(json, field_name)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
|
||||
- `field_name`: The name of the field to search for. [String literal](../syntax#string)
|
||||
|
||||
**Returned value**
|
||||
|
||||
It returns the value of a field as a [`String`](../../sql-reference/data-types/string.md#string), including separators. The value is unescaped. It returns an empty `String`: if the field doesn't contain a double quoted string, if unescaping fails or if the field doesn't exist.
|
||||
|
||||
**Implementation details**
|
||||
|
||||
There is currently no support for code points in the format `\uXXXX\uYYYY` that are not from the basic multilingual plane (they are converted to CESU-8 instead of UTF-8).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE jsons
|
||||
(
|
||||
`json` String
|
||||
)
|
||||
ENGINE = Memory;
|
||||
|
||||
INSERT INTO jsons VALUES ('{"foo":"\\n\\u0000"}');
|
||||
INSERT INTO jsons VALUES ('{"foo":"\\u263"}');
|
||||
INSERT INTO jsons VALUES ('{"foo":"\\u263a"}');
|
||||
INSERT INTO jsons VALUES ('{"foo":"hello}');
|
||||
|
||||
SELECT simpleJSONExtractString(json, 'foo') FROM jsons ORDER BY json;
|
||||
```
|
||||
|
||||
```response
|
||||
\n\0
|
||||
|
||||
☺
|
||||
|
||||
```
|
||||
|
||||
## visitParamHas
|
||||
|
||||
This function is [an alias of `simpleJSONHas`](./json-functions#simplejsonhas).
|
||||
|
||||
## visitParamExtractUInt
|
||||
|
||||
This function is [an alias of `simpleJSONExtractUInt`](./json-functions#simplejsonextractuint).
|
||||
|
||||
## visitParamExtractInt
|
||||
|
||||
This function is [an alias of `simpleJSONExtractInt`](./json-functions#simplejsonextractint).
|
||||
|
||||
## visitParamExtractFloat
|
||||
|
||||
This function is [an alias of `simpleJSONExtractFloat`](./json-functions#simplejsonextractfloat).
|
||||
|
||||
## visitParamExtractBool
|
||||
|
||||
This function is [an alias of `simpleJSONExtractBool`](./json-functions#simplejsonextractbool).
|
||||
|
||||
## visitParamExtractRaw
|
||||
|
||||
This function is [an alias of `simpleJSONExtractRaw`](./json-functions#simplejsonextractraw).
|
||||
|
||||
## visitParamExtractString
|
||||
|
||||
This function is [an alias of `simpleJSONExtractString`](./json-functions#simplejsonextractstring).
|
||||
|
||||
# JSONExtract functions
|
||||
|
||||
The following functions are based on [simdjson](https://github.com/lemire/simdjson) designed for more complex JSON parsing requirements.
|
||||
|
@ -299,6 +299,18 @@ sin(x)
|
||||
|
||||
Type: [Float*](../../sql-reference/data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT sin(1.23);
|
||||
```
|
||||
|
||||
```response
|
||||
0.9424888019316975
|
||||
```
|
||||
|
||||
## cos
|
||||
|
||||
Returns the cosine of the argument.
|
||||
|
@ -298,7 +298,7 @@ Full columns and constants are represented differently in memory. Functions usua
|
||||
Accepts any arguments, including `NULL` and does nothing. Always returns 0.
|
||||
The argument is internally still evaluated. Useful e.g. for benchmarks.
|
||||
|
||||
## sleep(seconds)
|
||||
## sleep
|
||||
|
||||
Used to introduce a delay or pause in the execution of a query. It is primarily used for testing and debugging purposes.
|
||||
|
||||
@ -310,7 +310,7 @@ sleep(seconds)
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `seconds`: [Int](../../sql-reference/data-types/int-uint.md) The number of seconds to pause the query execution to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds.
|
||||
- `seconds`: [UInt*](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md) The number of seconds to pause the query execution to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds.
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -360,7 +360,7 @@ sleepEachRow(seconds)
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `seconds`: [Int](../../sql-reference/data-types/int-uint.md) The number of seconds to pause the query execution for each row in the result set to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds.
|
||||
- `seconds`: [UInt*](../../sql-reference/data-types/int-uint.md) or [Float*](../../sql-reference/data-types/float.md) The number of seconds to pause the query execution for each row in the result set to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds.
|
||||
|
||||
**Returned value**
|
||||
|
||||
|
@ -588,8 +588,41 @@ Result:
|
||||
|
||||
## substringUTF8
|
||||
|
||||
Like `substring` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
|
||||
Returns the substring of a string `s` which starts at the specified byte index `offset` for Unicode code points. Byte counting starts from `1`. If `offset` is `0`, an empty string is returned. If `offset` is negative, the substring starts `pos` characters from the end of the string, rather than from the beginning. An optional argument `length` specifies the maximum number of bytes the returned substring may have.
|
||||
|
||||
Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
substringUTF8(s, offset[, length])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `s`: The string to calculate a substring from. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md) or [Enum](../../sql-reference/data-types/enum.md)
|
||||
- `offset`: The starting position of the substring in `s` . [(U)Int*](../../sql-reference/data-types/int-uint.md).
|
||||
- `length`: The maximum length of the substring. [(U)Int*](../../sql-reference/data-types/int-uint.md). Optional.
|
||||
|
||||
**Returned value**
|
||||
|
||||
A substring of `s` with `length` many bytes, starting at index `offset`.
|
||||
|
||||
**Implementation details**
|
||||
|
||||
Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
SELECT 'Täglich grüßt das Murmeltier.' AS str,
|
||||
substringUTF8(str, 9),
|
||||
substringUTF8(str, 9, 5)
|
||||
```
|
||||
|
||||
```response
|
||||
Täglich grüßt das Murmeltier. grüßt das Murmeltier. grüßt
|
||||
```
|
||||
|
||||
## substringIndex
|
||||
|
||||
@ -624,7 +657,39 @@ Result:
|
||||
|
||||
## substringIndexUTF8
|
||||
|
||||
Like `substringIndex` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
|
||||
Returns the substring of `s` before `count` occurrences of the delimiter `delim`, specifically for Unicode code points.
|
||||
|
||||
Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
substringIndexUTF8(s, delim, count)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `s`: The string to extract substring from. [String](../../sql-reference/data-types/string.md).
|
||||
- `delim`: The character to split. [String](../../sql-reference/data-types/string.md).
|
||||
- `count`: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md)
|
||||
|
||||
**Returned value**
|
||||
|
||||
A substring [String](../../sql-reference/data-types/string.md) of `s` before `count` occurrences of `delim`.
|
||||
|
||||
**Implementation details**
|
||||
|
||||
Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
SELECT substringIndexUTF8('www.straßen-in-europa.de', '.', 2)
|
||||
```
|
||||
|
||||
```response
|
||||
www.straßen-in-europa
|
||||
```
|
||||
|
||||
## appendTrailingCharIfAbsent
|
||||
|
||||
|
@ -353,7 +353,7 @@ For efficiency, the `and` and `or` functions accept any number of arguments. The
|
||||
|
||||
ClickHouse supports the `IS NULL` and `IS NOT NULL` operators.
|
||||
|
||||
### IS NULL
|
||||
### IS NULL {#is_null}
|
||||
|
||||
- For [Nullable](../../sql-reference/data-types/nullable.md) type values, the `IS NULL` operator returns:
|
||||
- `1`, if the value is `NULL`.
|
||||
@ -374,7 +374,7 @@ SELECT x+100 FROM t_null WHERE y IS NULL
|
||||
└──────────────┘
|
||||
```
|
||||
|
||||
### IS NOT NULL
|
||||
### IS NOT NULL {#is_not_null}
|
||||
|
||||
- For [Nullable](../../sql-reference/data-types/nullable.md) type values, the `IS NOT NULL` operator returns:
|
||||
- `0`, if the value is `NULL`.
|
||||
|
@ -13,13 +13,6 @@ a system table called `system.dropped_tables`.
|
||||
|
||||
If you have a materialized view without a `TO` clause associated with the dropped table, then you will also have to UNDROP the inner table of that view.
|
||||
|
||||
:::note
|
||||
UNDROP TABLE is experimental. To use it add this setting:
|
||||
```sql
|
||||
set allow_experimental_undrop_table_query = 1;
|
||||
```
|
||||
:::
|
||||
|
||||
:::tip
|
||||
Also see [DROP TABLE](/docs/en/sql-reference/statements/drop.md)
|
||||
:::
|
||||
@ -32,60 +25,53 @@ UNDROP TABLE [db.]name [UUID '<uuid>'] [ON CLUSTER cluster]
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
set allow_experimental_undrop_table_query = 1;
|
||||
```
|
||||
|
||||
```sql
|
||||
CREATE TABLE undropMe
|
||||
CREATE TABLE tab
|
||||
(
|
||||
`id` UInt8
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id
|
||||
```
|
||||
ORDER BY id;
|
||||
|
||||
DROP TABLE tab;
|
||||
|
||||
```sql
|
||||
DROP TABLE undropMe
|
||||
```
|
||||
```sql
|
||||
SELECT *
|
||||
FROM system.dropped_tables
|
||||
FORMAT Vertical
|
||||
FORMAT Vertical;
|
||||
```
|
||||
|
||||
```response
|
||||
Row 1:
|
||||
──────
|
||||
index: 0
|
||||
database: default
|
||||
table: undropMe
|
||||
table: tab
|
||||
uuid: aa696a1a-1d70-4e60-a841-4c80827706cc
|
||||
engine: MergeTree
|
||||
metadata_dropped_path: /var/lib/clickhouse/metadata_dropped/default.undropMe.aa696a1a-1d70-4e60-a841-4c80827706cc.sql
|
||||
metadata_dropped_path: /var/lib/clickhouse/metadata_dropped/default.tab.aa696a1a-1d70-4e60-a841-4c80827706cc.sql
|
||||
table_dropped_time: 2023-04-05 14:12:12
|
||||
|
||||
1 row in set. Elapsed: 0.001 sec.
|
||||
```
|
||||
|
||||
```sql
|
||||
UNDROP TABLE undropMe
|
||||
```
|
||||
```response
|
||||
Ok.
|
||||
```
|
||||
```sql
|
||||
UNDROP TABLE tab;
|
||||
|
||||
SELECT *
|
||||
FROM system.dropped_tables
|
||||
FORMAT Vertical
|
||||
```
|
||||
FORMAT Vertical;
|
||||
|
||||
```response
|
||||
Ok.
|
||||
|
||||
0 rows in set. Elapsed: 0.001 sec.
|
||||
```
|
||||
|
||||
```sql
|
||||
DESCRIBE TABLE undropMe
|
||||
FORMAT Vertical
|
||||
DESCRIBE TABLE tab
|
||||
FORMAT Vertical;
|
||||
```
|
||||
|
||||
```response
|
||||
Row 1:
|
||||
──────
|
||||
|
@ -50,6 +50,7 @@
|
||||
#include <Functions/registerFunctions.h>
|
||||
#include <AggregateFunctions/registerAggregateFunctions.h>
|
||||
#include <Formats/registerFormats.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
using namespace std::literals;
|
||||
@ -1137,6 +1138,13 @@ void Client::processOptions(const OptionsDescription & options_description,
|
||||
}
|
||||
|
||||
|
||||
static bool checkIfStdoutIsRegularFile()
|
||||
{
|
||||
struct stat file_stat;
|
||||
return fstat(STDOUT_FILENO, &file_stat) == 0 && S_ISREG(file_stat.st_mode);
|
||||
}
|
||||
|
||||
|
||||
void Client::processConfig()
|
||||
{
|
||||
if (!queries.empty() && config().has("queries-file"))
|
||||
@ -1173,7 +1181,14 @@ void Client::processConfig()
|
||||
pager = config().getString("pager", "");
|
||||
|
||||
is_default_format = !config().has("vertical") && !config().has("format");
|
||||
if (config().has("vertical"))
|
||||
if (is_default_format && checkIfStdoutIsRegularFile())
|
||||
{
|
||||
is_default_format = false;
|
||||
std::optional<String> format_from_file_name;
|
||||
format_from_file_name = FormatFactory::instance().tryGetFormatFromFileDescriptor(STDOUT_FILENO);
|
||||
format = format_from_file_name ? *format_from_file_name : "TabSeparated";
|
||||
}
|
||||
else if (config().has("vertical"))
|
||||
format = config().getString("format", "Vertical");
|
||||
else
|
||||
format = config().getString("format", is_interactive ? "PrettyCompact" : "TabSeparated");
|
||||
@ -1377,8 +1392,8 @@ void Client::readArguments(
|
||||
}
|
||||
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wunused-function"
|
||||
#pragma GCC diagnostic ignored "-Wmissing-declarations"
|
||||
#pragma clang diagnostic ignored "-Wunused-function"
|
||||
#pragma clang diagnostic ignored "-Wmissing-declarations"
|
||||
|
||||
int mainEntryClickHouseClient(int argc, char ** argv)
|
||||
{
|
||||
|
@ -143,7 +143,7 @@ int mainEntryClickHouseCompressor(int argc, char ** argv)
|
||||
ParserCodec codec_parser;
|
||||
|
||||
std::string codecs_line = boost::algorithm::join(codecs, ",");
|
||||
auto ast = parseQuery(codec_parser, "(" + codecs_line + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
||||
auto ast = parseQuery(codec_parser, "(" + codecs_line + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS);
|
||||
codec = CompressionCodecFactory::instance().get(ast, nullptr);
|
||||
}
|
||||
else
|
||||
|
@ -109,8 +109,8 @@ static std::vector<std::string> extractFromConfig(
|
||||
return {configuration->getString(key)};
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wunused-function"
|
||||
#pragma GCC diagnostic ignored "-Wmissing-declarations"
|
||||
#pragma clang diagnostic ignored "-Wunused-function"
|
||||
#pragma clang diagnostic ignored "-Wmissing-declarations"
|
||||
|
||||
int mainEntryClickHouseExtractFromConfig(int argc, char ** argv)
|
||||
{
|
||||
|
@ -70,8 +70,8 @@ void skipSpacesAndComments(const char*& pos, const char* end, bool print_comment
|
||||
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wunused-function"
|
||||
#pragma GCC diagnostic ignored "-Wmissing-declarations"
|
||||
#pragma clang diagnostic ignored "-Wunused-function"
|
||||
#pragma clang diagnostic ignored "-Wmissing-declarations"
|
||||
|
||||
extern const char * auto_time_zones[];
|
||||
|
||||
@ -234,7 +234,7 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
|
||||
size_t approx_query_length = multiple ? find_first_symbols<';'>(pos, end) - pos : end - pos;
|
||||
|
||||
ASTPtr res = parseQueryAndMovePosition(
|
||||
parser, pos, end, "query", multiple, cmd_settings.max_query_size, cmd_settings.max_parser_depth);
|
||||
parser, pos, end, "query", multiple, cmd_settings.max_query_size, cmd_settings.max_parser_depth, cmd_settings.max_parser_backtracks);
|
||||
|
||||
std::unique_ptr<ReadBuffer> insert_query_payload = nullptr;
|
||||
/// If the query is INSERT ... VALUES, then we will try to parse the data.
|
||||
|
@ -44,7 +44,7 @@ String KeeperClient::executeFourLetterCommand(const String & command)
|
||||
std::vector<String> KeeperClient::getCompletions(const String & prefix) const
|
||||
{
|
||||
Tokens tokens(prefix.data(), prefix.data() + prefix.size(), 0, false);
|
||||
IParser::Pos pos(tokens, 0);
|
||||
IParser::Pos pos(tokens, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS);
|
||||
|
||||
if (pos->type != TokenType::BareWord)
|
||||
return registered_commands_and_four_letter_words;
|
||||
@ -278,6 +278,7 @@ bool KeeperClient::processQueryText(const String & text)
|
||||
/* allow_multi_statements = */ true,
|
||||
/* max_query_size = */ 0,
|
||||
/* max_parser_depth = */ 0,
|
||||
/* max_parser_backtracks = */ 0,
|
||||
/* skip_insignificant = */ false);
|
||||
|
||||
if (!res)
|
||||
|
@ -11,6 +11,7 @@ set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES
|
||||
LibraryBridgeHandlers.cpp
|
||||
SharedLibrary.cpp
|
||||
library-bridge.cpp
|
||||
createFunctionBaseCast.cpp
|
||||
)
|
||||
|
||||
clickhouse_add_executable(clickhouse-library-bridge ${CLICKHOUSE_LIBRARY_BRIDGE_SOURCES})
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <Bridge/IBridge.h>
|
||||
#include "LibraryBridgeHandlerFactory.h"
|
||||
|
||||
|
23
programs/library-bridge/createFunctionBaseCast.cpp
Normal file
23
programs/library-bridge/createFunctionBaseCast.cpp
Normal file
@ -0,0 +1,23 @@
|
||||
#include <memory>
|
||||
#include <Functions/CastOverloadResolver.h>
|
||||
#include <Core/ColumnsWithTypeAndName.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
class IFunctionBase;
|
||||
using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
|
||||
|
||||
FunctionBasePtr createFunctionBaseCast(
|
||||
ContextPtr, const char *, const ColumnsWithTypeAndName &, const DataTypePtr &, std::optional<CastDiagnostic>, CastType)
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for Library Bridge");
|
||||
}
|
||||
|
||||
}
|
@ -327,6 +327,14 @@ static bool checkIfStdinIsRegularFile()
|
||||
return fstat(STDIN_FILENO, &file_stat) == 0 && S_ISREG(file_stat.st_mode);
|
||||
}
|
||||
|
||||
|
||||
static bool checkIfStdoutIsRegularFile()
|
||||
{
|
||||
struct stat file_stat;
|
||||
return fstat(STDOUT_FILENO, &file_stat) == 0 && S_ISREG(file_stat.st_mode);
|
||||
}
|
||||
|
||||
|
||||
std::string LocalServer::getInitialCreateTableQuery()
|
||||
{
|
||||
if (!config().has("table-structure") && !config().has("table-file") && !config().has("table-data-format") && (!checkIfStdinIsRegularFile() || queries.empty()))
|
||||
@ -638,7 +646,14 @@ void LocalServer::processConfig()
|
||||
if (config().has("macros"))
|
||||
global_context->setMacros(std::make_unique<Macros>(config(), "macros", log));
|
||||
|
||||
format = config().getString("output-format", config().getString("format", is_interactive ? "PrettyCompact" : "TSV"));
|
||||
if (!config().has("output-format") && !config().has("format") && checkIfStdoutIsRegularFile())
|
||||
{
|
||||
std::optional<String> format_from_file_name;
|
||||
format_from_file_name = FormatFactory::instance().tryGetFormatFromFileDescriptor(STDOUT_FILENO);
|
||||
format = format_from_file_name ? *format_from_file_name : "TSV";
|
||||
}
|
||||
else
|
||||
format = config().getString("output-format", config().getString("format", is_interactive ? "PrettyCompact" : "TSV"));
|
||||
insert_format = "Values";
|
||||
|
||||
/// Setting value from cmd arg overrides one from config
|
||||
@ -944,8 +959,8 @@ void LocalServer::readArguments(int argc, char ** argv, Arguments & common_argum
|
||||
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wunused-function"
|
||||
#pragma GCC diagnostic ignored "-Wmissing-declarations"
|
||||
#pragma clang diagnostic ignored "-Wunused-function"
|
||||
#pragma clang diagnostic ignored "-Wmissing-declarations"
|
||||
|
||||
int mainEntryClickHouseLocal(int argc, char ** argv)
|
||||
{
|
||||
@ -985,12 +1000,6 @@ extern "C" int LLVMFuzzerInitialize(int * pargc, char *** pargv)
|
||||
{
|
||||
std::vector<char *> argv(*pargv, *pargv + (*pargc + 1));
|
||||
|
||||
if (!isClickhouseApp("local", argv))
|
||||
{
|
||||
std::cerr << "\033[31m" << "ClickHouse compiled in fuzzing mode, only clickhouse local is available." << "\033[0m" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/// As a user you can add flags to clickhouse binary in fuzzing mode as follows
|
||||
/// clickhouse local <set of clickhouse-local specific flag> -- <set of libfuzzer flags>
|
||||
|
||||
@ -998,13 +1007,16 @@ extern "C" int LLVMFuzzerInitialize(int * pargc, char *** pargv)
|
||||
|
||||
auto it = argv.begin() + 1;
|
||||
for (; *it; ++it)
|
||||
{
|
||||
if (strcmp(*it, "--") == 0)
|
||||
{
|
||||
++it;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
while (*it)
|
||||
{
|
||||
if (strncmp(*it, "--", 2) != 0)
|
||||
{
|
||||
*(p++) = *it;
|
||||
@ -1012,6 +1024,7 @@ extern "C" int LLVMFuzzerInitialize(int * pargc, char *** pargv)
|
||||
}
|
||||
else
|
||||
++it;
|
||||
}
|
||||
|
||||
*pargc = static_cast<int>(p - &(*pargv)[0]);
|
||||
*p = nullptr;
|
||||
|
@ -68,7 +68,6 @@ namespace
|
||||
using MainFunc = int (*)(int, char**);
|
||||
|
||||
#if !defined(FUZZING_MODE)
|
||||
|
||||
/// Add an item here to register new application
|
||||
std::pair<std::string_view, MainFunc> clickhouse_applications[] =
|
||||
{
|
||||
@ -105,13 +104,6 @@ std::pair<std::string_view, MainFunc> clickhouse_applications[] =
|
||||
{"restart", mainEntryClickHouseRestart},
|
||||
};
|
||||
|
||||
/// Add an item here to register a new short name
|
||||
std::pair<std::string_view, std::string_view> clickhouse_short_names[] =
|
||||
{
|
||||
{"chl", "local"},
|
||||
{"chc", "client"},
|
||||
};
|
||||
|
||||
int printHelp(int, char **)
|
||||
{
|
||||
std::cerr << "Use one of the following commands:" << std::endl;
|
||||
@ -121,6 +113,13 @@ int printHelp(int, char **)
|
||||
}
|
||||
#endif
|
||||
|
||||
/// Add an item here to register a new short name
|
||||
std::pair<std::string_view, std::string_view> clickhouse_short_names[] =
|
||||
{
|
||||
{"chl", "local"},
|
||||
{"chc", "client"},
|
||||
};
|
||||
|
||||
|
||||
enum class InstructionFail
|
||||
{
|
||||
|
@ -1204,8 +1204,8 @@ public:
|
||||
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wunused-function"
|
||||
#pragma GCC diagnostic ignored "-Wmissing-declarations"
|
||||
#pragma clang diagnostic ignored "-Wunused-function"
|
||||
#pragma clang diagnostic ignored "-Wmissing-declarations"
|
||||
|
||||
int mainEntryClickHouseObfuscator(int argc, char ** argv)
|
||||
try
|
||||
|
@ -13,6 +13,7 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES
|
||||
getIdentifierQuote.cpp
|
||||
odbc-bridge.cpp
|
||||
validateODBCConnectionString.cpp
|
||||
createFunctionBaseCast.cpp
|
||||
)
|
||||
|
||||
clickhouse_add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES})
|
||||
|
23
programs/odbc-bridge/createFunctionBaseCast.cpp
Normal file
23
programs/odbc-bridge/createFunctionBaseCast.cpp
Normal file
@ -0,0 +1,23 @@
|
||||
#include <memory>
|
||||
#include <Functions/CastOverloadResolver.h>
|
||||
#include <Core/ColumnsWithTypeAndName.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
class IFunctionBase;
|
||||
using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
|
||||
|
||||
FunctionBasePtr createFunctionBaseCast(
|
||||
ContextPtr, const char *, const ColumnsWithTypeAndName &, const DataTypePtr &, std::optional<CastDiagnostic>, CastType)
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for Library Bridge");
|
||||
}
|
||||
|
||||
}
|
@ -733,8 +733,6 @@ try
|
||||
LOG_INFO(log, "Available CPU instruction sets: {}", cpu_info);
|
||||
#endif
|
||||
|
||||
sanityChecks(*this);
|
||||
|
||||
// Initialize global thread pool. Do it before we fetch configs from zookeeper
|
||||
// nodes (`from_zk`), because ZooKeeper interface uses the pool. We will
|
||||
// ignore `max_thread_pool_size` in configs we fetch from ZK, but oh well.
|
||||
@ -904,6 +902,7 @@ try
|
||||
config_processor.savePreprocessedConfig(loaded_config, config().getString("path", DBMS_DEFAULT_PATH));
|
||||
config().removeConfiguration(old_configuration.get());
|
||||
config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false);
|
||||
global_context->setConfig(loaded_config.configuration);
|
||||
}
|
||||
|
||||
Settings::checkNoSettingNamesAtTopLevel(config(), config_path);
|
||||
@ -911,6 +910,9 @@ try
|
||||
/// We need to reload server settings because config could be updated via zookeeper.
|
||||
server_settings.loadSettingsFromConfig(config());
|
||||
|
||||
/// NOTE: Do sanity checks after we loaded all possible substitutions (for the configuration) from ZK
|
||||
sanityChecks(*this);
|
||||
|
||||
#if defined(OS_LINUX)
|
||||
std::string executable_path = getExecutablePath();
|
||||
|
||||
|
@ -133,20 +133,20 @@ public:
|
||||
/// This function also enables custom prefixes to be used.
|
||||
void setCustomSettingsPrefixes(const Strings & prefixes);
|
||||
void setCustomSettingsPrefixes(const String & comma_separated_prefixes);
|
||||
bool isSettingNameAllowed(const std::string_view name) const;
|
||||
void checkSettingNameIsAllowed(const std::string_view name) const;
|
||||
bool isSettingNameAllowed(std::string_view name) const;
|
||||
void checkSettingNameIsAllowed(std::string_view name) const;
|
||||
|
||||
/// Allows implicit user creation without password (by default it's allowed).
|
||||
/// In other words, allow 'CREATE USER' queries without 'IDENTIFIED WITH' clause.
|
||||
void setImplicitNoPasswordAllowed(const bool allow_implicit_no_password_);
|
||||
void setImplicitNoPasswordAllowed(bool allow_implicit_no_password_);
|
||||
bool isImplicitNoPasswordAllowed() const;
|
||||
|
||||
/// Allows users without password (by default it's allowed).
|
||||
void setNoPasswordAllowed(const bool allow_no_password_);
|
||||
void setNoPasswordAllowed(bool allow_no_password_);
|
||||
bool isNoPasswordAllowed() const;
|
||||
|
||||
/// Allows users with plaintext password (by default it's allowed).
|
||||
void setPlaintextPasswordAllowed(const bool allow_plaintext_password_);
|
||||
void setPlaintextPasswordAllowed(bool allow_plaintext_password_);
|
||||
bool isPlaintextPasswordAllowed() const;
|
||||
|
||||
/// Default password type when the user does not specify it.
|
||||
|
@ -62,7 +62,7 @@ AccessEntityPtr deserializeAccessEntityImpl(const String & definition)
|
||||
const char * end = begin + definition.size();
|
||||
while (pos < end)
|
||||
{
|
||||
queries.emplace_back(parseQueryAndMovePosition(parser, pos, end, "", true, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH));
|
||||
queries.emplace_back(parseQueryAndMovePosition(parser, pos, end, "", true, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS));
|
||||
while (isWhitespaceASCII(*pos) || *pos == ';')
|
||||
++pos;
|
||||
}
|
||||
|
@ -616,7 +616,7 @@ UUID IAccessStorage::generateRandomID()
|
||||
}
|
||||
|
||||
|
||||
void IAccessStorage::clearConflictsInEntitiesList(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, const LoggerPtr log_)
|
||||
void IAccessStorage::clearConflictsInEntitiesList(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, LoggerPtr log_)
|
||||
{
|
||||
std::unordered_map<UUID, size_t> positions_by_id;
|
||||
std::unordered_map<std::string_view, size_t> positions_by_type_and_name[static_cast<size_t>(AccessEntityType::MAX)];
|
||||
|
@ -228,7 +228,7 @@ protected:
|
||||
static UUID generateRandomID();
|
||||
LoggerPtr getLogger() const;
|
||||
static String formatEntityTypeWithName(AccessEntityType type, const String & name) { return AccessEntityTypeInfo::get(type).formatEntityNameWithType(name); }
|
||||
static void clearConflictsInEntitiesList(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, const LoggerPtr log_);
|
||||
static void clearConflictsInEntitiesList(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, LoggerPtr log_);
|
||||
[[noreturn]] void throwNotFound(const UUID & id) const;
|
||||
[[noreturn]] void throwNotFound(AccessEntityType type, const String & name) const;
|
||||
[[noreturn]] static void throwBadCast(const UUID & id, AccessEntityType type, const String & name, AccessEntityType required_type);
|
||||
|
@ -86,7 +86,7 @@ void RowPolicyCache::PolicyInfo::setPolicy(const RowPolicyPtr & policy_)
|
||||
try
|
||||
{
|
||||
ParserExpression parser;
|
||||
parsed_filters[filter_type_i] = parseQuery(parser, filter, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
||||
parsed_filters[filter_type_i] = parseQuery(parser, filter, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
|
@ -66,7 +66,7 @@ namespace
|
||||
|
||||
String error_message;
|
||||
const char * pos = string_query.data();
|
||||
auto ast = tryParseQuery(parser, pos, pos + string_query.size(), error_message, false, "", false, 0, 0);
|
||||
auto ast = tryParseQuery(parser, pos, pos + string_query.size(), error_message, false, "", false, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS, true);
|
||||
|
||||
if (!ast)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Failed to parse grant query. Error: {}", error_message);
|
||||
|
@ -182,11 +182,14 @@ public:
|
||||
|
||||
if constexpr (Trait::sampler == Sampler::NONE)
|
||||
{
|
||||
if (limit_num_elems && cur_elems.value.size() >= max_elems)
|
||||
if constexpr (limit_num_elems)
|
||||
{
|
||||
if constexpr (Trait::last)
|
||||
cur_elems.value[(cur_elems.total_values - 1) % max_elems] = row_value;
|
||||
return;
|
||||
if (cur_elems.value.size() >= max_elems)
|
||||
{
|
||||
if constexpr (Trait::last)
|
||||
cur_elems.value[(cur_elems.total_values - 1) % max_elems] = row_value;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
cur_elems.value.push_back(row_value, arena);
|
||||
@ -236,7 +239,7 @@ public:
|
||||
|
||||
void mergeNoSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
|
||||
{
|
||||
if (!limit_num_elems)
|
||||
if constexpr (!limit_num_elems)
|
||||
{
|
||||
if (rhs_elems.value.size())
|
||||
cur_elems.value.insertByOffsets(rhs_elems.value, 0, rhs_elems.value.size(), arena);
|
||||
|
@ -144,7 +144,7 @@ public:
|
||||
count = other.count;
|
||||
compressed = other.compressed;
|
||||
|
||||
sampled.resize(other.sampled.size());
|
||||
sampled.resize_exact(other.sampled.size());
|
||||
memcpy(sampled.data(), other.sampled.data(), sizeof(Stats) * other.sampled.size());
|
||||
return;
|
||||
}
|
||||
@ -180,7 +180,7 @@ public:
|
||||
compress();
|
||||
|
||||
backup_sampled.clear();
|
||||
backup_sampled.reserve(sampled.size() + other.sampled.size());
|
||||
backup_sampled.reserve_exact(sampled.size() + other.sampled.size());
|
||||
double merged_relative_error = std::max(relative_error, other.relative_error);
|
||||
size_t merged_count = count + other.count;
|
||||
Int64 additional_self_delta = static_cast<Int64>(std::floor(2 * other.relative_error * other.count));
|
||||
@ -268,11 +268,7 @@ public:
|
||||
|
||||
size_t sampled_len = 0;
|
||||
readBinaryLittleEndian(sampled_len, buf);
|
||||
if (sampled_len > compress_threshold)
|
||||
throw Exception(
|
||||
ErrorCodes::INCORRECT_DATA, "The number of elements {} for quantileGK exceeds {}", sampled_len, compress_threshold);
|
||||
|
||||
sampled.resize(sampled_len);
|
||||
sampled.resize_exact(sampled_len);
|
||||
|
||||
for (size_t i = 0; i < sampled_len; ++i)
|
||||
{
|
||||
@ -317,7 +313,7 @@ private:
|
||||
::sort(head_sampled.begin(), head_sampled.end());
|
||||
|
||||
backup_sampled.clear();
|
||||
backup_sampled.reserve(sampled.size() + head_sampled.size());
|
||||
backup_sampled.reserve_exact(sampled.size() + head_sampled.size());
|
||||
|
||||
size_t sample_idx = 0;
|
||||
size_t ops_idx = 0;
|
||||
|
@ -483,6 +483,7 @@ public:
|
||||
}
|
||||
|
||||
bool isAbleToParallelizeMerge() const override { return is_able_to_parallelize_merge; }
|
||||
bool canOptimizeEqualKeysRanges() const override { return !is_able_to_parallelize_merge; }
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena *) const override
|
||||
{
|
||||
@ -576,6 +577,7 @@ public:
|
||||
}
|
||||
|
||||
bool isAbleToParallelizeMerge() const override { return is_able_to_parallelize_merge; }
|
||||
bool canOptimizeEqualKeysRanges() const override { return !is_able_to_parallelize_merge; }
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena *) const override
|
||||
{
|
||||
|
@ -142,6 +142,7 @@ public:
|
||||
}
|
||||
|
||||
bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); }
|
||||
bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); }
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override
|
||||
{
|
||||
|
@ -165,6 +165,7 @@ public:
|
||||
}
|
||||
|
||||
bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); }
|
||||
bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); }
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override
|
||||
{
|
||||
|
@ -111,6 +111,7 @@ public:
|
||||
}
|
||||
|
||||
bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); }
|
||||
bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); }
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override
|
||||
{
|
||||
|
@ -152,6 +152,7 @@ public:
|
||||
}
|
||||
|
||||
bool isAbleToParallelizeMerge() const override { return nested_function->isAbleToParallelizeMerge(); }
|
||||
bool canOptimizeEqualKeysRanges() const override { return nested_function->canOptimizeEqualKeysRanges(); }
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override
|
||||
{
|
||||
|
@ -92,6 +92,7 @@ public:
|
||||
}
|
||||
|
||||
bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); }
|
||||
bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); }
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override
|
||||
{
|
||||
|
@ -162,6 +162,10 @@ public:
|
||||
/// Tells if merge() with thread pool parameter could be used.
|
||||
virtual bool isAbleToParallelizeMerge() const { return false; }
|
||||
|
||||
/// Return true if it is allowed to replace call of `addBatch`
|
||||
/// to `addBatchSinglePlace` for ranges of consecutive equal keys.
|
||||
virtual bool canOptimizeEqualKeysRanges() const { return true; }
|
||||
|
||||
/// Should be used only if isAbleToParallelizeMerge() returned true.
|
||||
virtual void
|
||||
merge(AggregateDataPtr __restrict /*place*/, ConstAggregateDataPtr /*rhs*/, ThreadPool & /*thread_pool*/, Arena * /*arena*/) const
|
||||
|
@ -27,6 +27,9 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
|
||||
|
||||
auto initialize = [&]() mutable
|
||||
{
|
||||
if (context)
|
||||
return true;
|
||||
|
||||
shared_context = Context::createShared();
|
||||
context = Context::createGlobal(shared_context.get());
|
||||
context->makeGlobalContext();
|
||||
|
@ -81,7 +81,8 @@ void getAggregateFunctionNameAndParametersArray(
|
||||
ParserExpressionList params_parser(false);
|
||||
ASTPtr args_ast = parseQuery(params_parser,
|
||||
parameters_str.data(), parameters_str.data() + parameters_str.size(),
|
||||
"parameters of aggregate function in " + error_context, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
||||
"parameters of aggregate function in " + error_context,
|
||||
0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS);
|
||||
|
||||
if (args_ast->children.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Incorrect list of parameters to aggregate function {}",
|
||||
|
@ -54,10 +54,10 @@ public:
|
||||
if (!constant_node)
|
||||
return;
|
||||
|
||||
const auto & constant_value_literal = constant_node->getValue();
|
||||
if (!isInt64OrUInt64FieldType(constant_value_literal.getType()))
|
||||
if (auto constant_type = constant_node->getResultType(); !isNativeInteger(constant_type))
|
||||
return;
|
||||
|
||||
const auto & constant_value_literal = constant_node->getValue();
|
||||
if (getSettings().aggregate_functions_null_for_empty)
|
||||
return;
|
||||
|
||||
|
@ -25,7 +25,7 @@ String BackupInfo::toString() const
|
||||
BackupInfo BackupInfo::fromString(const String & str)
|
||||
{
|
||||
ParserIdentifierWithOptionalParameters parser;
|
||||
ASTPtr ast = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
||||
ASTPtr ast = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS);
|
||||
return fromAST(*ast);
|
||||
}
|
||||
|
||||
|
@ -101,10 +101,12 @@ RestorerFromBackup::RestorerFromBackup(
|
||||
|
||||
RestorerFromBackup::~RestorerFromBackup()
|
||||
{
|
||||
if (!futures.empty())
|
||||
/// If an exception occurs we can come here to the destructor having some tasks still unfinished.
|
||||
/// We have to wait until they finish.
|
||||
if (getNumFutures() > 0)
|
||||
{
|
||||
LOG_ERROR(log, "RestorerFromBackup must not be destroyed while {} tasks are still running", futures.size());
|
||||
chassert(false && "RestorerFromBackup must not be destroyed while some tasks are still running");
|
||||
LOG_INFO(log, "Waiting for {} tasks to finish", getNumFutures());
|
||||
waitFutures();
|
||||
}
|
||||
}
|
||||
|
||||
@ -422,7 +424,7 @@ void RestorerFromBackup::findTableInBackupImpl(const QualifiedTableName & table_
|
||||
readStringUntilEOF(create_query_str, *read_buffer);
|
||||
read_buffer.reset();
|
||||
ParserCreateQuery create_parser;
|
||||
ASTPtr create_table_query = parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
||||
ASTPtr create_table_query = parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS);
|
||||
applyCustomStoragePolicy(create_table_query);
|
||||
renameDatabaseAndTableNameInCreateQuery(create_table_query, renaming_map, context->getGlobalContext());
|
||||
String create_table_query_str = serializeAST(*create_table_query);
|
||||
@ -532,7 +534,7 @@ void RestorerFromBackup::findDatabaseInBackupImpl(const String & database_name_i
|
||||
readStringUntilEOF(create_query_str, *read_buffer);
|
||||
read_buffer.reset();
|
||||
ParserCreateQuery create_parser;
|
||||
ASTPtr create_database_query = parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
||||
ASTPtr create_database_query = parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS);
|
||||
renameDatabaseAndTableNameInCreateQuery(create_database_query, renaming_map, context->getGlobalContext());
|
||||
String create_database_query_str = serializeAST(*create_database_query);
|
||||
|
||||
|
@ -345,7 +345,7 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu
|
||||
if (dialect == Dialect::kusto)
|
||||
parser = std::make_unique<ParserKQLStatement>(end, global_context->getSettings().allow_settings_after_format_in_insert);
|
||||
else if (dialect == Dialect::prql)
|
||||
parser = std::make_unique<ParserPRQLQuery>(max_length, settings.max_parser_depth);
|
||||
parser = std::make_unique<ParserPRQLQuery>(max_length, settings.max_parser_depth, settings.max_parser_backtracks);
|
||||
else
|
||||
parser = std::make_unique<ParserQuery>(end, global_context->getSettings().allow_settings_after_format_in_insert);
|
||||
|
||||
@ -353,9 +353,9 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu
|
||||
{
|
||||
String message;
|
||||
if (dialect == Dialect::kusto)
|
||||
res = tryParseKQLQuery(*parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth);
|
||||
res = tryParseKQLQuery(*parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth, settings.max_parser_backtracks, true);
|
||||
else
|
||||
res = tryParseQuery(*parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth);
|
||||
res = tryParseQuery(*parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth, settings.max_parser_backtracks, true);
|
||||
|
||||
if (!res)
|
||||
{
|
||||
@ -366,9 +366,9 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu
|
||||
else
|
||||
{
|
||||
if (dialect == Dialect::kusto)
|
||||
res = parseKQLQueryAndMovePosition(*parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth);
|
||||
res = parseKQLQueryAndMovePosition(*parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth, settings.max_parser_backtracks);
|
||||
else
|
||||
res = parseQueryAndMovePosition(*parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth);
|
||||
res = parseQueryAndMovePosition(*parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth, settings.max_parser_backtracks);
|
||||
}
|
||||
|
||||
if (is_interactive)
|
||||
@ -385,12 +385,12 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu
|
||||
|
||||
|
||||
/// Consumes trailing semicolons and tries to consume the same-line trailing comment.
|
||||
void ClientBase::adjustQueryEnd(const char *& this_query_end, const char * all_queries_end, uint32_t max_parser_depth)
|
||||
void ClientBase::adjustQueryEnd(const char *& this_query_end, const char * all_queries_end, uint32_t max_parser_depth, uint32_t max_parser_backtracks)
|
||||
{
|
||||
// We have to skip the trailing semicolon that might be left
|
||||
// after VALUES parsing or just after a normal semicolon-terminated query.
|
||||
Tokens after_query_tokens(this_query_end, all_queries_end);
|
||||
IParser::Pos after_query_iterator(after_query_tokens, max_parser_depth);
|
||||
IParser::Pos after_query_iterator(after_query_tokens, max_parser_depth, max_parser_backtracks);
|
||||
while (after_query_iterator.isValid() && after_query_iterator->type == TokenType::Semicolon)
|
||||
{
|
||||
this_query_end = after_query_iterator->end;
|
||||
@ -1984,6 +1984,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText(
|
||||
return MultiQueryProcessingStage::QUERIES_END;
|
||||
|
||||
unsigned max_parser_depth = static_cast<unsigned>(global_context->getSettingsRef().max_parser_depth);
|
||||
unsigned max_parser_backtracks = static_cast<unsigned>(global_context->getSettingsRef().max_parser_backtracks);
|
||||
|
||||
// If there are only comments left until the end of file, we just
|
||||
// stop. The parser can't handle this situation because it always
|
||||
@ -1994,7 +1995,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText(
|
||||
// and it makes more sense to treat them as such.
|
||||
{
|
||||
Tokens tokens(this_query_begin, all_queries_end);
|
||||
IParser::Pos token_iterator(tokens, max_parser_depth);
|
||||
IParser::Pos token_iterator(tokens, max_parser_depth, max_parser_backtracks);
|
||||
if (!token_iterator.isValid())
|
||||
return MultiQueryProcessingStage::QUERIES_END;
|
||||
}
|
||||
@ -2015,7 +2016,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText(
|
||||
if (ignore_error)
|
||||
{
|
||||
Tokens tokens(this_query_begin, all_queries_end);
|
||||
IParser::Pos token_iterator(tokens, max_parser_depth);
|
||||
IParser::Pos token_iterator(tokens, max_parser_depth, max_parser_backtracks);
|
||||
while (token_iterator->type != TokenType::Semicolon && token_iterator.isValid())
|
||||
++token_iterator;
|
||||
this_query_begin = token_iterator->end;
|
||||
@ -2055,7 +2056,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText(
|
||||
// after we have processed the query. But even this guess is
|
||||
// beneficial so that we see proper trailing comments in "echo" and
|
||||
// server log.
|
||||
adjustQueryEnd(this_query_end, all_queries_end, max_parser_depth);
|
||||
adjustQueryEnd(this_query_end, all_queries_end, max_parser_depth, max_parser_backtracks);
|
||||
return MultiQueryProcessingStage::EXECUTE_QUERY;
|
||||
}
|
||||
|
||||
@ -2251,7 +2252,8 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
|
||||
this_query_end = insert_ast->end;
|
||||
adjustQueryEnd(
|
||||
this_query_end, all_queries_end,
|
||||
static_cast<unsigned>(global_context->getSettingsRef().max_parser_depth));
|
||||
static_cast<unsigned>(global_context->getSettingsRef().max_parser_depth),
|
||||
static_cast<unsigned>(global_context->getSettingsRef().max_parser_backtracks));
|
||||
}
|
||||
|
||||
// Report error.
|
||||
|
@ -94,7 +94,7 @@ protected:
|
||||
void processParsedSingleQuery(const String & full_query, const String & query_to_execute,
|
||||
ASTPtr parsed_query, std::optional<bool> echo_query_ = {}, bool report_error = false);
|
||||
|
||||
static void adjustQueryEnd(const char *& this_query_end, const char * all_queries_end, uint32_t max_parser_depth);
|
||||
static void adjustQueryEnd(const char *& this_query_end, const char * all_queries_end, uint32_t max_parser_depth, uint32_t max_parser_backtracks);
|
||||
ASTPtr parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const;
|
||||
static void setupSignalHandler();
|
||||
|
||||
|
@ -569,7 +569,8 @@ void QueryFuzzer::fuzzColumnDeclaration(ASTColumnDeclaration & column)
|
||||
auto data_type = fuzzDataType(DataTypeFactory::instance().get(column.type));
|
||||
|
||||
ParserDataType parser;
|
||||
column.type = parseQuery(parser, data_type->getName(), DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
||||
column.type = parseQuery(parser, data_type->getName(),
|
||||
DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS);
|
||||
}
|
||||
}
|
||||
|
||||
@ -821,7 +822,8 @@ static ASTPtr tryParseInsertQuery(const String & full_query)
|
||||
ParserInsertQuery parser(end, false);
|
||||
String message;
|
||||
|
||||
return tryParseQuery(parser, pos, end, message, false, "", false, DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
||||
return tryParseQuery(parser, pos, end, message, false, "", false,
|
||||
DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS, true);
|
||||
}
|
||||
|
||||
ASTs QueryFuzzer::getInsertQueriesForFuzzedTables(const String & full_query)
|
||||
@ -914,6 +916,38 @@ ASTPtr QueryFuzzer::fuzzLiteralUnderExpressionList(ASTPtr child)
|
||||
child = makeASTFunction(
|
||||
"toFixedString", std::make_shared<ASTLiteral>(value), std::make_shared<ASTLiteral>(static_cast<UInt64>(value.size())));
|
||||
}
|
||||
else if (type == Field::Types::Which::UInt64 && fuzz_rand() % 7 == 0)
|
||||
{
|
||||
child = makeASTFunction(fuzz_rand() % 2 == 0 ? "toUInt128" : "toUInt256", std::make_shared<ASTLiteral>(l->value.get<UInt64>()));
|
||||
}
|
||||
else if (type == Field::Types::Which::Int64 && fuzz_rand() % 7 == 0)
|
||||
{
|
||||
child = makeASTFunction(fuzz_rand() % 2 == 0 ? "toInt128" : "toInt256", std::make_shared<ASTLiteral>(l->value.get<Int64>()));
|
||||
}
|
||||
else if (type == Field::Types::Which::Float64 && fuzz_rand() % 7 == 0)
|
||||
{
|
||||
int decimal = fuzz_rand() % 4;
|
||||
if (decimal == 0)
|
||||
child = makeASTFunction(
|
||||
"toDecimal32",
|
||||
std::make_shared<ASTLiteral>(l->value.get<Float64>()),
|
||||
std::make_shared<ASTLiteral>(static_cast<UInt64>(fuzz_rand() % 9)));
|
||||
else if (decimal == 1)
|
||||
child = makeASTFunction(
|
||||
"toDecimal64",
|
||||
std::make_shared<ASTLiteral>(l->value.get<Float64>()),
|
||||
std::make_shared<ASTLiteral>(static_cast<UInt64>(fuzz_rand() % 18)));
|
||||
else if (decimal == 2)
|
||||
child = makeASTFunction(
|
||||
"toDecimal128",
|
||||
std::make_shared<ASTLiteral>(l->value.get<Float64>()),
|
||||
std::make_shared<ASTLiteral>(static_cast<UInt64>(fuzz_rand() % 38)));
|
||||
else
|
||||
child = makeASTFunction(
|
||||
"toDecimal256",
|
||||
std::make_shared<ASTLiteral>(l->value.get<Float64>()),
|
||||
std::make_shared<ASTLiteral>(static_cast<UInt64>(fuzz_rand() % 76)));
|
||||
}
|
||||
|
||||
if (fuzz_rand() % 7 == 0)
|
||||
child = makeASTFunction("toNullable", child);
|
||||
@ -933,7 +967,19 @@ ASTPtr QueryFuzzer::reverseLiteralFuzzing(ASTPtr child)
|
||||
{
|
||||
if (auto * function = child.get()->as<ASTFunction>())
|
||||
{
|
||||
std::unordered_set<String> can_be_reverted{"toNullable", "toLowCardinality", "materialize"};
|
||||
const std::unordered_set<String> can_be_reverted{
|
||||
"materialize",
|
||||
"toDecimal32", /// Keeping the first parameter only should be ok (valid query most of the time)
|
||||
"toDecimal64",
|
||||
"toDecimal128",
|
||||
"toDecimal256",
|
||||
"toFixedString", /// Same as toDecimal
|
||||
"toInt128",
|
||||
"toInt256",
|
||||
"toLowCardinality",
|
||||
"toNullable",
|
||||
"toUInt128",
|
||||
"toUInt256"};
|
||||
if (can_be_reverted.contains(function->name) && function->children.size() == 1)
|
||||
{
|
||||
if (fuzz_rand() % 7 == 0)
|
||||
|
@ -47,9 +47,7 @@ private:
|
||||
|
||||
std::unique_ptr<MemoryChunk> prev;
|
||||
|
||||
MemoryChunk()
|
||||
{
|
||||
}
|
||||
MemoryChunk() = default;
|
||||
|
||||
void swap(MemoryChunk & other)
|
||||
{
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <IO/MMappedFileCache.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <base/errnoToString.h>
|
||||
#include <base/find_symbols.h>
|
||||
#include <base/getPageSize.h>
|
||||
#include <sys/resource.h>
|
||||
#include <chrono>
|
||||
@ -90,6 +91,9 @@ AsynchronousMetrics::AsynchronousMetrics(
|
||||
openFileIfExists("/sys/fs/cgroup/cpu/cpu.cfs_quota_us", cgroupcpu_cfs_quota);
|
||||
}
|
||||
|
||||
openFileIfExists("/proc/sys/vm/max_map_count", vm_max_map_count);
|
||||
openFileIfExists("/proc/self/maps", vm_maps);
|
||||
|
||||
openSensors();
|
||||
openBlockDevices();
|
||||
openEDAC();
|
||||
@ -1423,6 +1427,55 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update)
|
||||
}
|
||||
}
|
||||
|
||||
if (vm_max_map_count)
|
||||
{
|
||||
try
|
||||
{
|
||||
vm_max_map_count->rewind();
|
||||
|
||||
uint64_t max_map_count = 0;
|
||||
readText(max_map_count, *vm_max_map_count);
|
||||
new_values["VMMaxMapCount"] = { max_map_count, "The maximum number of memory mappings a process may have (/proc/sys/vm/max_map_count)."};
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
openFileIfExists("/proc/sys/vm/max_map_count", vm_max_map_count);
|
||||
}
|
||||
}
|
||||
|
||||
if (vm_maps)
|
||||
{
|
||||
try
|
||||
{
|
||||
vm_maps->rewind();
|
||||
|
||||
uint64_t num_maps = 0;
|
||||
while (!vm_maps->eof())
|
||||
{
|
||||
char * next_pos = find_first_symbols<'\n'>(vm_maps->position(), vm_maps->buffer().end());
|
||||
vm_maps->position() = next_pos;
|
||||
|
||||
if (!vm_maps->hasPendingData())
|
||||
continue;
|
||||
|
||||
if (*vm_maps->position() == '\n')
|
||||
{
|
||||
++num_maps;
|
||||
++vm_maps->position();
|
||||
}
|
||||
}
|
||||
new_values["VMNumMaps"] = { num_maps,
|
||||
"The current number of memory mappings of the process (/proc/self/maps)."
|
||||
" If it is close to the maximum (VMMaxMapCount), you should increase the limit for vm.max_map_count in /etc/sysctl.conf"};
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
openFileIfExists("/proc/self/maps", vm_maps);
|
||||
}
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
for (size_t i = 0, size = thermal.size(); i < size; ++i)
|
||||
|
@ -123,6 +123,9 @@ private:
|
||||
std::optional<ReadBufferFromFilePRead> cgroupcpu_cfs_quota TSA_GUARDED_BY(data_mutex);
|
||||
std::optional<ReadBufferFromFilePRead> cgroupcpu_max TSA_GUARDED_BY(data_mutex);
|
||||
|
||||
std::optional<ReadBufferFromFilePRead> vm_max_map_count TSA_GUARDED_BY(data_mutex);
|
||||
std::optional<ReadBufferFromFilePRead> vm_maps TSA_GUARDED_BY(data_mutex);
|
||||
|
||||
std::vector<std::unique_ptr<ReadBufferFromFilePRead>> thermal TSA_GUARDED_BY(data_mutex);
|
||||
|
||||
std::unordered_map<String /* device name */,
|
||||
|
@ -62,7 +62,6 @@ struct LastElementCache
|
||||
bool check(const Key & key) const { return value.first == key; }
|
||||
|
||||
bool hasOnlyOneValue() const { return found && misses == 1; }
|
||||
UInt64 getMisses() const { return misses; }
|
||||
};
|
||||
|
||||
template <typename Data>
|
||||
@ -232,7 +231,7 @@ public:
|
||||
ALWAYS_INLINE UInt64 getCacheMissesSinceLastReset() const
|
||||
{
|
||||
if constexpr (consecutive_keys_optimization)
|
||||
return cache.getMisses();
|
||||
return cache.misses;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -427,6 +427,8 @@ void ConfigProcessor::doIncludesRecursive(
|
||||
|
||||
/// Replace the original contents, not add to it.
|
||||
bool replace = attributes->getNamedItem("replace");
|
||||
/// Merge with the original contents
|
||||
bool merge = attributes->getNamedItem("merge");
|
||||
|
||||
bool included_something = false;
|
||||
|
||||
@ -450,7 +452,6 @@ void ConfigProcessor::doIncludesRecursive(
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Replace the whole node not just contents.
|
||||
if (node->nodeName() == "include")
|
||||
{
|
||||
const NodeListPtr children = node_to_include->childNodes();
|
||||
@ -458,8 +459,18 @@ void ConfigProcessor::doIncludesRecursive(
|
||||
for (Node * child = children->item(0); child; child = next_child)
|
||||
{
|
||||
next_child = child->nextSibling();
|
||||
NodePtr new_node = config->importNode(child, true);
|
||||
node->parentNode()->insertBefore(new_node, node);
|
||||
|
||||
/// Recursively replace existing nodes in merge mode
|
||||
if (merge)
|
||||
{
|
||||
NodePtr new_node = config->importNode(child->parentNode(), true);
|
||||
mergeRecursive(config, node->parentNode(), new_node);
|
||||
}
|
||||
else /// Append to existing node by default
|
||||
{
|
||||
NodePtr new_node = config->importNode(child, true);
|
||||
node->parentNode()->insertBefore(new_node, node);
|
||||
}
|
||||
}
|
||||
|
||||
node->parentNode()->removeChild(node);
|
||||
@ -777,9 +788,9 @@ ConfigProcessor::LoadedConfig ConfigProcessor::loadConfig(bool allow_zk_includes
|
||||
}
|
||||
|
||||
ConfigProcessor::LoadedConfig ConfigProcessor::loadConfigWithZooKeeperIncludes(
|
||||
zkutil::ZooKeeperNodeCache & zk_node_cache,
|
||||
const zkutil::EventPtr & zk_changed_event,
|
||||
bool fallback_to_preprocessed)
|
||||
zkutil::ZooKeeperNodeCache & zk_node_cache,
|
||||
const zkutil::EventPtr & zk_changed_event,
|
||||
bool fallback_to_preprocessed)
|
||||
{
|
||||
XMLDocumentPtr config_xml;
|
||||
bool has_zk_includes;
|
||||
|
@ -297,7 +297,7 @@ void DNSResolver::setDisableCacheFlag(bool is_disabled)
|
||||
impl->disable_cache = is_disabled;
|
||||
}
|
||||
|
||||
void DNSResolver::setCacheMaxEntries(const UInt64 cache_max_entries)
|
||||
void DNSResolver::setCacheMaxEntries(UInt64 cache_max_entries)
|
||||
{
|
||||
impl->cache_address.setMaxSizeInBytes(cache_max_entries);
|
||||
impl->cache_host.setMaxSizeInBytes(cache_max_entries);
|
||||
|
@ -56,7 +56,7 @@ public:
|
||||
void setDisableCacheFlag(bool is_disabled = true);
|
||||
|
||||
/// Set a limit of entries in cache
|
||||
void setCacheMaxEntries(const UInt64 cache_max_entries);
|
||||
void setCacheMaxEntries(UInt64 cache_max_entries);
|
||||
|
||||
/// Drops all caches
|
||||
void dropCache();
|
||||
|
@ -255,7 +255,7 @@ private:
|
||||
|
||||
static LUTIndex toLUTIndex(ExtendedDayNum d)
|
||||
{
|
||||
return normalizeLUTIndex(static_cast<Int64>(d + daynum_offset_epoch));
|
||||
return normalizeLUTIndex(static_cast<Int64>(d + daynum_offset_epoch)); /// NOLINT
|
||||
}
|
||||
|
||||
LUTIndex toLUTIndex(Time t) const
|
||||
|
@ -584,10 +584,6 @@
|
||||
M(703, INVALID_IDENTIFIER) \
|
||||
M(704, QUERY_CACHE_USED_WITH_NONDETERMINISTIC_FUNCTIONS) \
|
||||
M(705, TABLE_NOT_EMPTY) \
|
||||
\
|
||||
M(900, DISTRIBUTED_CACHE_ERROR) \
|
||||
M(901, CANNOT_USE_DISTRIBUTED_CACHE) \
|
||||
\
|
||||
M(706, LIBSSH_ERROR) \
|
||||
M(707, GCP_ERROR) \
|
||||
M(708, ILLEGAL_STATISTIC) \
|
||||
@ -599,6 +595,10 @@
|
||||
M(715, CANNOT_DETECT_FORMAT) \
|
||||
M(716, CANNOT_FORGET_PARTITION) \
|
||||
M(717, EXPERIMENTAL_FEATURE_ERROR) \
|
||||
M(718, TOO_SLOW_PARSING) \
|
||||
\
|
||||
M(900, DISTRIBUTED_CACHE_ERROR) \
|
||||
M(901, CANNOT_USE_DISTRIBUTED_CACHE) \
|
||||
\
|
||||
M(999, KEEPER_EXCEPTION) \
|
||||
M(1000, POCO_EXCEPTION) \
|
||||
|
@ -1,5 +1,7 @@
|
||||
#include <Common/FunctionDocumentation.h>
|
||||
|
||||
#include <boost/algorithm/string.hpp>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -31,14 +33,7 @@ std::string FunctionDocumentation::examplesAsString() const
|
||||
|
||||
std::string FunctionDocumentation::categoriesAsString() const
|
||||
{
|
||||
if (categories.empty())
|
||||
return "";
|
||||
|
||||
auto it = categories.begin();
|
||||
std::string res = *it;
|
||||
for (; it != categories.end(); ++it)
|
||||
res += ", " + *it;
|
||||
return res;
|
||||
return boost::algorithm::join(categories, ", ");
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -41,9 +41,9 @@ public:
|
||||
}
|
||||
|
||||
/// There is no copy constructor because only one MultiVersion should own the same object.
|
||||
MultiVersion(MultiVersion && src) { *this = std::move(src); }
|
||||
MultiVersion(MultiVersion && src) { *this = std::move(src); } /// NOLINT
|
||||
|
||||
MultiVersion & operator=(MultiVersion && src)
|
||||
MultiVersion & operator=(MultiVersion && src) /// NOLINT
|
||||
{
|
||||
if (this != &src)
|
||||
{
|
||||
|
@ -302,7 +302,7 @@ private:
|
||||
readStringUntilEOF(query, in);
|
||||
|
||||
ParserCreateNamedCollectionQuery parser;
|
||||
auto ast = parseQuery(parser, query, "in file " + path, 0, settings.max_parser_depth);
|
||||
auto ast = parseQuery(parser, query, "in file " + path, 0, settings.max_parser_depth, settings.max_parser_backtracks);
|
||||
const auto & create_query = ast->as<const ASTCreateNamedCollectionQuery &>();
|
||||
return create_query;
|
||||
}
|
||||
|
@ -25,7 +25,7 @@
|
||||
*/
|
||||
template <typename T, typename U>
|
||||
constexpr bool memcpy_can_be_used_for_assignment = std::is_same_v<T, U>
|
||||
|| (std::is_integral_v<T> && std::is_integral_v<U> && sizeof(T) == sizeof(U));
|
||||
|| (std::is_integral_v<T> && std::is_integral_v<U> && sizeof(T) == sizeof(U)); /// NOLINT(misc-redundant-expression)
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -558,7 +558,7 @@ public:
|
||||
}
|
||||
|
||||
template <typename... TAllocatorParams>
|
||||
void swap(PODArray & rhs, TAllocatorParams &&... allocator_params)
|
||||
void swap(PODArray & rhs, TAllocatorParams &&... allocator_params) /// NOLINT(performance-noexcept-swap)
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
this->unprotect();
|
||||
@ -756,7 +756,7 @@ public:
|
||||
};
|
||||
|
||||
template <typename T, size_t initial_bytes, typename TAllocator, size_t pad_right_, size_t pad_left_>
|
||||
void swap(PODArray<T, initial_bytes, TAllocator, pad_right_, pad_left_> & lhs, PODArray<T, initial_bytes, TAllocator, pad_right_, pad_left_> & rhs)
|
||||
void swap(PODArray<T, initial_bytes, TAllocator, pad_right_, pad_left_> & lhs, PODArray<T, initial_bytes, TAllocator, pad_right_, pad_left_> & rhs) /// NOLINT
|
||||
{
|
||||
lhs.swap(rhs);
|
||||
}
|
||||
|
@ -476,6 +476,7 @@ The server successfully detected this situation and will download merged part fr
|
||||
M(FileSegmentRemoveMicroseconds, "File segment remove() time") \
|
||||
M(FileSegmentHolderCompleteMicroseconds, "File segments holder complete() time") \
|
||||
M(FileSegmentFailToIncreasePriority, "Number of times the priority was not increased due to a high contention on the cache lock") \
|
||||
M(FilesystemCacheFailToReserveSpaceBecauseOfLockContention, "Number of times space reservation was skipped due to a high contention on the cache lock") \
|
||||
M(FilesystemCacheHoldFileSegments, "Filesystem cache file segments count, which were hold") \
|
||||
M(FilesystemCacheUnusedHoldFileSegments, "Filesystem cache file segments count, which were hold, but not used (because of seek or LIMIT n, etc)") \
|
||||
\
|
||||
@ -533,6 +534,7 @@ The server successfully detected this situation and will download merged part fr
|
||||
\
|
||||
M(AggregationPreallocatedElementsInHashTables, "How many elements were preallocated in hash tables for aggregation.") \
|
||||
M(AggregationHashTablesInitializedAsTwoLevel, "How many hash tables were inited as two-level for aggregation.") \
|
||||
M(AggregationOptimizedEqualRangesOfKeys, "For how many blocks optimization of equal ranges of keys was applied") \
|
||||
\
|
||||
M(MetadataFromKeeperCacheHit, "Number of times an object storage metadata request was answered from cache without making request to Keeper") \
|
||||
M(MetadataFromKeeperCacheMiss, "Number of times an object storage metadata request had to be answered from Keeper") \
|
||||
|
@ -2,13 +2,13 @@
|
||||
# if USE_SSH
|
||||
# include <stdexcept>
|
||||
|
||||
# pragma GCC diagnostic push
|
||||
# pragma GCC diagnostic ignored "-Wreserved-macro-identifier"
|
||||
# pragma GCC diagnostic ignored "-Wreserved-identifier"
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wreserved-macro-identifier"
|
||||
# pragma clang diagnostic ignored "-Wreserved-identifier"
|
||||
|
||||
# include <libssh/libssh.h>
|
||||
|
||||
# pragma GCC diagnostic pop
|
||||
# pragma clang diagnostic pop
|
||||
|
||||
namespace
|
||||
{
|
||||
|
@ -387,7 +387,9 @@ public:
|
||||
|
||||
/// Introspection
|
||||
std::atomic<UInt64> dequeued_requests{0};
|
||||
std::atomic<UInt64> canceled_requests{0};
|
||||
std::atomic<ResourceCost> dequeued_cost{0};
|
||||
std::atomic<ResourceCost> canceled_cost{0};
|
||||
std::atomic<UInt64> busy_periods{0};
|
||||
};
|
||||
|
||||
|
@ -50,6 +50,12 @@ public:
|
||||
/// Should be called outside of scheduling subsystem, implementation must be thread-safe.
|
||||
virtual void enqueueRequest(ResourceRequest * request) = 0;
|
||||
|
||||
/// Cancel previously enqueued request.
|
||||
/// Returns `false` and does nothing given unknown or already executed request.
|
||||
/// Returns `true` if requests has been found and canceled.
|
||||
/// Should be called outside of scheduling subsystem, implementation must be thread-safe.
|
||||
virtual bool cancelRequest(ResourceRequest * request) = 0;
|
||||
|
||||
/// For introspection
|
||||
ResourceCost getBudget() const
|
||||
{
|
||||
|
@ -134,56 +134,65 @@ public:
|
||||
|
||||
std::pair<ResourceRequest *, bool> dequeueRequest() override
|
||||
{
|
||||
if (heap_size == 0)
|
||||
return {nullptr, false};
|
||||
|
||||
// Recursively pull request from child
|
||||
auto [request, child_active] = items.front().child->dequeueRequest();
|
||||
assert(request != nullptr);
|
||||
std::pop_heap(items.begin(), items.begin() + heap_size);
|
||||
Item & current = items[heap_size - 1];
|
||||
|
||||
// SFQ fairness invariant: system vruntime equals last served request start-time
|
||||
assert(current.vruntime >= system_vruntime);
|
||||
system_vruntime = current.vruntime;
|
||||
|
||||
// By definition vruntime is amount of consumed resource (cost) divided by weight
|
||||
current.vruntime += double(request->cost) / current.child->info.weight;
|
||||
max_vruntime = std::max(max_vruntime, current.vruntime);
|
||||
|
||||
if (child_active) // Put active child back in heap after vruntime update
|
||||
// Cycle is required to do deactivations in the case of canceled requests, when dequeueRequest returns `nullptr`
|
||||
while (true)
|
||||
{
|
||||
std::push_heap(items.begin(), items.begin() + heap_size);
|
||||
}
|
||||
else // Deactivate child if it is empty, but remember it's vruntime for latter activations
|
||||
{
|
||||
heap_size--;
|
||||
if (heap_size == 0)
|
||||
return {nullptr, false};
|
||||
|
||||
// Store index of this inactive child in `parent.idx`
|
||||
// This enables O(1) search of inactive children instead of O(n)
|
||||
current.child->info.parent.idx = heap_size;
|
||||
}
|
||||
// Recursively pull request from child
|
||||
auto [request, child_active] = items.front().child->dequeueRequest();
|
||||
std::pop_heap(items.begin(), items.begin() + heap_size);
|
||||
Item & current = items[heap_size - 1];
|
||||
|
||||
// Reset any difference between children on busy period end
|
||||
if (heap_size == 0)
|
||||
{
|
||||
// Reset vtime to zero to avoid floating-point error accumulation,
|
||||
// but do not reset too often, because it's O(N)
|
||||
UInt64 ns = clock_gettime_ns();
|
||||
if (last_reset_ns + 1000000000 < ns)
|
||||
if (request)
|
||||
{
|
||||
last_reset_ns = ns;
|
||||
for (Item & item : items)
|
||||
item.vruntime = 0;
|
||||
max_vruntime = 0;
|
||||
}
|
||||
system_vruntime = max_vruntime;
|
||||
busy_periods++;
|
||||
}
|
||||
// SFQ fairness invariant: system vruntime equals last served request start-time
|
||||
assert(current.vruntime >= system_vruntime);
|
||||
system_vruntime = current.vruntime;
|
||||
|
||||
dequeued_requests++;
|
||||
dequeued_cost += request->cost;
|
||||
return {request, heap_size > 0};
|
||||
// By definition vruntime is amount of consumed resource (cost) divided by weight
|
||||
current.vruntime += double(request->cost) / current.child->info.weight;
|
||||
max_vruntime = std::max(max_vruntime, current.vruntime);
|
||||
}
|
||||
|
||||
if (child_active) // Put active child back in heap after vruntime update
|
||||
{
|
||||
std::push_heap(items.begin(), items.begin() + heap_size);
|
||||
}
|
||||
else // Deactivate child if it is empty, but remember it's vruntime for latter activations
|
||||
{
|
||||
heap_size--;
|
||||
|
||||
// Store index of this inactive child in `parent.idx`
|
||||
// This enables O(1) search of inactive children instead of O(n)
|
||||
current.child->info.parent.idx = heap_size;
|
||||
}
|
||||
|
||||
// Reset any difference between children on busy period end
|
||||
if (heap_size == 0)
|
||||
{
|
||||
// Reset vtime to zero to avoid floating-point error accumulation,
|
||||
// but do not reset too often, because it's O(N)
|
||||
UInt64 ns = clock_gettime_ns();
|
||||
if (last_reset_ns + 1000000000 < ns)
|
||||
{
|
||||
last_reset_ns = ns;
|
||||
for (Item & item : items)
|
||||
item.vruntime = 0;
|
||||
max_vruntime = 0;
|
||||
}
|
||||
system_vruntime = max_vruntime;
|
||||
busy_periods++;
|
||||
}
|
||||
|
||||
if (request)
|
||||
{
|
||||
dequeued_requests++;
|
||||
dequeued_cost += request->cost;
|
||||
return {request, heap_size > 0};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool isActive() override
|
||||
|
@ -39,8 +39,7 @@ public:
|
||||
|
||||
void enqueueRequest(ResourceRequest * request) override
|
||||
{
|
||||
std::unique_lock lock(mutex);
|
||||
request->enqueue_ns = clock_gettime_ns();
|
||||
std::lock_guard lock(mutex);
|
||||
queue_cost += request->cost;
|
||||
bool was_empty = requests.empty();
|
||||
requests.push_back(request);
|
||||
@ -50,7 +49,7 @@ public:
|
||||
|
||||
std::pair<ResourceRequest *, bool> dequeueRequest() override
|
||||
{
|
||||
std::unique_lock lock(mutex);
|
||||
std::lock_guard lock(mutex);
|
||||
if (requests.empty())
|
||||
return {nullptr, false};
|
||||
ResourceRequest * result = requests.front();
|
||||
@ -63,9 +62,29 @@ public:
|
||||
return {result, !requests.empty()};
|
||||
}
|
||||
|
||||
bool cancelRequest(ResourceRequest * request) override
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
// TODO(serxa): reimplement queue as intrusive list of ResourceRequest to make this O(1) instead of O(N)
|
||||
for (auto i = requests.begin(), e = requests.end(); i != e; ++i)
|
||||
{
|
||||
if (*i == request)
|
||||
{
|
||||
requests.erase(i);
|
||||
if (requests.empty())
|
||||
busy_periods++;
|
||||
queue_cost -= request->cost;
|
||||
canceled_requests++;
|
||||
canceled_cost += request->cost;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isActive() override
|
||||
{
|
||||
std::unique_lock lock(mutex);
|
||||
std::lock_guard lock(mutex);
|
||||
return !requests.empty();
|
||||
}
|
||||
|
||||
@ -98,14 +117,14 @@ public:
|
||||
|
||||
std::pair<UInt64, Int64> getQueueLengthAndCost()
|
||||
{
|
||||
std::unique_lock lock(mutex);
|
||||
std::lock_guard lock(mutex);
|
||||
return {requests.size(), queue_cost};
|
||||
}
|
||||
|
||||
private:
|
||||
std::mutex mutex;
|
||||
Int64 queue_cost = 0;
|
||||
std::deque<ResourceRequest *> requests;
|
||||
std::deque<ResourceRequest *> requests; // TODO(serxa): reimplement it using intrusive list to avoid allocations/deallocations and O(N) during cancel
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -102,25 +102,31 @@ public:
|
||||
|
||||
std::pair<ResourceRequest *, bool> dequeueRequest() override
|
||||
{
|
||||
if (items.empty())
|
||||
return {nullptr, false};
|
||||
|
||||
// Recursively pull request from child
|
||||
auto [request, child_active] = items.front().child->dequeueRequest();
|
||||
assert(request != nullptr);
|
||||
|
||||
// Deactivate child if it is empty
|
||||
if (!child_active)
|
||||
// Cycle is required to do deactivations in the case of canceled requests, when dequeueRequest returns `nullptr`
|
||||
while (true)
|
||||
{
|
||||
std::pop_heap(items.begin(), items.end());
|
||||
items.pop_back();
|
||||
if (items.empty())
|
||||
busy_periods++;
|
||||
}
|
||||
return {nullptr, false};
|
||||
|
||||
dequeued_requests++;
|
||||
dequeued_cost += request->cost;
|
||||
return {request, !items.empty()};
|
||||
// Recursively pull request from child
|
||||
auto [request, child_active] = items.front().child->dequeueRequest();
|
||||
|
||||
// Deactivate child if it is empty
|
||||
if (!child_active)
|
||||
{
|
||||
std::pop_heap(items.begin(), items.end());
|
||||
items.pop_back();
|
||||
if (items.empty())
|
||||
busy_periods++;
|
||||
}
|
||||
|
||||
if (request)
|
||||
{
|
||||
dequeued_requests++;
|
||||
dequeued_cost += request->cost;
|
||||
return {request, !items.empty()};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool isActive() override
|
||||
|
@ -38,7 +38,6 @@ TEST(SchedulerDynamicResourceManager, Smoke)
|
||||
{
|
||||
ResourceGuard gA(cA->get("res1"), ResourceGuard::PostponeLocking);
|
||||
gA.lock();
|
||||
gA.setFailure();
|
||||
gA.unlock();
|
||||
|
||||
ResourceGuard gB(cB->get("res1"));
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
#include <Common/Scheduler/Nodes/tests/ResourceTest.h>
|
||||
|
||||
#include <barrier>
|
||||
#include <future>
|
||||
|
||||
using namespace DB;
|
||||
@ -73,6 +74,22 @@ struct ResourceHolder
|
||||
}
|
||||
};
|
||||
|
||||
struct MyRequest : public ResourceRequest
|
||||
{
|
||||
std::function<void()> on_execute;
|
||||
|
||||
explicit MyRequest(ResourceCost cost_, std::function<void()> on_execute_)
|
||||
: ResourceRequest(cost_)
|
||||
, on_execute(on_execute_)
|
||||
{}
|
||||
|
||||
void execute() override
|
||||
{
|
||||
if (on_execute)
|
||||
on_execute();
|
||||
}
|
||||
};
|
||||
|
||||
TEST(SchedulerRoot, Smoke)
|
||||
{
|
||||
ResourceTest t;
|
||||
@ -111,3 +128,49 @@ TEST(SchedulerRoot, Smoke)
|
||||
EXPECT_TRUE(fc2->requests.contains(&rg.request));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SchedulerRoot, Cancel)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
ResourceHolder r1(t);
|
||||
auto * fc1 = r1.add<ConstraintTest>("/", "<max_requests>1</max_requests>");
|
||||
r1.add<PriorityPolicy>("/prio");
|
||||
auto a = r1.addQueue("/prio/A", "<priority>1</priority>");
|
||||
auto b = r1.addQueue("/prio/B", "<priority>2</priority>");
|
||||
r1.registerResource();
|
||||
|
||||
std::barrier destruct_sync(2);
|
||||
std::barrier sync(2);
|
||||
std::thread consumer1([&]
|
||||
{
|
||||
MyRequest request(1,[&]
|
||||
{
|
||||
sync.arrive_and_wait(); // (A)
|
||||
EXPECT_TRUE(fc1->requests.contains(&request));
|
||||
sync.arrive_and_wait(); // (B)
|
||||
request.finish();
|
||||
destruct_sync.arrive_and_wait(); // (C)
|
||||
});
|
||||
a.queue->enqueueRequest(&request);
|
||||
destruct_sync.arrive_and_wait(); // (C)
|
||||
});
|
||||
|
||||
std::thread consumer2([&]
|
||||
{
|
||||
MyRequest request(1,[&]
|
||||
{
|
||||
FAIL() << "This request must be canceled, but instead executes";
|
||||
});
|
||||
sync.arrive_and_wait(); // (A) wait for request of consumer1 to be inside execute, so that constraint is in violated state and our request will not be executed immediately
|
||||
b.queue->enqueueRequest(&request);
|
||||
bool canceled = b.queue->cancelRequest(&request);
|
||||
EXPECT_TRUE(canceled);
|
||||
sync.arrive_and_wait(); // (B) release request of consumer1 to be finished
|
||||
});
|
||||
|
||||
consumer1.join();
|
||||
consumer2.join();
|
||||
|
||||
EXPECT_TRUE(fc1->requests.empty());
|
||||
}
|
||||
|
@ -71,8 +71,7 @@ public:
|
||||
// lock(mutex) is not required because `Dequeued` request cannot be used by the scheduler thread
|
||||
chassert(state == Dequeued);
|
||||
state = Finished;
|
||||
if (constraint)
|
||||
constraint->finishRequest(this);
|
||||
ResourceRequest::finish();
|
||||
}
|
||||
|
||||
static Request & local()
|
||||
@ -126,12 +125,6 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
/// Mark request as unsuccessful; by default request is considered to be successful
|
||||
void setFailure()
|
||||
{
|
||||
request.successful = false;
|
||||
}
|
||||
|
||||
ResourceLink link;
|
||||
Request & request;
|
||||
};
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user