Merge branch 'master' into merge_config

This commit is contained in:
alesapin 2024-03-15 13:52:52 +01:00
commit 38cd6ef27d
234 changed files with 3524 additions and 1370 deletions

View File

@ -5,128 +5,127 @@
# a) the new check is not controversial (this includes many checks in readability-* and google-*) or
# b) too noisy (checks with > 100 new warnings are considered noisy, this includes e.g. cppcoreguidelines-*).
# TODO: Once clang(-tidy) 17 is the minimum, we can convert this list to YAML
# See https://releases.llvm.org/17.0.1/tools/clang/tools/extra/docs/ReleaseNotes.html#improvements-to-clang-tidy
# TODO Let clang-tidy check headers in further directories
# --> HeaderFilterRegex: '^.*/(src|base|programs|utils)/.*(h|hpp)$'
HeaderFilterRegex: '^.*/(base|programs|utils)/.*(h|hpp)$'
Checks: '*,
-abseil-*,
Checks: [
'*',
-altera-*,
'-abseil-*',
-android-*,
'-altera-*',
-bugprone-assignment-in-if-condition,
-bugprone-branch-clone,
-bugprone-easily-swappable-parameters,
-bugprone-exception-escape,
-bugprone-implicit-widening-of-multiplication-result,
-bugprone-narrowing-conversions,
-bugprone-not-null-terminated-result,
-bugprone-reserved-identifier, # useful but too slow, TODO retry when https://reviews.llvm.org/rG1c282052624f9d0bd273bde0b47b30c96699c6c7 is merged
-bugprone-unchecked-optional-access,
'-android-*',
-cert-dcl16-c,
-cert-dcl37-c,
-cert-dcl51-cpp,
-cert-err58-cpp,
-cert-msc32-c,
-cert-msc51-cpp,
-cert-oop54-cpp,
-cert-oop57-cpp,
'-bugprone-assignment-in-if-condition',
'-bugprone-branch-clone',
'-bugprone-easily-swappable-parameters',
'-bugprone-exception-escape',
'-bugprone-implicit-widening-of-multiplication-result',
'-bugprone-narrowing-conversions',
'-bugprone-not-null-terminated-result',
'-bugprone-reserved-identifier', # useful but too slow, TODO retry when https://reviews.llvm.org/rG1c282052624f9d0bd273bde0b47b30c96699c6c7 is merged
'-bugprone-unchecked-optional-access',
-clang-analyzer-unix.Malloc,
'-cert-dcl16-c',
'-cert-dcl37-c',
'-cert-dcl51-cpp',
'-cert-err58-cpp',
'-cert-msc32-c',
'-cert-msc51-cpp',
'-cert-oop54-cpp',
'-cert-oop57-cpp',
-cppcoreguidelines-*, # impractical in a codebase as large as ClickHouse, also slow
'-clang-analyzer-unix.Malloc',
-darwin-*,
'-cppcoreguidelines-*', # impractical in a codebase as large as ClickHouse, also slow
-fuchsia-*,
'-darwin-*',
-google-build-using-namespace,
-google-readability-braces-around-statements,
-google-readability-casting,
-google-readability-function-size,
-google-readability-namespace-comments,
-google-readability-todo,
'-fuchsia-*',
-hicpp-avoid-c-arrays,
-hicpp-avoid-goto,
-hicpp-braces-around-statements,
-hicpp-explicit-conversions,
-hicpp-function-size,
-hicpp-member-init,
-hicpp-move-const-arg,
-hicpp-multiway-paths-covered,
-hicpp-named-parameter,
-hicpp-no-array-decay,
-hicpp-no-assembler,
-hicpp-no-malloc,
-hicpp-signed-bitwise,
-hicpp-special-member-functions,
-hicpp-uppercase-literal-suffix,
-hicpp-use-auto,
-hicpp-use-emplace,
-hicpp-vararg,
'-google-build-using-namespace',
'-google-readability-braces-around-statements',
'-google-readability-casting',
'-google-readability-function-size',
'-google-readability-namespace-comments',
'-google-readability-todo',
-linuxkernel-*,
'-hicpp-avoid-c-arrays',
'-hicpp-avoid-goto',
'-hicpp-braces-around-statements',
'-hicpp-explicit-conversions',
'-hicpp-function-size',
'-hicpp-member-init',
'-hicpp-move-const-arg',
'-hicpp-multiway-paths-covered',
'-hicpp-named-parameter',
'-hicpp-no-array-decay',
'-hicpp-no-assembler',
'-hicpp-no-malloc',
'-hicpp-signed-bitwise',
'-hicpp-special-member-functions',
'-hicpp-uppercase-literal-suffix',
'-hicpp-use-auto',
'-hicpp-use-emplace',
'-hicpp-vararg',
-llvm-*,
'-linuxkernel-*',
-llvmlibc-*,
'-llvm-*',
-openmp-*,
'-llvmlibc-*',
-misc-const-correctness,
-misc-include-cleaner, # useful but far too many occurrences
-misc-no-recursion,
-misc-non-private-member-variables-in-classes,
-misc-confusable-identifiers, # useful but slooow
-misc-use-anonymous-namespace,
'-openmp-*',
-modernize-avoid-c-arrays,
-modernize-concat-nested-namespaces,
-modernize-macro-to-enum,
-modernize-pass-by-value,
-modernize-return-braced-init-list,
-modernize-use-auto,
-modernize-use-default-member-init,
-modernize-use-emplace,
-modernize-use-nodiscard,
-modernize-use-override,
-modernize-use-trailing-return-type,
'-misc-const-correctness',
'-misc-include-cleaner', # useful but far too many occurrences
'-misc-no-recursion',
'-misc-non-private-member-variables-in-classes',
'-misc-confusable-identifiers', # useful but slooo
'-misc-use-anonymous-namespace',
-performance-inefficient-string-concatenation,
-performance-no-int-to-ptr,
-performance-avoid-endl,
-performance-unnecessary-value-param,
'-modernize-avoid-c-arrays',
'-modernize-concat-nested-namespaces',
'-modernize-macro-to-enum',
'-modernize-pass-by-value',
'-modernize-return-braced-init-list',
'-modernize-use-auto',
'-modernize-use-default-member-init',
'-modernize-use-emplace',
'-modernize-use-nodiscard',
'-modernize-use-override',
'-modernize-use-trailing-return-type',
-portability-simd-intrinsics,
'-performance-inefficient-string-concatenation',
'-performance-no-int-to-ptr',
'-performance-avoid-endl',
'-performance-unnecessary-value-param',
-readability-avoid-unconditional-preprocessor-if,
-readability-braces-around-statements,
-readability-convert-member-functions-to-static,
-readability-else-after-return,
-readability-function-cognitive-complexity,
-readability-function-size,
-readability-identifier-length,
-readability-identifier-naming, # useful but too slow
-readability-implicit-bool-conversion,
-readability-isolate-declaration,
-readability-magic-numbers,
-readability-named-parameter,
-readability-redundant-declaration,
-readability-simplify-boolean-expr,
-readability-static-accessed-through-instance,
-readability-suspicious-call-argument,
-readability-uppercase-literal-suffix,
-readability-use-anyofallof,
'-portability-simd-intrinsics',
-zircon-*,
'
'-readability-avoid-unconditional-preprocessor-if',
'-readability-braces-around-statements',
'-readability-convert-member-functions-to-static',
'-readability-else-after-return',
'-readability-function-cognitive-complexity',
'-readability-function-size',
'-readability-identifier-length',
'-readability-identifier-naming', # useful but too slow
'-readability-implicit-bool-conversion',
'-readability-isolate-declaration',
'-readability-magic-numbers',
'-readability-named-parameter',
'-readability-redundant-declaration',
'-readability-simplify-boolean-expr',
'-readability-static-accessed-through-instance',
'-readability-suspicious-call-argument',
'-readability-uppercase-literal-suffix',
'-readability-use-anyofallof',
'-zircon-*'
]
WarningsAsErrors: '*'

View File

@ -45,62 +45,3 @@ jobs:
with:
data: "${{ needs.RunConfig.outputs.data }}"
set_latest: true
SonarCloud:
runs-on: [self-hosted, builder]
env:
SONAR_SCANNER_VERSION: 4.8.0.2856
SONAR_SERVER_URL: "https://sonarcloud.io"
BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed
CC: clang-17
CXX: clang++-17
steps:
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis
filter: tree:0
submodules: true
- name: Set up JDK 11
uses: actions/setup-java@v1
with:
java-version: 11
- name: Download and set up sonar-scanner
env:
SONAR_SCANNER_DOWNLOAD_URL: https://binaries.sonarsource.com/Distribution/sonar-scanner-cli/sonar-scanner-cli-${{ env.SONAR_SCANNER_VERSION }}-linux.zip
run: |
mkdir -p "$HOME/.sonar"
curl -sSLo "$HOME/.sonar/sonar-scanner.zip" "${{ env.SONAR_SCANNER_DOWNLOAD_URL }}"
unzip -o "$HOME/.sonar/sonar-scanner.zip" -d "$HOME/.sonar/"
echo "$HOME/.sonar/sonar-scanner-${{ env.SONAR_SCANNER_VERSION }}-linux/bin" >> "$GITHUB_PATH"
- name: Download and set up build-wrapper
env:
BUILD_WRAPPER_DOWNLOAD_URL: ${{ env.SONAR_SERVER_URL }}/static/cpp/build-wrapper-linux-x86.zip
run: |
curl -sSLo "$HOME/.sonar/build-wrapper-linux-x86.zip" "${{ env.BUILD_WRAPPER_DOWNLOAD_URL }}"
unzip -o "$HOME/.sonar/build-wrapper-linux-x86.zip" -d "$HOME/.sonar/"
echo "$HOME/.sonar/build-wrapper-linux-x86" >> "$GITHUB_PATH"
- name: Set Up Build Tools
run: |
sudo apt-get update
sudo apt-get install -yq git cmake ccache ninja-build python3 yasm nasm
sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
- name: Run build-wrapper
run: |
mkdir build
cd build
cmake ..
cd ..
build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/
- name: Run sonar-scanner
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
run: |
sonar-scanner \
--define sonar.host.url="${{ env.SONAR_SERVER_URL }}" \
--define sonar.cfamily.build-wrapper-output="${{ env.BUILD_WRAPPER_OUT_DIR }}" \
--define sonar.projectKey="ClickHouse_ClickHouse" \
--define sonar.organization="clickhouse-java" \
--define sonar.cfamily.cpp23.enabled=true \
--define sonar.exclusions="**/*.java,**/*.ts,**/*.js,**/*.css,**/*.sql"

View File

@ -56,13 +56,13 @@ option(ENABLE_CHECK_HEAVY_BUILDS "Don't allow C++ translation units to compile t
if (ENABLE_CHECK_HEAVY_BUILDS)
# set DATA (since RSS does not work since 2.6.x+) to 5G
set (RLIMIT_DATA 5000000000)
# set VIRT (RLIMIT_AS) to 10G (DATA*10)
# set VIRT (RLIMIT_AS) to 10G (DATA*2)
set (RLIMIT_AS 10000000000)
# set CPU time limit to 1000 seconds
set (RLIMIT_CPU 1000)
# -fsanitize=memory and address are too heavy
if (SANITIZE)
if (SANITIZE OR SANITIZE_COVERAGE OR WITH_COVERAGE)
set (RLIMIT_DATA 10000000000) # 10G
endif()

View File

@ -1,7 +1,7 @@
#include "coverage.h"
#include <sys/mman.h>
#pragma GCC diagnostic ignored "-Wreserved-identifier"
#pragma clang diagnostic ignored "-Wreserved-identifier"
/// WITH_COVERAGE enables the default implementation of code coverage,

View File

@ -108,16 +108,22 @@
{
[[noreturn]] void abortOnFailedAssertion(const String & description);
}
#define chassert(x) do { static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(#x); } while (0)
#define chassert_1(x, ...) do { static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(#x); } while (0)
#define chassert_2(x, comment, ...) do { static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(comment); } while (0)
#define UNREACHABLE() abort()
// clang-format off
#else
/// Here sizeof() trick is used to suppress unused warning for result,
/// since simple "(void)x" will evaluate the expression, while
/// "sizeof(!(x))" will not.
#define chassert(x) (void)sizeof(!(x))
#define chassert_1(x, ...) (void)sizeof(!(x))
#define chassert_2(x, comment, ...) (void)sizeof(!(x))
#define UNREACHABLE() __builtin_unreachable()
#endif
#define CHASSERT_DISPATCH(_1,_2, N,...) N(_1, _2)
#define CHASSERT_INVOKE(tuple) CHASSERT_DISPATCH tuple
#define chassert(...) CHASSERT_INVOKE((__VA_ARGS__, chassert_2, chassert_1))
#endif
/// Macros for Clang Thread Safety Analysis (TSA). They can be safely ignored by other compilers.

View File

@ -59,8 +59,8 @@ using ComparatorWrapper = Comparator;
#endif
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wold-style-cast"
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wold-style-cast"
#include <miniselect/floyd_rivest_select.h>
@ -115,7 +115,7 @@ void partial_sort(RandomIt first, RandomIt middle, RandomIt last)
::partial_sort(first, middle, last, comparator());
}
#pragma GCC diagnostic pop
#pragma clang diagnostic pop
template <typename RandomIt, typename Compare>
void sort(RandomIt first, RandomIt last, Compare compare)

View File

@ -93,7 +93,7 @@ void TCPServerDispatcher::release()
void TCPServerDispatcher::run()
{
AutoPtr<TCPServerDispatcher> guard(this, true); // ensure object stays alive
AutoPtr<TCPServerDispatcher> guard(this); // ensure object stays alive
int idleTime = (int) _pParams->getThreadIdleTime().totalMilliseconds();
@ -149,11 +149,13 @@ void TCPServerDispatcher::enqueue(const StreamSocket& socket)
{
try
{
this->duplicate();
_threadPool.startWithPriority(_pParams->getThreadPriority(), *this, threadName);
++_currentThreads;
}
catch (Poco::Exception& exc)
{
this->release();
++_refusedConnections;
std::cerr << "Got exception while starting thread for connection. Error code: "
<< exc.code() << ", message: '" << exc.displayText() << "'" << std::endl;

2
contrib/curl vendored

@ -1 +1 @@
Subproject commit 5ce164e0e9290c96eb7d502173426c0a135ec008
Subproject commit 1a05e833f8f7140628b27882b10525fd9ec4b873

2
contrib/libhdfs3 vendored

@ -1 +1 @@
Subproject commit b9598e6016720a7c088bfe85ce1fa0410f9d2103
Subproject commit 0d04201c45359f0d0701fb1e8297d25eff7cfecf

View File

@ -17,6 +17,8 @@
#ifndef METROHASH_METROHASH_128_H
#define METROHASH_METROHASH_128_H
// NOLINTBEGIN(readability-avoid-const-params-in-decls)
#include <stdint.h>
class MetroHash128
@ -68,5 +70,6 @@ private:
void metrohash128_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out);
void metrohash128_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out);
// NOLINTEND(readability-avoid-const-params-in-decls)
#endif // #ifndef METROHASH_METROHASH_128_H

View File

@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="24.2.1.2248"
ARG VERSION="24.2.2.71"
ARG PACKAGES="clickhouse-keeper"
ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -4,6 +4,9 @@ FROM clickhouse/fasttest:$FROM_TAG
ENV CC=clang-${LLVM_VERSION}
ENV CXX=clang++-${LLVM_VERSION}
# If the cctools is updated, then first build it in the CI, then update here in a different commit
COPY --from=clickhouse/cctools:d9e3596e706b /cctools /cctools
# Rust toolchain and libraries
ENV RUSTUP_HOME=/rust/rustup
ENV CARGO_HOME=/rust/cargo
@ -73,9 +76,6 @@ RUN curl -Lo /usr/bin/clang-tidy-cache \
"https://raw.githubusercontent.com/matus-chochlik/ctcache/$CLANG_TIDY_SHA1/clang-tidy-cache" \
&& chmod +x /usr/bin/clang-tidy-cache
# If the cctools is updated, then first build it in the CI, then update here in a different commit
COPY --from=clickhouse/cctools:5a908f73878a /cctools /cctools
RUN mkdir /workdir && chmod 777 /workdir
WORKDIR /workdir

View File

@ -2,7 +2,7 @@
# It's based on the assumption that we don't care of the cctools version so much
# It event does not depend on the clickhouse/fasttest in the `docker/images.json`
ARG FROM_TAG=latest
FROM clickhouse/fasttest:$FROM_TAG
FROM clickhouse/fasttest:$FROM_TAG as builder
ENV CC=clang-${LLVM_VERSION}
ENV CXX=clang++-${LLVM_VERSION}
@ -29,3 +29,6 @@ RUN git clone https://github.com/tpoechtrager/cctools-port.git \
&& make install -j$(nproc) \
&& cd ../.. \
&& rm -rf cctools-port
FROM scratch
COPY --from=builder /cctools /cctools

View File

@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="24.2.1.2248"
ARG VERSION="24.2.2.71"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -27,7 +27,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="24.2.1.2248"
ARG VERSION="24.2.2.71"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image

View File

@ -0,0 +1,64 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v23.12.5.81-stable (a0fbe3ae813) FIXME as compared to v23.12.4.15-stable (4233d111d20)
#### Improvement
* Backported in [#60290](https://github.com/ClickHouse/ClickHouse/issues/60290): Copy S3 file GCP fallback to buffer copy in case GCP returned `Internal Error` with `GATEWAY_TIMEOUT` HTTP error code. [#60164](https://github.com/ClickHouse/ClickHouse/pull/60164) ([Maksim Kita](https://github.com/kitaisreal)).
* Backported in [#60830](https://github.com/ClickHouse/ClickHouse/issues/60830): Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)).
#### Build/Testing/Packaging Improvement
* Backported in [#59883](https://github.com/ClickHouse/ClickHouse/issues/59883): If you want to run initdb scripts every time when ClickHouse container is starting you shoud initialize environment varible CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS. [#59808](https://github.com/ClickHouse/ClickHouse/pull/59808) ([Alexander Nikolaev](https://github.com/AlexNik)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix_kql_issue_found_by_wingfuzz [#59626](https://github.com/ClickHouse/ClickHouse/pull/59626) ([Yong Wang](https://github.com/kashwy)).
* Fix error "Read beyond last offset" for AsynchronousBoundedReadBuffer [#59630](https://github.com/ClickHouse/ClickHouse/pull/59630) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix query start time on non initial queries [#59662](https://github.com/ClickHouse/ClickHouse/pull/59662) ([Raúl Marín](https://github.com/Algunenano)).
* rabbitmq: fix having neither acked nor nacked messages [#59775](https://github.com/ClickHouse/ClickHouse/pull/59775) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix parsing of partition expressions surrounded by parens [#59901](https://github.com/ClickHouse/ClickHouse/pull/59901) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Fix optimize_uniq_to_count removing the column alias [#60026](https://github.com/ClickHouse/ClickHouse/pull/60026) ([Raúl Marín](https://github.com/Algunenano)).
* Fix cosineDistance crash with Nullable [#60150](https://github.com/ClickHouse/ClickHouse/pull/60150) ([Raúl Marín](https://github.com/Algunenano)).
* Hide sensitive info for s3queue [#60233](https://github.com/ClickHouse/ClickHouse/pull/60233) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix_max_query_size_for_kql_compound_operator: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)).
* Reduce the number of read rows from `system.numbers` [#60546](https://github.com/ClickHouse/ClickHouse/pull/60546) ([JackyWoo](https://github.com/JackyWoo)).
* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Prevent setting custom metadata headers on unsupported multipart upload operations [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)).
* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)).
#### CI Fix or Improvement (changelog entry is not required)
* Backported in [#60767](https://github.com/ClickHouse/ClickHouse/issues/60767): Decoupled changes from [#60408](https://github.com/ClickHouse/ClickHouse/issues/60408). [#60553](https://github.com/ClickHouse/ClickHouse/pull/60553) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#60582](https://github.com/ClickHouse/ClickHouse/issues/60582): Arm and amd docker build jobs use similar job names and thus overwrite job reports - aarch64 and amd64 suffixes added to fix this. [#60554](https://github.com/ClickHouse/ClickHouse/pull/60554) ([Max K.](https://github.com/maxknv)).
* Backported in [#61041](https://github.com/ClickHouse/ClickHouse/issues/61041): Debug and fix markreleaseready. [#60611](https://github.com/ClickHouse/ClickHouse/pull/60611) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#61030](https://github.com/ClickHouse/ClickHouse/issues/61030): ... [#61022](https://github.com/ClickHouse/ClickHouse/pull/61022) ([Max K.](https://github.com/maxknv)).
* Backported in [#61224](https://github.com/ClickHouse/ClickHouse/issues/61224): ... [#61183](https://github.com/ClickHouse/ClickHouse/pull/61183) ([Han Fei](https://github.com/hanfei1991)).
* Backported in [#61190](https://github.com/ClickHouse/ClickHouse/issues/61190): ... [#61185](https://github.com/ClickHouse/ClickHouse/pull/61185) ([Max K.](https://github.com/maxknv)).
#### NO CL ENTRY
* NO CL ENTRY: 'Revert "Backport [#59798](https://github.com/ClickHouse/ClickHouse/issues/59798) to 23.12: CI: do not reuse builds on release branches"'. [#59979](https://github.com/ClickHouse/ClickHouse/pull/59979) ([Max K.](https://github.com/maxknv)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* CI: move ci-specifics from job scripts to ci.py [#58516](https://github.com/ClickHouse/ClickHouse/pull/58516) ([Max K.](https://github.com/maxknv)).
* Make ZooKeeper actually sequentialy consistent [#59735](https://github.com/ClickHouse/ClickHouse/pull/59735) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix special build reports in release branches [#59797](https://github.com/ClickHouse/ClickHouse/pull/59797) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* CI: do not reuse builds on release branches [#59798](https://github.com/ClickHouse/ClickHouse/pull/59798) ([Max K.](https://github.com/maxknv)).
* Fix mark release ready [#59994](https://github.com/ClickHouse/ClickHouse/pull/59994) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Ability to detect undead ZooKeeper sessions [#60044](https://github.com/ClickHouse/ClickHouse/pull/60044) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Detect io_uring in tests [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)).
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).
* Remove broken test while we fix it [#60547](https://github.com/ClickHouse/ClickHouse/pull/60547) ([Raúl Marín](https://github.com/Algunenano)).
* Speed up cctools building [#61011](https://github.com/ClickHouse/ClickHouse/pull/61011) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -0,0 +1,24 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v23.3.21.26-lts (d9672a3731f) FIXME as compared to v23.3.20.27-lts (cc974ba4f81)
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix reading from sparse columns after restart [#49660](https://github.com/ClickHouse/ClickHouse/pull/49660) ([Anton Popov](https://github.com/CurtizJ)).
* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#57104](https://github.com/ClickHouse/ClickHouse/pull/57104) ([Kruglov Pavel](https://github.com/Avogar)).
* Detect io_uring in tests [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)).
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).

View File

@ -0,0 +1,30 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v23.8.11.28-lts (31879d2ab4c) FIXME as compared to v23.8.10.43-lts (a278225bba9)
#### Improvement
* Backported in [#60828](https://github.com/ClickHouse/ClickHouse/issues/60828): Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
#### NO CL ENTRY
* NO CL ENTRY: 'Use the current branch test-utils to build cctools'. [#61276](https://github.com/ClickHouse/ClickHouse/pull/61276) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#57104](https://github.com/ClickHouse/ClickHouse/pull/57104) ([Kruglov Pavel](https://github.com/Avogar)).
* Detect io_uring in tests [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)).
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).

View File

@ -0,0 +1,26 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.1.7.18-stable (90925babd78) FIXME as compared to v24.1.6.52-stable (fa09f677bc9)
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix_max_query_size_for_kql_compound_operator: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)).
* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)).
* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)).
#### CI Fix or Improvement (changelog entry is not required)
* Backported in [#61043](https://github.com/ClickHouse/ClickHouse/issues/61043): Debug and fix markreleaseready. [#60611](https://github.com/ClickHouse/ClickHouse/pull/60611) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#61168](https://github.com/ClickHouse/ClickHouse/issues/61168): Just a preparation for the merge queue support. [#61099](https://github.com/ClickHouse/ClickHouse/pull/61099) ([Max K.](https://github.com/maxknv)).
* Backported in [#61192](https://github.com/ClickHouse/ClickHouse/issues/61192): ... [#61185](https://github.com/ClickHouse/ClickHouse/pull/61185) ([Max K.](https://github.com/maxknv)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).

View File

@ -0,0 +1,55 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.2.2.71-stable (9293d361e72) FIXME as compared to v24.2.1.2248-stable (891689a4150)
#### Improvement
* Backported in [#60834](https://github.com/ClickHouse/ClickHouse/issues/60834): Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* PartsSplitter invalid ranges for the same part [#60041](https://github.com/ClickHouse/ClickHouse/pull/60041) ([Maksim Kita](https://github.com/kitaisreal)).
* Try to avoid calculation of scalar subqueries for CREATE TABLE. [#60464](https://github.com/ClickHouse/ClickHouse/pull/60464) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix_max_query_size_for_kql_compound_operator: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)).
* Reduce the number of read rows from `system.numbers` [#60546](https://github.com/ClickHouse/ClickHouse/pull/60546) ([JackyWoo](https://github.com/JackyWoo)).
* Don't output number tips for date types [#60577](https://github.com/ClickHouse/ClickHouse/pull/60577) ([Raúl Marín](https://github.com/Algunenano)).
* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Prevent setting custom metadata headers on unsupported multipart upload operations [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)).
* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix multiple bugs in groupArraySorted [#61203](https://github.com/ClickHouse/ClickHouse/pull/61203) ([Raúl Marín](https://github.com/Algunenano)).
* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)).
#### CI Fix or Improvement (changelog entry is not required)
* Backported in [#60758](https://github.com/ClickHouse/ClickHouse/issues/60758): Decoupled changes from [#60408](https://github.com/ClickHouse/ClickHouse/issues/60408). [#60553](https://github.com/ClickHouse/ClickHouse/pull/60553) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#60706](https://github.com/ClickHouse/ClickHouse/issues/60706): Eliminates the need to provide input args to docker server jobs to clean yml files. [#60602](https://github.com/ClickHouse/ClickHouse/pull/60602) ([Max K.](https://github.com/maxknv)).
* Backported in [#61045](https://github.com/ClickHouse/ClickHouse/issues/61045): Debug and fix markreleaseready. [#60611](https://github.com/ClickHouse/ClickHouse/pull/60611) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#60721](https://github.com/ClickHouse/ClickHouse/issues/60721): Fix build_report job so that it's defined by ci_config only (not yml file). [#60613](https://github.com/ClickHouse/ClickHouse/pull/60613) ([Max K.](https://github.com/maxknv)).
* Backported in [#60668](https://github.com/ClickHouse/ClickHouse/issues/60668): Do not await ci pending jobs on release branches decrease wait timeout to fit into gh job timeout. [#60652](https://github.com/ClickHouse/ClickHouse/pull/60652) ([Max K.](https://github.com/maxknv)).
* Backported in [#60863](https://github.com/ClickHouse/ClickHouse/issues/60863): Set limited number of builds for "special build check" report in backports. [#60850](https://github.com/ClickHouse/ClickHouse/pull/60850) ([Max K.](https://github.com/maxknv)).
* Backported in [#60946](https://github.com/ClickHouse/ClickHouse/issues/60946): ... [#60935](https://github.com/ClickHouse/ClickHouse/pull/60935) ([Max K.](https://github.com/maxknv)).
* Backported in [#60972](https://github.com/ClickHouse/ClickHouse/issues/60972): ... [#60952](https://github.com/ClickHouse/ClickHouse/pull/60952) ([Max K.](https://github.com/maxknv)).
* Backported in [#60980](https://github.com/ClickHouse/ClickHouse/issues/60980): ... [#60958](https://github.com/ClickHouse/ClickHouse/pull/60958) ([Max K.](https://github.com/maxknv)).
* Backported in [#61170](https://github.com/ClickHouse/ClickHouse/issues/61170): Just a preparation for the merge queue support. [#61099](https://github.com/ClickHouse/ClickHouse/pull/61099) ([Max K.](https://github.com/maxknv)).
* Backported in [#61181](https://github.com/ClickHouse/ClickHouse/issues/61181): ... [#61172](https://github.com/ClickHouse/ClickHouse/pull/61172) ([Max K.](https://github.com/maxknv)).
* Backported in [#61228](https://github.com/ClickHouse/ClickHouse/issues/61228): ... [#61183](https://github.com/ClickHouse/ClickHouse/pull/61183) ([Han Fei](https://github.com/hanfei1991)).
* Backported in [#61194](https://github.com/ClickHouse/ClickHouse/issues/61194): ... [#61185](https://github.com/ClickHouse/ClickHouse/pull/61185) ([Max K.](https://github.com/maxknv)).
* Backported in [#61244](https://github.com/ClickHouse/ClickHouse/issues/61244): ... [#61214](https://github.com/ClickHouse/ClickHouse/pull/61214) ([Max K.](https://github.com/maxknv)).
* Backported in [#61388](https://github.com/ClickHouse/ClickHouse/issues/61388):. [#61373](https://github.com/ClickHouse/ClickHouse/pull/61373) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* CI: make workflow yml abstract [#60421](https://github.com/ClickHouse/ClickHouse/pull/60421) ([Max K.](https://github.com/maxknv)).
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).
* General sanity in function `seriesOutliersDetectTukey` [#60535](https://github.com/ClickHouse/ClickHouse/pull/60535) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Speed up cctools building [#61011](https://github.com/ClickHouse/ClickHouse/pull/61011) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -18,8 +18,8 @@ This engine allows integrating ClickHouse with [RabbitMQ](https://www.rabbitmq.c
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
name1 [type1],
name2 [type2],
...
) ENGINE = RabbitMQ SETTINGS
rabbitmq_host_port = 'host:port' [or rabbitmq_address = 'amqp(s)://guest:guest@localhost/vhost'],
@ -198,6 +198,10 @@ Additional virtual columns when `kafka_handle_error_mode='stream'`:
Note: `_raw_message` and `_error` virtual columns are filled only in case of exception during parsing, they are always `NULL` when message was parsed successfully.
## Caveats {#caveats}
Even though you may specify [default column expressions](/docs/en/sql-reference/statements/create/table.md/#default_values) (such as `DEFAULT`, `MATERIALIZED`, `ALIAS`) in the table definition, these will be ignored. Instead, the columns will be filled with their respective default values for their types.
## Data formats support {#data-formats-support}
RabbitMQ engine supports all [formats](../../../interfaces/formats.md) supported in ClickHouse.

View File

@ -946,96 +946,6 @@ You could change storage policy after table creation with [ALTER TABLE ... MODIF
The number of threads performing background moves of data parts can be changed by [background_move_pool_size](/docs/en/operations/server-configuration-parameters/settings.md/#background_move_pool_size) setting.
### Dynamic Storage
This example query shows how to attach a table stored at a URL and configure the
remote storage within the query. The web storage is not configured in the ClickHouse
configuration files; all the settings are in the CREATE/ATTACH query.
:::note
The example uses `type=web`, but any disk type can be configured as dynamic, even Local disk. Local disks require a path argument to be inside the server config parameter `custom_local_disks_base_directory`, which has no default, so set that also when using local disk.
:::
#### Example dynamic web storage
:::tip
A [demo dataset](https://github.com/ClickHouse/web-tables-demo) is hosted in GitHub. To prepare your own tables for web storage see the tool [clickhouse-static-files-uploader](/docs/en/operations/storing-data.md/#storing-data-on-webserver)
:::
In this `ATTACH TABLE` query the `UUID` provided matches the directory name of the data, and the endpoint is the URL for the raw GitHub content.
```sql
# highlight-next-line
ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
(
price UInt32,
date Date,
postcode1 LowCardinality(String),
postcode2 LowCardinality(String),
type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
is_new UInt8,
duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
addr1 String,
addr2 String,
street LowCardinality(String),
locality LowCardinality(String),
town LowCardinality(String),
district LowCardinality(String),
county LowCardinality(String)
)
ENGINE = MergeTree
ORDER BY (postcode1, postcode2, addr1, addr2)
# highlight-start
SETTINGS disk = disk(
type=web,
endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
);
# highlight-end
```
### Nested Dynamic Storage
This example query builds on the above dynamic disk configuration and shows how to
use a local disk to cache data from a table stored at a URL. Neither the cache disk
nor the web storage is configured in the ClickHouse configuration files; both are
configured in the CREATE/ATTACH query settings.
In the settings highlighted below notice that the disk of `type=web` is nested within
the disk of `type=cache`.
```sql
ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
(
price UInt32,
date Date,
postcode1 LowCardinality(String),
postcode2 LowCardinality(String),
type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
is_new UInt8,
duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
addr1 String,
addr2 String,
street LowCardinality(String),
locality LowCardinality(String),
town LowCardinality(String),
district LowCardinality(String),
county LowCardinality(String)
)
ENGINE = MergeTree
ORDER BY (postcode1, postcode2, addr1, addr2)
# highlight-start
SETTINGS disk = disk(
type=cache,
max_size='1Gi',
path='/var/lib/clickhouse/custom_disk_cache/',
disk=disk(
type=web,
endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
)
);
# highlight-end
```
### Details {#details}
In the case of `MergeTree` tables, data is getting to disk in different ways:
@ -1064,13 +974,11 @@ During this time, they are not moved to other volumes or disks. Therefore, until
User can assign new big parts to different disks of a [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures) volume in a balanced way using the [min_bytes_to_rebalance_partition_over_jbod](/docs/en/operations/settings/merge-tree-settings.md/#min-bytes-to-rebalance-partition-over-jbod) setting.
## Using S3 for Data Storage {#table_engine-mergetree-s3}
## Using External Storage for Data Storage {#table_engine-mergetree-s3}
:::note
Google Cloud Storage (GCS) is also supported using the type `s3`. See [GCS backed MergeTree](/docs/en/integrations/gcs).
:::
[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) family table engines can store data to `S3`, `AzureBlobStorage`, `HDFS` using a disk with types `s3`, `azure_blob_storage`, `hdfs` accordingly. See [configuring external storage options](/docs/en/operations/storing-data.md/#configuring-external-storage) for more details.
`MergeTree` family table engines can store data to [S3](https://aws.amazon.com/s3/) using a disk with type `s3`.
Example for [S3](https://aws.amazon.com/s3/) as external storage using a disk with type `s3`.
Configuration markup:
``` xml
@ -1112,253 +1020,12 @@ Configuration markup:
</storage_configuration>
```
Also see [configuring external storage options](/docs/en/operations/storing-data.md/#configuring-external-storage).
:::note cache configuration
ClickHouse versions 22.3 through 22.7 use a different cache configuration, see [using local cache](/docs/en/operations/storing-data.md/#using-local-cache) if you are using one of those versions.
:::
### Configuring the S3 disk
Required parameters:
- `endpoint` — S3 endpoint URL in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint URL should contain a bucket and root path to store data.
- `access_key_id` — S3 access key id.
- `secret_access_key` — S3 secret access key.
Optional parameters:
- `region` — S3 region name.
- `support_batch_delete` — This controls the check to see if batch deletes are supported. Set this to `false` when using Google Cloud Storage (GCS) as GCS does not support batch deletes and preventing the checks will prevent error messages in the logs.
- `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`.
- `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`.
- `expiration_window_seconds` — Grace period for checking if expiration-based credentials have expired. Optional, default value is `120`.
- `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL.
- `connect_timeout_ms` — Socket connect timeout in milliseconds. Default value is `10 seconds`.
- `request_timeout_ms` — Request timeout in milliseconds. Default value is `5 seconds`.
- `retry_attempts` — Number of retry attempts in case of failed request. Default value is `10`.
- `single_read_retries` — Number of retry attempts in case of connection drop during read. Default value is `4`.
- `min_bytes_for_seek` — Minimal number of bytes to use seek operation instead of sequential read. Default value is `1 Mb`.
- `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
- `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`.
- `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times.
- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set.
- `server_side_encryption_kms_key_id` - If specified, required headers for accessing S3 objects with [SSE-KMS encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingKMSEncryption.html) will be set. If an empty string is specified, the AWS managed S3 key will be used. Optional.
- `server_side_encryption_kms_encryption_context` - If specified alongside `server_side_encryption_kms_key_id`, the given encryption context header for SSE-KMS will be set. Optional.
- `server_side_encryption_kms_bucket_key_enabled` - If specified alongside `server_side_encryption_kms_key_id`, the header to enable S3 bucket keys for SSE-KMS will be set. Optional, can be `true` or `false`, defaults to nothing (matches the bucket-level setting).
- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited).
- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`.
- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited).
- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
- `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
- `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
- `key_template` — Define the format with which the object keys are generated. By default, Clickhouse takes `root path` from `endpoint` option and adds random generated suffix. That suffix is a dir with 3 random symbols and a file name with 29 random symbols. With that option you have a full control how to the object keys are generated. Some usage scenarios require having random symbols in the prefix or in the middle of object key. For example: `[a-z]{3}-prefix-random/constant-part/random-middle-[a-z]{3}/random-suffix-[a-z]{29}`. The value is parsed with [`re2`](https://github.com/google/re2/wiki/Syntax). Only some subset of the syntax is supported. Check if your preferred format is supported before using that option. Disk isn't initialized if clickhouse is unable to generate a key by the value of `key_template`. It requires enabled feature flag [storage_metadata_write_full_object_key](/docs/en/operations/settings/settings#storage_metadata_write_full_object_key). It forbids declaring the `root path` in `endpoint` option. It requires definition of the option `key_compatibility_prefix`.
- `key_compatibility_prefix` — That option is required when option `key_template` is in use. In order to be able to read the objects keys which were stored in the metadata files with the metadata version lower that `VERSION_FULL_OBJECT_KEY`, the previous `root path` from the `endpoint` option should be set here.
### Configuring the cache
This is the cache configuration from above:
```xml
<s3_cache>
<type>cache</type>
<disk>s3</disk>
<path>/var/lib/clickhouse/disks/s3_cache/</path>
<max_size>10Gi</max_size>
</s3_cache>
```
These parameters define the cache layer:
- `type` — If a disk is of type `cache` it caches mark and index files in memory.
- `disk` — The name of the disk that will be cached.
Cache parameters:
- `path` — The path where metadata for the cache is stored.
- `max_size` — The size (amount of disk space) that the cache can grow to.
:::tip
There are several other cache parameters that you can use to tune your storage, see [using local cache](/docs/en/operations/storing-data.md/#using-local-cache) for the details.
:::
S3 disk can be configured as `main` or `cold` storage:
``` xml
<storage_configuration>
...
<disks>
<s3>
<type>s3</type>
<endpoint>https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/root-path/</endpoint>
<access_key_id>your_access_key_id</access_key_id>
<secret_access_key>your_secret_access_key</secret_access_key>
</s3>
</disks>
<policies>
<s3_main>
<volumes>
<main>
<disk>s3</disk>
</main>
</volumes>
</s3_main>
<s3_cold>
<volumes>
<main>
<disk>default</disk>
</main>
<external>
<disk>s3</disk>
</external>
</volumes>
<move_factor>0.2</move_factor>
</s3_cold>
</policies>
...
</storage_configuration>
```
In case of `cold` option a data can be moved to S3 if local disk free size will be smaller than `move_factor * disk_size` or by TTL move rule.
## Using Azure Blob Storage for Data Storage {#table_engine-mergetree-azure-blob-storage}
`MergeTree` family table engines can store data to [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) using a disk with type `azure_blob_storage`.
As of February 2022, this feature is still a fresh addition, so expect that some Azure Blob Storage functionalities might be unimplemented.
Configuration markup:
``` xml
<storage_configuration>
...
<disks>
<blob_storage_disk>
<type>azure_blob_storage</type>
<storage_account_url>http://account.blob.core.windows.net</storage_account_url>
<container_name>container</container_name>
<account_name>account</account_name>
<account_key>pass123</account_key>
<metadata_path>/var/lib/clickhouse/disks/blob_storage_disk/</metadata_path>
<cache_path>/var/lib/clickhouse/disks/blob_storage_disk/cache/</cache_path>
<skip_access_check>false</skip_access_check>
</blob_storage_disk>
</disks>
...
</storage_configuration>
```
Connection parameters:
* `endpoint` — AzureBlobStorage endpoint URL with container & prefix. Optionally can contain account_name if the authentication method used needs it. (`http://account.blob.core.windows.net:{port}/[account_name]{container_name}/{data_prefix}`) or these parameters can be provided separately using storage_account_url, account_name & container. For specifying prefix, endpoint should be used.
* `endpoint_contains_account_name` - This flag is used to specify if endpoint contains account_name as it is only needed for certain authentication methods. (Default : true)
* `storage_account_url` - Required if endpoint is not specified, Azure Blob Storage account URL, like `http://account.blob.core.windows.net` or `http://azurite1:10000/devstoreaccount1`.
* `container_name` - Target container name, defaults to `default-container`.
* `container_already_exists` - If set to `false`, a new container `container_name` is created in the storage account, if set to `true`, disk connects to the container directly, and if left unset, disk connects to the account, checks if the container `container_name` exists, and creates it if it doesn't exist yet.
Authentication parameters (the disk will try all available methods **and** Managed Identity Credential):
* `connection_string` - For authentication using a connection string.
* `account_name` and `account_key` - For authentication using Shared Key.
Limit parameters (mainly for internal usage):
* `s3_max_single_part_upload_size` - Limits the size of a single block upload to Blob Storage.
* `min_bytes_for_seek` - Limits the size of a seekable region.
* `max_single_read_retries` - Limits the number of attempts to read a chunk of data from Blob Storage.
* `max_single_download_retries` - Limits the number of attempts to download a readable buffer from Blob Storage.
* `thread_pool_size` - Limits the number of threads with which `IDiskRemote` is instantiated.
* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object.
Other parameters:
* `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
* `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`.
* `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
* `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)).
:::note Zero-copy replication is not ready for production
Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use.
:::
## HDFS storage {#hdfs-storage}
In this sample configuration:
- the disk is of type `hdfs`
- the data is hosted at `hdfs://hdfs1:9000/clickhouse/`
```xml
<clickhouse>
<storage_configuration>
<disks>
<hdfs>
<type>hdfs</type>
<endpoint>hdfs://hdfs1:9000/clickhouse/</endpoint>
<skip_access_check>true</skip_access_check>
</hdfs>
<hdd>
<type>local</type>
<path>/</path>
</hdd>
</disks>
<policies>
<hdfs>
<volumes>
<main>
<disk>hdfs</disk>
</main>
<external>
<disk>hdd</disk>
</external>
</volumes>
</hdfs>
</policies>
</storage_configuration>
</clickhouse>
```
## Web storage (read-only) {#web-storage}
Web storage can be used for read-only purposes. An example use is for hosting sample
data, or for migrating data.
:::tip
Storage can also be configured temporarily within a query, if a web dataset is not expected
to be used routinely, see [dynamic storage](#dynamic-storage) and skip editing the
configuration file.
:::
In this sample configuration:
- the disk is of type `web`
- the data is hosted at `http://nginx:80/test1/`
- a cache on local storage is used
```xml
<clickhouse>
<storage_configuration>
<disks>
<web>
<type>web</type>
<endpoint>http://nginx:80/test1/</endpoint>
</web>
<cached_web>
<type>cache</type>
<disk>web</disk>
<path>cached_web_cache/</path>
<max_size>100000000</max_size>
</cached_web>
</disks>
<policies>
<web>
<volumes>
<main>
<disk>web</disk>
</main>
</volumes>
</web>
<cached_web>
<volumes>
<main>
<disk>cached_web</disk>
</main>
</volumes>
</cached_web>
</policies>
</storage_configuration>
</clickhouse>
```
## Virtual Columns {#virtual-columns}
- `_part` — Name of a part.

View File

@ -10,10 +10,14 @@ The embeddings and the metadata are stored in separate files in the raw data. A
converts them to CSV and imports them into ClickHouse. You can use the following `download.sh` script for that:
```bash
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/img_emb/img_emb_${1}.npy # download image embedding
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/text_emb/text_emb_${1}.npy # download text embedding
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/metadata/metadata_${1}.parquet # download metadata
python3 process.py ${1} # merge files and convert to CSV
number=${1}
if [[ $number == '' ]]; then
number=1
fi;
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/img_emb/img_emb_${number}.npy # download image embedding
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/text_emb/text_emb_${number}.npy # download text embedding
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/metadata/metadata_${number}.parquet # download metadata
python3 process.py $number # merge files and convert to CSV
```
Script `process.py` is defined as follows:

View File

@ -248,6 +248,9 @@ Some of the files might not download fully. Check the file sizes and re-download
``` bash
$ curl -O https://datasets.clickhouse.com/trips_mergetree/partitions/trips_mergetree.tar
# Validate the checksum
$ md5sum trips_mergetree.tar
# Checksum should be equal to: f3b8d469b41d9a82da064ded7245d12c
$ tar xvf trips_mergetree.tar -C /var/lib/clickhouse # path to ClickHouse data directory
$ # check permissions of unpacked data, fix if required
$ sudo service clickhouse-server restart

View File

@ -5,26 +5,416 @@ sidebar_label: "External Disks for Storing Data"
title: "External Disks for Storing Data"
---
Data, processed in ClickHouse, is usually stored in the local file system — on the same machine with the ClickHouse server. That requires large-capacity disks, which can be expensive enough. To avoid that you can store the data remotely — on [Amazon S3](https://aws.amazon.com/s3/) disks or in the Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)).
Data, processed in ClickHouse, is usually stored in the local file system — on the same machine with the ClickHouse server. That requires large-capacity disks, which can be expensive enough. To avoid that you can store the data remotely. Various storages are supported:
1. [Amazon S3](https://aws.amazon.com/s3/) object storage.
2. The Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html))
3. [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs).
To work with data stored on `Amazon S3` disks use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine, and to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine.
:::note ClickHouse also has support for external table engines, which are different from external storage option described on this page as they allow to read data stored in some general file format (like Parquet), while on this page we are describing storage configuration for ClickHouse `MergeTree` family or `Log` family tables.
1. to work with data stored on `Amazon S3` disks, use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine.
2. to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine.
3. to work with data stored in Azure Blob Storage use [AzureBlobStorage](/docs/en/engines/table-engines/integrations/azureBlobStorage.md) table engine.
:::
To load data from a web server with static files use a disk with type [web](#storing-data-on-webserver).
## Configuring external storage {#configuring-external-storage}
## Configuring HDFS {#configuring-hdfs}
[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) family table engines can store data to `S3`, `AzureBlobStorage`, `HDFS` using a disk with types `s3`, `azure_blob_storage`, `hdfs` accordingly.
[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) family table engines can store data to HDFS using a disk with type `HDFS`.
Disk configuration requires:
1. `type` section, equal to one of `s3`, `azure_blob_storage`, `hdfs`, `local_blob_storage`, `web`.
2. Configuration of a specific external storage type.
Configuration markup:
Starting from 24.1 clickhouse version, it is possible to use a new configuration option.
It requires to specify:
1. `type` equal to `object_storage`
2. `object_storage_type`, equal to one of `s3`, `azure_blob_storage` (or just `azure` from `24.3`), `hdfs`, `local_blob_storage` (or just `local` from `24.3`), `web`.
Optionally, `metadata_type` can be specified (it is equal to `local` by default), but it can also be set to `plain`, `web`.
Usage of `plain` metadata type is described in [plain storage section](/docs/en/operations/storing-data.md/#storing-data-on-webserver), `web` metadata type can be used only with `web` object storage type, `local` metadata type stores metadata files locally (each metadata files contains mapping to files in object storage and some additional meta information about them).
E.g. configuration option
``` xml
<s3>
<type>s3</type>
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
<use_invironment_credentials>1</use_invironment_credentials>
</s3>
```
is equal to configuration (from `24.1`):
``` xml
<s3>
<type>object_storage</type>
<object_storage_type>s3</object_storage_type>
<metadata_type>local</metadata_type>
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
<use_invironment_credentials>1</use_invironment_credentials>
</s3>
```
Configuration
``` xml
<s3_plain>
<type>s3_plain</type>
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
<use_invironment_credentials>1</use_invironment_credentials>
</s3_plain>
```
is equal to
``` xml
<s3_plain>
<type>object_storage</type>
<object_storage_type>s3</object_storage_type>
<metadata_type>plain</metadata_type>
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
<use_invironment_credentials>1</use_invironment_credentials>
</s3_plain>
```
Example of full storage configuration will look like:
``` xml
<clickhouse>
<storage_configuration>
<disks>
<s3>
<type>s3</type>
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
<use_invironment_credentials>1</use_invironment_credentials>
</s3>
</disks>
<policies>
<s3>
<volumes>
<main>
<disk>s3</disk>
</main>
</volumes>
</s3>
</policies>
</storage_configuration>
</clickhouse>
```
Starting with 24.1 clickhouse version, it can also look like:
``` xml
<clickhouse>
<storage_configuration>
<disks>
<s3>
<type>object_storage</type>
<object_storage_type>s3</object_storage_type>
<metadata_type>local</metadata_type>
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
<use_invironment_credentials>1</use_invironment_credentials>
</s3>
</disks>
<policies>
<s3>
<volumes>
<main>
<disk>s3</disk>
</main>
</volumes>
</s3>
</policies>
</storage_configuration>
</clickhouse>
```
In order to make a specific kind of storage a default option for all `MergeTree` tables add the following section to configuration file:
``` xml
<clickhouse>
<merge_tree>
<storage_policy>s3</storage_policy>
</merge_tree>
</clickhouse>
```
If you want to configure a specific storage policy only to specific table, you can define it in settings while creating the table:
``` sql
CREATE TABLE test (a Int32, b String)
ENGINE = MergeTree() ORDER BY a
SETTINGS storage_policy = 's3';
```
You can also use `disk` instead of `storage_policy`. In this case it is not requires to have `storage_policy` section in configuration file, only `disk` section would be enough.
``` sql
CREATE TABLE test (a Int32, b String)
ENGINE = MergeTree() ORDER BY a
SETTINGS disk = 's3';
```
## Dynamic Configuration {#dynamic-configuration}
There is also a possibility to specify storage configuration without a predefined disk in configuration in a configuration file, but can be configured in the `CREATE`/`ATTACH` query settings.
The following example query builds on the above dynamic disk configuration and shows how to use a local disk to cache data from a table stored at a URL.
```sql
ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
(
price UInt32,
date Date,
postcode1 LowCardinality(String),
postcode2 LowCardinality(String),
type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
is_new UInt8,
duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
addr1 String,
addr2 String,
street LowCardinality(String),
locality LowCardinality(String),
town LowCardinality(String),
district LowCardinality(String),
county LowCardinality(String)
)
ENGINE = MergeTree
ORDER BY (postcode1, postcode2, addr1, addr2)
# highlight-start
SETTINGS disk = disk(
type=web,
endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
);
# highlight-end
```
The example below adds cache to external storage.
```sql
ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
(
price UInt32,
date Date,
postcode1 LowCardinality(String),
postcode2 LowCardinality(String),
type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
is_new UInt8,
duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
addr1 String,
addr2 String,
street LowCardinality(String),
locality LowCardinality(String),
town LowCardinality(String),
district LowCardinality(String),
county LowCardinality(String)
)
ENGINE = MergeTree
ORDER BY (postcode1, postcode2, addr1, addr2)
# highlight-start
SETTINGS disk = disk(
type=cache,
max_size='1Gi',
path='/var/lib/clickhouse/custom_disk_cache/',
disk=disk(
type=web,
endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
)
);
# highlight-end
```
In the settings highlighted below notice that the disk of `type=web` is nested within
the disk of `type=cache`.
:::note
The example uses `type=web`, but any disk type can be configured as dynamic, even Local disk. Local disks require a path argument to be inside the server config parameter `custom_local_disks_base_directory`, which has no default, so set that also when using local disk.
:::
A combination of config-based configuration and sql-defined configuration is also possible:
```sql
ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
(
price UInt32,
date Date,
postcode1 LowCardinality(String),
postcode2 LowCardinality(String),
type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
is_new UInt8,
duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
addr1 String,
addr2 String,
street LowCardinality(String),
locality LowCardinality(String),
town LowCardinality(String),
district LowCardinality(String),
county LowCardinality(String)
)
ENGINE = MergeTree
ORDER BY (postcode1, postcode2, addr1, addr2)
# highlight-start
SETTINGS disk = disk(
type=cache,
max_size='1Gi',
path='/var/lib/clickhouse/custom_disk_cache/',
disk=disk(
type=web,
endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
)
);
# highlight-end
```
where `web` is a from a server configuration file:
``` xml
<storage_configuration>
<disks>
<web>
<type>web</type>
<endpoint>'https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'</endpoint>
</web>
</disks>
</storage_configuration>
```
### Using S3 Storage {#s3-storage}
Required parameters:
- `endpoint` — S3 endpoint URL in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint URL should contain a bucket and root path to store data.
- `access_key_id` — S3 access key id.
- `secret_access_key` — S3 secret access key.
Optional parameters:
- `region` — S3 region name.
- `support_batch_delete` — This controls the check to see if batch deletes are supported. Set this to `false` when using Google Cloud Storage (GCS) as GCS does not support batch deletes and preventing the checks will prevent error messages in the logs.
- `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`.
- `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`.
- `expiration_window_seconds` — Grace period for checking if expiration-based credentials have expired. Optional, default value is `120`.
- `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL.
- `connect_timeout_ms` — Socket connect timeout in milliseconds. Default value is `10 seconds`.
- `request_timeout_ms` — Request timeout in milliseconds. Default value is `5 seconds`.
- `retry_attempts` — Number of retry attempts in case of failed request. Default value is `10`.
- `single_read_retries` — Number of retry attempts in case of connection drop during read. Default value is `4`.
- `min_bytes_for_seek` — Minimal number of bytes to use seek operation instead of sequential read. Default value is `1 Mb`.
- `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
- `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`.
- `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times.
- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set.
- `server_side_encryption_kms_key_id` - If specified, required headers for accessing S3 objects with [SSE-KMS encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingKMSEncryption.html) will be set. If an empty string is specified, the AWS managed S3 key will be used. Optional.
- `server_side_encryption_kms_encryption_context` - If specified alongside `server_side_encryption_kms_key_id`, the given encryption context header for SSE-KMS will be set. Optional.
- `server_side_encryption_kms_bucket_key_enabled` - If specified alongside `server_side_encryption_kms_key_id`, the header to enable S3 bucket keys for SSE-KMS will be set. Optional, can be `true` or `false`, defaults to nothing (matches the bucket-level setting).
- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited).
- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`.
- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited).
- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
- `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
- `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
- `key_template` — Define the format with which the object keys are generated. By default, Clickhouse takes `root path` from `endpoint` option and adds random generated suffix. That suffix is a dir with 3 random symbols and a file name with 29 random symbols. With that option you have a full control how to the object keys are generated. Some usage scenarios require having random symbols in the prefix or in the middle of object key. For example: `[a-z]{3}-prefix-random/constant-part/random-middle-[a-z]{3}/random-suffix-[a-z]{29}`. The value is parsed with [`re2`](https://github.com/google/re2/wiki/Syntax). Only some subset of the syntax is supported. Check if your preferred format is supported before using that option. Disk isn't initialized if clickhouse is unable to generate a key by the value of `key_template`. It requires enabled feature flag [storage_metadata_write_full_object_key](/docs/en/operations/settings/settings#storage_metadata_write_full_object_key). It forbids declaring the `root path` in `endpoint` option. It requires definition of the option `key_compatibility_prefix`.
- `key_compatibility_prefix` — That option is required when option `key_template` is in use. In order to be able to read the objects keys which were stored in the metadata files with the metadata version lower that `VERSION_FULL_OBJECT_KEY`, the previous `root path` from the `endpoint` option should be set here.
:::note
Google Cloud Storage (GCS) is also supported using the type `s3`. See [GCS backed MergeTree](/docs/en/integrations/gcs).
:::
### Using Plain Storage {#plain-storage}
In `22.10` a new disk type `s3_plain` was introduced, which provides a write-once storage. Configuration parameters are the same as for `s3` disk type.
Unlike `s3` disk type, it stores data as is, e.g. instead of randomly-generated blob names, it uses normal file names (the same way as clickhouse stores files on local disk) and does not store any metadata locally, e.g. it is derived from data on `s3`.
This disk type allows to keep a static version of the table, as it does not allow executing merges on the existing data and does not allow inserting of new data.
A use case for this disk type is to create backups on it, which can be done via `BACKUP TABLE data TO Disk('plain_disk_name', 'backup_name')`. Afterwards you can do `RESTORE TABLE data AS data_restored FROM Disk('plain_disk_name', 'backup_name')` or using `ATTACH TABLE data (...) ENGINE = MergeTree() SETTINGS disk = 'plain_disk_name'`.
Configuration:
``` xml
<s3_plain>
<type>s3_plain</type>
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
<use_invironment_credentials>1</use_invironment_credentials>
</s3_plain>
```
Starting from `24.1` it is possible configure any object storage disk (`s3`, `azure`, `hdfs`, `local`) using `plain` metadata type.
Configuration:
``` xml
<s3_plain>
<type>object_storage</type>
<object_storage_type>azure</object_storage_type>
<metadata_type>plain</metadata_type>
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
<use_invironment_credentials>1</use_invironment_credentials>
</s3_plain>
```
### Using Azure Blob Storage {#azure-blob-storage}
`MergeTree` family table engines can store data to [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) using a disk with type `azure_blob_storage`.
As of February 2022, this feature is still a fresh addition, so expect that some Azure Blob Storage functionalities might be unimplemented.
Configuration markup:
``` xml
<storage_configuration>
...
<disks>
<blob_storage_disk>
<type>azure_blob_storage</type>
<storage_account_url>http://account.blob.core.windows.net</storage_account_url>
<container_name>container</container_name>
<account_name>account</account_name>
<account_key>pass123</account_key>
<metadata_path>/var/lib/clickhouse/disks/blob_storage_disk/</metadata_path>
<cache_path>/var/lib/clickhouse/disks/blob_storage_disk/cache/</cache_path>
<skip_access_check>false</skip_access_check>
</blob_storage_disk>
</disks>
...
</storage_configuration>
```
Connection parameters:
* `storage_account_url` - **Required**, Azure Blob Storage account URL, like `http://account.blob.core.windows.net` or `http://azurite1:10000/devstoreaccount1`.
* `container_name` - Target container name, defaults to `default-container`.
* `container_already_exists` - If set to `false`, a new container `container_name` is created in the storage account, if set to `true`, disk connects to the container directly, and if left unset, disk connects to the account, checks if the container `container_name` exists, and creates it if it doesn't exist yet.
Authentication parameters (the disk will try all available methods **and** Managed Identity Credential):
* `connection_string` - For authentication using a connection string.
* `account_name` and `account_key` - For authentication using Shared Key.
Limit parameters (mainly for internal usage):
* `s3_max_single_part_upload_size` - Limits the size of a single block upload to Blob Storage.
* `min_bytes_for_seek` - Limits the size of a seekable region.
* `max_single_read_retries` - Limits the number of attempts to read a chunk of data from Blob Storage.
* `max_single_download_retries` - Limits the number of attempts to download a readable buffer from Blob Storage.
* `thread_pool_size` - Limits the number of threads with which `IDiskRemote` is instantiated.
* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object.
Other parameters:
* `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
* `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`.
* `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
* `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)).
:::note Zero-copy replication is not ready for production
Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use.
:::
## Using HDFS storage {#hdfs-storage}
In this sample configuration:
- the disk is of type `hdfs`
- the data is hosted at `hdfs://hdfs1:9000/clickhouse/`
```xml
<clickhouse>
<storage_configuration>
<disks>
<hdfs>
<type>hdfs</type>
<endpoint>hdfs://hdfs1:9000/clickhouse/</endpoint>
<skip_access_check>true</skip_access_check>
</hdfs>
<hdd>
<type>local</type>
<path>/</path>
</hdd>
</disks>
<policies>
<hdfs>
@ -32,26 +422,17 @@ Configuration markup:
<main>
<disk>hdfs</disk>
</main>
<external>
<disk>hdd</disk>
</external>
</volumes>
</hdfs>
</policies>
</storage_configuration>
<merge_tree>
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
</merge_tree>
</clickhouse>
```
Required parameters:
- `endpoint` — HDFS endpoint URL in `path` format. Endpoint URL should contain a root path to store data.
Optional parameters:
- `min_bytes_for_seek` — The minimal number of bytes to use seek operation instead of sequential read. Default value: `1 Mb`.
## Using Virtual File System for Data Encryption {#encrypted-virtual-file-system}
### Using Data Encryption {#encrypted-virtual-file-system}
You can encrypt the data stored on [S3](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3), or [HDFS](#configuring-hdfs) external disks, or on a local disk. To turn on the encryption mode, in the configuration file you must define a disk with the type `encrypted` and choose a disk on which the data will be saved. An `encrypted` disk ciphers all written files on the fly, and when you read files from an `encrypted` disk it deciphers them automatically. So you can work with an `encrypted` disk like with a normal one.
@ -112,7 +493,7 @@ Example of disk configuration:
</clickhouse>
```
## Using local cache {#using-local-cache}
### Using local cache {#using-local-cache}
It is possible to configure local cache over disks in storage configuration starting from version 22.3.
For versions 22.3 - 22.7 cache is supported only for `s3` disk type. For versions >= 22.8 cache is supported for any disk type: S3, Azure, Local, Encrypted, etc.
@ -275,23 +656,92 @@ Cache profile events:
- `CachedWriteBufferCacheWriteBytes`, `CachedWriteBufferCacheWriteMicroseconds`
## Using in-memory cache (userspace page cache) {#userspace-page-cache}
The File Cache described above stores cached data in local files. Alternatively, object-store-based disks can be configured to use "Userspace Page Cache", which is RAM-only. Userspace page cache is recommended only if file cache can't be used for some reason, e.g. if the machine doesn't have a local disk at all. Note that file cache effectively uses RAM for caching too, since the OS caches contents of local files.
To enable userspace page cache for disks that don't use file cache, use setting `use_page_cache_for_disks_without_file_cache`.
By default, on Linux, the userspace page cache will use all available memory, similar to the OS page cache. In tools like `top` and `ps`, the clickhouse server process will typically show resident set size near 100% of the machine's RAM - this is normal, and most of this memory is actually reclaimable by the OS on memory pressure (`MADV_FREE`). This behavior can be disabled with server setting `page_cache_use_madv_free = 0`, making the userspace page cache just use a fixed amount of memory `page_cache_size` with no special interaction with the OS. On Mac OS, `page_cache_use_madv_free` is always disabled as it doesn't have lazy `MADV_FREE`.
Unfortunately, `page_cache_use_madv_free` makes it difficult to tell if the server is close to running out of memory, since the RSS metric becomes useless. Async metric `UnreclaimableRSS` shows the amount of physical memory used by the server, excluding the memory reclaimable by the OS: `select value from system.asynchronous_metrics where metric = 'UnreclaimableRSS'`. Use it for monitoring instead of RSS. This metric is only available if `page_cache_use_madv_free` is enabled.
## Storing Data on Web Server {#storing-data-on-webserver}
There is a tool `clickhouse-static-files-uploader`, which prepares a data directory for a given table (`SELECT data_paths FROM system.tables WHERE name = 'table_name'`). For each table you need, you get a directory of files. These files can be uploaded to, for example, a web server with static files. After this preparation, you can load this table into any ClickHouse server via `DiskWeb`.
### Using static Web storage (read-only) {#web-storage}
This is a read-only disk. Its data is only read and never modified. A new table is loaded to this disk via `ATTACH TABLE` query (see example below). Local disk is not actually used, each `SELECT` query will result in a `http` request to fetch required data. All modification of the table data will result in an exception, i.e. the following types of queries are not allowed: [CREATE TABLE](/docs/en/sql-reference/statements/create/table.md), [ALTER TABLE](/docs/en/sql-reference/statements/alter/index.md), [RENAME TABLE](/docs/en/sql-reference/statements/rename.md/#misc_operations-rename_table), [DETACH TABLE](/docs/en/sql-reference/statements/detach.md) and [TRUNCATE TABLE](/docs/en/sql-reference/statements/truncate.md).
Web storage can be used for read-only purposes. An example use is for hosting sample data, or for migrating data.
There is a tool `clickhouse-static-files-uploader`, which prepares a data directory for a given table (`SELECT data_paths FROM system.tables WHERE name = 'table_name'`). For each table you need, you get a directory of files. These files can be uploaded to, for example, a web server with static files. After this preparation, you can load this table into any ClickHouse server via `DiskWeb`.
Web server storage is supported only for the [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) engine families. To access the data stored on a `web` disk, use the [storage_policy](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#terms) setting when executing the query. For example, `ATTACH TABLE table_web UUID '{}' (id Int32) ENGINE = MergeTree() ORDER BY id SETTINGS storage_policy = 'web'`.
In this sample configuration:
- the disk is of type `web`
- the data is hosted at `http://nginx:80/test1/`
- a cache on local storage is used
```xml
<clickhouse>
<storage_configuration>
<disks>
<web>
<type>web</type>
<endpoint>http://nginx:80/test1/</endpoint>
</web>
<cached_web>
<type>cache</type>
<disk>web</disk>
<path>cached_web_cache/</path>
<max_size>100000000</max_size>
</cached_web>
</disks>
<policies>
<web>
<volumes>
<main>
<disk>web</disk>
</main>
</volumes>
</web>
<cached_web>
<volumes>
<main>
<disk>cached_web</disk>
</main>
</volumes>
</cached_web>
</policies>
</storage_configuration>
</clickhouse>
```
:::tip
Storage can also be configured temporarily within a query, if a web dataset is not expected
to be used routinely, see [dynamic configuration](#dynamic-configuration) and skip editing the
configuration file.
:::
:::tip
A [demo dataset](https://github.com/ClickHouse/web-tables-demo) is hosted in GitHub. To prepare your own tables for web storage see the tool [clickhouse-static-files-uploader](/docs/en/operations/storing-data.md/#storing-data-on-webserver)
:::
In this `ATTACH TABLE` query the `UUID` provided matches the directory name of the data, and the endpoint is the URL for the raw GitHub content.
```sql
# highlight-next-line
ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
(
price UInt32,
date Date,
postcode1 LowCardinality(String),
postcode2 LowCardinality(String),
type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
is_new UInt8,
duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
addr1 String,
addr2 String,
street LowCardinality(String),
locality LowCardinality(String),
town LowCardinality(String),
district LowCardinality(String),
county LowCardinality(String)
)
ENGINE = MergeTree
ORDER BY (postcode1, postcode2, addr1, addr2)
# highlight-start
SETTINGS disk = disk(
type=web,
endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
);
# highlight-end
```
A ready test case. You need to add this configuration to config:
@ -487,7 +937,7 @@ If URL is not reachable on disk load when the server is starting up tables, then
Use [http_max_single_read_retries](/docs/en/operations/settings/settings.md/#http-max-single-read-retries) setting to limit the maximum number of retries during a single HTTP read.
## Zero-copy Replication (not ready for production) {#zero-copy}
### Zero-copy Replication (not ready for production) {#zero-copy}
Zero-copy replication is possible, but not recommended, with `S3` and `HDFS` disks. Zero-copy replication means that if the data is stored remotely on several machines and needs to be synchronized, then only the metadata is replicated (paths to the data parts), but not the data itself.

View File

@ -26,7 +26,9 @@ priority: 0
is_active: 0
active_children: 0
dequeued_requests: 67
canceled_requests: 0
dequeued_cost: 4692272
canceled_cost: 0
busy_periods: 63
vruntime: 938454.1999999989
system_vruntime: ᴺᵁᴸᴸ
@ -54,7 +56,9 @@ Columns:
- `is_active` (`UInt8`) - Whether this node is currently active - has resource requests to be dequeued and constraints satisfied.
- `active_children` (`UInt64`) - The number of children in active state.
- `dequeued_requests` (`UInt64`) - The total number of resource requests dequeued from this node.
- `canceled_requests` (`UInt64`) - The total number of resource requests canceled from this node.
- `dequeued_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests dequeued from this node.
- `canceled_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests canceled from this node.
- `busy_periods` (`UInt64`) - The total number of deactivations of this node.
- `vruntime` (`Nullable(Float64)`) - For children of `fair` nodes only. Virtual runtime of a node used by SFQ algorithm to select the next child to process in a max-min fair manner.
- `system_vruntime` (`Nullable(Float64)`) - For `fair` nodes only. Virtual runtime showing `vruntime` of the last processed resource request. Used during child activation as the new value of `vruntime`.

View File

@ -16,7 +16,9 @@ ClickHouse also supports:
## NULL Processing
During aggregation, all `NULL`s are skipped. If the aggregation has several parameters it will ignore any row in which one or more of the parameters are NULL.
During aggregation, all `NULL` arguments are skipped. If the aggregation has several arguments it will ignore any row in which one or more of them are NULL.
There is an exception to this rule, which are the functions [`first_value`](../../sql-reference/aggregate-functions/reference/first_value.md), [`last_value`](../../sql-reference/aggregate-functions/reference/last_value.md) and their aliases when followed by the modifier `RESPECT NULLS`: `FIRST_VALUE(b) RESPECT NULLS`.
**Examples:**
@ -85,3 +87,50 @@ FROM t_null_big;
│ [2,2,3] │ [2,NULL,2,3,NULL] │
└───────────────┴───────────────────────────────────────┘
```
Note that aggregations are skipped when the columns are used as arguments to an aggregated function. For example [`count`](../../sql-reference/aggregate-functions/reference/count.md) without parameters (`count()`) or with constant ones (`count(1)`) will count all rows in the block (independently of the value of the GROUP BY column as it's not an argument), while `count(column)` will only return the number of rows where column is not NULL.
```sql
SELECT
v,
count(1),
count(v)
FROM
(
SELECT if(number < 10, NULL, number % 3) AS v
FROM numbers(15)
)
GROUP BY v
┌────v─┬─count()─┬─count(v)─┐
│ ᴺᵁᴸᴸ │ 10 │ 0 │
│ 0 │ 1 │ 1 │
│ 1 │ 2 │ 2 │
│ 2 │ 2 │ 2 │
└──────┴─────────┴──────────┘
```
And here is an example of of first_value with `RESPECT NULLS` where we can see that NULL inputs are respected and it will return the first value read, whether it's NULL or not:
```sql
SELECT
col || '_' || ((col + 1) * 5 - 1) as range,
first_value(odd_or_null) as first,
first_value(odd_or_null) IGNORE NULLS as first_ignore_null,
first_value(odd_or_null) RESPECT NULLS as first_respect_nulls
FROM
(
SELECT
intDiv(number, 5) AS col,
if(number % 2 == 0, NULL, number) as odd_or_null
FROM numbers(15)
)
GROUP BY col
ORDER BY col
┌─range─┬─first─┬─first_ignore_null─┬─first_respect_nulls─┐
│ 0_4 │ 1 │ 1 │ ᴺᵁᴸᴸ │
│ 1_9 │ 5 │ 5 │ 5 │
│ 2_14 │ 11 │ 11 │ ᴺᵁᴸᴸ │
└───────┴───────┴───────────────────┴─────────────────────┘
```

View File

@ -1,16 +1,99 @@
---
slug: /en/sql-reference/aggregate-functions/reference/varpop
title: "varPop"
slug: "/en/sql-reference/aggregate-functions/reference/varpop"
sidebar_position: 32
---
# varPop(x)
This page covers the `varPop` and `varPopStable` functions available in ClickHouse.
Calculates the amount `Σ((x - x̅)^2) / n`, where `n` is the sample size and `x̅`is the average value of `x`.
## varPop
In other words, dispersion for a set of values. Returns `Float64`.
Calculates the population covariance between two data columns. The population covariance measures the degree to which two variables vary together. Calculates the amount `Σ((x - x̅)^2) / n`, where `n` is the sample size and `x̅`is the average value of `x`.
Alias: `VAR_POP`.
**Syntax**
:::note
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varPopStable` function. It works slower but provides a lower computational error.
:::
```sql
covarPop(x, y)
```
**Parameters**
- `x`: The first data column. [Numeric](../../../native-protocol/columns.md)
- `y`: The second data column. [Numeric](../../../native-protocol/columns.md)
**Returned value**
Returns an integer of type `Float64`.
**Implementation details**
This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varPopStable` function](#varPopStable).
**Example**
```sql
DROP TABLE IF EXISTS test_data;
CREATE TABLE test_data
(
x Int32,
y Int32
)
ENGINE = Memory;
INSERT INTO test_data VALUES (1, 2), (2, 3), (3, 5), (4, 6), (5, 8);
SELECT
covarPop(x, y) AS covar_pop
FROM test_data;
```
```response
3
```
## varPopStable
Calculates population covariance between two data columns using a stable, numerically accurate method to calculate the variance. This function is designed to provide reliable results even with large datasets or values that might cause numerical instability in other implementations.
**Syntax**
```sql
covarPopStable(x, y)
```
**Parameters**
- `x`: The first data column. [String literal](../../syntax#syntax-string-literal)
- `y`: The second data column. [Expression](../../syntax#syntax-expressions)
**Returned value**
Returns an integer of type `Float64`.
**Implementation details**
Unlike [`varPop()`](#varPop), this function uses a stable, numerically accurate algorithm to calculate the population variance to avoid issues like catastrophic cancellation or loss of precision. This function also handles `NaN` and `Inf` values correctly, excluding them from calculations.
**Example**
Query:
```sql
DROP TABLE IF EXISTS test_data;
CREATE TABLE test_data
(
x Int32,
y Int32
)
ENGINE = Memory;
INSERT INTO test_data VALUES (1, 2), (2, 9), (9, 5), (4, 6), (5, 8);
SELECT
covarPopStable(x, y) AS covar_pop_stable
FROM test_data;
```
```response
0.5999999999999999
```

View File

@ -1,18 +1,128 @@
---
title: "varSamp"
slug: /en/sql-reference/aggregate-functions/reference/varsamp
sidebar_position: 33
---
# varSamp
This page contains information on the `varSamp` and `varSampStable` ClickHouse functions.
Calculates the amount `Σ((x - x̅)^2) / (n - 1)`, where `n` is the sample size and `x̅`is the average value of `x`.
## varSamp
It represents an unbiased estimate of the variance of a random variable if passed values from its sample.
Calculate the sample variance of a data set.
Returns `Float64`. When `n <= 1`, returns `+∞`.
**Syntax**
Alias: `VAR_SAMP`.
```sql
varSamp(expr)
```
:::note
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varSampStable` function. It works slower but provides a lower computational error.
:::
**Parameters**
- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../../syntax#syntax-expressions)
**Returned value**
Returns a Float64 value representing the sample variance of the input data set.
**Implementation details**
The `varSamp()` function calculates the sample variance using the following formula:
```plaintext
∑(x - mean(x))^2 / (n - 1)
```
Where:
- `x` is each individual data point in the data set.
- `mean(x)` is the arithmetic mean of the data set.
- `n` is the number of data points in the data set.
The function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPop()` function](./varpop#varpop) instead.
This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varSampStable` function](#varSampStable).
**Example**
Query:
```sql
CREATE TABLE example_table
(
id UInt64,
value Float64
)
ENGINE = MergeTree
ORDER BY id;
INSERT INTO example_table VALUES (1, 10.5), (2, 12.3), (3, 9.8), (4, 11.2), (5, 10.7);
SELECT varSamp(value) FROM example_table;
```
Response:
```response
0.8650000000000091
```
## varSampStable
Calculate the sample variance of a data set using a numerically stable algorithm.
**Syntax**
```sql
varSampStable(expr)
```
**Parameters**
- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../../syntax#syntax-expressions)
**Returned value**
The `varSampStable()` function returns a Float64 value representing the sample variance of the input data set.
**Implementation details**
The `varSampStable()` function calculates the sample variance using the same formula as the [`varSamp()`](#varSamp function):
```plaintext
∑(x - mean(x))^2 / (n - 1)
```
Where:
- `x` is each individual data point in the data set.
- `mean(x)` is the arithmetic mean of the data set.
- `n` is the number of data points in the data set.
The difference between `varSampStable()` and `varSamp()` is that `varSampStable()` is designed to provide a more deterministic and stable result when dealing with floating-point arithmetic. It uses an algorithm that minimizes the accumulation of rounding errors, which can be particularly important when dealing with large data sets or data with a wide range of values.
Like `varSamp()`, the `varSampStable()` function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPopStable()` function](./varpop#varpopstable) instead.
**Example**
Query:
```sql
CREATE TABLE example_table
(
id UInt64,
value Float64
)
ENGINE = MergeTree
ORDER BY id;
INSERT INTO example_table VALUES (1, 10.5), (2, 12.3), (3, 9.8), (4, 11.2), (5, 10.7);
SELECT varSampStable(value) FROM example_table;
```
Response:
```response
0.865
```
This query calculates the sample variance of the `value` column in the `example_table` using the `varSampStable()` function. The result shows that the sample variance of the values `[10.5, 12.3, 9.8, 11.2, 10.7]` is approximately 0.865, which may differ slightly from the result of `varSamp()` due to the more precise handling of floating-point arithmetic.

View File

@ -10,6 +10,8 @@ sidebar_label: Nullable
Returns whether the argument is [NULL](../../sql-reference/syntax.md#null).
See also operator [`IS NULL`](../operators/index.md#is_null).
``` sql
isNull(x)
```
@ -54,6 +56,8 @@ Result:
Returns whether the argument is not [NULL](../../sql-reference/syntax.md#null-literal).
See also operator [`IS NOT NULL`](../operators/index.md#is_not_null).
``` sql
isNotNull(x)
```

View File

@ -5,80 +5,372 @@ sidebar_label: JSON
---
There are two sets of functions to parse JSON.
- `visitParam*` (`simpleJSON*`) is made to parse a special very limited subset of a JSON, but these functions are extremely fast.
- `simpleJSON*` (`visitParam*`) is made to parse a special very limited subset of a JSON, but these functions are extremely fast.
- `JSONExtract*` is made to parse normal JSON.
# visitParam functions
# simpleJSON/visitParam functions
ClickHouse has special functions for working with simplified JSON. All these JSON functions are based on strong assumptions about what the JSON can be, but they try to do as little as possible to get the job done.
The following assumptions are made:
1. The field name (function argument) must be a constant.
2. The field name is somehow canonically encoded in JSON. For example: `visitParamHas('{"abc":"def"}', 'abc') = 1`, but `visitParamHas('{"\\u0061\\u0062\\u0063":"def"}', 'abc') = 0`
2. The field name is somehow canonically encoded in JSON. For example: `simpleJSONHas('{"abc":"def"}', 'abc') = 1`, but `simpleJSONHas('{"\\u0061\\u0062\\u0063":"def"}', 'abc') = 0`
3. Fields are searched for on any nesting level, indiscriminately. If there are multiple matching fields, the first occurrence is used.
4. The JSON does not have space characters outside of string literals.
## visitParamHas(params, name)
## simpleJSONHas
Checks whether there is a field with the `name` name.
Checks whether there is a field named `field_name`. The result is `UInt8`.
Alias: `simpleJSONHas`.
**Syntax**
## visitParamExtractUInt(params, name)
Parses UInt64 from the value of the field named `name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns 0.
Alias: `simpleJSONExtractUInt`.
## visitParamExtractInt(params, name)
The same as for Int64.
Alias: `simpleJSONExtractInt`.
## visitParamExtractFloat(params, name)
The same as for Float64.
Alias: `simpleJSONExtractFloat`.
## visitParamExtractBool(params, name)
Parses a true/false value. The result is UInt8.
Alias: `simpleJSONExtractBool`.
## visitParamExtractRaw(params, name)
Returns the value of a field, including separators.
Alias: `simpleJSONExtractRaw`.
Examples:
``` sql
visitParamExtractRaw('{"abc":"\\n\\u0000"}', 'abc') = '"\\n\\u0000"';
visitParamExtractRaw('{"abc":{"def":[1,2,3]}}', 'abc') = '{"def":[1,2,3]}';
```sql
simpleJSONHas(json, field_name)
```
## visitParamExtractString(params, name)
**Parameters**
Parses the string in double quotes. The value is unescaped. If unescaping failed, it returns an empty string.
- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
- `field_name`: The name of the field to search for. [String literal](../syntax#string)
Alias: `simpleJSONExtractString`.
**Returned value**
Examples:
It returns `1` if the field exists, `0` otherwise.
``` sql
visitParamExtractString('{"abc":"\\n\\u0000"}', 'abc') = '\n\0';
visitParamExtractString('{"abc":"\\u263a"}', 'abc') = '☺';
visitParamExtractString('{"abc":"\\u263"}', 'abc') = '';
visitParamExtractString('{"abc":"hello}', 'abc') = '';
**Example**
Query:
```sql
CREATE TABLE jsons
(
`json` String
)
ENGINE = Memory;
INSERT INTO jsons VALUES ('{"foo":"true","qux":1}');
SELECT simpleJSONHas(json, 'foo') FROM jsons;
SELECT simpleJSONHas(json, 'bar') FROM jsons;
```
```response
1
0
```
## simpleJSONExtractUInt
Parses `UInt64` from the value of the field named `field_name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns `0`.
**Syntax**
```sql
simpleJSONExtractUInt(json, field_name)
```
**Parameters**
- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
- `field_name`: The name of the field to search for. [String literal](../syntax#string)
**Returned value**
It returns the number parsed from the field if the field exists and contains a number, `0` otherwise.
**Example**
Query:
```sql
CREATE TABLE jsons
(
`json` String
)
ENGINE = Memory;
INSERT INTO jsons VALUES ('{"foo":"4e3"}');
INSERT INTO jsons VALUES ('{"foo":3.4}');
INSERT INTO jsons VALUES ('{"foo":5}');
INSERT INTO jsons VALUES ('{"foo":"not1number"}');
INSERT INTO jsons VALUES ('{"baz":2}');
SELECT simpleJSONExtractUInt(json, 'foo') FROM jsons ORDER BY json;
```
```response
0
4
0
3
5
```
## simpleJSONExtractInt
Parses `Int64` from the value of the field named `field_name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns `0`.
**Syntax**
```sql
simpleJSONExtractInt(json, field_name)
```
**Parameters**
- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
- `field_name`: The name of the field to search for. [String literal](../syntax#string)
**Returned value**
It returns the number parsed from the field if the field exists and contains a number, `0` otherwise.
**Example**
Query:
```sql
CREATE TABLE jsons
(
`json` String
)
ENGINE = Memory;
INSERT INTO jsons VALUES ('{"foo":"-4e3"}');
INSERT INTO jsons VALUES ('{"foo":-3.4}');
INSERT INTO jsons VALUES ('{"foo":5}');
INSERT INTO jsons VALUES ('{"foo":"not1number"}');
INSERT INTO jsons VALUES ('{"baz":2}');
SELECT simpleJSONExtractInt(json, 'foo') FROM jsons ORDER BY json;
```
```response
0
-4
0
-3
5
```
## simpleJSONExtractFloat
Parses `Float64` from the value of the field named `field_name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns `0`.
**Syntax**
```sql
simpleJSONExtractFloat(json, field_name)
```
**Parameters**
- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
- `field_name`: The name of the field to search for. [String literal](../syntax#string)
**Returned value**
It returns the number parsed from the field if the field exists and contains a number, `0` otherwise.
**Example**
Query:
```sql
CREATE TABLE jsons
(
`json` String
)
ENGINE = Memory;
INSERT INTO jsons VALUES ('{"foo":"-4e3"}');
INSERT INTO jsons VALUES ('{"foo":-3.4}');
INSERT INTO jsons VALUES ('{"foo":5}');
INSERT INTO jsons VALUES ('{"foo":"not1number"}');
INSERT INTO jsons VALUES ('{"baz":2}');
SELECT simpleJSONExtractFloat(json, 'foo') FROM jsons ORDER BY json;
```
```response
0
-4000
0
-3.4
5
```
## simpleJSONExtractBool
Parses a true/false value from the value of the field named `field_name`. The result is `UInt8`.
**Syntax**
```sql
simpleJSONExtractBool(json, field_name)
```
**Parameters**
- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
- `field_name`: The name of the field to search for. [String literal](../syntax#string)
**Returned value**
It returns `1` if the value of the field is `true`, `0` otherwise. This means this function will return `0` including (and not only) in the following cases:
- If the field doesn't exists.
- If the field contains `true` as a string, e.g.: `{"field":"true"}`.
- If the field contains `1` as a numerical value.
**Example**
Query:
```sql
CREATE TABLE jsons
(
`json` String
)
ENGINE = Memory;
INSERT INTO jsons VALUES ('{"foo":false,"bar":true}');
INSERT INTO jsons VALUES ('{"foo":"true","qux":1}');
SELECT simpleJSONExtractBool(json, 'bar') FROM jsons ORDER BY json;
SELECT simpleJSONExtractBool(json, 'foo') FROM jsons ORDER BY json;
```
```response
0
1
0
0
```
## simpleJSONExtractRaw
Returns the value of the field named `field_name` as a `String`, including separators.
**Syntax**
```sql
simpleJSONExtractRaw(json, field_name)
```
**Parameters**
- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
- `field_name`: The name of the field to search for. [String literal](../syntax#string)
**Returned value**
It returns the value of the field as a [`String`](../../sql-reference/data-types/string.md#string), including separators if the field exists, or an empty `String` otherwise.
**Example**
Query:
```sql
CREATE TABLE jsons
(
`json` String
)
ENGINE = Memory;
INSERT INTO jsons VALUES ('{"foo":"-4e3"}');
INSERT INTO jsons VALUES ('{"foo":-3.4}');
INSERT INTO jsons VALUES ('{"foo":5}');
INSERT INTO jsons VALUES ('{"foo":{"def":[1,2,3]}}');
INSERT INTO jsons VALUES ('{"baz":2}');
SELECT simpleJSONExtractRaw(json, 'foo') FROM jsons ORDER BY json;
```
```response
"-4e3"
-3.4
5
{"def":[1,2,3]}
```
## simpleJSONExtractString
Parses `String` in double quotes from the value of the field named `field_name`.
**Syntax**
```sql
simpleJSONExtractString(json, field_name)
```
**Parameters**
- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
- `field_name`: The name of the field to search for. [String literal](../syntax#string)
**Returned value**
It returns the value of a field as a [`String`](../../sql-reference/data-types/string.md#string), including separators. The value is unescaped. It returns an empty `String`: if the field doesn't contain a double quoted string, if unescaping fails or if the field doesn't exist.
**Implementation details**
There is currently no support for code points in the format `\uXXXX\uYYYY` that are not from the basic multilingual plane (they are converted to CESU-8 instead of UTF-8).
**Example**
Query:
```sql
CREATE TABLE jsons
(
`json` String
)
ENGINE = Memory;
INSERT INTO jsons VALUES ('{"foo":"\\n\\u0000"}');
INSERT INTO jsons VALUES ('{"foo":"\\u263"}');
INSERT INTO jsons VALUES ('{"foo":"\\u263a"}');
INSERT INTO jsons VALUES ('{"foo":"hello}');
SELECT simpleJSONExtractString(json, 'foo') FROM jsons ORDER BY json;
```
```response
\n\0
```
## visitParamHas
This function is [an alias of `simpleJSONHas`](./json-functions#simplejsonhas).
## visitParamExtractUInt
This function is [an alias of `simpleJSONExtractUInt`](./json-functions#simplejsonextractuint).
## visitParamExtractInt
This function is [an alias of `simpleJSONExtractInt`](./json-functions#simplejsonextractint).
## visitParamExtractFloat
This function is [an alias of `simpleJSONExtractFloat`](./json-functions#simplejsonextractfloat).
## visitParamExtractBool
This function is [an alias of `simpleJSONExtractBool`](./json-functions#simplejsonextractbool).
## visitParamExtractRaw
This function is [an alias of `simpleJSONExtractRaw`](./json-functions#simplejsonextractraw).
## visitParamExtractString
This function is [an alias of `simpleJSONExtractString`](./json-functions#simplejsonextractstring).
# JSONExtract functions
The following functions are based on [simdjson](https://github.com/lemire/simdjson) designed for more complex JSON parsing requirements.

View File

@ -299,6 +299,18 @@ sin(x)
Type: [Float*](../../sql-reference/data-types/float.md).
**Example**
Query:
```sql
SELECT sin(1.23);
```
```response
0.9424888019316975
```
## cos
Returns the cosine of the argument.

View File

@ -298,7 +298,7 @@ Full columns and constants are represented differently in memory. Functions usua
Accepts any arguments, including `NULL` and does nothing. Always returns 0.
The argument is internally still evaluated. Useful e.g. for benchmarks.
## sleep(seconds)
## sleep
Used to introduce a delay or pause in the execution of a query. It is primarily used for testing and debugging purposes.
@ -310,7 +310,7 @@ sleep(seconds)
**Arguments**
- `seconds`: [Int](../../sql-reference/data-types/int-uint.md) The number of seconds to pause the query execution to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds.
- `seconds`: [UInt*](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md) The number of seconds to pause the query execution to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds.
**Returned value**
@ -360,7 +360,7 @@ sleepEachRow(seconds)
**Arguments**
- `seconds`: [Int](../../sql-reference/data-types/int-uint.md) The number of seconds to pause the query execution for each row in the result set to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds.
- `seconds`: [UInt*](../../sql-reference/data-types/int-uint.md) or [Float*](../../sql-reference/data-types/float.md) The number of seconds to pause the query execution for each row in the result set to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds.
**Returned value**

View File

@ -588,8 +588,41 @@ Result:
## substringUTF8
Like `substring` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
Returns the substring of a string `s` which starts at the specified byte index `offset` for Unicode code points. Byte counting starts from `1`. If `offset` is `0`, an empty string is returned. If `offset` is negative, the substring starts `pos` characters from the end of the string, rather than from the beginning. An optional argument `length` specifies the maximum number of bytes the returned substring may have.
Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
**Syntax**
```sql
substringUTF8(s, offset[, length])
```
**Arguments**
- `s`: The string to calculate a substring from. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md) or [Enum](../../sql-reference/data-types/enum.md)
- `offset`: The starting position of the substring in `s` . [(U)Int*](../../sql-reference/data-types/int-uint.md).
- `length`: The maximum length of the substring. [(U)Int*](../../sql-reference/data-types/int-uint.md). Optional.
**Returned value**
A substring of `s` with `length` many bytes, starting at index `offset`.
**Implementation details**
Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
**Example**
```sql
SELECT 'Täglich grüßt das Murmeltier.' AS str,
substringUTF8(str, 9),
substringUTF8(str, 9, 5)
```
```response
Täglich grüßt das Murmeltier. grüßt das Murmeltier. grüßt
```
## substringIndex
@ -624,7 +657,39 @@ Result:
## substringIndexUTF8
Like `substringIndex` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
Returns the substring of `s` before `count` occurrences of the delimiter `delim`, specifically for Unicode code points.
Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
**Syntax**
```sql
substringIndexUTF8(s, delim, count)
```
**Arguments**
- `s`: The string to extract substring from. [String](../../sql-reference/data-types/string.md).
- `delim`: The character to split. [String](../../sql-reference/data-types/string.md).
- `count`: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md)
**Returned value**
A substring [String](../../sql-reference/data-types/string.md) of `s` before `count` occurrences of `delim`.
**Implementation details**
Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
**Example**
```sql
SELECT substringIndexUTF8('www.straßen-in-europa.de', '.', 2)
```
```response
www.straßen-in-europa
```
## appendTrailingCharIfAbsent

View File

@ -353,7 +353,7 @@ For efficiency, the `and` and `or` functions accept any number of arguments. The
ClickHouse supports the `IS NULL` and `IS NOT NULL` operators.
### IS NULL
### IS NULL {#is_null}
- For [Nullable](../../sql-reference/data-types/nullable.md) type values, the `IS NULL` operator returns:
- `1`, if the value is `NULL`.
@ -374,7 +374,7 @@ SELECT x+100 FROM t_null WHERE y IS NULL
└──────────────┘
```
### IS NOT NULL
### IS NOT NULL {#is_not_null}
- For [Nullable](../../sql-reference/data-types/nullable.md) type values, the `IS NOT NULL` operator returns:
- `0`, if the value is `NULL`.

View File

@ -50,6 +50,7 @@
#include <Functions/registerFunctions.h>
#include <AggregateFunctions/registerAggregateFunctions.h>
#include <Formats/registerFormats.h>
#include <Formats/FormatFactory.h>
namespace fs = std::filesystem;
using namespace std::literals;
@ -1137,6 +1138,13 @@ void Client::processOptions(const OptionsDescription & options_description,
}
static bool checkIfStdoutIsRegularFile()
{
struct stat file_stat;
return fstat(STDOUT_FILENO, &file_stat) == 0 && S_ISREG(file_stat.st_mode);
}
void Client::processConfig()
{
if (!queries.empty() && config().has("queries-file"))
@ -1173,7 +1181,14 @@ void Client::processConfig()
pager = config().getString("pager", "");
is_default_format = !config().has("vertical") && !config().has("format");
if (config().has("vertical"))
if (is_default_format && checkIfStdoutIsRegularFile())
{
is_default_format = false;
std::optional<String> format_from_file_name;
format_from_file_name = FormatFactory::instance().tryGetFormatFromFileDescriptor(STDOUT_FILENO);
format = format_from_file_name ? *format_from_file_name : "TabSeparated";
}
else if (config().has("vertical"))
format = config().getString("format", "Vertical");
else
format = config().getString("format", is_interactive ? "PrettyCompact" : "TabSeparated");
@ -1377,8 +1392,8 @@ void Client::readArguments(
}
#pragma GCC diagnostic ignored "-Wunused-function"
#pragma GCC diagnostic ignored "-Wmissing-declarations"
#pragma clang diagnostic ignored "-Wunused-function"
#pragma clang diagnostic ignored "-Wmissing-declarations"
int mainEntryClickHouseClient(int argc, char ** argv)
{

View File

@ -109,8 +109,8 @@ static std::vector<std::string> extractFromConfig(
return {configuration->getString(key)};
}
#pragma GCC diagnostic ignored "-Wunused-function"
#pragma GCC diagnostic ignored "-Wmissing-declarations"
#pragma clang diagnostic ignored "-Wunused-function"
#pragma clang diagnostic ignored "-Wmissing-declarations"
int mainEntryClickHouseExtractFromConfig(int argc, char ** argv)
{

View File

@ -70,8 +70,8 @@ void skipSpacesAndComments(const char*& pos, const char* end, bool print_comment
}
#pragma GCC diagnostic ignored "-Wunused-function"
#pragma GCC diagnostic ignored "-Wmissing-declarations"
#pragma clang diagnostic ignored "-Wunused-function"
#pragma clang diagnostic ignored "-Wmissing-declarations"
extern const char * auto_time_zones[];

View File

@ -327,6 +327,14 @@ static bool checkIfStdinIsRegularFile()
return fstat(STDIN_FILENO, &file_stat) == 0 && S_ISREG(file_stat.st_mode);
}
static bool checkIfStdoutIsRegularFile()
{
struct stat file_stat;
return fstat(STDOUT_FILENO, &file_stat) == 0 && S_ISREG(file_stat.st_mode);
}
std::string LocalServer::getInitialCreateTableQuery()
{
if (!config().has("table-structure") && !config().has("table-file") && !config().has("table-data-format") && (!checkIfStdinIsRegularFile() || queries.empty()))
@ -638,7 +646,14 @@ void LocalServer::processConfig()
if (config().has("macros"))
global_context->setMacros(std::make_unique<Macros>(config(), "macros", log));
format = config().getString("output-format", config().getString("format", is_interactive ? "PrettyCompact" : "TSV"));
if (!config().has("output-format") && !config().has("format") && checkIfStdoutIsRegularFile())
{
std::optional<String> format_from_file_name;
format_from_file_name = FormatFactory::instance().tryGetFormatFromFileDescriptor(STDOUT_FILENO);
format = format_from_file_name ? *format_from_file_name : "TSV";
}
else
format = config().getString("output-format", config().getString("format", is_interactive ? "PrettyCompact" : "TSV"));
insert_format = "Values";
/// Setting value from cmd arg overrides one from config
@ -944,8 +959,8 @@ void LocalServer::readArguments(int argc, char ** argv, Arguments & common_argum
}
#pragma GCC diagnostic ignored "-Wunused-function"
#pragma GCC diagnostic ignored "-Wmissing-declarations"
#pragma clang diagnostic ignored "-Wunused-function"
#pragma clang diagnostic ignored "-Wmissing-declarations"
int mainEntryClickHouseLocal(int argc, char ** argv)
{

View File

@ -1204,8 +1204,8 @@ public:
}
#pragma GCC diagnostic ignored "-Wunused-function"
#pragma GCC diagnostic ignored "-Wmissing-declarations"
#pragma clang diagnostic ignored "-Wunused-function"
#pragma clang diagnostic ignored "-Wmissing-declarations"
int mainEntryClickHouseObfuscator(int argc, char ** argv)
try

View File

@ -133,20 +133,20 @@ public:
/// This function also enables custom prefixes to be used.
void setCustomSettingsPrefixes(const Strings & prefixes);
void setCustomSettingsPrefixes(const String & comma_separated_prefixes);
bool isSettingNameAllowed(const std::string_view name) const;
void checkSettingNameIsAllowed(const std::string_view name) const;
bool isSettingNameAllowed(std::string_view name) const;
void checkSettingNameIsAllowed(std::string_view name) const;
/// Allows implicit user creation without password (by default it's allowed).
/// In other words, allow 'CREATE USER' queries without 'IDENTIFIED WITH' clause.
void setImplicitNoPasswordAllowed(const bool allow_implicit_no_password_);
void setImplicitNoPasswordAllowed(bool allow_implicit_no_password_);
bool isImplicitNoPasswordAllowed() const;
/// Allows users without password (by default it's allowed).
void setNoPasswordAllowed(const bool allow_no_password_);
void setNoPasswordAllowed(bool allow_no_password_);
bool isNoPasswordAllowed() const;
/// Allows users with plaintext password (by default it's allowed).
void setPlaintextPasswordAllowed(const bool allow_plaintext_password_);
void setPlaintextPasswordAllowed(bool allow_plaintext_password_);
bool isPlaintextPasswordAllowed() const;
/// Default password type when the user does not specify it.

View File

@ -616,7 +616,7 @@ UUID IAccessStorage::generateRandomID()
}
void IAccessStorage::clearConflictsInEntitiesList(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, const LoggerPtr log_)
void IAccessStorage::clearConflictsInEntitiesList(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, LoggerPtr log_)
{
std::unordered_map<UUID, size_t> positions_by_id;
std::unordered_map<std::string_view, size_t> positions_by_type_and_name[static_cast<size_t>(AccessEntityType::MAX)];

View File

@ -228,7 +228,7 @@ protected:
static UUID generateRandomID();
LoggerPtr getLogger() const;
static String formatEntityTypeWithName(AccessEntityType type, const String & name) { return AccessEntityTypeInfo::get(type).formatEntityNameWithType(name); }
static void clearConflictsInEntitiesList(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, const LoggerPtr log_);
static void clearConflictsInEntitiesList(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, LoggerPtr log_);
[[noreturn]] void throwNotFound(const UUID & id) const;
[[noreturn]] void throwNotFound(AccessEntityType type, const String & name) const;
[[noreturn]] static void throwBadCast(const UUID & id, AccessEntityType type, const String & name, AccessEntityType required_type);

View File

@ -182,11 +182,14 @@ public:
if constexpr (Trait::sampler == Sampler::NONE)
{
if (limit_num_elems && cur_elems.value.size() >= max_elems)
if constexpr (limit_num_elems)
{
if constexpr (Trait::last)
cur_elems.value[(cur_elems.total_values - 1) % max_elems] = row_value;
return;
if (cur_elems.value.size() >= max_elems)
{
if constexpr (Trait::last)
cur_elems.value[(cur_elems.total_values - 1) % max_elems] = row_value;
return;
}
}
cur_elems.value.push_back(row_value, arena);
@ -236,7 +239,7 @@ public:
void mergeNoSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
{
if (!limit_num_elems)
if constexpr (!limit_num_elems)
{
if (rhs_elems.value.size())
cur_elems.value.insertByOffsets(rhs_elems.value, 0, rhs_elems.value.size(), arena);

View File

@ -310,9 +310,11 @@ public:
{
for (Field & element : values)
{
UInt8 is_null = 0;
readBinary(is_null, buf);
if (!is_null)
/// We must initialize the Field type since some internal functions (like operator=) use them
new (&element) Field;
bool has_value = false;
readBinary(has_value, buf);
if (has_value)
serialization->deserializeBinary(element, buf, {});
}
}

View File

@ -144,7 +144,7 @@ public:
count = other.count;
compressed = other.compressed;
sampled.resize(other.sampled.size());
sampled.resize_exact(other.sampled.size());
memcpy(sampled.data(), other.sampled.data(), sizeof(Stats) * other.sampled.size());
return;
}
@ -180,7 +180,7 @@ public:
compress();
backup_sampled.clear();
backup_sampled.reserve(sampled.size() + other.sampled.size());
backup_sampled.reserve_exact(sampled.size() + other.sampled.size());
double merged_relative_error = std::max(relative_error, other.relative_error);
size_t merged_count = count + other.count;
Int64 additional_self_delta = static_cast<Int64>(std::floor(2 * other.relative_error * other.count));
@ -268,11 +268,7 @@ public:
size_t sampled_len = 0;
readBinaryLittleEndian(sampled_len, buf);
if (sampled_len > compress_threshold)
throw Exception(
ErrorCodes::INCORRECT_DATA, "The number of elements {} for quantileGK exceeds {}", sampled_len, compress_threshold);
sampled.resize(sampled_len);
sampled.resize_exact(sampled_len);
for (size_t i = 0; i < sampled_len; ++i)
{
@ -317,7 +313,7 @@ private:
::sort(head_sampled.begin(), head_sampled.end());
backup_sampled.clear();
backup_sampled.reserve(sampled.size() + head_sampled.size());
backup_sampled.reserve_exact(sampled.size() + head_sampled.size());
size_t sample_idx = 0;
size_t ops_idx = 0;

View File

@ -47,9 +47,7 @@ private:
std::unique_ptr<MemoryChunk> prev;
MemoryChunk()
{
}
MemoryChunk() = default;
void swap(MemoryChunk & other)
{

View File

@ -9,6 +9,7 @@
#include <IO/MMappedFileCache.h>
#include <IO/ReadHelpers.h>
#include <base/errnoToString.h>
#include <base/find_symbols.h>
#include <base/getPageSize.h>
#include <sys/resource.h>
#include <chrono>
@ -90,6 +91,9 @@ AsynchronousMetrics::AsynchronousMetrics(
openFileIfExists("/sys/fs/cgroup/cpu/cpu.cfs_quota_us", cgroupcpu_cfs_quota);
}
openFileIfExists("/proc/sys/vm/max_map_count", vm_max_map_count);
openFileIfExists("/proc/self/maps", vm_maps);
openSensors();
openBlockDevices();
openEDAC();
@ -1423,6 +1427,55 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update)
}
}
if (vm_max_map_count)
{
try
{
vm_max_map_count->rewind();
uint64_t max_map_count = 0;
readText(max_map_count, *vm_max_map_count);
new_values["VMMaxMapCount"] = { max_map_count, "The maximum number of memory mappings a process may have (/proc/sys/vm/max_map_count)."};
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
openFileIfExists("/proc/sys/vm/max_map_count", vm_max_map_count);
}
}
if (vm_maps)
{
try
{
vm_maps->rewind();
uint64_t num_maps = 0;
while (!vm_maps->eof())
{
char * next_pos = find_first_symbols<'\n'>(vm_maps->position(), vm_maps->buffer().end());
vm_maps->position() = next_pos;
if (!vm_maps->hasPendingData())
continue;
if (*vm_maps->position() == '\n')
{
++num_maps;
++vm_maps->position();
}
}
new_values["VMNumMaps"] = { num_maps,
"The current number of memory mappings of the process (/proc/self/maps)."
" If it is close to the maximum (VMMaxMapCount), you should increase the limit for vm.max_map_count in /etc/sysctl.conf"};
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
openFileIfExists("/proc/self/maps", vm_maps);
}
}
try
{
for (size_t i = 0, size = thermal.size(); i < size; ++i)

View File

@ -123,6 +123,9 @@ private:
std::optional<ReadBufferFromFilePRead> cgroupcpu_cfs_quota TSA_GUARDED_BY(data_mutex);
std::optional<ReadBufferFromFilePRead> cgroupcpu_max TSA_GUARDED_BY(data_mutex);
std::optional<ReadBufferFromFilePRead> vm_max_map_count TSA_GUARDED_BY(data_mutex);
std::optional<ReadBufferFromFilePRead> vm_maps TSA_GUARDED_BY(data_mutex);
std::vector<std::unique_ptr<ReadBufferFromFilePRead>> thermal TSA_GUARDED_BY(data_mutex);
std::unordered_map<String /* device name */,

View File

@ -297,7 +297,7 @@ void DNSResolver::setDisableCacheFlag(bool is_disabled)
impl->disable_cache = is_disabled;
}
void DNSResolver::setCacheMaxEntries(const UInt64 cache_max_entries)
void DNSResolver::setCacheMaxEntries(UInt64 cache_max_entries)
{
impl->cache_address.setMaxSizeInBytes(cache_max_entries);
impl->cache_host.setMaxSizeInBytes(cache_max_entries);

View File

@ -56,7 +56,7 @@ public:
void setDisableCacheFlag(bool is_disabled = true);
/// Set a limit of entries in cache
void setCacheMaxEntries(const UInt64 cache_max_entries);
void setCacheMaxEntries(UInt64 cache_max_entries);
/// Drops all caches
void dropCache();

View File

@ -255,7 +255,7 @@ private:
static LUTIndex toLUTIndex(ExtendedDayNum d)
{
return normalizeLUTIndex(static_cast<Int64>(d + daynum_offset_epoch));
return normalizeLUTIndex(static_cast<Int64>(d + daynum_offset_epoch)); /// NOLINT
}
LUTIndex toLUTIndex(Time t) const

View File

@ -1,5 +1,7 @@
#include <Common/FunctionDocumentation.h>
#include <boost/algorithm/string.hpp>
namespace DB
{
@ -31,14 +33,7 @@ std::string FunctionDocumentation::examplesAsString() const
std::string FunctionDocumentation::categoriesAsString() const
{
if (categories.empty())
return "";
auto it = categories.begin();
std::string res = *it;
for (; it != categories.end(); ++it)
res += ", " + *it;
return res;
return boost::algorithm::join(categories, ", ");
}
}

View File

@ -41,9 +41,9 @@ public:
}
/// There is no copy constructor because only one MultiVersion should own the same object.
MultiVersion(MultiVersion && src) { *this = std::move(src); }
MultiVersion(MultiVersion && src) { *this = std::move(src); } /// NOLINT
MultiVersion & operator=(MultiVersion && src)
MultiVersion & operator=(MultiVersion && src) /// NOLINT
{
if (this != &src)
{

View File

@ -25,7 +25,7 @@
*/
template <typename T, typename U>
constexpr bool memcpy_can_be_used_for_assignment = std::is_same_v<T, U>
|| (std::is_integral_v<T> && std::is_integral_v<U> && sizeof(T) == sizeof(U));
|| (std::is_integral_v<T> && std::is_integral_v<U> && sizeof(T) == sizeof(U)); /// NOLINT(misc-redundant-expression)
namespace DB
{
@ -558,7 +558,7 @@ public:
}
template <typename... TAllocatorParams>
void swap(PODArray & rhs, TAllocatorParams &&... allocator_params)
void swap(PODArray & rhs, TAllocatorParams &&... allocator_params) /// NOLINT(performance-noexcept-swap)
{
#ifndef NDEBUG
this->unprotect();
@ -756,7 +756,7 @@ public:
};
template <typename T, size_t initial_bytes, typename TAllocator, size_t pad_right_, size_t pad_left_>
void swap(PODArray<T, initial_bytes, TAllocator, pad_right_, pad_left_> & lhs, PODArray<T, initial_bytes, TAllocator, pad_right_, pad_left_> & rhs)
void swap(PODArray<T, initial_bytes, TAllocator, pad_right_, pad_left_> & lhs, PODArray<T, initial_bytes, TAllocator, pad_right_, pad_left_> & rhs) /// NOLINT
{
lhs.swap(rhs);
}

View File

@ -476,6 +476,7 @@ The server successfully detected this situation and will download merged part fr
M(FileSegmentRemoveMicroseconds, "File segment remove() time") \
M(FileSegmentHolderCompleteMicroseconds, "File segments holder complete() time") \
M(FileSegmentFailToIncreasePriority, "Number of times the priority was not increased due to a high contention on the cache lock") \
M(FilesystemCacheFailToReserveSpaceBecauseOfLockContention, "Number of times space reservation was skipped due to a high contention on the cache lock") \
M(FilesystemCacheHoldFileSegments, "Filesystem cache file segments count, which were hold") \
M(FilesystemCacheUnusedHoldFileSegments, "Filesystem cache file segments count, which were hold, but not used (because of seek or LIMIT n, etc)") \
\

View File

@ -2,13 +2,13 @@
# if USE_SSH
# include <stdexcept>
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wreserved-macro-identifier"
# pragma GCC diagnostic ignored "-Wreserved-identifier"
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wreserved-macro-identifier"
# pragma clang diagnostic ignored "-Wreserved-identifier"
# include <libssh/libssh.h>
# pragma GCC diagnostic pop
# pragma clang diagnostic pop
namespace
{

View File

@ -387,7 +387,9 @@ public:
/// Introspection
std::atomic<UInt64> dequeued_requests{0};
std::atomic<UInt64> canceled_requests{0};
std::atomic<ResourceCost> dequeued_cost{0};
std::atomic<ResourceCost> canceled_cost{0};
std::atomic<UInt64> busy_periods{0};
};

View File

@ -50,6 +50,12 @@ public:
/// Should be called outside of scheduling subsystem, implementation must be thread-safe.
virtual void enqueueRequest(ResourceRequest * request) = 0;
/// Cancel previously enqueued request.
/// Returns `false` and does nothing given unknown or already executed request.
/// Returns `true` if requests has been found and canceled.
/// Should be called outside of scheduling subsystem, implementation must be thread-safe.
virtual bool cancelRequest(ResourceRequest * request) = 0;
/// For introspection
ResourceCost getBudget() const
{

View File

@ -134,56 +134,65 @@ public:
std::pair<ResourceRequest *, bool> dequeueRequest() override
{
if (heap_size == 0)
return {nullptr, false};
// Recursively pull request from child
auto [request, child_active] = items.front().child->dequeueRequest();
assert(request != nullptr);
std::pop_heap(items.begin(), items.begin() + heap_size);
Item & current = items[heap_size - 1];
// SFQ fairness invariant: system vruntime equals last served request start-time
assert(current.vruntime >= system_vruntime);
system_vruntime = current.vruntime;
// By definition vruntime is amount of consumed resource (cost) divided by weight
current.vruntime += double(request->cost) / current.child->info.weight;
max_vruntime = std::max(max_vruntime, current.vruntime);
if (child_active) // Put active child back in heap after vruntime update
// Cycle is required to do deactivations in the case of canceled requests, when dequeueRequest returns `nullptr`
while (true)
{
std::push_heap(items.begin(), items.begin() + heap_size);
}
else // Deactivate child if it is empty, but remember it's vruntime for latter activations
{
heap_size--;
if (heap_size == 0)
return {nullptr, false};
// Store index of this inactive child in `parent.idx`
// This enables O(1) search of inactive children instead of O(n)
current.child->info.parent.idx = heap_size;
}
// Recursively pull request from child
auto [request, child_active] = items.front().child->dequeueRequest();
std::pop_heap(items.begin(), items.begin() + heap_size);
Item & current = items[heap_size - 1];
// Reset any difference between children on busy period end
if (heap_size == 0)
{
// Reset vtime to zero to avoid floating-point error accumulation,
// but do not reset too often, because it's O(N)
UInt64 ns = clock_gettime_ns();
if (last_reset_ns + 1000000000 < ns)
if (request)
{
last_reset_ns = ns;
for (Item & item : items)
item.vruntime = 0;
max_vruntime = 0;
}
system_vruntime = max_vruntime;
busy_periods++;
}
// SFQ fairness invariant: system vruntime equals last served request start-time
assert(current.vruntime >= system_vruntime);
system_vruntime = current.vruntime;
dequeued_requests++;
dequeued_cost += request->cost;
return {request, heap_size > 0};
// By definition vruntime is amount of consumed resource (cost) divided by weight
current.vruntime += double(request->cost) / current.child->info.weight;
max_vruntime = std::max(max_vruntime, current.vruntime);
}
if (child_active) // Put active child back in heap after vruntime update
{
std::push_heap(items.begin(), items.begin() + heap_size);
}
else // Deactivate child if it is empty, but remember it's vruntime for latter activations
{
heap_size--;
// Store index of this inactive child in `parent.idx`
// This enables O(1) search of inactive children instead of O(n)
current.child->info.parent.idx = heap_size;
}
// Reset any difference between children on busy period end
if (heap_size == 0)
{
// Reset vtime to zero to avoid floating-point error accumulation,
// but do not reset too often, because it's O(N)
UInt64 ns = clock_gettime_ns();
if (last_reset_ns + 1000000000 < ns)
{
last_reset_ns = ns;
for (Item & item : items)
item.vruntime = 0;
max_vruntime = 0;
}
system_vruntime = max_vruntime;
busy_periods++;
}
if (request)
{
dequeued_requests++;
dequeued_cost += request->cost;
return {request, heap_size > 0};
}
}
}
bool isActive() override

View File

@ -39,8 +39,7 @@ public:
void enqueueRequest(ResourceRequest * request) override
{
std::unique_lock lock(mutex);
request->enqueue_ns = clock_gettime_ns();
std::lock_guard lock(mutex);
queue_cost += request->cost;
bool was_empty = requests.empty();
requests.push_back(request);
@ -50,7 +49,7 @@ public:
std::pair<ResourceRequest *, bool> dequeueRequest() override
{
std::unique_lock lock(mutex);
std::lock_guard lock(mutex);
if (requests.empty())
return {nullptr, false};
ResourceRequest * result = requests.front();
@ -63,9 +62,29 @@ public:
return {result, !requests.empty()};
}
bool cancelRequest(ResourceRequest * request) override
{
std::lock_guard lock(mutex);
// TODO(serxa): reimplement queue as intrusive list of ResourceRequest to make this O(1) instead of O(N)
for (auto i = requests.begin(), e = requests.end(); i != e; ++i)
{
if (*i == request)
{
requests.erase(i);
if (requests.empty())
busy_periods++;
queue_cost -= request->cost;
canceled_requests++;
canceled_cost += request->cost;
return true;
}
}
return false;
}
bool isActive() override
{
std::unique_lock lock(mutex);
std::lock_guard lock(mutex);
return !requests.empty();
}
@ -98,14 +117,14 @@ public:
std::pair<UInt64, Int64> getQueueLengthAndCost()
{
std::unique_lock lock(mutex);
std::lock_guard lock(mutex);
return {requests.size(), queue_cost};
}
private:
std::mutex mutex;
Int64 queue_cost = 0;
std::deque<ResourceRequest *> requests;
std::deque<ResourceRequest *> requests; // TODO(serxa): reimplement it using intrusive list to avoid allocations/deallocations and O(N) during cancel
};
}

View File

@ -102,25 +102,31 @@ public:
std::pair<ResourceRequest *, bool> dequeueRequest() override
{
if (items.empty())
return {nullptr, false};
// Recursively pull request from child
auto [request, child_active] = items.front().child->dequeueRequest();
assert(request != nullptr);
// Deactivate child if it is empty
if (!child_active)
// Cycle is required to do deactivations in the case of canceled requests, when dequeueRequest returns `nullptr`
while (true)
{
std::pop_heap(items.begin(), items.end());
items.pop_back();
if (items.empty())
busy_periods++;
}
return {nullptr, false};
dequeued_requests++;
dequeued_cost += request->cost;
return {request, !items.empty()};
// Recursively pull request from child
auto [request, child_active] = items.front().child->dequeueRequest();
// Deactivate child if it is empty
if (!child_active)
{
std::pop_heap(items.begin(), items.end());
items.pop_back();
if (items.empty())
busy_periods++;
}
if (request)
{
dequeued_requests++;
dequeued_cost += request->cost;
return {request, !items.empty()};
}
}
}
bool isActive() override

View File

@ -38,7 +38,6 @@ TEST(SchedulerDynamicResourceManager, Smoke)
{
ResourceGuard gA(cA->get("res1"), ResourceGuard::PostponeLocking);
gA.lock();
gA.setFailure();
gA.unlock();
ResourceGuard gB(cB->get("res1"));

View File

@ -4,6 +4,7 @@
#include <Common/Scheduler/Nodes/tests/ResourceTest.h>
#include <barrier>
#include <future>
using namespace DB;
@ -73,6 +74,22 @@ struct ResourceHolder
}
};
struct MyRequest : public ResourceRequest
{
std::function<void()> on_execute;
explicit MyRequest(ResourceCost cost_, std::function<void()> on_execute_)
: ResourceRequest(cost_)
, on_execute(on_execute_)
{}
void execute() override
{
if (on_execute)
on_execute();
}
};
TEST(SchedulerRoot, Smoke)
{
ResourceTest t;
@ -111,3 +128,49 @@ TEST(SchedulerRoot, Smoke)
EXPECT_TRUE(fc2->requests.contains(&rg.request));
}
}
TEST(SchedulerRoot, Cancel)
{
ResourceTest t;
ResourceHolder r1(t);
auto * fc1 = r1.add<ConstraintTest>("/", "<max_requests>1</max_requests>");
r1.add<PriorityPolicy>("/prio");
auto a = r1.addQueue("/prio/A", "<priority>1</priority>");
auto b = r1.addQueue("/prio/B", "<priority>2</priority>");
r1.registerResource();
std::barrier destruct_sync(2);
std::barrier sync(2);
std::thread consumer1([&]
{
MyRequest request(1,[&]
{
sync.arrive_and_wait(); // (A)
EXPECT_TRUE(fc1->requests.contains(&request));
sync.arrive_and_wait(); // (B)
request.finish();
destruct_sync.arrive_and_wait(); // (C)
});
a.queue->enqueueRequest(&request);
destruct_sync.arrive_and_wait(); // (C)
});
std::thread consumer2([&]
{
MyRequest request(1,[&]
{
FAIL() << "This request must be canceled, but instead executes";
});
sync.arrive_and_wait(); // (A) wait for request of consumer1 to be inside execute, so that constraint is in violated state and our request will not be executed immediately
b.queue->enqueueRequest(&request);
bool canceled = b.queue->cancelRequest(&request);
EXPECT_TRUE(canceled);
sync.arrive_and_wait(); // (B) release request of consumer1 to be finished
});
consumer1.join();
consumer2.join();
EXPECT_TRUE(fc1->requests.empty());
}

View File

@ -71,8 +71,7 @@ public:
// lock(mutex) is not required because `Dequeued` request cannot be used by the scheduler thread
chassert(state == Dequeued);
state = Finished;
if (constraint)
constraint->finishRequest(this);
ResourceRequest::finish();
}
static Request & local()
@ -126,12 +125,6 @@ public:
}
}
/// Mark request as unsuccessful; by default request is considered to be successful
void setFailure()
{
request.successful = false;
}
ResourceLink link;
Request & request;
};

View File

@ -0,0 +1,13 @@
#include <Common/Scheduler/ResourceRequest.h>
#include <Common/Scheduler/ISchedulerConstraint.h>
namespace DB
{
void ResourceRequest::finish()
{
if (constraint)
constraint->finishRequest(this);
}
}

View File

@ -14,9 +14,6 @@ class ISchedulerConstraint;
using ResourceCost = Int64;
constexpr ResourceCost ResourceCostMax = std::numeric_limits<int>::max();
/// Timestamps (nanoseconds since epoch)
using ResourceNs = UInt64;
/*
* Request for a resource consumption. The main moving part of the scheduling subsystem.
* Resource requests processing workflow:
@ -31,7 +28,7 @@ using ResourceNs = UInt64;
* 3) Scheduler calls ISchedulerNode::dequeueRequest() that returns the request.
* 4) Callback ResourceRequest::execute() is called to provide access to the resource.
* 5) The resource consumption is happening outside of the scheduling subsystem.
* 6) request->constraint->finishRequest() is called when consumption is finished.
* 6) ResourceRequest::finish() is called when consumption is finished.
*
* Steps (5) and (6) can be omitted if constraint is not used by the resource.
*
@ -39,7 +36,10 @@ using ResourceNs = UInt64;
* Request ownership is done outside of the scheduling subsystem.
* After (6) request can be destructed safely.
*
* Request cancelling is not supported yet.
* Request can also be canceled before (3) using ISchedulerQueue::cancelRequest().
* Returning false means it is too late for request to be canceled. It should be processed in a regular way.
* Returning true means successful cancel and therefore steps (4) and (5) are not going to happen
* and step (6) MUST be omitted.
*/
class ResourceRequest
{
@ -48,32 +48,20 @@ public:
/// NOTE: If cost is not known in advance, ResourceBudget should be used (note that every ISchedulerQueue has it)
ResourceCost cost;
/// Request outcome
/// Should be filled during resource consumption
bool successful;
/// Scheduler node to be notified on consumption finish
/// Auto-filled during request enqueue/dequeue
ISchedulerConstraint * constraint;
/// Timestamps for introspection
ResourceNs enqueue_ns;
ResourceNs execute_ns;
ResourceNs finish_ns;
explicit ResourceRequest(ResourceCost cost_ = 1)
{
reset(cost_);
}
/// ResourceRequest object may be reused again after reset()
void reset(ResourceCost cost_)
{
cost = cost_;
successful = true;
constraint = nullptr;
enqueue_ns = 0;
execute_ns = 0;
finish_ns = 0;
}
virtual ~ResourceRequest() = default;
@ -83,6 +71,12 @@ public:
/// just triggering start of a consumption, not doing the consumption itself
/// (e.g. setting an std::promise or creating a job in a thread pool)
virtual void execute() = 0;
/// Stop resource consumption and notify resource scheduler.
/// Should be called when resource consumption is finished by consumer.
/// ResourceRequest should not be destructed or reset before calling to `finish()`.
/// WARNING: this function MUST not be called if request was canceled.
void finish();
};
}

View File

@ -145,22 +145,27 @@ public:
std::pair<ResourceRequest *, bool> dequeueRequest() override
{
if (current == nullptr) // No active resources
return {nullptr, false};
while (true)
{
if (current == nullptr) // No active resources
return {nullptr, false};
// Dequeue request from current resource
auto [request, resource_active] = current->root->dequeueRequest();
assert(request != nullptr);
// Dequeue request from current resource
auto [request, resource_active] = current->root->dequeueRequest();
// Deactivate resource if required
if (!resource_active)
deactivate(current);
else
current = current->next; // Just move round-robin pointer
// Deactivate resource if required
if (!resource_active)
deactivate(current);
else
current = current->next; // Just move round-robin pointer
dequeued_requests++;
dequeued_cost += request->cost;
return {request, current != nullptr};
if (request == nullptr) // Possible in case of request cancel, just retry
continue;
dequeued_requests++;
dequeued_cost += request->cost;
return {request, current != nullptr};
}
}
bool isActive() override
@ -245,7 +250,6 @@ private:
void execute(ResourceRequest * request)
{
request->execute_ns = clock_gettime_ns();
request->execute();
}

View File

@ -149,7 +149,7 @@ public:
/// Pad the remainder, which is missing up to an 8-byte word.
current_word = 0;
switch (end - data)
switch (end - data) /// NOLINT(bugprone-switch-missing-default-case)
{
case 7: current_bytes[CURRENT_BYTES_IDX(6)] = data[6]; [[fallthrough]];
case 6: current_bytes[CURRENT_BYTES_IDX(5)] = data[5]; [[fallthrough]];

View File

@ -16,7 +16,7 @@ class MergeTreeTransaction;
/// or transaction object is not needed and not passed intentionally.
#ifndef NO_TRANSACTION_PTR
#define NO_TRANSACTION_PTR std::shared_ptr<MergeTreeTransaction>(nullptr)
#define NO_TRANSACTION_RAW static_cast<MergeTreeTransaction *>(nullptr)
#define NO_TRANSACTION_RAW static_cast<MergeTreeTransaction *>(nullptr) /// NOLINT(bugprone-macro-parentheses)
#endif
/// Commit Sequence Number

View File

@ -23,7 +23,7 @@ namespace ProfileEvents
namespace Coordination
{
void Exception::incrementErrorMetrics(const Error code_)
void Exception::incrementErrorMetrics(Error code_)
{
if (Coordination::isUserError(code_))
ProfileEvents::increment(ProfileEvents::ZooKeeperUserExceptions);
@ -33,14 +33,14 @@ void Exception::incrementErrorMetrics(const Error code_)
ProfileEvents::increment(ProfileEvents::ZooKeeperOtherExceptions);
}
Exception::Exception(const std::string & msg, const Error code_, int)
Exception::Exception(const std::string & msg, Error code_, int)
: DB::Exception(msg, DB::ErrorCodes::KEEPER_EXCEPTION)
, code(code_)
{
incrementErrorMetrics(code);
}
Exception::Exception(PreformattedMessage && msg, const Error code_)
Exception::Exception(PreformattedMessage && msg, Error code_)
: DB::Exception(std::move(msg), DB::ErrorCodes::KEEPER_EXCEPTION)
, code(code_)
{
@ -48,7 +48,7 @@ Exception::Exception(PreformattedMessage && msg, const Error code_)
incrementErrorMetrics(code);
}
Exception::Exception(const Error code_)
Exception::Exception(Error code_)
: Exception(code_, "Coordination error: {}", errorMessage(code_))
{
}

View File

@ -466,13 +466,13 @@ class Exception : public DB::Exception
{
private:
/// Delegate constructor, used to minimize repetition; last parameter used for overload resolution.
Exception(const std::string & msg, const Error code_, int); /// NOLINT
Exception(PreformattedMessage && msg, const Error code_);
Exception(const std::string & msg, Error code_, int); /// NOLINT
Exception(PreformattedMessage && msg, Error code_);
/// Message must be a compile-time constant
template <typename T>
requires std::is_convertible_v<T, String>
Exception(T && message, const Error code_) : DB::Exception(std::forward<T>(message), DB::ErrorCodes::KEEPER_EXCEPTION, /* remote_= */ false), code(code_)
Exception(T && message, Error code_) : DB::Exception(std::forward<T>(message), DB::ErrorCodes::KEEPER_EXCEPTION, /* remote_= */ false), code(code_)
{
incrementErrorMetrics(code);
}
@ -480,23 +480,23 @@ private:
static void incrementErrorMetrics(Error code_);
public:
explicit Exception(const Error code_); /// NOLINT
explicit Exception(Error code_); /// NOLINT
Exception(const Exception & exc);
template <typename... Args>
Exception(const Error code_, FormatStringHelper<Args...> fmt, Args &&... args)
Exception(Error code_, FormatStringHelper<Args...> fmt, Args &&... args)
: DB::Exception(DB::ErrorCodes::KEEPER_EXCEPTION, std::move(fmt), std::forward<Args>(args)...)
, code(code_)
{
incrementErrorMetrics(code);
}
inline static Exception createDeprecated(const std::string & msg, const Error code_)
inline static Exception createDeprecated(const std::string & msg, Error code_)
{
return Exception(msg, code_, 0);
}
inline static Exception fromPath(const Error code_, const std::string & path)
inline static Exception fromPath(Error code_, const std::string & path)
{
return Exception(code_, "Coordination error: {}, path {}", errorMessage(code_), path);
}
@ -504,7 +504,7 @@ public:
/// Message must be a compile-time constant
template <typename T>
requires std::is_convertible_v<T, String>
inline static Exception fromMessage(const Error code_, T && message)
inline static Exception fromMessage(Error code_, T && message)
{
return Exception(std::forward<T>(message), code_);
}

View File

@ -19,14 +19,14 @@ namespace Poco { class Logger; }
using LogSeriesLimiterPtr = std::shared_ptr<LogSeriesLimiter>;
namespace
namespace impl
{
[[maybe_unused]] LoggerPtr getLoggerHelper(const LoggerPtr & logger) { return logger; }
[[maybe_unused]] LoggerPtr getLoggerHelper(const AtomicLogger & logger) { return logger.load(); }
[[maybe_unused]] const ::Poco::Logger * getLoggerHelper(const ::Poco::Logger * logger) { return logger; }
[[maybe_unused]] std::unique_ptr<LogToStrImpl> getLoggerHelper(std::unique_ptr<LogToStrImpl> && logger) { return logger; }
[[maybe_unused]] std::unique_ptr<LogFrequencyLimiterIml> getLoggerHelper(std::unique_ptr<LogFrequencyLimiterIml> && logger) { return logger; }
[[maybe_unused]] LogSeriesLimiterPtr getLoggerHelper(LogSeriesLimiterPtr & logger) { return logger; }
[[maybe_unused]] inline LoggerPtr getLoggerHelper(const LoggerPtr & logger) { return logger; }
[[maybe_unused]] inline LoggerPtr getLoggerHelper(const AtomicLogger & logger) { return logger.load(); }
[[maybe_unused]] inline const ::Poco::Logger * getLoggerHelper(const ::Poco::Logger * logger) { return logger; }
[[maybe_unused]] inline std::unique_ptr<LogToStrImpl> getLoggerHelper(std::unique_ptr<LogToStrImpl> && logger) { return logger; }
[[maybe_unused]] inline std::unique_ptr<LogFrequencyLimiterIml> getLoggerHelper(std::unique_ptr<LogFrequencyLimiterIml> && logger) { return logger; }
[[maybe_unused]] inline LogSeriesLimiterPtr getLoggerHelper(LogSeriesLimiterPtr & logger) { return logger; }
}
#define LOG_IMPL_FIRST_ARG(X, ...) X
@ -65,7 +65,7 @@ namespace
#define LOG_IMPL(logger, priority, PRIORITY, ...) do \
{ \
auto _logger = ::getLoggerHelper(logger); \
auto _logger = ::impl::getLoggerHelper(logger); \
const bool _is_clients_log = (DB::CurrentThread::getGroup() != nullptr) && \
(DB::CurrentThread::get().getClientLogsLevel() >= (priority)); \
if (!_is_clients_log && !_logger->is((PRIORITY))) \

View File

@ -123,17 +123,15 @@ protected:
std::string getServerUrl() const
{
return "http://" + server_data.socket->address().toString();
return "http://" + server_data.server->socket().address().toString();
}
void startServer()
{
server_data.reset();
server_data.params = new Poco::Net::HTTPServerParams();
server_data.socket = std::make_unique<Poco::Net::ServerSocket>(server_data.port);
server_data.handler_factory = new HTTPRequestHandlerFactory(slowdown_receive);
server_data.server = std::make_unique<Poco::Net::HTTPServer>(
server_data.handler_factory, *server_data.socket, server_data.params);
server_data.handler_factory, server_data.port);
server_data.server->start();
}
@ -155,8 +153,7 @@ protected:
{
// just some port to avoid collisions with others tests
UInt16 port = 9871;
Poco::Net::HTTPServerParams::Ptr params;
std::unique_ptr<Poco::Net::ServerSocket> socket;
HTTPRequestHandlerFactory::Ptr handler_factory;
std::unique_ptr<Poco::Net::HTTPServer> server;
@ -171,8 +168,6 @@ protected:
server = nullptr;
handler_factory = nullptr;
socket = nullptr;
params = nullptr;
}
~ServerData() {

View File

@ -374,7 +374,7 @@ void Context::updateKeeperConfiguration([[maybe_unused]] const Poco::Util::Abstr
if (!shared->keeper_dispatcher)
return;
shared->keeper_dispatcher->updateConfiguration(getConfigRef(), getMacros());
shared->keeper_dispatcher->updateConfiguration(config_, getMacros());
}
std::shared_ptr<zkutil::ZooKeeper> Context::getZooKeeper() const

View File

@ -36,7 +36,7 @@ void insertDefaultPostgreSQLValue(IColumn & column, const IColumn & sample_colum
void insertPostgreSQLValue(
IColumn & column, std::string_view value,
const ExternalResultDescription::ValueType type, const DataTypePtr data_type,
ExternalResultDescription::ValueType type, DataTypePtr data_type,
const std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t idx)
{
switch (type)
@ -170,7 +170,7 @@ void insertPostgreSQLValue(
void preparePostgreSQLArrayInfo(
std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t column_idx, const DataTypePtr data_type)
std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t column_idx, DataTypePtr data_type)
{
const auto * array_type = typeid_cast<const DataTypeArray *>(data_type.get());
auto nested = array_type->getNestedType();

View File

@ -22,11 +22,11 @@ struct PostgreSQLArrayInfo
void insertPostgreSQLValue(
IColumn & column, std::string_view value,
const ExternalResultDescription::ValueType type, const DataTypePtr data_type,
ExternalResultDescription::ValueType type, DataTypePtr data_type,
const std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t idx);
void preparePostgreSQLArrayInfo(
std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t column_idx, const DataTypePtr data_type);
std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t column_idx, DataTypePtr data_type);
void insertDefaultPostgreSQLValue(IColumn & column, const IColumn & sample_column);

View File

@ -15,6 +15,7 @@ namespace ErrorCodes
extern const int THERE_IS_NO_PROFILE;
extern const int NO_ELEMENTS_IN_CONFIG;
extern const int UNKNOWN_ELEMENT_IN_CONFIG;
extern const int BAD_ARGUMENTS;
}
IMPLEMENT_SETTINGS_TRAITS(SettingsTraits, LIST_OF_SETTINGS)
@ -114,7 +115,11 @@ std::vector<String> Settings::getAllRegisteredNames() const
void Settings::set(std::string_view name, const Field & value)
{
if (name == "compatibility")
{
if (value.getType() != Field::Types::Which::String)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected type of value for setting 'compatibility'. Expected String, got {}", value.getTypeName());
applyCompatibilitySetting(value.get<String>());
}
/// If we change setting that was changed by compatibility setting before
/// we should remove it from settings_changed_by_compatibility_setting,
/// otherwise the next time we will change compatibility setting

View File

@ -567,7 +567,7 @@ class IColumn;
M(UInt64, min_free_disk_space_for_temporary_data, 0, "The minimum disk space to keep while writing temporary data used in external sorting and aggregation.", 0) \
\
M(DefaultTableEngine, default_temporary_table_engine, DefaultTableEngine::Memory, "Default table engine used when ENGINE is not set in CREATE TEMPORARY statement.",0) \
M(DefaultTableEngine, default_table_engine, DefaultTableEngine::None, "Default table engine used when ENGINE is not set in CREATE statement.",0) \
M(DefaultTableEngine, default_table_engine, DefaultTableEngine::MergeTree, "Default table engine used when ENGINE is not set in CREATE statement.",0) \
M(Bool, show_table_uuid_in_table_create_query_if_not_nil, false, "For tables in databases with Engine=Atomic show UUID of the table in its CREATE query.", 0) \
M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \
M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \
@ -778,6 +778,7 @@ class IColumn;
M(UInt64, filesystem_cache_max_download_size, (128UL * 1024 * 1024 * 1024), "Max remote filesystem cache size that can be downloaded by a single query", 0) \
M(Bool, throw_on_error_from_cache_on_write_operations, false, "Ignore error from cache when caching on write operations (INSERT, merges)", 0) \
M(UInt64, filesystem_cache_segments_batch_size, 20, "Limit on size of a single batch of file segments that a read buffer can request from cache. Too low value will lead to excessive requests to cache, too large may slow down eviction from cache", 0) \
M(UInt64, filesystem_cache_reserve_space_wait_lock_timeout_milliseconds, 1000, "Wait time to lock cache for sapce reservation in filesystem cache", 0) \
\
M(Bool, use_page_cache_for_disks_without_file_cache, false, "Use userspace page cache for remote disks that don't have filesystem cache enabled.", 0) \
M(Bool, read_from_page_cache_if_exists_otherwise_bypass_cache, false, "Use userspace page cache in passive mode, similar to read_from_filesystem_cache_if_exists_otherwise_bypass_cache.", 0) \
@ -1192,6 +1193,7 @@ class IColumn;
FORMAT_FACTORY_SETTINGS(M, ALIAS) \
OBSOLETE_FORMAT_SETTINGS(M, ALIAS) \
/// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding)
DECLARE_SETTINGS_TRAITS_ALLOW_CUSTOM_SETTINGS(SettingsTraits, LIST_OF_SETTINGS)
@ -1236,6 +1238,7 @@ private:
/*
* User-specified file format settings for File and URL engines.
*/
/// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding)
DECLARE_SETTINGS_TRAITS(FormatFactorySettingsTraits, LIST_OF_ALL_FORMAT_SETTINGS)
struct FormatFactorySettings : public BaseSettings<FormatFactorySettingsTraits>

View File

@ -90,9 +90,11 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
{"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"},
{"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"},
{"page_cache_inject_eviction", false, false, "Added userspace page cache"},
{"default_table_engine", "None", "MergeTree", "Set default table engine to MergeTree for better usability"},
{"input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects", false, false, "Allow to use String type for ambiguous paths during named tuple inference from JSON objects"},
{"throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert", false, true, "Deduplication is dependent materialized view cannot work together with async inserts."},
{"parallel_replicas_allow_in_with_subquery", false, true, "If true, subquery for IN will be executed on every follower replica"},
{"filesystem_cache_reserve_space_wait_lock_timeout_milliseconds", 1000, 1000, "Wait time to lock cache for sapce reservation in filesystem cache"},
}},
{"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"},
{"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"},

View File

@ -4,6 +4,7 @@
#include <DataTypes/Serializations/SerializationNullable.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnConst.h>
#include <Core/Field.h>
#include <Parsers/IAST.h>
#include <Common/typeid_cast.h>
@ -56,6 +57,30 @@ bool DataTypeNullable::equals(const IDataType & rhs) const
return rhs.isNullable() && nested_data_type->equals(*static_cast<const DataTypeNullable &>(rhs).nested_data_type);
}
ColumnPtr DataTypeNullable::createColumnConst(size_t size, const Field & field) const
{
if (onlyNull())
{
auto column = createColumn();
column->insert(field);
return ColumnConst::create(std::move(column), size);
}
auto column = nested_data_type->createColumn();
bool is_null = field.isNull();
if (is_null)
nested_data_type->insertDefaultInto(*column);
else
column->insert(field);
auto null_mask = ColumnUInt8::create();
null_mask->getData().push_back(is_null ? 1 : 0);
auto res = ColumnNullable::create(std::move(column), std::move(null_mask));
return ColumnConst::create(std::move(res), size);
}
SerializationPtr DataTypeNullable::doGetDefaultSerialization() const
{
return std::make_shared<SerializationNullable>(nested_data_type->getDefaultSerialization());

View File

@ -41,6 +41,7 @@ public:
bool onlyNull() const override;
bool canBeInsideLowCardinality() const override { return nested_data_type->canBeInsideLowCardinality(); }
bool canBePromoted() const override { return nested_data_type->canBePromoted(); }
ColumnPtr createColumnConst(size_t size, const Field & field) const override;
const DataTypePtr & getNestedType() const { return nested_data_type; }

View File

@ -423,7 +423,7 @@ MutableColumns CacheDictionary<dictionary_key_type>::aggregateColumnsInOrderOfKe
const DictionaryStorageFetchRequest & request,
const MutableColumns & fetched_columns,
const PaddedPODArray<KeyState> & key_index_to_state,
IColumn::Filter * const default_mask) const
IColumn::Filter * default_mask) const
{
MutableColumns aggregated_columns = request.makeAttributesResultColumns();
@ -473,7 +473,7 @@ MutableColumns CacheDictionary<dictionary_key_type>::aggregateColumns(
const PaddedPODArray<KeyState> & key_index_to_fetched_columns_from_storage_result,
const MutableColumns & fetched_columns_during_update,
const HashMap<KeyType, size_t> & found_keys_to_fetched_columns_during_update_index,
IColumn::Filter * const default_mask) const
IColumn::Filter * default_mask) const
{
/**
* Aggregation of columns fetched from storage and from source during update.

View File

@ -162,7 +162,7 @@ private:
const DictionaryStorageFetchRequest & request,
const MutableColumns & fetched_columns,
const PaddedPODArray<KeyState> & key_index_to_state,
IColumn::Filter * const default_mask = nullptr) const;
IColumn::Filter * default_mask = nullptr) const;
MutableColumns aggregateColumns(
const PaddedPODArray<KeyType> & keys,

View File

@ -14,7 +14,7 @@ class IRegionsHierarchyReader
public:
virtual bool readNext(RegionEntry & entry) = 0;
virtual ~IRegionsHierarchyReader() {}
virtual ~IRegionsHierarchyReader() = default;
};
using IRegionsHierarchyReaderPtr = std::unique_ptr<IRegionsHierarchyReader>;

View File

@ -568,7 +568,7 @@ bool RegExpTreeDictionary::setAttributesShortCircuit(
const String & data,
std::unordered_set<UInt64> & visited_nodes,
const std::unordered_map<String, const DictionaryAttribute &> & attributes,
std::unordered_set<String> * const defaults) const
std::unordered_set<String> * defaults) const
{
if (visited_nodes.contains(id))
return attributes_to_set.attributesFull() == attributes.size();

View File

@ -210,7 +210,7 @@ private:
const String & data,
std::unordered_set<UInt64> & visited_nodes,
const std::unordered_map<String, const DictionaryAttribute &> & attributes,
std::unordered_set<String> * const defaults) const;
std::unordered_set<String> * defaults) const;
struct RegexTreeNode;
using RegexTreeNodePtr = std::shared_ptr<RegexTreeNode>;

View File

@ -1,7 +1,27 @@
#include "DiskType.h"
#include <Poco/String.h>
#include <Common/Exception.h>
namespace DB
{
namespace ErrorCodes
{
extern const int UNKNOWN_ELEMENT_IN_CONFIG;
}
MetadataStorageType metadataTypeFromString(const String & type)
{
auto check_type = Poco::toLower(type);
if (check_type == "local")
return MetadataStorageType::Local;
if (check_type == "plain")
return MetadataStorageType::Plain;
if (check_type == "web")
return MetadataStorageType::StaticWeb;
throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG,
"MetadataStorageFactory: unknown metadata storage type: {}", type);
}
bool DataSourceDescription::operator==(const DataSourceDescription & other) const
{
@ -14,4 +34,32 @@ bool DataSourceDescription::sameKind(const DataSourceDescription & other) const
== std::tie(other.type, other.object_storage_type, other.description);
}
std::string DataSourceDescription::toString() const
{
switch (type)
{
case DataSourceType::Local:
return "local";
case DataSourceType::RAM:
return "memory";
case DataSourceType::ObjectStorage:
{
switch (object_storage_type)
{
case ObjectStorageType::S3:
return "s3";
case ObjectStorageType::HDFS:
return "hdfs";
case ObjectStorageType::Azure:
return "azure_blob_storage";
case ObjectStorageType::Local:
return "local_blob_storage";
case ObjectStorageType::Web:
return "web";
case ObjectStorageType::None:
return "none";
}
}
}
}
}

View File

@ -17,7 +17,6 @@ enum class ObjectStorageType
{
None,
S3,
S3_Plain,
Azure,
HDFS,
Web,
@ -30,9 +29,11 @@ enum class MetadataStorageType
Local,
Plain,
StaticWeb,
Memory,
};
MetadataStorageType metadataTypeFromString(const String & type);
String toString(DataSourceType data_source_type);
struct DataSourceDescription
{
DataSourceType type;
@ -47,36 +48,7 @@ struct DataSourceDescription
bool operator==(const DataSourceDescription & other) const;
bool sameKind(const DataSourceDescription & other) const;
std::string toString() const
{
switch (type)
{
case DataSourceType::Local:
return "local";
case DataSourceType::RAM:
return "memory";
case DataSourceType::ObjectStorage:
{
switch (object_storage_type)
{
case ObjectStorageType::S3:
return "s3";
case ObjectStorageType::S3_Plain:
return "s3_plain";
case ObjectStorageType::HDFS:
return "hdfs";
case ObjectStorageType::Azure:
return "azure_blob_storage";
case ObjectStorageType::Local:
return "local_blob_storage";
case ObjectStorageType::Web:
return "web";
case ObjectStorageType::None:
return "none";
}
}
}
}
std::string toString() const;
};
}

View File

@ -637,7 +637,8 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegment & file_segment)
ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromSourceBytes, current_impl_buffer_size);
bool continue_predownload = file_segment.reserve(current_predownload_size);
bool continue_predownload = file_segment.reserve(
current_predownload_size, settings.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds);
if (continue_predownload)
{
LOG_TEST(log, "Left to predownload: {}, buffer size: {}", bytes_to_predownload, current_impl_buffer_size);
@ -992,7 +993,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
{
chassert(file_offset_of_buffer_end + size - 1 <= file_segment.range().right);
bool success = file_segment.reserve(size);
bool success = file_segment.reserve(size, settings.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds);
if (success)
{
chassert(file_segment.getCurrentWriteOffset() == static_cast<size_t>(implementation_buffer->getPosition()));

View File

@ -26,16 +26,18 @@ FileSegmentRangeWriter::FileSegmentRangeWriter(
FileCache * cache_,
const FileSegment::Key & key_,
const FileCacheUserInfo & user_,
size_t reserve_space_lock_wait_timeout_milliseconds_,
std::shared_ptr<FilesystemCacheLog> cache_log_,
const String & query_id_,
const String & source_path_)
: cache(cache_)
, key(key_)
, user(user_)
, reserve_space_lock_wait_timeout_milliseconds(reserve_space_lock_wait_timeout_milliseconds_)
, log(getLogger("FileSegmentRangeWriter"))
, cache_log(cache_log_)
, query_id(query_id_)
, source_path(source_path_)
, user(user_)
{
}
@ -89,7 +91,7 @@ bool FileSegmentRangeWriter::write(const char * data, size_t size, size_t offset
size_t size_to_write = std::min(available_size, size);
bool reserved = file_segment->reserve(size_to_write);
bool reserved = file_segment->reserve(size_to_write, reserve_space_lock_wait_timeout_milliseconds);
if (!reserved)
{
appendFilesystemCacheLog(*file_segment);
@ -211,6 +213,7 @@ CachedOnDiskWriteBufferFromFile::CachedOnDiskWriteBufferFromFile(
, key(key_)
, query_id(query_id_)
, user(user_)
, reserve_space_lock_wait_timeout_milliseconds(settings_.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds)
, throw_on_error_from_cache(settings_.throw_on_error_from_cache)
, cache_log(!query_id_.empty() && settings_.enable_filesystem_cache_log ? cache_log_ : nullptr)
{
@ -251,7 +254,8 @@ void CachedOnDiskWriteBufferFromFile::cacheData(char * data, size_t size, bool t
if (!cache_writer)
{
cache_writer = std::make_unique<FileSegmentRangeWriter>(cache.get(), key, user, cache_log, query_id, source_path);
cache_writer = std::make_unique<FileSegmentRangeWriter>(
cache.get(), key, user, reserve_space_lock_wait_timeout_milliseconds, cache_log, query_id, source_path);
}
Stopwatch watch(CLOCK_MONOTONIC);

View File

@ -30,6 +30,7 @@ public:
FileCache * cache_,
const FileSegment::Key & key_,
const FileCacheUserInfo & user_,
size_t reserve_space_lock_wait_timeout_milliseconds_,
std::shared_ptr<FilesystemCacheLog> cache_log_,
const String & query_id_,
const String & source_path_);
@ -52,13 +53,14 @@ private:
void completeFileSegment();
FileCache * cache;
FileSegment::Key key;
const FileSegment::Key key;
const FileCacheUserInfo user;
const size_t reserve_space_lock_wait_timeout_milliseconds;
LoggerPtr log;
std::shared_ptr<FilesystemCacheLog> cache_log;
String query_id;
String source_path;
FileCacheUserInfo user;
const String query_id;
const String source_path;
FileSegmentsHolderPtr file_segments;
@ -99,11 +101,12 @@ private:
String source_path;
FileCacheKey key;
size_t current_download_offset = 0;
const String query_id;
const FileCacheUserInfo user;
const size_t reserve_space_lock_wait_timeout_milliseconds;
const bool throw_on_error_from_cache;
bool throw_on_error_from_cache;
size_t current_download_offset = 0;
bool cache_in_error_state_or_disabled = false;
std::unique_ptr<FileSegmentRangeWriter> cache_writer;

View File

@ -218,6 +218,7 @@ public:
virtual bool isReadOnly() const { return false; }
virtual bool isWriteOnce() const { return false; }
virtual bool isPlain() const { return false; }
virtual bool supportParallelWrite() const { return false; }

View File

@ -31,6 +31,8 @@ LocalObjectStorage::LocalObjectStorage(String key_prefix_)
description = *block_device_id;
else
description = "/";
fs::create_directories(key_prefix);
}
bool LocalObjectStorage::exists(const StoredObject & object) const
@ -106,13 +108,11 @@ std::unique_ptr<ReadBufferFromFileBase> LocalObjectStorage::readObject( /// NOLI
std::optional<size_t> read_hint,
std::optional<size_t> file_size) const
{
const auto & path = object.remote_path;
if (!file_size)
file_size = tryGetSizeFromFilePath(path);
file_size = tryGetSizeFromFilePath(object.remote_path);
LOG_TEST(log, "Read object: {}", path);
return createReadBufferFromFileBase(path, patchSettings(read_settings), read_hint, file_size);
LOG_TEST(log, "Read object: {}", object.remote_path);
return createReadBufferFromFileBase(object.remote_path, patchSettings(read_settings), read_hint, file_size);
}
std::unique_ptr<WriteBufferFromFileBase> LocalObjectStorage::writeObject( /// NOLINT
@ -126,6 +126,11 @@ std::unique_ptr<WriteBufferFromFileBase> LocalObjectStorage::writeObject( /// NO
throw Exception(ErrorCodes::BAD_ARGUMENTS, "LocalObjectStorage doesn't support append to files");
LOG_TEST(log, "Write object: {}", object.remote_path);
/// Unlike real blob storage, in local fs we cannot create a file with non-existing prefix.
/// So let's create it.
fs::create_directories(fs::path(object.remote_path).parent_path());
return std::make_unique<WriteBufferFromFile>(object.remote_path, buf_size);
}
@ -157,9 +162,36 @@ void LocalObjectStorage::removeObjectsIfExist(const StoredObjects & objects)
removeObjectIfExists(object);
}
ObjectMetadata LocalObjectStorage::getObjectMetadata(const std::string & /* path */) const
ObjectMetadata LocalObjectStorage::getObjectMetadata(const std::string & path) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Metadata is not supported for LocalObjectStorage");
ObjectMetadata object_metadata;
LOG_TEST(log, "Getting metadata for path: {}", path);
object_metadata.size_bytes = fs::file_size(path);
object_metadata.last_modified = Poco::Timestamp::fromEpochTime(
std::chrono::duration_cast<std::chrono::seconds>(fs::last_write_time(path).time_since_epoch()).count());
return object_metadata;
}
void LocalObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int /* max_keys */) const
{
for (const auto & entry : fs::directory_iterator(path))
{
if (entry.is_directory())
{
listObjects(entry.path(), children, 0);
continue;
}
auto metadata = getObjectMetadata(entry.path());
children.emplace_back(entry.path(), std::move(metadata));
}
}
bool LocalObjectStorage::existsOrHasAnyChild(const std::string & path) const
{
/// Unlike real object storage, existence of a prefix path can be checked by
/// just checking existence of this prefix directly, so simple exists is enough here.
return exists(StoredObject(path));
}
void LocalObjectStorage::copyObject( // NOLINT

View File

@ -58,6 +58,10 @@ public:
ObjectMetadata getObjectMetadata(const std::string & path) const override;
void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const override;
bool existsOrHasAnyChild(const std::string & path) const override;
void copyObject( /// NOLINT
const StoredObject & object_from,
const StoredObject & object_to,

View File

@ -32,6 +32,35 @@ void MetadataStorageFactory::registerMetadataStorageType(const std::string & met
}
}
std::string MetadataStorageFactory::getCompatibilityMetadataTypeHint(const ObjectStorageType & type)
{
switch (type)
{
case ObjectStorageType::S3:
case ObjectStorageType::HDFS:
case ObjectStorageType::Local:
case ObjectStorageType::Azure:
return "local";
case ObjectStorageType::Web:
return "web";
default:
return "";
}
}
std::string MetadataStorageFactory::getMetadataType(
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
const std::string & compatibility_type_hint)
{
if (compatibility_type_hint.empty() && !config.has(config_prefix + ".metadata_type"))
{
throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Expected `metadata_type` in config");
}
return config.getString(config_prefix + ".metadata_type", compatibility_type_hint);
}
MetadataStoragePtr MetadataStorageFactory::create(
const std::string & name,
const Poco::Util::AbstractConfiguration & config,
@ -39,12 +68,7 @@ MetadataStoragePtr MetadataStorageFactory::create(
ObjectStoragePtr object_storage,
const std::string & compatibility_type_hint) const
{
if (compatibility_type_hint.empty() && !config.has(config_prefix + ".metadata_type"))
{
throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Expected `metadata_type` in config");
}
const auto type = config.getString(config_prefix + ".metadata_type", compatibility_type_hint);
const auto type = getMetadataType(config, config_prefix, compatibility_type_hint);
const auto it = registry.find(type);
if (it == registry.end())

View File

@ -25,6 +25,13 @@ public:
ObjectStoragePtr object_storage,
const std::string & compatibility_type_hint) const;
static std::string getMetadataType(
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
const std::string & compatibility_type_hint = "");
static std::string getCompatibilityMetadataTypeHint(const ObjectStorageType & type);
private:
using Registry = std::unordered_map<String, Creator>;
Registry registry;

View File

@ -48,10 +48,7 @@ bool MetadataStorageFromPlainObjectStorage::isDirectory(const std::string & path
std::string directory = object_key.serialize();
if (!directory.ends_with('/'))
directory += '/';
RelativePathsWithMetadata files;
object_storage->listObjects(directory, files, 1);
return !files.empty();
return object_storage->existsOrHasAnyChild(directory);
}
uint64_t MetadataStorageFromPlainObjectStorage::getFileSize(const String & path) const

View File

@ -18,6 +18,8 @@
#include <Disks/ObjectStorages/Local/LocalObjectStorage.h>
#include <Disks/loadLocalDiskConfig.h>
#endif
#include <Disks/ObjectStorages/MetadataStorageFactory.h>
#include <Disks/ObjectStorages/PlainObjectStorage.h>
#include <Interpreters/Context.h>
#include <Common/Macros.h>
@ -32,6 +34,36 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
namespace
{
bool isPlainStorage(
ObjectStorageType type,
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix)
{
auto compatibility_hint = MetadataStorageFactory::getCompatibilityMetadataTypeHint(type);
auto metadata_type = MetadataStorageFactory::getMetadataType(config, config_prefix, compatibility_hint);
return metadataTypeFromString(metadata_type) == MetadataStorageType::Plain;
}
template <typename BaseObjectStorage, class ...Args>
ObjectStoragePtr createObjectStorage(
ObjectStorageType type,
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
Args && ...args)
{
if (isPlainStorage(type, config, config_prefix))
{
return std::make_shared<PlainObjectStorage<BaseObjectStorage>>(std::forward<Args>(args)...);
}
else
{
return std::make_shared<BaseObjectStorage>(std::forward<Args>(args)...);
}
}
}
ObjectStorageFactory & ObjectStorageFactory::instance()
{
static ObjectStorageFactory factory;
@ -127,14 +159,14 @@ void registerS3ObjectStorage(ObjectStorageFactory & factory)
auto s3_capabilities = getCapabilitiesFromConfig(config, config_prefix);
auto settings = getSettings(config, config_prefix, context);
auto client = getClient(config, config_prefix, context, *settings);
auto key_generator = getKeyGenerator(disk_type, uri, config, config_prefix);
auto key_generator = getKeyGenerator(uri, config, config_prefix);
auto object_storage = std::make_shared<S3ObjectStorage>(
std::move(client), std::move(settings), uri, s3_capabilities, key_generator, name);
auto object_storage = createObjectStorage<S3ObjectStorage>(
ObjectStorageType::S3, config, config_prefix, std::move(client), std::move(settings), uri, s3_capabilities, key_generator, name);
/// NOTE: should we still perform this check for clickhouse-disks?
if (!skip_access_check)
checkS3Capabilities(*object_storage, s3_capabilities, name);
checkS3Capabilities(*dynamic_cast<S3ObjectStorage *>(object_storage.get()), s3_capabilities, name);
return object_storage;
});
@ -163,14 +195,14 @@ void registerS3PlainObjectStorage(ObjectStorageFactory & factory)
auto s3_capabilities = getCapabilitiesFromConfig(config, config_prefix);
auto settings = getSettings(config, config_prefix, context);
auto client = getClient(config, config_prefix, context, *settings);
auto key_generator = getKeyGenerator(disk_type, uri, config, config_prefix);
auto key_generator = getKeyGenerator(uri, config, config_prefix);
auto object_storage = std::make_shared<S3PlainObjectStorage>(
auto object_storage = std::make_shared<PlainObjectStorage<S3ObjectStorage>>(
std::move(client), std::move(settings), uri, s3_capabilities, key_generator, name);
/// NOTE: should we still perform this check for clickhouse-disks?
if (!skip_access_check)
checkS3Capabilities(*object_storage, s3_capabilities, name);
checkS3Capabilities(*dynamic_cast<S3ObjectStorage *>(object_storage.get()), s3_capabilities, name);
return object_storage;
});
@ -198,7 +230,7 @@ void registerHDFSObjectStorage(ObjectStorageFactory & factory)
context->getSettingsRef().hdfs_replication
);
return std::make_unique<HDFSObjectStorage>(uri, std::move(settings), config);
return createObjectStorage<HDFSObjectStorage>(ObjectStorageType::HDFS, config, config_prefix, uri, std::move(settings), config);
});
}
#endif
@ -214,12 +246,11 @@ void registerAzureObjectStorage(ObjectStorageFactory & factory)
bool /* skip_access_check */) -> ObjectStoragePtr
{
AzureBlobStorageEndpoint endpoint = processAzureBlobStorageEndpoint(config, config_prefix);
return std::make_unique<AzureObjectStorage>(
name,
return createObjectStorage<AzureObjectStorage>(
ObjectStorageType::Azure, config, config_prefix, name,
getAzureBlobContainerClient(config, config_prefix),
getAzureBlobStorageSettings(config, config_prefix, context),
endpoint.prefix.empty() ? endpoint.container_name : endpoint.container_name + "/" + endpoint.prefix);
};
factory.registerObjectStorageType("azure_blob_storage", creator);
factory.registerObjectStorageType("azure", creator);
@ -250,7 +281,7 @@ void registerWebObjectStorage(ObjectStorageFactory & factory)
ErrorCodes::BAD_ARGUMENTS, "Bad URI: `{}`. Error: {}", uri, e.what());
}
return std::make_shared<WebObjectStorage>(uri, context);
return createObjectStorage<WebObjectStorage>(ObjectStorageType::Web, config, config_prefix, uri, context);
});
}
@ -268,7 +299,7 @@ void registerLocalObjectStorage(ObjectStorageFactory & factory)
loadDiskLocalConfig(name, config, config_prefix, context, object_key_prefix, keep_free_space_bytes);
/// keys are mapped to the fs, object_key_prefix is a directory also
fs::create_directories(object_key_prefix);
return std::make_shared<LocalObjectStorage>(object_key_prefix);
return createObjectStorage<LocalObjectStorage>(ObjectStorageType::Local, config, config_prefix, object_key_prefix);
};
factory.registerObjectStorageType("local_blob_storage", creator);

View File

@ -0,0 +1,35 @@
#pragma once
#include <Disks/ObjectStorages/IObjectStorage.h>
#include <Common/ObjectStorageKeyGenerator.h>
namespace DB
{
/// Do not encode keys, store as-is, and do not require separate disk for metadata.
/// But because of this does not support renames/hardlinks/attrs/...
///
/// NOTE: This disk has excessive API calls.
template <typename BaseObjectStorage>
class PlainObjectStorage : public BaseObjectStorage
{
public:
template <class ...Args>
explicit PlainObjectStorage(Args && ...args)
: BaseObjectStorage(std::forward<Args>(args)...) {}
std::string getName() const override { return "" + BaseObjectStorage::getName(); }
/// Notes:
/// - supports BACKUP to this disk
/// - does not support INSERT into MergeTree table on this disk
bool isWriteOnce() const override { return true; }
bool isPlain() const override { return true; }
ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override
{
return ObjectStorageKey::createAsRelative(BaseObjectStorage::getCommonKeyPrefix(), path);
}
};
}

Some files were not shown because too many files have changed in this diff Show More