mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
Merge remote-tracking branch 'rschu1ze/master' into locate-mysql
This commit is contained in:
commit
1281acc295
195
.clang-tidy
195
.clang-tidy
@ -5,128 +5,127 @@
|
||||
# a) the new check is not controversial (this includes many checks in readability-* and google-*) or
|
||||
# b) too noisy (checks with > 100 new warnings are considered noisy, this includes e.g. cppcoreguidelines-*).
|
||||
|
||||
# TODO: Once clang(-tidy) 17 is the minimum, we can convert this list to YAML
|
||||
# See https://releases.llvm.org/17.0.1/tools/clang/tools/extra/docs/ReleaseNotes.html#improvements-to-clang-tidy
|
||||
|
||||
# TODO Let clang-tidy check headers in further directories
|
||||
# --> HeaderFilterRegex: '^.*/(src|base|programs|utils)/.*(h|hpp)$'
|
||||
HeaderFilterRegex: '^.*/(base|programs|utils)/.*(h|hpp)$'
|
||||
|
||||
Checks: '*,
|
||||
-abseil-*,
|
||||
Checks: [
|
||||
'*',
|
||||
|
||||
-altera-*,
|
||||
'-abseil-*',
|
||||
|
||||
-android-*,
|
||||
'-altera-*',
|
||||
|
||||
-bugprone-assignment-in-if-condition,
|
||||
-bugprone-branch-clone,
|
||||
-bugprone-easily-swappable-parameters,
|
||||
-bugprone-exception-escape,
|
||||
-bugprone-implicit-widening-of-multiplication-result,
|
||||
-bugprone-narrowing-conversions,
|
||||
-bugprone-not-null-terminated-result,
|
||||
-bugprone-reserved-identifier, # useful but too slow, TODO retry when https://reviews.llvm.org/rG1c282052624f9d0bd273bde0b47b30c96699c6c7 is merged
|
||||
-bugprone-unchecked-optional-access,
|
||||
'-android-*',
|
||||
|
||||
-cert-dcl16-c,
|
||||
-cert-dcl37-c,
|
||||
-cert-dcl51-cpp,
|
||||
-cert-err58-cpp,
|
||||
-cert-msc32-c,
|
||||
-cert-msc51-cpp,
|
||||
-cert-oop54-cpp,
|
||||
-cert-oop57-cpp,
|
||||
'-bugprone-assignment-in-if-condition',
|
||||
'-bugprone-branch-clone',
|
||||
'-bugprone-easily-swappable-parameters',
|
||||
'-bugprone-exception-escape',
|
||||
'-bugprone-implicit-widening-of-multiplication-result',
|
||||
'-bugprone-narrowing-conversions',
|
||||
'-bugprone-not-null-terminated-result',
|
||||
'-bugprone-reserved-identifier', # useful but too slow, TODO retry when https://reviews.llvm.org/rG1c282052624f9d0bd273bde0b47b30c96699c6c7 is merged
|
||||
'-bugprone-unchecked-optional-access',
|
||||
|
||||
-clang-analyzer-unix.Malloc,
|
||||
'-cert-dcl16-c',
|
||||
'-cert-dcl37-c',
|
||||
'-cert-dcl51-cpp',
|
||||
'-cert-err58-cpp',
|
||||
'-cert-msc32-c',
|
||||
'-cert-msc51-cpp',
|
||||
'-cert-oop54-cpp',
|
||||
'-cert-oop57-cpp',
|
||||
|
||||
-cppcoreguidelines-*, # impractical in a codebase as large as ClickHouse, also slow
|
||||
'-clang-analyzer-unix.Malloc',
|
||||
|
||||
-darwin-*,
|
||||
'-cppcoreguidelines-*', # impractical in a codebase as large as ClickHouse, also slow
|
||||
|
||||
-fuchsia-*,
|
||||
'-darwin-*',
|
||||
|
||||
-google-build-using-namespace,
|
||||
-google-readability-braces-around-statements,
|
||||
-google-readability-casting,
|
||||
-google-readability-function-size,
|
||||
-google-readability-namespace-comments,
|
||||
-google-readability-todo,
|
||||
'-fuchsia-*',
|
||||
|
||||
-hicpp-avoid-c-arrays,
|
||||
-hicpp-avoid-goto,
|
||||
-hicpp-braces-around-statements,
|
||||
-hicpp-explicit-conversions,
|
||||
-hicpp-function-size,
|
||||
-hicpp-member-init,
|
||||
-hicpp-move-const-arg,
|
||||
-hicpp-multiway-paths-covered,
|
||||
-hicpp-named-parameter,
|
||||
-hicpp-no-array-decay,
|
||||
-hicpp-no-assembler,
|
||||
-hicpp-no-malloc,
|
||||
-hicpp-signed-bitwise,
|
||||
-hicpp-special-member-functions,
|
||||
-hicpp-uppercase-literal-suffix,
|
||||
-hicpp-use-auto,
|
||||
-hicpp-use-emplace,
|
||||
-hicpp-vararg,
|
||||
'-google-build-using-namespace',
|
||||
'-google-readability-braces-around-statements',
|
||||
'-google-readability-casting',
|
||||
'-google-readability-function-size',
|
||||
'-google-readability-namespace-comments',
|
||||
'-google-readability-todo',
|
||||
|
||||
-linuxkernel-*,
|
||||
'-hicpp-avoid-c-arrays',
|
||||
'-hicpp-avoid-goto',
|
||||
'-hicpp-braces-around-statements',
|
||||
'-hicpp-explicit-conversions',
|
||||
'-hicpp-function-size',
|
||||
'-hicpp-member-init',
|
||||
'-hicpp-move-const-arg',
|
||||
'-hicpp-multiway-paths-covered',
|
||||
'-hicpp-named-parameter',
|
||||
'-hicpp-no-array-decay',
|
||||
'-hicpp-no-assembler',
|
||||
'-hicpp-no-malloc',
|
||||
'-hicpp-signed-bitwise',
|
||||
'-hicpp-special-member-functions',
|
||||
'-hicpp-uppercase-literal-suffix',
|
||||
'-hicpp-use-auto',
|
||||
'-hicpp-use-emplace',
|
||||
'-hicpp-vararg',
|
||||
|
||||
-llvm-*,
|
||||
'-linuxkernel-*',
|
||||
|
||||
-llvmlibc-*,
|
||||
'-llvm-*',
|
||||
|
||||
-openmp-*,
|
||||
'-llvmlibc-*',
|
||||
|
||||
-misc-const-correctness,
|
||||
-misc-include-cleaner, # useful but far too many occurrences
|
||||
-misc-no-recursion,
|
||||
-misc-non-private-member-variables-in-classes,
|
||||
-misc-confusable-identifiers, # useful but slooow
|
||||
-misc-use-anonymous-namespace,
|
||||
'-openmp-*',
|
||||
|
||||
-modernize-avoid-c-arrays,
|
||||
-modernize-concat-nested-namespaces,
|
||||
-modernize-macro-to-enum,
|
||||
-modernize-pass-by-value,
|
||||
-modernize-return-braced-init-list,
|
||||
-modernize-use-auto,
|
||||
-modernize-use-default-member-init,
|
||||
-modernize-use-emplace,
|
||||
-modernize-use-nodiscard,
|
||||
-modernize-use-override,
|
||||
-modernize-use-trailing-return-type,
|
||||
'-misc-const-correctness',
|
||||
'-misc-include-cleaner', # useful but far too many occurrences
|
||||
'-misc-no-recursion',
|
||||
'-misc-non-private-member-variables-in-classes',
|
||||
'-misc-confusable-identifiers', # useful but slooo
|
||||
'-misc-use-anonymous-namespace',
|
||||
|
||||
-performance-inefficient-string-concatenation,
|
||||
-performance-no-int-to-ptr,
|
||||
-performance-avoid-endl,
|
||||
-performance-unnecessary-value-param,
|
||||
'-modernize-avoid-c-arrays',
|
||||
'-modernize-concat-nested-namespaces',
|
||||
'-modernize-macro-to-enum',
|
||||
'-modernize-pass-by-value',
|
||||
'-modernize-return-braced-init-list',
|
||||
'-modernize-use-auto',
|
||||
'-modernize-use-default-member-init',
|
||||
'-modernize-use-emplace',
|
||||
'-modernize-use-nodiscard',
|
||||
'-modernize-use-override',
|
||||
'-modernize-use-trailing-return-type',
|
||||
|
||||
-portability-simd-intrinsics,
|
||||
'-performance-inefficient-string-concatenation',
|
||||
'-performance-no-int-to-ptr',
|
||||
'-performance-avoid-endl',
|
||||
'-performance-unnecessary-value-param',
|
||||
|
||||
-readability-avoid-unconditional-preprocessor-if,
|
||||
-readability-braces-around-statements,
|
||||
-readability-convert-member-functions-to-static,
|
||||
-readability-else-after-return,
|
||||
-readability-function-cognitive-complexity,
|
||||
-readability-function-size,
|
||||
-readability-identifier-length,
|
||||
-readability-identifier-naming, # useful but too slow
|
||||
-readability-implicit-bool-conversion,
|
||||
-readability-isolate-declaration,
|
||||
-readability-magic-numbers,
|
||||
-readability-named-parameter,
|
||||
-readability-redundant-declaration,
|
||||
-readability-simplify-boolean-expr,
|
||||
-readability-static-accessed-through-instance,
|
||||
-readability-suspicious-call-argument,
|
||||
-readability-uppercase-literal-suffix,
|
||||
-readability-use-anyofallof,
|
||||
'-portability-simd-intrinsics',
|
||||
|
||||
-zircon-*,
|
||||
'
|
||||
'-readability-avoid-unconditional-preprocessor-if',
|
||||
'-readability-braces-around-statements',
|
||||
'-readability-convert-member-functions-to-static',
|
||||
'-readability-else-after-return',
|
||||
'-readability-function-cognitive-complexity',
|
||||
'-readability-function-size',
|
||||
'-readability-identifier-length',
|
||||
'-readability-identifier-naming', # useful but too slow
|
||||
'-readability-implicit-bool-conversion',
|
||||
'-readability-isolate-declaration',
|
||||
'-readability-magic-numbers',
|
||||
'-readability-named-parameter',
|
||||
'-readability-redundant-declaration',
|
||||
'-readability-simplify-boolean-expr',
|
||||
'-readability-static-accessed-through-instance',
|
||||
'-readability-suspicious-call-argument',
|
||||
'-readability-uppercase-literal-suffix',
|
||||
'-readability-use-anyofallof',
|
||||
|
||||
'-zircon-*'
|
||||
]
|
||||
|
||||
WarningsAsErrors: '*'
|
||||
|
||||
|
@ -56,13 +56,13 @@ option(ENABLE_CHECK_HEAVY_BUILDS "Don't allow C++ translation units to compile t
|
||||
if (ENABLE_CHECK_HEAVY_BUILDS)
|
||||
# set DATA (since RSS does not work since 2.6.x+) to 5G
|
||||
set (RLIMIT_DATA 5000000000)
|
||||
# set VIRT (RLIMIT_AS) to 10G (DATA*10)
|
||||
# set VIRT (RLIMIT_AS) to 10G (DATA*2)
|
||||
set (RLIMIT_AS 10000000000)
|
||||
# set CPU time limit to 1000 seconds
|
||||
set (RLIMIT_CPU 1000)
|
||||
|
||||
# -fsanitize=memory and address are too heavy
|
||||
if (SANITIZE)
|
||||
if (SANITIZE OR SANITIZE_COVERAGE OR WITH_COVERAGE)
|
||||
set (RLIMIT_DATA 10000000000) # 10G
|
||||
endif()
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
#include "coverage.h"
|
||||
#include <sys/mman.h>
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wreserved-identifier"
|
||||
#pragma clang diagnostic ignored "-Wreserved-identifier"
|
||||
|
||||
|
||||
/// WITH_COVERAGE enables the default implementation of code coverage,
|
||||
|
@ -59,8 +59,8 @@ using ComparatorWrapper = Comparator;
|
||||
|
||||
#endif
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wold-style-cast"
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wold-style-cast"
|
||||
|
||||
#include <miniselect/floyd_rivest_select.h>
|
||||
|
||||
@ -115,7 +115,7 @@ void partial_sort(RandomIt first, RandomIt middle, RandomIt last)
|
||||
::partial_sort(first, middle, last, comparator());
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic pop
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
template <typename RandomIt, typename Compare>
|
||||
void sort(RandomIt first, RandomIt last, Compare compare)
|
||||
|
2
contrib/libhdfs3
vendored
2
contrib/libhdfs3
vendored
@ -1 +1 @@
|
||||
Subproject commit b9598e6016720a7c088bfe85ce1fa0410f9d2103
|
||||
Subproject commit 0d04201c45359f0d0701fb1e8297d25eff7cfecf
|
@ -17,6 +17,8 @@
|
||||
#ifndef METROHASH_METROHASH_128_H
|
||||
#define METROHASH_METROHASH_128_H
|
||||
|
||||
// NOLINTBEGIN(readability-avoid-const-params-in-decls)
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
class MetroHash128
|
||||
@ -68,5 +70,6 @@ private:
|
||||
void metrohash128_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out);
|
||||
void metrohash128_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out);
|
||||
|
||||
// NOLINTEND(readability-avoid-const-params-in-decls)
|
||||
|
||||
#endif // #ifndef METROHASH_METROHASH_128_H
|
||||
|
@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="24.2.1.2248"
|
||||
ARG VERSION="24.2.2.71"
|
||||
ARG PACKAGES="clickhouse-keeper"
|
||||
ARG DIRECT_DOWNLOAD_URLS=""
|
||||
|
||||
|
@ -4,6 +4,9 @@ FROM clickhouse/fasttest:$FROM_TAG
|
||||
ENV CC=clang-${LLVM_VERSION}
|
||||
ENV CXX=clang++-${LLVM_VERSION}
|
||||
|
||||
# If the cctools is updated, then first build it in the CI, then update here in a different commit
|
||||
COPY --from=clickhouse/cctools:d9e3596e706b /cctools /cctools
|
||||
|
||||
# Rust toolchain and libraries
|
||||
ENV RUSTUP_HOME=/rust/rustup
|
||||
ENV CARGO_HOME=/rust/cargo
|
||||
@ -73,9 +76,6 @@ RUN curl -Lo /usr/bin/clang-tidy-cache \
|
||||
"https://raw.githubusercontent.com/matus-chochlik/ctcache/$CLANG_TIDY_SHA1/clang-tidy-cache" \
|
||||
&& chmod +x /usr/bin/clang-tidy-cache
|
||||
|
||||
# If the cctools is updated, then first build it in the CI, then update here in a different commit
|
||||
COPY --from=clickhouse/cctools:5a908f73878a /cctools /cctools
|
||||
|
||||
RUN mkdir /workdir && chmod 777 /workdir
|
||||
WORKDIR /workdir
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
# It's based on the assumption that we don't care of the cctools version so much
|
||||
# It event does not depend on the clickhouse/fasttest in the `docker/images.json`
|
||||
ARG FROM_TAG=latest
|
||||
FROM clickhouse/fasttest:$FROM_TAG
|
||||
FROM clickhouse/fasttest:$FROM_TAG as builder
|
||||
|
||||
ENV CC=clang-${LLVM_VERSION}
|
||||
ENV CXX=clang++-${LLVM_VERSION}
|
||||
@ -29,3 +29,6 @@ RUN git clone https://github.com/tpoechtrager/cctools-port.git \
|
||||
&& make install -j$(nproc) \
|
||||
&& cd ../.. \
|
||||
&& rm -rf cctools-port
|
||||
|
||||
FROM scratch
|
||||
COPY --from=builder /cctools /cctools
|
||||
|
@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="24.2.1.2248"
|
||||
ARG VERSION="24.2.2.71"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
ARG DIRECT_DOWNLOAD_URLS=""
|
||||
|
||||
|
@ -27,7 +27,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
|
||||
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
|
||||
ARG VERSION="24.2.1.2248"
|
||||
ARG VERSION="24.2.2.71"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
# set non-empty deb_location_url url to create a docker image
|
||||
|
64
docs/changelogs/v23.12.5.81-stable.md
Normal file
64
docs/changelogs/v23.12.5.81-stable.md
Normal file
@ -0,0 +1,64 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v23.12.5.81-stable (a0fbe3ae813) FIXME as compared to v23.12.4.15-stable (4233d111d20)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#60290](https://github.com/ClickHouse/ClickHouse/issues/60290): Copy S3 file GCP fallback to buffer copy in case GCP returned `Internal Error` with `GATEWAY_TIMEOUT` HTTP error code. [#60164](https://github.com/ClickHouse/ClickHouse/pull/60164) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Backported in [#60830](https://github.com/ClickHouse/ClickHouse/issues/60830): Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#59883](https://github.com/ClickHouse/ClickHouse/issues/59883): If you want to run initdb scripts every time when ClickHouse container is starting you shoud initialize environment varible CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS. [#59808](https://github.com/ClickHouse/ClickHouse/pull/59808) ([Alexander Nikolaev](https://github.com/AlexNik)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix_kql_issue_found_by_wingfuzz [#59626](https://github.com/ClickHouse/ClickHouse/pull/59626) ([Yong Wang](https://github.com/kashwy)).
|
||||
* Fix error "Read beyond last offset" for AsynchronousBoundedReadBuffer [#59630](https://github.com/ClickHouse/ClickHouse/pull/59630) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix query start time on non initial queries [#59662](https://github.com/ClickHouse/ClickHouse/pull/59662) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* rabbitmq: fix having neither acked nor nacked messages [#59775](https://github.com/ClickHouse/ClickHouse/pull/59775) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix parsing of partition expressions surrounded by parens [#59901](https://github.com/ClickHouse/ClickHouse/pull/59901) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
|
||||
* Fix optimize_uniq_to_count removing the column alias [#60026](https://github.com/ClickHouse/ClickHouse/pull/60026) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix cosineDistance crash with Nullable [#60150](https://github.com/ClickHouse/ClickHouse/pull/60150) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Hide sensitive info for s3queue [#60233](https://github.com/ClickHouse/ClickHouse/pull/60233) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix_max_query_size_for_kql_compound_operator: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)).
|
||||
* Reduce the number of read rows from `system.numbers` [#60546](https://github.com/ClickHouse/ClickHouse/pull/60546) ([JackyWoo](https://github.com/JackyWoo)).
|
||||
* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Prevent setting custom metadata headers on unsupported multipart upload operations [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
|
||||
* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
||||
#### CI Fix or Improvement (changelog entry is not required)
|
||||
|
||||
* Backported in [#60767](https://github.com/ClickHouse/ClickHouse/issues/60767): Decoupled changes from [#60408](https://github.com/ClickHouse/ClickHouse/issues/60408). [#60553](https://github.com/ClickHouse/ClickHouse/pull/60553) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#60582](https://github.com/ClickHouse/ClickHouse/issues/60582): Arm and amd docker build jobs use similar job names and thus overwrite job reports - aarch64 and amd64 suffixes added to fix this. [#60554](https://github.com/ClickHouse/ClickHouse/pull/60554) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61041](https://github.com/ClickHouse/ClickHouse/issues/61041): Debug and fix markreleaseready. [#60611](https://github.com/ClickHouse/ClickHouse/pull/60611) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#61030](https://github.com/ClickHouse/ClickHouse/issues/61030): ... [#61022](https://github.com/ClickHouse/ClickHouse/pull/61022) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61224](https://github.com/ClickHouse/ClickHouse/issues/61224): ... [#61183](https://github.com/ClickHouse/ClickHouse/pull/61183) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Backported in [#61190](https://github.com/ClickHouse/ClickHouse/issues/61190): ... [#61185](https://github.com/ClickHouse/ClickHouse/pull/61185) ([Max K.](https://github.com/maxknv)).
|
||||
|
||||
#### NO CL ENTRY
|
||||
|
||||
* NO CL ENTRY: 'Revert "Backport [#59798](https://github.com/ClickHouse/ClickHouse/issues/59798) to 23.12: CI: do not reuse builds on release branches"'. [#59979](https://github.com/ClickHouse/ClickHouse/pull/59979) ([Max K.](https://github.com/maxknv)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* CI: move ci-specifics from job scripts to ci.py [#58516](https://github.com/ClickHouse/ClickHouse/pull/58516) ([Max K.](https://github.com/maxknv)).
|
||||
* Make ZooKeeper actually sequentialy consistent [#59735](https://github.com/ClickHouse/ClickHouse/pull/59735) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Fix special build reports in release branches [#59797](https://github.com/ClickHouse/ClickHouse/pull/59797) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* CI: do not reuse builds on release branches [#59798](https://github.com/ClickHouse/ClickHouse/pull/59798) ([Max K.](https://github.com/maxknv)).
|
||||
* Fix mark release ready [#59994](https://github.com/ClickHouse/ClickHouse/pull/59994) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Ability to detect undead ZooKeeper sessions [#60044](https://github.com/ClickHouse/ClickHouse/pull/60044) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Detect io_uring in tests [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Remove broken test while we fix it [#60547](https://github.com/ClickHouse/ClickHouse/pull/60547) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Speed up cctools building [#61011](https://github.com/ClickHouse/ClickHouse/pull/61011) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
24
docs/changelogs/v23.3.21.26-lts.md
Normal file
24
docs/changelogs/v23.3.21.26-lts.md
Normal file
@ -0,0 +1,24 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v23.3.21.26-lts (d9672a3731f) FIXME as compared to v23.3.20.27-lts (cc974ba4f81)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix reading from sparse columns after restart [#49660](https://github.com/ClickHouse/ClickHouse/pull/49660) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#57104](https://github.com/ClickHouse/ClickHouse/pull/57104) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Detect io_uring in tests [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
|
30
docs/changelogs/v23.8.11.28-lts.md
Normal file
30
docs/changelogs/v23.8.11.28-lts.md
Normal file
@ -0,0 +1,30 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v23.8.11.28-lts (31879d2ab4c) FIXME as compared to v23.8.10.43-lts (a278225bba9)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#60828](https://github.com/ClickHouse/ClickHouse/issues/60828): Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
||||
#### NO CL ENTRY
|
||||
|
||||
* NO CL ENTRY: 'Use the current branch test-utils to build cctools'. [#61276](https://github.com/ClickHouse/ClickHouse/pull/61276) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#57104](https://github.com/ClickHouse/ClickHouse/pull/57104) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Detect io_uring in tests [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
|
26
docs/changelogs/v24.1.7.18-stable.md
Normal file
26
docs/changelogs/v24.1.7.18-stable.md
Normal file
@ -0,0 +1,26 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v24.1.7.18-stable (90925babd78) FIXME as compared to v24.1.6.52-stable (fa09f677bc9)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix_max_query_size_for_kql_compound_operator: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)).
|
||||
* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
||||
#### CI Fix or Improvement (changelog entry is not required)
|
||||
|
||||
* Backported in [#61043](https://github.com/ClickHouse/ClickHouse/issues/61043): Debug and fix markreleaseready. [#60611](https://github.com/ClickHouse/ClickHouse/pull/60611) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#61168](https://github.com/ClickHouse/ClickHouse/issues/61168): Just a preparation for the merge queue support. [#61099](https://github.com/ClickHouse/ClickHouse/pull/61099) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61192](https://github.com/ClickHouse/ClickHouse/issues/61192): ... [#61185](https://github.com/ClickHouse/ClickHouse/pull/61185) ([Max K.](https://github.com/maxknv)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
|
55
docs/changelogs/v24.2.2.71-stable.md
Normal file
55
docs/changelogs/v24.2.2.71-stable.md
Normal file
@ -0,0 +1,55 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v24.2.2.71-stable (9293d361e72) FIXME as compared to v24.2.1.2248-stable (891689a4150)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#60834](https://github.com/ClickHouse/ClickHouse/issues/60834): Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* PartsSplitter invalid ranges for the same part [#60041](https://github.com/ClickHouse/ClickHouse/pull/60041) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Try to avoid calculation of scalar subqueries for CREATE TABLE. [#60464](https://github.com/ClickHouse/ClickHouse/pull/60464) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix_max_query_size_for_kql_compound_operator: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)).
|
||||
* Reduce the number of read rows from `system.numbers` [#60546](https://github.com/ClickHouse/ClickHouse/pull/60546) ([JackyWoo](https://github.com/JackyWoo)).
|
||||
* Don't output number tips for date types [#60577](https://github.com/ClickHouse/ClickHouse/pull/60577) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Prevent setting custom metadata headers on unsupported multipart upload operations [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
|
||||
* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix multiple bugs in groupArraySorted [#61203](https://github.com/ClickHouse/ClickHouse/pull/61203) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
||||
#### CI Fix or Improvement (changelog entry is not required)
|
||||
|
||||
* Backported in [#60758](https://github.com/ClickHouse/ClickHouse/issues/60758): Decoupled changes from [#60408](https://github.com/ClickHouse/ClickHouse/issues/60408). [#60553](https://github.com/ClickHouse/ClickHouse/pull/60553) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#60706](https://github.com/ClickHouse/ClickHouse/issues/60706): Eliminates the need to provide input args to docker server jobs to clean yml files. [#60602](https://github.com/ClickHouse/ClickHouse/pull/60602) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61045](https://github.com/ClickHouse/ClickHouse/issues/61045): Debug and fix markreleaseready. [#60611](https://github.com/ClickHouse/ClickHouse/pull/60611) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#60721](https://github.com/ClickHouse/ClickHouse/issues/60721): Fix build_report job so that it's defined by ci_config only (not yml file). [#60613](https://github.com/ClickHouse/ClickHouse/pull/60613) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#60668](https://github.com/ClickHouse/ClickHouse/issues/60668): Do not await ci pending jobs on release branches decrease wait timeout to fit into gh job timeout. [#60652](https://github.com/ClickHouse/ClickHouse/pull/60652) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#60863](https://github.com/ClickHouse/ClickHouse/issues/60863): Set limited number of builds for "special build check" report in backports. [#60850](https://github.com/ClickHouse/ClickHouse/pull/60850) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#60946](https://github.com/ClickHouse/ClickHouse/issues/60946): ... [#60935](https://github.com/ClickHouse/ClickHouse/pull/60935) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#60972](https://github.com/ClickHouse/ClickHouse/issues/60972): ... [#60952](https://github.com/ClickHouse/ClickHouse/pull/60952) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#60980](https://github.com/ClickHouse/ClickHouse/issues/60980): ... [#60958](https://github.com/ClickHouse/ClickHouse/pull/60958) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61170](https://github.com/ClickHouse/ClickHouse/issues/61170): Just a preparation for the merge queue support. [#61099](https://github.com/ClickHouse/ClickHouse/pull/61099) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61181](https://github.com/ClickHouse/ClickHouse/issues/61181): ... [#61172](https://github.com/ClickHouse/ClickHouse/pull/61172) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61228](https://github.com/ClickHouse/ClickHouse/issues/61228): ... [#61183](https://github.com/ClickHouse/ClickHouse/pull/61183) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Backported in [#61194](https://github.com/ClickHouse/ClickHouse/issues/61194): ... [#61185](https://github.com/ClickHouse/ClickHouse/pull/61185) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61244](https://github.com/ClickHouse/ClickHouse/issues/61244): ... [#61214](https://github.com/ClickHouse/ClickHouse/pull/61214) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61388](https://github.com/ClickHouse/ClickHouse/issues/61388):. [#61373](https://github.com/ClickHouse/ClickHouse/pull/61373) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* CI: make workflow yml abstract [#60421](https://github.com/ClickHouse/ClickHouse/pull/60421) ([Max K.](https://github.com/maxknv)).
|
||||
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* General sanity in function `seriesOutliersDetectTukey` [#60535](https://github.com/ClickHouse/ClickHouse/pull/60535) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Speed up cctools building [#61011](https://github.com/ClickHouse/ClickHouse/pull/61011) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
@ -18,8 +18,8 @@ This engine allows integrating ClickHouse with [RabbitMQ](https://www.rabbitmq.c
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
(
|
||||
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
|
||||
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
|
||||
name1 [type1],
|
||||
name2 [type2],
|
||||
...
|
||||
) ENGINE = RabbitMQ SETTINGS
|
||||
rabbitmq_host_port = 'host:port' [or rabbitmq_address = 'amqp(s)://guest:guest@localhost/vhost'],
|
||||
@ -198,6 +198,10 @@ Additional virtual columns when `kafka_handle_error_mode='stream'`:
|
||||
|
||||
Note: `_raw_message` and `_error` virtual columns are filled only in case of exception during parsing, they are always `NULL` when message was parsed successfully.
|
||||
|
||||
## Caveats {#caveats}
|
||||
|
||||
Even though you may specify [default column expressions](/docs/en/sql-reference/statements/create/table.md/#default_values) (such as `DEFAULT`, `MATERIALIZED`, `ALIAS`) in the table definition, these will be ignored. Instead, the columns will be filled with their respective default values for their types.
|
||||
|
||||
## Data formats support {#data-formats-support}
|
||||
|
||||
RabbitMQ engine supports all [formats](../../../interfaces/formats.md) supported in ClickHouse.
|
||||
|
@ -946,96 +946,6 @@ You could change storage policy after table creation with [ALTER TABLE ... MODIF
|
||||
|
||||
The number of threads performing background moves of data parts can be changed by [background_move_pool_size](/docs/en/operations/server-configuration-parameters/settings.md/#background_move_pool_size) setting.
|
||||
|
||||
### Dynamic Storage
|
||||
|
||||
This example query shows how to attach a table stored at a URL and configure the
|
||||
remote storage within the query. The web storage is not configured in the ClickHouse
|
||||
configuration files; all the settings are in the CREATE/ATTACH query.
|
||||
|
||||
:::note
|
||||
The example uses `type=web`, but any disk type can be configured as dynamic, even Local disk. Local disks require a path argument to be inside the server config parameter `custom_local_disks_base_directory`, which has no default, so set that also when using local disk.
|
||||
:::
|
||||
|
||||
#### Example dynamic web storage
|
||||
|
||||
:::tip
|
||||
A [demo dataset](https://github.com/ClickHouse/web-tables-demo) is hosted in GitHub. To prepare your own tables for web storage see the tool [clickhouse-static-files-uploader](/docs/en/operations/storing-data.md/#storing-data-on-webserver)
|
||||
:::
|
||||
|
||||
In this `ATTACH TABLE` query the `UUID` provided matches the directory name of the data, and the endpoint is the URL for the raw GitHub content.
|
||||
|
||||
```sql
|
||||
# highlight-next-line
|
||||
ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
|
||||
(
|
||||
price UInt32,
|
||||
date Date,
|
||||
postcode1 LowCardinality(String),
|
||||
postcode2 LowCardinality(String),
|
||||
type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
|
||||
is_new UInt8,
|
||||
duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
|
||||
addr1 String,
|
||||
addr2 String,
|
||||
street LowCardinality(String),
|
||||
locality LowCardinality(String),
|
||||
town LowCardinality(String),
|
||||
district LowCardinality(String),
|
||||
county LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (postcode1, postcode2, addr1, addr2)
|
||||
# highlight-start
|
||||
SETTINGS disk = disk(
|
||||
type=web,
|
||||
endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
|
||||
);
|
||||
# highlight-end
|
||||
```
|
||||
|
||||
### Nested Dynamic Storage
|
||||
|
||||
This example query builds on the above dynamic disk configuration and shows how to
|
||||
use a local disk to cache data from a table stored at a URL. Neither the cache disk
|
||||
nor the web storage is configured in the ClickHouse configuration files; both are
|
||||
configured in the CREATE/ATTACH query settings.
|
||||
|
||||
In the settings highlighted below notice that the disk of `type=web` is nested within
|
||||
the disk of `type=cache`.
|
||||
|
||||
```sql
|
||||
ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
|
||||
(
|
||||
price UInt32,
|
||||
date Date,
|
||||
postcode1 LowCardinality(String),
|
||||
postcode2 LowCardinality(String),
|
||||
type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
|
||||
is_new UInt8,
|
||||
duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
|
||||
addr1 String,
|
||||
addr2 String,
|
||||
street LowCardinality(String),
|
||||
locality LowCardinality(String),
|
||||
town LowCardinality(String),
|
||||
district LowCardinality(String),
|
||||
county LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (postcode1, postcode2, addr1, addr2)
|
||||
# highlight-start
|
||||
SETTINGS disk = disk(
|
||||
type=cache,
|
||||
max_size='1Gi',
|
||||
path='/var/lib/clickhouse/custom_disk_cache/',
|
||||
disk=disk(
|
||||
type=web,
|
||||
endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
|
||||
)
|
||||
);
|
||||
# highlight-end
|
||||
```
|
||||
|
||||
### Details {#details}
|
||||
|
||||
In the case of `MergeTree` tables, data is getting to disk in different ways:
|
||||
@ -1064,13 +974,11 @@ During this time, they are not moved to other volumes or disks. Therefore, until
|
||||
|
||||
User can assign new big parts to different disks of a [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures) volume in a balanced way using the [min_bytes_to_rebalance_partition_over_jbod](/docs/en/operations/settings/merge-tree-settings.md/#min-bytes-to-rebalance-partition-over-jbod) setting.
|
||||
|
||||
## Using S3 for Data Storage {#table_engine-mergetree-s3}
|
||||
## Using External Storage for Data Storage {#table_engine-mergetree-s3}
|
||||
|
||||
:::note
|
||||
Google Cloud Storage (GCS) is also supported using the type `s3`. See [GCS backed MergeTree](/docs/en/integrations/gcs).
|
||||
:::
|
||||
[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) family table engines can store data to `S3`, `AzureBlobStorage`, `HDFS` using a disk with types `s3`, `azure_blob_storage`, `hdfs` accordingly. See [configuring external storage options](/docs/en/operations/storing-data.md/#configuring-external-storage) for more details.
|
||||
|
||||
`MergeTree` family table engines can store data to [S3](https://aws.amazon.com/s3/) using a disk with type `s3`.
|
||||
Example for [S3](https://aws.amazon.com/s3/) as external storage using a disk with type `s3`.
|
||||
|
||||
Configuration markup:
|
||||
``` xml
|
||||
@ -1112,253 +1020,12 @@ Configuration markup:
|
||||
</storage_configuration>
|
||||
```
|
||||
|
||||
Also see [configuring external storage options](/docs/en/operations/storing-data.md/#configuring-external-storage).
|
||||
|
||||
:::note cache configuration
|
||||
ClickHouse versions 22.3 through 22.7 use a different cache configuration, see [using local cache](/docs/en/operations/storing-data.md/#using-local-cache) if you are using one of those versions.
|
||||
:::
|
||||
|
||||
### Configuring the S3 disk
|
||||
|
||||
Required parameters:
|
||||
|
||||
- `endpoint` — S3 endpoint URL in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint URL should contain a bucket and root path to store data.
|
||||
- `access_key_id` — S3 access key id.
|
||||
- `secret_access_key` — S3 secret access key.
|
||||
|
||||
Optional parameters:
|
||||
|
||||
- `region` — S3 region name.
|
||||
- `support_batch_delete` — This controls the check to see if batch deletes are supported. Set this to `false` when using Google Cloud Storage (GCS) as GCS does not support batch deletes and preventing the checks will prevent error messages in the logs.
|
||||
- `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`.
|
||||
- `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`.
|
||||
- `expiration_window_seconds` — Grace period for checking if expiration-based credentials have expired. Optional, default value is `120`.
|
||||
- `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL.
|
||||
- `connect_timeout_ms` — Socket connect timeout in milliseconds. Default value is `10 seconds`.
|
||||
- `request_timeout_ms` — Request timeout in milliseconds. Default value is `5 seconds`.
|
||||
- `retry_attempts` — Number of retry attempts in case of failed request. Default value is `10`.
|
||||
- `single_read_retries` — Number of retry attempts in case of connection drop during read. Default value is `4`.
|
||||
- `min_bytes_for_seek` — Minimal number of bytes to use seek operation instead of sequential read. Default value is `1 Mb`.
|
||||
- `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
|
||||
- `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`.
|
||||
- `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times.
|
||||
- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set.
|
||||
- `server_side_encryption_kms_key_id` - If specified, required headers for accessing S3 objects with [SSE-KMS encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingKMSEncryption.html) will be set. If an empty string is specified, the AWS managed S3 key will be used. Optional.
|
||||
- `server_side_encryption_kms_encryption_context` - If specified alongside `server_side_encryption_kms_key_id`, the given encryption context header for SSE-KMS will be set. Optional.
|
||||
- `server_side_encryption_kms_bucket_key_enabled` - If specified alongside `server_side_encryption_kms_key_id`, the header to enable S3 bucket keys for SSE-KMS will be set. Optional, can be `true` or `false`, defaults to nothing (matches the bucket-level setting).
|
||||
- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited).
|
||||
- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`.
|
||||
- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited).
|
||||
- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
|
||||
- `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
- `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
- `key_template` — Define the format with which the object keys are generated. By default, Clickhouse takes `root path` from `endpoint` option and adds random generated suffix. That suffix is a dir with 3 random symbols and a file name with 29 random symbols. With that option you have a full control how to the object keys are generated. Some usage scenarios require having random symbols in the prefix or in the middle of object key. For example: `[a-z]{3}-prefix-random/constant-part/random-middle-[a-z]{3}/random-suffix-[a-z]{29}`. The value is parsed with [`re2`](https://github.com/google/re2/wiki/Syntax). Only some subset of the syntax is supported. Check if your preferred format is supported before using that option. Disk isn't initialized if clickhouse is unable to generate a key by the value of `key_template`. It requires enabled feature flag [storage_metadata_write_full_object_key](/docs/en/operations/settings/settings#storage_metadata_write_full_object_key). It forbids declaring the `root path` in `endpoint` option. It requires definition of the option `key_compatibility_prefix`.
|
||||
- `key_compatibility_prefix` — That option is required when option `key_template` is in use. In order to be able to read the objects keys which were stored in the metadata files with the metadata version lower that `VERSION_FULL_OBJECT_KEY`, the previous `root path` from the `endpoint` option should be set here.
|
||||
|
||||
### Configuring the cache
|
||||
|
||||
This is the cache configuration from above:
|
||||
```xml
|
||||
<s3_cache>
|
||||
<type>cache</type>
|
||||
<disk>s3</disk>
|
||||
<path>/var/lib/clickhouse/disks/s3_cache/</path>
|
||||
<max_size>10Gi</max_size>
|
||||
</s3_cache>
|
||||
```
|
||||
|
||||
These parameters define the cache layer:
|
||||
- `type` — If a disk is of type `cache` it caches mark and index files in memory.
|
||||
- `disk` — The name of the disk that will be cached.
|
||||
|
||||
Cache parameters:
|
||||
- `path` — The path where metadata for the cache is stored.
|
||||
- `max_size` — The size (amount of disk space) that the cache can grow to.
|
||||
|
||||
:::tip
|
||||
There are several other cache parameters that you can use to tune your storage, see [using local cache](/docs/en/operations/storing-data.md/#using-local-cache) for the details.
|
||||
:::
|
||||
|
||||
S3 disk can be configured as `main` or `cold` storage:
|
||||
``` xml
|
||||
<storage_configuration>
|
||||
...
|
||||
<disks>
|
||||
<s3>
|
||||
<type>s3</type>
|
||||
<endpoint>https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/root-path/</endpoint>
|
||||
<access_key_id>your_access_key_id</access_key_id>
|
||||
<secret_access_key>your_secret_access_key</secret_access_key>
|
||||
</s3>
|
||||
</disks>
|
||||
<policies>
|
||||
<s3_main>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>s3</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</s3_main>
|
||||
<s3_cold>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>default</disk>
|
||||
</main>
|
||||
<external>
|
||||
<disk>s3</disk>
|
||||
</external>
|
||||
</volumes>
|
||||
<move_factor>0.2</move_factor>
|
||||
</s3_cold>
|
||||
</policies>
|
||||
...
|
||||
</storage_configuration>
|
||||
```
|
||||
|
||||
In case of `cold` option a data can be moved to S3 if local disk free size will be smaller than `move_factor * disk_size` or by TTL move rule.
|
||||
|
||||
## Using Azure Blob Storage for Data Storage {#table_engine-mergetree-azure-blob-storage}
|
||||
|
||||
`MergeTree` family table engines can store data to [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) using a disk with type `azure_blob_storage`.
|
||||
|
||||
As of February 2022, this feature is still a fresh addition, so expect that some Azure Blob Storage functionalities might be unimplemented.
|
||||
|
||||
Configuration markup:
|
||||
``` xml
|
||||
<storage_configuration>
|
||||
...
|
||||
<disks>
|
||||
<blob_storage_disk>
|
||||
<type>azure_blob_storage</type>
|
||||
<storage_account_url>http://account.blob.core.windows.net</storage_account_url>
|
||||
<container_name>container</container_name>
|
||||
<account_name>account</account_name>
|
||||
<account_key>pass123</account_key>
|
||||
<metadata_path>/var/lib/clickhouse/disks/blob_storage_disk/</metadata_path>
|
||||
<cache_path>/var/lib/clickhouse/disks/blob_storage_disk/cache/</cache_path>
|
||||
<skip_access_check>false</skip_access_check>
|
||||
</blob_storage_disk>
|
||||
</disks>
|
||||
...
|
||||
</storage_configuration>
|
||||
```
|
||||
|
||||
Connection parameters:
|
||||
* `endpoint` — AzureBlobStorage endpoint URL with container & prefix. Optionally can contain account_name if the authentication method used needs it. (`http://account.blob.core.windows.net:{port}/[account_name]{container_name}/{data_prefix}`) or these parameters can be provided separately using storage_account_url, account_name & container. For specifying prefix, endpoint should be used.
|
||||
* `endpoint_contains_account_name` - This flag is used to specify if endpoint contains account_name as it is only needed for certain authentication methods. (Default : true)
|
||||
* `storage_account_url` - Required if endpoint is not specified, Azure Blob Storage account URL, like `http://account.blob.core.windows.net` or `http://azurite1:10000/devstoreaccount1`.
|
||||
* `container_name` - Target container name, defaults to `default-container`.
|
||||
* `container_already_exists` - If set to `false`, a new container `container_name` is created in the storage account, if set to `true`, disk connects to the container directly, and if left unset, disk connects to the account, checks if the container `container_name` exists, and creates it if it doesn't exist yet.
|
||||
|
||||
Authentication parameters (the disk will try all available methods **and** Managed Identity Credential):
|
||||
* `connection_string` - For authentication using a connection string.
|
||||
* `account_name` and `account_key` - For authentication using Shared Key.
|
||||
|
||||
Limit parameters (mainly for internal usage):
|
||||
* `s3_max_single_part_upload_size` - Limits the size of a single block upload to Blob Storage.
|
||||
* `min_bytes_for_seek` - Limits the size of a seekable region.
|
||||
* `max_single_read_retries` - Limits the number of attempts to read a chunk of data from Blob Storage.
|
||||
* `max_single_download_retries` - Limits the number of attempts to download a readable buffer from Blob Storage.
|
||||
* `thread_pool_size` - Limits the number of threads with which `IDiskRemote` is instantiated.
|
||||
* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object.
|
||||
|
||||
Other parameters:
|
||||
* `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
|
||||
* `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`.
|
||||
* `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
* `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
|
||||
Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)).
|
||||
|
||||
:::note Zero-copy replication is not ready for production
|
||||
Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use.
|
||||
:::
|
||||
|
||||
## HDFS storage {#hdfs-storage}
|
||||
|
||||
In this sample configuration:
|
||||
- the disk is of type `hdfs`
|
||||
- the data is hosted at `hdfs://hdfs1:9000/clickhouse/`
|
||||
|
||||
```xml
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<hdfs>
|
||||
<type>hdfs</type>
|
||||
<endpoint>hdfs://hdfs1:9000/clickhouse/</endpoint>
|
||||
<skip_access_check>true</skip_access_check>
|
||||
</hdfs>
|
||||
<hdd>
|
||||
<type>local</type>
|
||||
<path>/</path>
|
||||
</hdd>
|
||||
</disks>
|
||||
<policies>
|
||||
<hdfs>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>hdfs</disk>
|
||||
</main>
|
||||
<external>
|
||||
<disk>hdd</disk>
|
||||
</external>
|
||||
</volumes>
|
||||
</hdfs>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
## Web storage (read-only) {#web-storage}
|
||||
|
||||
Web storage can be used for read-only purposes. An example use is for hosting sample
|
||||
data, or for migrating data.
|
||||
|
||||
:::tip
|
||||
Storage can also be configured temporarily within a query, if a web dataset is not expected
|
||||
to be used routinely, see [dynamic storage](#dynamic-storage) and skip editing the
|
||||
configuration file.
|
||||
:::
|
||||
|
||||
In this sample configuration:
|
||||
- the disk is of type `web`
|
||||
- the data is hosted at `http://nginx:80/test1/`
|
||||
- a cache on local storage is used
|
||||
|
||||
```xml
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<web>
|
||||
<type>web</type>
|
||||
<endpoint>http://nginx:80/test1/</endpoint>
|
||||
</web>
|
||||
<cached_web>
|
||||
<type>cache</type>
|
||||
<disk>web</disk>
|
||||
<path>cached_web_cache/</path>
|
||||
<max_size>100000000</max_size>
|
||||
</cached_web>
|
||||
</disks>
|
||||
<policies>
|
||||
<web>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>web</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</web>
|
||||
<cached_web>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>cached_web</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</cached_web>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
## Virtual Columns {#virtual-columns}
|
||||
|
||||
- `_part` — Name of a part.
|
||||
|
@ -10,10 +10,14 @@ The embeddings and the metadata are stored in separate files in the raw data. A
|
||||
converts them to CSV and imports them into ClickHouse. You can use the following `download.sh` script for that:
|
||||
|
||||
```bash
|
||||
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/img_emb/img_emb_${1}.npy # download image embedding
|
||||
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/text_emb/text_emb_${1}.npy # download text embedding
|
||||
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/metadata/metadata_${1}.parquet # download metadata
|
||||
python3 process.py ${1} # merge files and convert to CSV
|
||||
number=${1}
|
||||
if [[ $number == '' ]]; then
|
||||
number=1
|
||||
fi;
|
||||
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/img_emb/img_emb_${number}.npy # download image embedding
|
||||
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/text_emb/text_emb_${number}.npy # download text embedding
|
||||
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/metadata/metadata_${number}.parquet # download metadata
|
||||
python3 process.py $number # merge files and convert to CSV
|
||||
```
|
||||
Script `process.py` is defined as follows:
|
||||
|
||||
|
@ -5,26 +5,416 @@ sidebar_label: "External Disks for Storing Data"
|
||||
title: "External Disks for Storing Data"
|
||||
---
|
||||
|
||||
Data, processed in ClickHouse, is usually stored in the local file system — on the same machine with the ClickHouse server. That requires large-capacity disks, which can be expensive enough. To avoid that you can store the data remotely — on [Amazon S3](https://aws.amazon.com/s3/) disks or in the Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)).
|
||||
Data, processed in ClickHouse, is usually stored in the local file system — on the same machine with the ClickHouse server. That requires large-capacity disks, which can be expensive enough. To avoid that you can store the data remotely. Various storages are supported:
|
||||
1. [Amazon S3](https://aws.amazon.com/s3/) object storage.
|
||||
2. The Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html))
|
||||
3. [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs).
|
||||
|
||||
To work with data stored on `Amazon S3` disks use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine, and to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine.
|
||||
:::note ClickHouse also has support for external table engines, which are different from external storage option described on this page as they allow to read data stored in some general file format (like Parquet), while on this page we are describing storage configuration for ClickHouse `MergeTree` family or `Log` family tables.
|
||||
1. to work with data stored on `Amazon S3` disks, use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine.
|
||||
2. to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine.
|
||||
3. to work with data stored in Azure Blob Storage use [AzureBlobStorage](/docs/en/engines/table-engines/integrations/azureBlobStorage.md) table engine.
|
||||
:::
|
||||
|
||||
To load data from a web server with static files use a disk with type [web](#storing-data-on-webserver).
|
||||
## Configuring external storage {#configuring-external-storage}
|
||||
|
||||
## Configuring HDFS {#configuring-hdfs}
|
||||
[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) family table engines can store data to `S3`, `AzureBlobStorage`, `HDFS` using a disk with types `s3`, `azure_blob_storage`, `hdfs` accordingly.
|
||||
|
||||
[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) family table engines can store data to HDFS using a disk with type `HDFS`.
|
||||
Disk configuration requires:
|
||||
1. `type` section, equal to one of `s3`, `azure_blob_storage`, `hdfs`, `local_blob_storage`, `web`.
|
||||
2. Configuration of a specific external storage type.
|
||||
|
||||
Configuration markup:
|
||||
Starting from 24.1 clickhouse version, it is possible to use a new configuration option.
|
||||
It requires to specify:
|
||||
1. `type` equal to `object_storage`
|
||||
2. `object_storage_type`, equal to one of `s3`, `azure_blob_storage` (or just `azure` from `24.3`), `hdfs`, `local_blob_storage` (or just `local` from `24.3`), `web`.
|
||||
Optionally, `metadata_type` can be specified (it is equal to `local` by default), but it can also be set to `plain`, `web`.
|
||||
Usage of `plain` metadata type is described in [plain storage section](/docs/en/operations/storing-data.md/#storing-data-on-webserver), `web` metadata type can be used only with `web` object storage type, `local` metadata type stores metadata files locally (each metadata files contains mapping to files in object storage and some additional meta information about them).
|
||||
|
||||
E.g. configuration option
|
||||
``` xml
|
||||
<s3>
|
||||
<type>s3</type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3>
|
||||
```
|
||||
|
||||
is equal to configuration (from `24.1`):
|
||||
``` xml
|
||||
<s3>
|
||||
<type>object_storage</type>
|
||||
<object_storage_type>s3</object_storage_type>
|
||||
<metadata_type>local</metadata_type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3>
|
||||
```
|
||||
|
||||
Configuration
|
||||
``` xml
|
||||
<s3_plain>
|
||||
<type>s3_plain</type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3_plain>
|
||||
```
|
||||
|
||||
is equal to
|
||||
``` xml
|
||||
<s3_plain>
|
||||
<type>object_storage</type>
|
||||
<object_storage_type>s3</object_storage_type>
|
||||
<metadata_type>plain</metadata_type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3_plain>
|
||||
```
|
||||
|
||||
Example of full storage configuration will look like:
|
||||
``` xml
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<s3>
|
||||
<type>s3</type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3>
|
||||
</disks>
|
||||
<policies>
|
||||
<s3>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>s3</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</s3>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
Starting with 24.1 clickhouse version, it can also look like:
|
||||
``` xml
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<s3>
|
||||
<type>object_storage</type>
|
||||
<object_storage_type>s3</object_storage_type>
|
||||
<metadata_type>local</metadata_type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3>
|
||||
</disks>
|
||||
<policies>
|
||||
<s3>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>s3</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</s3>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
In order to make a specific kind of storage a default option for all `MergeTree` tables add the following section to configuration file:
|
||||
``` xml
|
||||
<clickhouse>
|
||||
<merge_tree>
|
||||
<storage_policy>s3</storage_policy>
|
||||
</merge_tree>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
If you want to configure a specific storage policy only to specific table, you can define it in settings while creating the table:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE test (a Int32, b String)
|
||||
ENGINE = MergeTree() ORDER BY a
|
||||
SETTINGS storage_policy = 's3';
|
||||
```
|
||||
|
||||
You can also use `disk` instead of `storage_policy`. In this case it is not requires to have `storage_policy` section in configuration file, only `disk` section would be enough.
|
||||
|
||||
``` sql
|
||||
CREATE TABLE test (a Int32, b String)
|
||||
ENGINE = MergeTree() ORDER BY a
|
||||
SETTINGS disk = 's3';
|
||||
```
|
||||
|
||||
## Dynamic Configuration {#dynamic-configuration}
|
||||
|
||||
There is also a possibility to specify storage configuration without a predefined disk in configuration in a configuration file, but can be configured in the `CREATE`/`ATTACH` query settings.
|
||||
|
||||
The following example query builds on the above dynamic disk configuration and shows how to use a local disk to cache data from a table stored at a URL.
|
||||
|
||||
```sql
|
||||
ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
|
||||
(
|
||||
price UInt32,
|
||||
date Date,
|
||||
postcode1 LowCardinality(String),
|
||||
postcode2 LowCardinality(String),
|
||||
type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
|
||||
is_new UInt8,
|
||||
duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
|
||||
addr1 String,
|
||||
addr2 String,
|
||||
street LowCardinality(String),
|
||||
locality LowCardinality(String),
|
||||
town LowCardinality(String),
|
||||
district LowCardinality(String),
|
||||
county LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (postcode1, postcode2, addr1, addr2)
|
||||
# highlight-start
|
||||
SETTINGS disk = disk(
|
||||
type=web,
|
||||
endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
|
||||
);
|
||||
# highlight-end
|
||||
```
|
||||
|
||||
The example below adds cache to external storage.
|
||||
|
||||
```sql
|
||||
ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
|
||||
(
|
||||
price UInt32,
|
||||
date Date,
|
||||
postcode1 LowCardinality(String),
|
||||
postcode2 LowCardinality(String),
|
||||
type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
|
||||
is_new UInt8,
|
||||
duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
|
||||
addr1 String,
|
||||
addr2 String,
|
||||
street LowCardinality(String),
|
||||
locality LowCardinality(String),
|
||||
town LowCardinality(String),
|
||||
district LowCardinality(String),
|
||||
county LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (postcode1, postcode2, addr1, addr2)
|
||||
# highlight-start
|
||||
SETTINGS disk = disk(
|
||||
type=cache,
|
||||
max_size='1Gi',
|
||||
path='/var/lib/clickhouse/custom_disk_cache/',
|
||||
disk=disk(
|
||||
type=web,
|
||||
endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
|
||||
)
|
||||
);
|
||||
# highlight-end
|
||||
```
|
||||
|
||||
In the settings highlighted below notice that the disk of `type=web` is nested within
|
||||
the disk of `type=cache`.
|
||||
|
||||
:::note
|
||||
The example uses `type=web`, but any disk type can be configured as dynamic, even Local disk. Local disks require a path argument to be inside the server config parameter `custom_local_disks_base_directory`, which has no default, so set that also when using local disk.
|
||||
:::
|
||||
|
||||
A combination of config-based configuration and sql-defined configuration is also possible:
|
||||
|
||||
```sql
|
||||
ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
|
||||
(
|
||||
price UInt32,
|
||||
date Date,
|
||||
postcode1 LowCardinality(String),
|
||||
postcode2 LowCardinality(String),
|
||||
type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
|
||||
is_new UInt8,
|
||||
duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
|
||||
addr1 String,
|
||||
addr2 String,
|
||||
street LowCardinality(String),
|
||||
locality LowCardinality(String),
|
||||
town LowCardinality(String),
|
||||
district LowCardinality(String),
|
||||
county LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (postcode1, postcode2, addr1, addr2)
|
||||
# highlight-start
|
||||
SETTINGS disk = disk(
|
||||
type=cache,
|
||||
max_size='1Gi',
|
||||
path='/var/lib/clickhouse/custom_disk_cache/',
|
||||
disk=disk(
|
||||
type=web,
|
||||
endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
|
||||
)
|
||||
);
|
||||
# highlight-end
|
||||
```
|
||||
|
||||
where `web` is a from a server configuration file:
|
||||
|
||||
``` xml
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<web>
|
||||
<type>web</type>
|
||||
<endpoint>'https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'</endpoint>
|
||||
</web>
|
||||
</disks>
|
||||
</storage_configuration>
|
||||
```
|
||||
|
||||
### Using S3 Storage {#s3-storage}
|
||||
|
||||
Required parameters:
|
||||
|
||||
- `endpoint` — S3 endpoint URL in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint URL should contain a bucket and root path to store data.
|
||||
- `access_key_id` — S3 access key id.
|
||||
- `secret_access_key` — S3 secret access key.
|
||||
|
||||
Optional parameters:
|
||||
|
||||
- `region` — S3 region name.
|
||||
- `support_batch_delete` — This controls the check to see if batch deletes are supported. Set this to `false` when using Google Cloud Storage (GCS) as GCS does not support batch deletes and preventing the checks will prevent error messages in the logs.
|
||||
- `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`.
|
||||
- `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`.
|
||||
- `expiration_window_seconds` — Grace period for checking if expiration-based credentials have expired. Optional, default value is `120`.
|
||||
- `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL.
|
||||
- `connect_timeout_ms` — Socket connect timeout in milliseconds. Default value is `10 seconds`.
|
||||
- `request_timeout_ms` — Request timeout in milliseconds. Default value is `5 seconds`.
|
||||
- `retry_attempts` — Number of retry attempts in case of failed request. Default value is `10`.
|
||||
- `single_read_retries` — Number of retry attempts in case of connection drop during read. Default value is `4`.
|
||||
- `min_bytes_for_seek` — Minimal number of bytes to use seek operation instead of sequential read. Default value is `1 Mb`.
|
||||
- `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
|
||||
- `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`.
|
||||
- `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times.
|
||||
- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set.
|
||||
- `server_side_encryption_kms_key_id` - If specified, required headers for accessing S3 objects with [SSE-KMS encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingKMSEncryption.html) will be set. If an empty string is specified, the AWS managed S3 key will be used. Optional.
|
||||
- `server_side_encryption_kms_encryption_context` - If specified alongside `server_side_encryption_kms_key_id`, the given encryption context header for SSE-KMS will be set. Optional.
|
||||
- `server_side_encryption_kms_bucket_key_enabled` - If specified alongside `server_side_encryption_kms_key_id`, the header to enable S3 bucket keys for SSE-KMS will be set. Optional, can be `true` or `false`, defaults to nothing (matches the bucket-level setting).
|
||||
- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited).
|
||||
- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`.
|
||||
- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited).
|
||||
- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
|
||||
- `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
- `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
- `key_template` — Define the format with which the object keys are generated. By default, Clickhouse takes `root path` from `endpoint` option and adds random generated suffix. That suffix is a dir with 3 random symbols and a file name with 29 random symbols. With that option you have a full control how to the object keys are generated. Some usage scenarios require having random symbols in the prefix or in the middle of object key. For example: `[a-z]{3}-prefix-random/constant-part/random-middle-[a-z]{3}/random-suffix-[a-z]{29}`. The value is parsed with [`re2`](https://github.com/google/re2/wiki/Syntax). Only some subset of the syntax is supported. Check if your preferred format is supported before using that option. Disk isn't initialized if clickhouse is unable to generate a key by the value of `key_template`. It requires enabled feature flag [storage_metadata_write_full_object_key](/docs/en/operations/settings/settings#storage_metadata_write_full_object_key). It forbids declaring the `root path` in `endpoint` option. It requires definition of the option `key_compatibility_prefix`.
|
||||
- `key_compatibility_prefix` — That option is required when option `key_template` is in use. In order to be able to read the objects keys which were stored in the metadata files with the metadata version lower that `VERSION_FULL_OBJECT_KEY`, the previous `root path` from the `endpoint` option should be set here.
|
||||
|
||||
:::note
|
||||
Google Cloud Storage (GCS) is also supported using the type `s3`. See [GCS backed MergeTree](/docs/en/integrations/gcs).
|
||||
:::
|
||||
|
||||
### Using Plain Storage {#plain-storage}
|
||||
|
||||
In `22.10` a new disk type `s3_plain` was introduced, which provides a write-once storage. Configuration parameters are the same as for `s3` disk type.
|
||||
Unlike `s3` disk type, it stores data as is, e.g. instead of randomly-generated blob names, it uses normal file names (the same way as clickhouse stores files on local disk) and does not store any metadata locally, e.g. it is derived from data on `s3`.
|
||||
|
||||
This disk type allows to keep a static version of the table, as it does not allow executing merges on the existing data and does not allow inserting of new data.
|
||||
A use case for this disk type is to create backups on it, which can be done via `BACKUP TABLE data TO Disk('plain_disk_name', 'backup_name')`. Afterwards you can do `RESTORE TABLE data AS data_restored FROM Disk('plain_disk_name', 'backup_name')` or using `ATTACH TABLE data (...) ENGINE = MergeTree() SETTINGS disk = 'plain_disk_name'`.
|
||||
|
||||
Configuration:
|
||||
``` xml
|
||||
<s3_plain>
|
||||
<type>s3_plain</type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3_plain>
|
||||
```
|
||||
|
||||
Starting from `24.1` it is possible configure any object storage disk (`s3`, `azure`, `hdfs`, `local`) using `plain` metadata type.
|
||||
|
||||
Configuration:
|
||||
``` xml
|
||||
<s3_plain>
|
||||
<type>object_storage</type>
|
||||
<object_storage_type>azure</object_storage_type>
|
||||
<metadata_type>plain</metadata_type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3_plain>
|
||||
```
|
||||
|
||||
### Using Azure Blob Storage {#azure-blob-storage}
|
||||
|
||||
`MergeTree` family table engines can store data to [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) using a disk with type `azure_blob_storage`.
|
||||
|
||||
As of February 2022, this feature is still a fresh addition, so expect that some Azure Blob Storage functionalities might be unimplemented.
|
||||
|
||||
Configuration markup:
|
||||
``` xml
|
||||
<storage_configuration>
|
||||
...
|
||||
<disks>
|
||||
<blob_storage_disk>
|
||||
<type>azure_blob_storage</type>
|
||||
<storage_account_url>http://account.blob.core.windows.net</storage_account_url>
|
||||
<container_name>container</container_name>
|
||||
<account_name>account</account_name>
|
||||
<account_key>pass123</account_key>
|
||||
<metadata_path>/var/lib/clickhouse/disks/blob_storage_disk/</metadata_path>
|
||||
<cache_path>/var/lib/clickhouse/disks/blob_storage_disk/cache/</cache_path>
|
||||
<skip_access_check>false</skip_access_check>
|
||||
</blob_storage_disk>
|
||||
</disks>
|
||||
...
|
||||
</storage_configuration>
|
||||
```
|
||||
|
||||
Connection parameters:
|
||||
* `storage_account_url` - **Required**, Azure Blob Storage account URL, like `http://account.blob.core.windows.net` or `http://azurite1:10000/devstoreaccount1`.
|
||||
* `container_name` - Target container name, defaults to `default-container`.
|
||||
* `container_already_exists` - If set to `false`, a new container `container_name` is created in the storage account, if set to `true`, disk connects to the container directly, and if left unset, disk connects to the account, checks if the container `container_name` exists, and creates it if it doesn't exist yet.
|
||||
|
||||
Authentication parameters (the disk will try all available methods **and** Managed Identity Credential):
|
||||
* `connection_string` - For authentication using a connection string.
|
||||
* `account_name` and `account_key` - For authentication using Shared Key.
|
||||
|
||||
Limit parameters (mainly for internal usage):
|
||||
* `s3_max_single_part_upload_size` - Limits the size of a single block upload to Blob Storage.
|
||||
* `min_bytes_for_seek` - Limits the size of a seekable region.
|
||||
* `max_single_read_retries` - Limits the number of attempts to read a chunk of data from Blob Storage.
|
||||
* `max_single_download_retries` - Limits the number of attempts to download a readable buffer from Blob Storage.
|
||||
* `thread_pool_size` - Limits the number of threads with which `IDiskRemote` is instantiated.
|
||||
* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object.
|
||||
|
||||
Other parameters:
|
||||
* `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
|
||||
* `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`.
|
||||
* `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
* `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
|
||||
Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)).
|
||||
|
||||
:::note Zero-copy replication is not ready for production
|
||||
Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use.
|
||||
:::
|
||||
|
||||
## Using HDFS storage {#hdfs-storage}
|
||||
|
||||
In this sample configuration:
|
||||
- the disk is of type `hdfs`
|
||||
- the data is hosted at `hdfs://hdfs1:9000/clickhouse/`
|
||||
|
||||
```xml
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<hdfs>
|
||||
<type>hdfs</type>
|
||||
<endpoint>hdfs://hdfs1:9000/clickhouse/</endpoint>
|
||||
<skip_access_check>true</skip_access_check>
|
||||
</hdfs>
|
||||
<hdd>
|
||||
<type>local</type>
|
||||
<path>/</path>
|
||||
</hdd>
|
||||
</disks>
|
||||
<policies>
|
||||
<hdfs>
|
||||
@ -32,26 +422,17 @@ Configuration markup:
|
||||
<main>
|
||||
<disk>hdfs</disk>
|
||||
</main>
|
||||
<external>
|
||||
<disk>hdd</disk>
|
||||
</external>
|
||||
</volumes>
|
||||
</hdfs>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
|
||||
<merge_tree>
|
||||
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
|
||||
</merge_tree>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
Required parameters:
|
||||
|
||||
- `endpoint` — HDFS endpoint URL in `path` format. Endpoint URL should contain a root path to store data.
|
||||
|
||||
Optional parameters:
|
||||
|
||||
- `min_bytes_for_seek` — The minimal number of bytes to use seek operation instead of sequential read. Default value: `1 Mb`.
|
||||
|
||||
## Using Virtual File System for Data Encryption {#encrypted-virtual-file-system}
|
||||
### Using Data Encryption {#encrypted-virtual-file-system}
|
||||
|
||||
You can encrypt the data stored on [S3](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3), or [HDFS](#configuring-hdfs) external disks, or on a local disk. To turn on the encryption mode, in the configuration file you must define a disk with the type `encrypted` and choose a disk on which the data will be saved. An `encrypted` disk ciphers all written files on the fly, and when you read files from an `encrypted` disk it deciphers them automatically. So you can work with an `encrypted` disk like with a normal one.
|
||||
|
||||
@ -112,7 +493,7 @@ Example of disk configuration:
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
## Using local cache {#using-local-cache}
|
||||
### Using local cache {#using-local-cache}
|
||||
|
||||
It is possible to configure local cache over disks in storage configuration starting from version 22.3.
|
||||
For versions 22.3 - 22.7 cache is supported only for `s3` disk type. For versions >= 22.8 cache is supported for any disk type: S3, Azure, Local, Encrypted, etc.
|
||||
@ -275,23 +656,92 @@ Cache profile events:
|
||||
|
||||
- `CachedWriteBufferCacheWriteBytes`, `CachedWriteBufferCacheWriteMicroseconds`
|
||||
|
||||
## Using in-memory cache (userspace page cache) {#userspace-page-cache}
|
||||
|
||||
The File Cache described above stores cached data in local files. Alternatively, object-store-based disks can be configured to use "Userspace Page Cache", which is RAM-only. Userspace page cache is recommended only if file cache can't be used for some reason, e.g. if the machine doesn't have a local disk at all. Note that file cache effectively uses RAM for caching too, since the OS caches contents of local files.
|
||||
|
||||
To enable userspace page cache for disks that don't use file cache, use setting `use_page_cache_for_disks_without_file_cache`.
|
||||
|
||||
By default, on Linux, the userspace page cache will use all available memory, similar to the OS page cache. In tools like `top` and `ps`, the clickhouse server process will typically show resident set size near 100% of the machine's RAM - this is normal, and most of this memory is actually reclaimable by the OS on memory pressure (`MADV_FREE`). This behavior can be disabled with server setting `page_cache_use_madv_free = 0`, making the userspace page cache just use a fixed amount of memory `page_cache_size` with no special interaction with the OS. On Mac OS, `page_cache_use_madv_free` is always disabled as it doesn't have lazy `MADV_FREE`.
|
||||
|
||||
Unfortunately, `page_cache_use_madv_free` makes it difficult to tell if the server is close to running out of memory, since the RSS metric becomes useless. Async metric `UnreclaimableRSS` shows the amount of physical memory used by the server, excluding the memory reclaimable by the OS: `select value from system.asynchronous_metrics where metric = 'UnreclaimableRSS'`. Use it for monitoring instead of RSS. This metric is only available if `page_cache_use_madv_free` is enabled.
|
||||
|
||||
## Storing Data on Web Server {#storing-data-on-webserver}
|
||||
|
||||
There is a tool `clickhouse-static-files-uploader`, which prepares a data directory for a given table (`SELECT data_paths FROM system.tables WHERE name = 'table_name'`). For each table you need, you get a directory of files. These files can be uploaded to, for example, a web server with static files. After this preparation, you can load this table into any ClickHouse server via `DiskWeb`.
|
||||
### Using static Web storage (read-only) {#web-storage}
|
||||
|
||||
This is a read-only disk. Its data is only read and never modified. A new table is loaded to this disk via `ATTACH TABLE` query (see example below). Local disk is not actually used, each `SELECT` query will result in a `http` request to fetch required data. All modification of the table data will result in an exception, i.e. the following types of queries are not allowed: [CREATE TABLE](/docs/en/sql-reference/statements/create/table.md), [ALTER TABLE](/docs/en/sql-reference/statements/alter/index.md), [RENAME TABLE](/docs/en/sql-reference/statements/rename.md/#misc_operations-rename_table), [DETACH TABLE](/docs/en/sql-reference/statements/detach.md) and [TRUNCATE TABLE](/docs/en/sql-reference/statements/truncate.md).
|
||||
Web storage can be used for read-only purposes. An example use is for hosting sample data, or for migrating data.
|
||||
There is a tool `clickhouse-static-files-uploader`, which prepares a data directory for a given table (`SELECT data_paths FROM system.tables WHERE name = 'table_name'`). For each table you need, you get a directory of files. These files can be uploaded to, for example, a web server with static files. After this preparation, you can load this table into any ClickHouse server via `DiskWeb`.
|
||||
|
||||
Web server storage is supported only for the [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) engine families. To access the data stored on a `web` disk, use the [storage_policy](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#terms) setting when executing the query. For example, `ATTACH TABLE table_web UUID '{}' (id Int32) ENGINE = MergeTree() ORDER BY id SETTINGS storage_policy = 'web'`.
|
||||
In this sample configuration:
|
||||
- the disk is of type `web`
|
||||
- the data is hosted at `http://nginx:80/test1/`
|
||||
- a cache on local storage is used
|
||||
|
||||
```xml
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<web>
|
||||
<type>web</type>
|
||||
<endpoint>http://nginx:80/test1/</endpoint>
|
||||
</web>
|
||||
<cached_web>
|
||||
<type>cache</type>
|
||||
<disk>web</disk>
|
||||
<path>cached_web_cache/</path>
|
||||
<max_size>100000000</max_size>
|
||||
</cached_web>
|
||||
</disks>
|
||||
<policies>
|
||||
<web>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>web</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</web>
|
||||
<cached_web>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>cached_web</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</cached_web>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
:::tip
|
||||
Storage can also be configured temporarily within a query, if a web dataset is not expected
|
||||
to be used routinely, see [dynamic configuration](#dynamic-configuration) and skip editing the
|
||||
configuration file.
|
||||
:::
|
||||
|
||||
:::tip
|
||||
A [demo dataset](https://github.com/ClickHouse/web-tables-demo) is hosted in GitHub. To prepare your own tables for web storage see the tool [clickhouse-static-files-uploader](/docs/en/operations/storing-data.md/#storing-data-on-webserver)
|
||||
:::
|
||||
|
||||
In this `ATTACH TABLE` query the `UUID` provided matches the directory name of the data, and the endpoint is the URL for the raw GitHub content.
|
||||
|
||||
```sql
|
||||
# highlight-next-line
|
||||
ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
|
||||
(
|
||||
price UInt32,
|
||||
date Date,
|
||||
postcode1 LowCardinality(String),
|
||||
postcode2 LowCardinality(String),
|
||||
type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
|
||||
is_new UInt8,
|
||||
duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
|
||||
addr1 String,
|
||||
addr2 String,
|
||||
street LowCardinality(String),
|
||||
locality LowCardinality(String),
|
||||
town LowCardinality(String),
|
||||
district LowCardinality(String),
|
||||
county LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (postcode1, postcode2, addr1, addr2)
|
||||
# highlight-start
|
||||
SETTINGS disk = disk(
|
||||
type=web,
|
||||
endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
|
||||
);
|
||||
# highlight-end
|
||||
```
|
||||
|
||||
A ready test case. You need to add this configuration to config:
|
||||
|
||||
@ -487,7 +937,7 @@ If URL is not reachable on disk load when the server is starting up tables, then
|
||||
Use [http_max_single_read_retries](/docs/en/operations/settings/settings.md/#http-max-single-read-retries) setting to limit the maximum number of retries during a single HTTP read.
|
||||
|
||||
|
||||
## Zero-copy Replication (not ready for production) {#zero-copy}
|
||||
### Zero-copy Replication (not ready for production) {#zero-copy}
|
||||
|
||||
Zero-copy replication is possible, but not recommended, with `S3` and `HDFS` disks. Zero-copy replication means that if the data is stored remotely on several machines and needs to be synchronized, then only the metadata is replicated (paths to the data parts), but not the data itself.
|
||||
|
||||
|
@ -63,8 +63,8 @@ covarPopStable(x, y)
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `x`: The first data column. [String literal](../syntax#syntax-string-literal)
|
||||
- `y`: The second data column. [Expression](../syntax#syntax-expressions)
|
||||
- `x`: The first data column. [String literal](../../syntax#syntax-string-literal)
|
||||
- `y`: The second data column. [Expression](../../syntax#syntax-expressions)
|
||||
|
||||
**Returned value**
|
||||
|
||||
|
@ -8,7 +8,7 @@ This page contains information on the `varSamp` and `varSampStable` ClickHouse f
|
||||
|
||||
## varSamp
|
||||
|
||||
Calculate the sample variance of a data set.
|
||||
Calculate the sample variance of a data set.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -18,7 +18,7 @@ varSamp(expr)
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../syntax#syntax-expressions)
|
||||
- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../../syntax#syntax-expressions)
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -78,7 +78,7 @@ varSampStable(expr)
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../syntax#syntax-expressions)
|
||||
- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../../syntax#syntax-expressions)
|
||||
|
||||
**Returned value**
|
||||
|
||||
|
@ -36,9 +36,9 @@ You can explicitly set a time zone for `DateTime`-type columns when creating a t
|
||||
|
||||
The [clickhouse-client](../../interfaces/cli.md) applies the server time zone by default if a time zone isn’t explicitly set when initializing the data type. To use the client time zone, run `clickhouse-client` with the `--use_client_time_zone` parameter.
|
||||
|
||||
ClickHouse outputs values depending on the value of the [date_time_output_format](../../operations/settings/settings.md#settings-date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionally, you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function.
|
||||
ClickHouse outputs values depending on the value of the [date_time_output_format](../../operations/settings/settings-formats.md#date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionally, you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function.
|
||||
|
||||
When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the value of the [date_time_input_format](../../operations/settings/settings.md#settings-date_time_input_format) setting.
|
||||
When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the value of the [date_time_input_format](../../operations/settings/settings-formats.md#date_time_input_format) setting.
|
||||
|
||||
## Examples
|
||||
|
||||
@ -147,8 +147,8 @@ Time shifts for multiple days. Some pacific islands changed their timezone offse
|
||||
- [Type conversion functions](../../sql-reference/functions/type-conversion-functions.md)
|
||||
- [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md)
|
||||
- [Functions for working with arrays](../../sql-reference/functions/array-functions.md)
|
||||
- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#settings-date_time_input_format)
|
||||
- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#settings-date_time_output_format)
|
||||
- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#date_time_input_format)
|
||||
- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#date_time_output_format)
|
||||
- [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone)
|
||||
- [The `session_timezone` setting](../../operations/settings/settings.md#session_timezone)
|
||||
- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime)
|
||||
|
@ -27,9 +27,9 @@ DateTime([timezone])
|
||||
|
||||
Консольный клиент ClickHouse по умолчанию использует часовой пояс сервера, если для значения `DateTime` часовой пояс не был задан в явном виде при инициализации типа данных. Чтобы использовать часовой пояс клиента, запустите [clickhouse-client](../../interfaces/cli.md) с параметром `--use_client_time_zone`.
|
||||
|
||||
ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/index.md#settings-date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime).
|
||||
ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/settings-formats.md#date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime).
|
||||
|
||||
При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/index.md#settings-date_time_input_format).
|
||||
При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/settings-formats.md#date_time_input_format).
|
||||
|
||||
## Примеры {#primery}
|
||||
|
||||
@ -119,8 +119,8 @@ FROM dt
|
||||
- [Функции преобразования типов](../../sql-reference/functions/type-conversion-functions.md)
|
||||
- [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md)
|
||||
- [Функции для работы с массивами](../../sql-reference/functions/array-functions.md)
|
||||
- [Настройка `date_time_input_format`](../../operations/settings/index.md#settings-date_time_input_format)
|
||||
- [Настройка `date_time_output_format`](../../operations/settings/index.md)
|
||||
- [Настройка `date_time_input_format`](../../operations/settings/settings-formats.md#date_time_input_format)
|
||||
- [Настройка `date_time_output_format`](../../operations/settings/settings-formats.md#date_time_output_format)
|
||||
- [Конфигурационный параметр сервера `timezone`](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone)
|
||||
- [Параметр `session_timezone`](../../operations/settings/settings.md#session_timezone)
|
||||
- [Операторы для работы с датой и временем](../../sql-reference/operators/index.md#operators-datetime)
|
||||
|
@ -1392,8 +1392,8 @@ void Client::readArguments(
|
||||
}
|
||||
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wunused-function"
|
||||
#pragma GCC diagnostic ignored "-Wmissing-declarations"
|
||||
#pragma clang diagnostic ignored "-Wunused-function"
|
||||
#pragma clang diagnostic ignored "-Wmissing-declarations"
|
||||
|
||||
int mainEntryClickHouseClient(int argc, char ** argv)
|
||||
{
|
||||
|
@ -109,8 +109,8 @@ static std::vector<std::string> extractFromConfig(
|
||||
return {configuration->getString(key)};
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wunused-function"
|
||||
#pragma GCC diagnostic ignored "-Wmissing-declarations"
|
||||
#pragma clang diagnostic ignored "-Wunused-function"
|
||||
#pragma clang diagnostic ignored "-Wmissing-declarations"
|
||||
|
||||
int mainEntryClickHouseExtractFromConfig(int argc, char ** argv)
|
||||
{
|
||||
|
@ -70,8 +70,8 @@ void skipSpacesAndComments(const char*& pos, const char* end, bool print_comment
|
||||
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wunused-function"
|
||||
#pragma GCC diagnostic ignored "-Wmissing-declarations"
|
||||
#pragma clang diagnostic ignored "-Wunused-function"
|
||||
#pragma clang diagnostic ignored "-Wmissing-declarations"
|
||||
|
||||
extern const char * auto_time_zones[];
|
||||
|
||||
|
@ -959,8 +959,8 @@ void LocalServer::readArguments(int argc, char ** argv, Arguments & common_argum
|
||||
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wunused-function"
|
||||
#pragma GCC diagnostic ignored "-Wmissing-declarations"
|
||||
#pragma clang diagnostic ignored "-Wunused-function"
|
||||
#pragma clang diagnostic ignored "-Wmissing-declarations"
|
||||
|
||||
int mainEntryClickHouseLocal(int argc, char ** argv)
|
||||
{
|
||||
|
@ -1204,8 +1204,8 @@ public:
|
||||
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wunused-function"
|
||||
#pragma GCC diagnostic ignored "-Wmissing-declarations"
|
||||
#pragma clang diagnostic ignored "-Wunused-function"
|
||||
#pragma clang diagnostic ignored "-Wmissing-declarations"
|
||||
|
||||
int mainEntryClickHouseObfuscator(int argc, char ** argv)
|
||||
try
|
||||
|
@ -133,20 +133,20 @@ public:
|
||||
/// This function also enables custom prefixes to be used.
|
||||
void setCustomSettingsPrefixes(const Strings & prefixes);
|
||||
void setCustomSettingsPrefixes(const String & comma_separated_prefixes);
|
||||
bool isSettingNameAllowed(const std::string_view name) const;
|
||||
void checkSettingNameIsAllowed(const std::string_view name) const;
|
||||
bool isSettingNameAllowed(std::string_view name) const;
|
||||
void checkSettingNameIsAllowed(std::string_view name) const;
|
||||
|
||||
/// Allows implicit user creation without password (by default it's allowed).
|
||||
/// In other words, allow 'CREATE USER' queries without 'IDENTIFIED WITH' clause.
|
||||
void setImplicitNoPasswordAllowed(const bool allow_implicit_no_password_);
|
||||
void setImplicitNoPasswordAllowed(bool allow_implicit_no_password_);
|
||||
bool isImplicitNoPasswordAllowed() const;
|
||||
|
||||
/// Allows users without password (by default it's allowed).
|
||||
void setNoPasswordAllowed(const bool allow_no_password_);
|
||||
void setNoPasswordAllowed(bool allow_no_password_);
|
||||
bool isNoPasswordAllowed() const;
|
||||
|
||||
/// Allows users with plaintext password (by default it's allowed).
|
||||
void setPlaintextPasswordAllowed(const bool allow_plaintext_password_);
|
||||
void setPlaintextPasswordAllowed(bool allow_plaintext_password_);
|
||||
bool isPlaintextPasswordAllowed() const;
|
||||
|
||||
/// Default password type when the user does not specify it.
|
||||
|
@ -616,7 +616,7 @@ UUID IAccessStorage::generateRandomID()
|
||||
}
|
||||
|
||||
|
||||
void IAccessStorage::clearConflictsInEntitiesList(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, const LoggerPtr log_)
|
||||
void IAccessStorage::clearConflictsInEntitiesList(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, LoggerPtr log_)
|
||||
{
|
||||
std::unordered_map<UUID, size_t> positions_by_id;
|
||||
std::unordered_map<std::string_view, size_t> positions_by_type_and_name[static_cast<size_t>(AccessEntityType::MAX)];
|
||||
|
@ -228,7 +228,7 @@ protected:
|
||||
static UUID generateRandomID();
|
||||
LoggerPtr getLogger() const;
|
||||
static String formatEntityTypeWithName(AccessEntityType type, const String & name) { return AccessEntityTypeInfo::get(type).formatEntityNameWithType(name); }
|
||||
static void clearConflictsInEntitiesList(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, const LoggerPtr log_);
|
||||
static void clearConflictsInEntitiesList(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, LoggerPtr log_);
|
||||
[[noreturn]] void throwNotFound(const UUID & id) const;
|
||||
[[noreturn]] void throwNotFound(AccessEntityType type, const String & name) const;
|
||||
[[noreturn]] static void throwBadCast(const UUID & id, AccessEntityType type, const String & name, AccessEntityType required_type);
|
||||
|
@ -47,9 +47,7 @@ private:
|
||||
|
||||
std::unique_ptr<MemoryChunk> prev;
|
||||
|
||||
MemoryChunk()
|
||||
{
|
||||
}
|
||||
MemoryChunk() = default;
|
||||
|
||||
void swap(MemoryChunk & other)
|
||||
{
|
||||
|
@ -297,7 +297,7 @@ void DNSResolver::setDisableCacheFlag(bool is_disabled)
|
||||
impl->disable_cache = is_disabled;
|
||||
}
|
||||
|
||||
void DNSResolver::setCacheMaxEntries(const UInt64 cache_max_entries)
|
||||
void DNSResolver::setCacheMaxEntries(UInt64 cache_max_entries)
|
||||
{
|
||||
impl->cache_address.setMaxSizeInBytes(cache_max_entries);
|
||||
impl->cache_host.setMaxSizeInBytes(cache_max_entries);
|
||||
|
@ -56,7 +56,7 @@ public:
|
||||
void setDisableCacheFlag(bool is_disabled = true);
|
||||
|
||||
/// Set a limit of entries in cache
|
||||
void setCacheMaxEntries(const UInt64 cache_max_entries);
|
||||
void setCacheMaxEntries(UInt64 cache_max_entries);
|
||||
|
||||
/// Drops all caches
|
||||
void dropCache();
|
||||
|
@ -255,7 +255,7 @@ private:
|
||||
|
||||
static LUTIndex toLUTIndex(ExtendedDayNum d)
|
||||
{
|
||||
return normalizeLUTIndex(static_cast<Int64>(d + daynum_offset_epoch));
|
||||
return normalizeLUTIndex(static_cast<Int64>(d + daynum_offset_epoch)); /// NOLINT
|
||||
}
|
||||
|
||||
LUTIndex toLUTIndex(Time t) const
|
||||
|
@ -41,9 +41,9 @@ public:
|
||||
}
|
||||
|
||||
/// There is no copy constructor because only one MultiVersion should own the same object.
|
||||
MultiVersion(MultiVersion && src) { *this = std::move(src); }
|
||||
MultiVersion(MultiVersion && src) { *this = std::move(src); } /// NOLINT
|
||||
|
||||
MultiVersion & operator=(MultiVersion && src)
|
||||
MultiVersion & operator=(MultiVersion && src) /// NOLINT
|
||||
{
|
||||
if (this != &src)
|
||||
{
|
||||
|
@ -25,7 +25,7 @@
|
||||
*/
|
||||
template <typename T, typename U>
|
||||
constexpr bool memcpy_can_be_used_for_assignment = std::is_same_v<T, U>
|
||||
|| (std::is_integral_v<T> && std::is_integral_v<U> && sizeof(T) == sizeof(U));
|
||||
|| (std::is_integral_v<T> && std::is_integral_v<U> && sizeof(T) == sizeof(U)); /// NOLINT(misc-redundant-expression)
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -558,7 +558,7 @@ public:
|
||||
}
|
||||
|
||||
template <typename... TAllocatorParams>
|
||||
void swap(PODArray & rhs, TAllocatorParams &&... allocator_params)
|
||||
void swap(PODArray & rhs, TAllocatorParams &&... allocator_params) /// NOLINT(performance-noexcept-swap)
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
this->unprotect();
|
||||
@ -756,7 +756,7 @@ public:
|
||||
};
|
||||
|
||||
template <typename T, size_t initial_bytes, typename TAllocator, size_t pad_right_, size_t pad_left_>
|
||||
void swap(PODArray<T, initial_bytes, TAllocator, pad_right_, pad_left_> & lhs, PODArray<T, initial_bytes, TAllocator, pad_right_, pad_left_> & rhs)
|
||||
void swap(PODArray<T, initial_bytes, TAllocator, pad_right_, pad_left_> & lhs, PODArray<T, initial_bytes, TAllocator, pad_right_, pad_left_> & rhs) /// NOLINT
|
||||
{
|
||||
lhs.swap(rhs);
|
||||
}
|
||||
|
@ -2,13 +2,13 @@
|
||||
# if USE_SSH
|
||||
# include <stdexcept>
|
||||
|
||||
# pragma GCC diagnostic push
|
||||
# pragma GCC diagnostic ignored "-Wreserved-macro-identifier"
|
||||
# pragma GCC diagnostic ignored "-Wreserved-identifier"
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wreserved-macro-identifier"
|
||||
# pragma clang diagnostic ignored "-Wreserved-identifier"
|
||||
|
||||
# include <libssh/libssh.h>
|
||||
|
||||
# pragma GCC diagnostic pop
|
||||
# pragma clang diagnostic pop
|
||||
|
||||
namespace
|
||||
{
|
||||
|
@ -149,7 +149,7 @@ public:
|
||||
|
||||
/// Pad the remainder, which is missing up to an 8-byte word.
|
||||
current_word = 0;
|
||||
switch (end - data)
|
||||
switch (end - data) /// NOLINT(bugprone-switch-missing-default-case)
|
||||
{
|
||||
case 7: current_bytes[CURRENT_BYTES_IDX(6)] = data[6]; [[fallthrough]];
|
||||
case 6: current_bytes[CURRENT_BYTES_IDX(5)] = data[5]; [[fallthrough]];
|
||||
|
@ -16,7 +16,7 @@ class MergeTreeTransaction;
|
||||
/// or transaction object is not needed and not passed intentionally.
|
||||
#ifndef NO_TRANSACTION_PTR
|
||||
#define NO_TRANSACTION_PTR std::shared_ptr<MergeTreeTransaction>(nullptr)
|
||||
#define NO_TRANSACTION_RAW static_cast<MergeTreeTransaction *>(nullptr)
|
||||
#define NO_TRANSACTION_RAW static_cast<MergeTreeTransaction *>(nullptr) /// NOLINT(bugprone-macro-parentheses)
|
||||
#endif
|
||||
|
||||
/// Commit Sequence Number
|
||||
|
@ -23,7 +23,7 @@ namespace ProfileEvents
|
||||
namespace Coordination
|
||||
{
|
||||
|
||||
void Exception::incrementErrorMetrics(const Error code_)
|
||||
void Exception::incrementErrorMetrics(Error code_)
|
||||
{
|
||||
if (Coordination::isUserError(code_))
|
||||
ProfileEvents::increment(ProfileEvents::ZooKeeperUserExceptions);
|
||||
@ -33,14 +33,14 @@ void Exception::incrementErrorMetrics(const Error code_)
|
||||
ProfileEvents::increment(ProfileEvents::ZooKeeperOtherExceptions);
|
||||
}
|
||||
|
||||
Exception::Exception(const std::string & msg, const Error code_, int)
|
||||
Exception::Exception(const std::string & msg, Error code_, int)
|
||||
: DB::Exception(msg, DB::ErrorCodes::KEEPER_EXCEPTION)
|
||||
, code(code_)
|
||||
{
|
||||
incrementErrorMetrics(code);
|
||||
}
|
||||
|
||||
Exception::Exception(PreformattedMessage && msg, const Error code_)
|
||||
Exception::Exception(PreformattedMessage && msg, Error code_)
|
||||
: DB::Exception(std::move(msg), DB::ErrorCodes::KEEPER_EXCEPTION)
|
||||
, code(code_)
|
||||
{
|
||||
@ -48,7 +48,7 @@ Exception::Exception(PreformattedMessage && msg, const Error code_)
|
||||
incrementErrorMetrics(code);
|
||||
}
|
||||
|
||||
Exception::Exception(const Error code_)
|
||||
Exception::Exception(Error code_)
|
||||
: Exception(code_, "Coordination error: {}", errorMessage(code_))
|
||||
{
|
||||
}
|
||||
|
@ -466,13 +466,13 @@ class Exception : public DB::Exception
|
||||
{
|
||||
private:
|
||||
/// Delegate constructor, used to minimize repetition; last parameter used for overload resolution.
|
||||
Exception(const std::string & msg, const Error code_, int); /// NOLINT
|
||||
Exception(PreformattedMessage && msg, const Error code_);
|
||||
Exception(const std::string & msg, Error code_, int); /// NOLINT
|
||||
Exception(PreformattedMessage && msg, Error code_);
|
||||
|
||||
/// Message must be a compile-time constant
|
||||
template <typename T>
|
||||
requires std::is_convertible_v<T, String>
|
||||
Exception(T && message, const Error code_) : DB::Exception(std::forward<T>(message), DB::ErrorCodes::KEEPER_EXCEPTION, /* remote_= */ false), code(code_)
|
||||
Exception(T && message, Error code_) : DB::Exception(std::forward<T>(message), DB::ErrorCodes::KEEPER_EXCEPTION, /* remote_= */ false), code(code_)
|
||||
{
|
||||
incrementErrorMetrics(code);
|
||||
}
|
||||
@ -480,23 +480,23 @@ private:
|
||||
static void incrementErrorMetrics(Error code_);
|
||||
|
||||
public:
|
||||
explicit Exception(const Error code_); /// NOLINT
|
||||
explicit Exception(Error code_); /// NOLINT
|
||||
Exception(const Exception & exc);
|
||||
|
||||
template <typename... Args>
|
||||
Exception(const Error code_, FormatStringHelper<Args...> fmt, Args &&... args)
|
||||
Exception(Error code_, FormatStringHelper<Args...> fmt, Args &&... args)
|
||||
: DB::Exception(DB::ErrorCodes::KEEPER_EXCEPTION, std::move(fmt), std::forward<Args>(args)...)
|
||||
, code(code_)
|
||||
{
|
||||
incrementErrorMetrics(code);
|
||||
}
|
||||
|
||||
inline static Exception createDeprecated(const std::string & msg, const Error code_)
|
||||
inline static Exception createDeprecated(const std::string & msg, Error code_)
|
||||
{
|
||||
return Exception(msg, code_, 0);
|
||||
}
|
||||
|
||||
inline static Exception fromPath(const Error code_, const std::string & path)
|
||||
inline static Exception fromPath(Error code_, const std::string & path)
|
||||
{
|
||||
return Exception(code_, "Coordination error: {}, path {}", errorMessage(code_), path);
|
||||
}
|
||||
@ -504,7 +504,7 @@ public:
|
||||
/// Message must be a compile-time constant
|
||||
template <typename T>
|
||||
requires std::is_convertible_v<T, String>
|
||||
inline static Exception fromMessage(const Error code_, T && message)
|
||||
inline static Exception fromMessage(Error code_, T && message)
|
||||
{
|
||||
return Exception(std::forward<T>(message), code_);
|
||||
}
|
||||
|
@ -19,14 +19,14 @@ namespace Poco { class Logger; }
|
||||
|
||||
using LogSeriesLimiterPtr = std::shared_ptr<LogSeriesLimiter>;
|
||||
|
||||
namespace
|
||||
namespace impl
|
||||
{
|
||||
[[maybe_unused]] LoggerPtr getLoggerHelper(const LoggerPtr & logger) { return logger; }
|
||||
[[maybe_unused]] LoggerPtr getLoggerHelper(const AtomicLogger & logger) { return logger.load(); }
|
||||
[[maybe_unused]] const ::Poco::Logger * getLoggerHelper(const ::Poco::Logger * logger) { return logger; }
|
||||
[[maybe_unused]] std::unique_ptr<LogToStrImpl> getLoggerHelper(std::unique_ptr<LogToStrImpl> && logger) { return logger; }
|
||||
[[maybe_unused]] std::unique_ptr<LogFrequencyLimiterIml> getLoggerHelper(std::unique_ptr<LogFrequencyLimiterIml> && logger) { return logger; }
|
||||
[[maybe_unused]] LogSeriesLimiterPtr getLoggerHelper(LogSeriesLimiterPtr & logger) { return logger; }
|
||||
[[maybe_unused]] inline LoggerPtr getLoggerHelper(const LoggerPtr & logger) { return logger; }
|
||||
[[maybe_unused]] inline LoggerPtr getLoggerHelper(const AtomicLogger & logger) { return logger.load(); }
|
||||
[[maybe_unused]] inline const ::Poco::Logger * getLoggerHelper(const ::Poco::Logger * logger) { return logger; }
|
||||
[[maybe_unused]] inline std::unique_ptr<LogToStrImpl> getLoggerHelper(std::unique_ptr<LogToStrImpl> && logger) { return logger; }
|
||||
[[maybe_unused]] inline std::unique_ptr<LogFrequencyLimiterIml> getLoggerHelper(std::unique_ptr<LogFrequencyLimiterIml> && logger) { return logger; }
|
||||
[[maybe_unused]] inline LogSeriesLimiterPtr getLoggerHelper(LogSeriesLimiterPtr & logger) { return logger; }
|
||||
}
|
||||
|
||||
#define LOG_IMPL_FIRST_ARG(X, ...) X
|
||||
@ -65,7 +65,7 @@ namespace
|
||||
|
||||
#define LOG_IMPL(logger, priority, PRIORITY, ...) do \
|
||||
{ \
|
||||
auto _logger = ::getLoggerHelper(logger); \
|
||||
auto _logger = ::impl::getLoggerHelper(logger); \
|
||||
const bool _is_clients_log = (DB::CurrentThread::getGroup() != nullptr) && \
|
||||
(DB::CurrentThread::get().getClientLogsLevel() >= (priority)); \
|
||||
if (!_is_clients_log && !_logger->is((PRIORITY))) \
|
||||
|
@ -36,7 +36,7 @@ void insertDefaultPostgreSQLValue(IColumn & column, const IColumn & sample_colum
|
||||
|
||||
void insertPostgreSQLValue(
|
||||
IColumn & column, std::string_view value,
|
||||
const ExternalResultDescription::ValueType type, const DataTypePtr data_type,
|
||||
ExternalResultDescription::ValueType type, DataTypePtr data_type,
|
||||
const std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t idx)
|
||||
{
|
||||
switch (type)
|
||||
@ -170,7 +170,7 @@ void insertPostgreSQLValue(
|
||||
|
||||
|
||||
void preparePostgreSQLArrayInfo(
|
||||
std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t column_idx, const DataTypePtr data_type)
|
||||
std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t column_idx, DataTypePtr data_type)
|
||||
{
|
||||
const auto * array_type = typeid_cast<const DataTypeArray *>(data_type.get());
|
||||
auto nested = array_type->getNestedType();
|
||||
|
@ -22,11 +22,11 @@ struct PostgreSQLArrayInfo
|
||||
|
||||
void insertPostgreSQLValue(
|
||||
IColumn & column, std::string_view value,
|
||||
const ExternalResultDescription::ValueType type, const DataTypePtr data_type,
|
||||
ExternalResultDescription::ValueType type, DataTypePtr data_type,
|
||||
const std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t idx);
|
||||
|
||||
void preparePostgreSQLArrayInfo(
|
||||
std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t column_idx, const DataTypePtr data_type);
|
||||
std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t column_idx, DataTypePtr data_type);
|
||||
|
||||
void insertDefaultPostgreSQLValue(IColumn & column, const IColumn & sample_column);
|
||||
|
||||
|
@ -568,7 +568,7 @@ class IColumn;
|
||||
M(UInt64, min_free_disk_space_for_temporary_data, 0, "The minimum disk space to keep while writing temporary data used in external sorting and aggregation.", 0) \
|
||||
\
|
||||
M(DefaultTableEngine, default_temporary_table_engine, DefaultTableEngine::Memory, "Default table engine used when ENGINE is not set in CREATE TEMPORARY statement.",0) \
|
||||
M(DefaultTableEngine, default_table_engine, DefaultTableEngine::None, "Default table engine used when ENGINE is not set in CREATE statement.",0) \
|
||||
M(DefaultTableEngine, default_table_engine, DefaultTableEngine::MergeTree, "Default table engine used when ENGINE is not set in CREATE statement.",0) \
|
||||
M(Bool, show_table_uuid_in_table_create_query_if_not_nil, false, "For tables in databases with Engine=Atomic show UUID of the table in its CREATE query.", 0) \
|
||||
M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \
|
||||
M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \
|
||||
@ -1194,6 +1194,7 @@ class IColumn;
|
||||
FORMAT_FACTORY_SETTINGS(M, ALIAS) \
|
||||
OBSOLETE_FORMAT_SETTINGS(M, ALIAS) \
|
||||
|
||||
/// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding)
|
||||
DECLARE_SETTINGS_TRAITS_ALLOW_CUSTOM_SETTINGS(SettingsTraits, LIST_OF_SETTINGS)
|
||||
|
||||
|
||||
@ -1238,6 +1239,7 @@ private:
|
||||
/*
|
||||
* User-specified file format settings for File and URL engines.
|
||||
*/
|
||||
/// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding)
|
||||
DECLARE_SETTINGS_TRAITS(FormatFactorySettingsTraits, LIST_OF_ALL_FORMAT_SETTINGS)
|
||||
|
||||
struct FormatFactorySettings : public BaseSettings<FormatFactorySettingsTraits>
|
||||
|
@ -90,6 +90,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
|
||||
{"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"},
|
||||
{"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"},
|
||||
{"page_cache_inject_eviction", false, false, "Added userspace page cache"},
|
||||
{"default_table_engine", "None", "MergeTree", "Set default table engine to MergeTree for better usability"},
|
||||
{"input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects", false, false, "Allow to use String type for ambiguous paths during named tuple inference from JSON objects"},
|
||||
{"throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert", false, true, "Deduplication is dependent materialized view cannot work together with async inserts."},
|
||||
{"parallel_replicas_allow_in_with_subquery", false, true, "If true, subquery for IN will be executed on every follower replica"},
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <DataTypes/Serializations/SerializationNullable.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Core/Field.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
@ -56,6 +57,30 @@ bool DataTypeNullable::equals(const IDataType & rhs) const
|
||||
return rhs.isNullable() && nested_data_type->equals(*static_cast<const DataTypeNullable &>(rhs).nested_data_type);
|
||||
}
|
||||
|
||||
ColumnPtr DataTypeNullable::createColumnConst(size_t size, const Field & field) const
|
||||
{
|
||||
if (onlyNull())
|
||||
{
|
||||
auto column = createColumn();
|
||||
column->insert(field);
|
||||
return ColumnConst::create(std::move(column), size);
|
||||
}
|
||||
|
||||
auto column = nested_data_type->createColumn();
|
||||
bool is_null = field.isNull();
|
||||
|
||||
if (is_null)
|
||||
nested_data_type->insertDefaultInto(*column);
|
||||
else
|
||||
column->insert(field);
|
||||
|
||||
auto null_mask = ColumnUInt8::create();
|
||||
null_mask->getData().push_back(is_null ? 1 : 0);
|
||||
|
||||
auto res = ColumnNullable::create(std::move(column), std::move(null_mask));
|
||||
return ColumnConst::create(std::move(res), size);
|
||||
}
|
||||
|
||||
SerializationPtr DataTypeNullable::doGetDefaultSerialization() const
|
||||
{
|
||||
return std::make_shared<SerializationNullable>(nested_data_type->getDefaultSerialization());
|
||||
|
@ -41,6 +41,7 @@ public:
|
||||
bool onlyNull() const override;
|
||||
bool canBeInsideLowCardinality() const override { return nested_data_type->canBeInsideLowCardinality(); }
|
||||
bool canBePromoted() const override { return nested_data_type->canBePromoted(); }
|
||||
ColumnPtr createColumnConst(size_t size, const Field & field) const override;
|
||||
|
||||
const DataTypePtr & getNestedType() const { return nested_data_type; }
|
||||
|
||||
|
@ -423,7 +423,7 @@ MutableColumns CacheDictionary<dictionary_key_type>::aggregateColumnsInOrderOfKe
|
||||
const DictionaryStorageFetchRequest & request,
|
||||
const MutableColumns & fetched_columns,
|
||||
const PaddedPODArray<KeyState> & key_index_to_state,
|
||||
IColumn::Filter * const default_mask) const
|
||||
IColumn::Filter * default_mask) const
|
||||
{
|
||||
MutableColumns aggregated_columns = request.makeAttributesResultColumns();
|
||||
|
||||
@ -473,7 +473,7 @@ MutableColumns CacheDictionary<dictionary_key_type>::aggregateColumns(
|
||||
const PaddedPODArray<KeyState> & key_index_to_fetched_columns_from_storage_result,
|
||||
const MutableColumns & fetched_columns_during_update,
|
||||
const HashMap<KeyType, size_t> & found_keys_to_fetched_columns_during_update_index,
|
||||
IColumn::Filter * const default_mask) const
|
||||
IColumn::Filter * default_mask) const
|
||||
{
|
||||
/**
|
||||
* Aggregation of columns fetched from storage and from source during update.
|
||||
|
@ -162,7 +162,7 @@ private:
|
||||
const DictionaryStorageFetchRequest & request,
|
||||
const MutableColumns & fetched_columns,
|
||||
const PaddedPODArray<KeyState> & key_index_to_state,
|
||||
IColumn::Filter * const default_mask = nullptr) const;
|
||||
IColumn::Filter * default_mask = nullptr) const;
|
||||
|
||||
MutableColumns aggregateColumns(
|
||||
const PaddedPODArray<KeyType> & keys,
|
||||
|
@ -14,7 +14,7 @@ class IRegionsHierarchyReader
|
||||
public:
|
||||
virtual bool readNext(RegionEntry & entry) = 0;
|
||||
|
||||
virtual ~IRegionsHierarchyReader() {}
|
||||
virtual ~IRegionsHierarchyReader() = default;
|
||||
};
|
||||
|
||||
using IRegionsHierarchyReaderPtr = std::unique_ptr<IRegionsHierarchyReader>;
|
||||
|
@ -568,7 +568,7 @@ bool RegExpTreeDictionary::setAttributesShortCircuit(
|
||||
const String & data,
|
||||
std::unordered_set<UInt64> & visited_nodes,
|
||||
const std::unordered_map<String, const DictionaryAttribute &> & attributes,
|
||||
std::unordered_set<String> * const defaults) const
|
||||
std::unordered_set<String> * defaults) const
|
||||
{
|
||||
if (visited_nodes.contains(id))
|
||||
return attributes_to_set.attributesFull() == attributes.size();
|
||||
|
@ -210,7 +210,7 @@ private:
|
||||
const String & data,
|
||||
std::unordered_set<UInt64> & visited_nodes,
|
||||
const std::unordered_map<String, const DictionaryAttribute &> & attributes,
|
||||
std::unordered_set<String> * const defaults) const;
|
||||
std::unordered_set<String> * defaults) const;
|
||||
|
||||
struct RegexTreeNode;
|
||||
using RegexTreeNodePtr = std::shared_ptr<RegexTreeNode>;
|
||||
|
@ -140,9 +140,9 @@ struct NumericArraySource : public ArraySourceImpl<NumericArraySource<T>>
|
||||
|
||||
|
||||
/// The methods can be virtual or not depending on the template parameter. See IStringSource.
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wsuggest-override"
|
||||
#pragma GCC diagnostic ignored "-Wsuggest-destructor-override"
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wsuggest-override"
|
||||
#pragma clang diagnostic ignored "-Wsuggest-destructor-override"
|
||||
|
||||
template <typename Base>
|
||||
struct ConstSource : public Base
|
||||
@ -231,7 +231,7 @@ struct ConstSource : public Base
|
||||
}
|
||||
};
|
||||
|
||||
#pragma GCC diagnostic pop
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
struct StringSource
|
||||
{
|
||||
|
@ -13,10 +13,6 @@
|
||||
|
||||
#include <memory>
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
# include <Core/ValuesWithType.h>
|
||||
#endif
|
||||
|
||||
/// This file contains user interface for functions.
|
||||
|
||||
namespace llvm
|
||||
|
@ -63,6 +63,7 @@ enum class RemoteFSReadMethod
|
||||
class MMappedFileCache;
|
||||
class PageCache;
|
||||
|
||||
/// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding)
|
||||
struct ReadSettings
|
||||
{
|
||||
/// Method to use reading from local filesystem.
|
||||
|
@ -905,7 +905,7 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing(
|
||||
const InsertDataPtr & data,
|
||||
const Block & header,
|
||||
const ContextPtr & insert_context,
|
||||
const LoggerPtr logger,
|
||||
LoggerPtr logger,
|
||||
LogFunc && add_to_async_insert_log)
|
||||
{
|
||||
size_t total_rows = 0;
|
||||
|
@ -265,7 +265,7 @@ private:
|
||||
const InsertDataPtr & data,
|
||||
const Block & header,
|
||||
const ContextPtr & insert_context,
|
||||
const LoggerPtr logger,
|
||||
LoggerPtr logger,
|
||||
LogFunc && add_to_async_insert_log);
|
||||
|
||||
template <typename LogFunc>
|
||||
|
@ -330,7 +330,7 @@ protected:
|
||||
return *this;
|
||||
}
|
||||
|
||||
void swap(QueryAccessInfo & rhs)
|
||||
void swap(QueryAccessInfo & rhs) noexcept
|
||||
{
|
||||
std::swap(databases, rhs.databases);
|
||||
std::swap(tables, rhs.tables);
|
||||
@ -680,7 +680,7 @@ public:
|
||||
void addSpecialScalar(const String & name, const Block & block);
|
||||
|
||||
const QueryAccessInfo & getQueryAccessInfo() const { return *getQueryAccessInfoPtr(); }
|
||||
const QueryAccessInfoPtr getQueryAccessInfoPtr() const { return query_access_info; }
|
||||
QueryAccessInfoPtr getQueryAccessInfoPtr() const { return query_access_info; }
|
||||
void setQueryAccessInfo(QueryAccessInfoPtr other) { query_access_info = other; }
|
||||
|
||||
void addQueryAccessInfo(
|
||||
|
@ -23,7 +23,7 @@ struct ExternalLoadableLifetime
|
||||
UInt64 max_sec = 0;
|
||||
|
||||
ExternalLoadableLifetime(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
|
||||
ExternalLoadableLifetime() {}
|
||||
ExternalLoadableLifetime() = default;
|
||||
};
|
||||
|
||||
/// Get delay before trying to load again after error.
|
||||
|
@ -151,7 +151,7 @@ Block InterpreterInsertQuery::getSampleBlock(
|
||||
names.emplace_back(std::move(current_name));
|
||||
}
|
||||
|
||||
return getSampleBlock(names, table, metadata_snapshot, allow_materialized);
|
||||
return getSampleBlockImpl(names, table, metadata_snapshot, no_destination, allow_materialized);
|
||||
}
|
||||
|
||||
std::optional<Names> InterpreterInsertQuery::getInsertColumnNames() const
|
||||
@ -173,13 +173,18 @@ std::optional<Names> InterpreterInsertQuery::getInsertColumnNames() const
|
||||
return names;
|
||||
}
|
||||
|
||||
Block InterpreterInsertQuery::getSampleBlock(
|
||||
Block InterpreterInsertQuery::getSampleBlockImpl(
|
||||
const Names & names,
|
||||
const StoragePtr & table,
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
bool no_destination,
|
||||
bool allow_materialized)
|
||||
{
|
||||
Block table_sample_physical = metadata_snapshot->getSampleBlock();
|
||||
Block table_sample_virtuals;
|
||||
if (no_destination)
|
||||
table_sample_virtuals = table->getVirtualsHeader();
|
||||
|
||||
Block table_sample_insertable = metadata_snapshot->getSampleBlockInsertable();
|
||||
Block res;
|
||||
for (const auto & current_name : names)
|
||||
@ -194,13 +199,19 @@ Block InterpreterInsertQuery::getSampleBlock(
|
||||
if (table_sample_physical.has(current_name))
|
||||
{
|
||||
if (!allow_materialized)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.",
|
||||
current_name);
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column", current_name);
|
||||
res.insert(ColumnWithTypeAndName(table_sample_physical.getByName(current_name).type, current_name));
|
||||
}
|
||||
else /// The table does not have a column with that name
|
||||
else if (table_sample_virtuals.has(current_name))
|
||||
{
|
||||
res.insert(ColumnWithTypeAndName(table_sample_virtuals.getByName(current_name).type, current_name));
|
||||
}
|
||||
else
|
||||
{
|
||||
/// The table does not have a column with that name
|
||||
throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "No such column {} in table {}",
|
||||
current_name, table->getStorageID().getNameForLogs());
|
||||
}
|
||||
}
|
||||
else
|
||||
res.insert(ColumnWithTypeAndName(table_sample_insertable.getByName(current_name).type, current_name));
|
||||
@ -276,7 +287,7 @@ Chain InterpreterInsertQuery::buildChain(
|
||||
if (!running_group)
|
||||
running_group = std::make_shared<ThreadGroup>(getContext());
|
||||
|
||||
auto sample = getSampleBlock(columns, table, metadata_snapshot, allow_materialized);
|
||||
auto sample = getSampleBlockImpl(columns, table, metadata_snapshot, no_destination, allow_materialized);
|
||||
if (check_access)
|
||||
getContext()->checkAccess(AccessType::INSERT, table->getStorageID(), sample.getNames());
|
||||
|
||||
|
@ -69,7 +69,7 @@ public:
|
||||
bool shouldAddSquashingFroStorage(const StoragePtr & table) const;
|
||||
|
||||
private:
|
||||
static Block getSampleBlock(const Names & names, const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot, bool allow_materialized);
|
||||
static Block getSampleBlockImpl(const Names & names, const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot, bool no_destination, bool allow_materialized);
|
||||
|
||||
ASTPtr query_ptr;
|
||||
const bool allow_materialized;
|
||||
|
@ -318,7 +318,7 @@ public:
|
||||
~ProcessListEntry();
|
||||
|
||||
QueryStatusPtr getQueryStatus() { return *it; }
|
||||
const QueryStatusPtr getQueryStatus() const { return *it; }
|
||||
QueryStatusPtr getQueryStatus() const { return *it; }
|
||||
};
|
||||
|
||||
|
||||
|
@ -1082,6 +1082,11 @@ void addBuildSubqueriesForSetsStepIfNeeded(
|
||||
{
|
||||
auto query_tree = subquery->detachQueryTree();
|
||||
auto subquery_options = select_query_options.subquery();
|
||||
/// I don't know if this is a good decision,
|
||||
/// But for now it is done in the same way as in old analyzer.
|
||||
/// This would not ignore limits for subqueries (affects mutations only).
|
||||
/// See test_build_sets_from_multiple_threads-analyzer.
|
||||
subquery_options.ignore_limits = false;
|
||||
Planner subquery_planner(
|
||||
query_tree,
|
||||
subquery_options,
|
||||
|
@ -59,7 +59,7 @@ public:
|
||||
|
||||
Chunk clone() const;
|
||||
|
||||
void swap(Chunk & other)
|
||||
void swap(Chunk & other) noexcept
|
||||
{
|
||||
columns.swap(other.columns);
|
||||
chunk_info.swap(other.chunk_info);
|
||||
|
@ -126,7 +126,7 @@ static void postprocessChunk(Chunk & chunk, const AggregatingSortedAlgorithm::Co
|
||||
|
||||
|
||||
AggregatingSortedAlgorithm::SimpleAggregateDescription::SimpleAggregateDescription(
|
||||
AggregateFunctionPtr function_, const size_t column_number_,
|
||||
AggregateFunctionPtr function_, size_t column_number_,
|
||||
DataTypePtr nested_type_, DataTypePtr real_type_)
|
||||
: function(std::move(function_)), column_number(column_number_)
|
||||
, nested_type(std::move(nested_type_)), real_type(std::move(real_type_))
|
||||
|
@ -110,7 +110,7 @@ protected:
|
||||
return result;
|
||||
}
|
||||
|
||||
uintptr_t ALWAYS_INLINE swap(std::atomic<Data *> & value, std::uintptr_t flags, std::uintptr_t mask)
|
||||
uintptr_t ALWAYS_INLINE swap(std::atomic<Data *> & value, std::uintptr_t flags, std::uintptr_t mask) /// NOLINT
|
||||
{
|
||||
Data * expected = nullptr;
|
||||
Data * desired = getPtr(flags | getUInt(data));
|
||||
|
@ -6,8 +6,8 @@ namespace DB
|
||||
TTLUpdateInfoAlgorithm::TTLUpdateInfoAlgorithm(
|
||||
const TTLExpressions & ttl_expressions_,
|
||||
const TTLDescription & description_,
|
||||
const TTLUpdateField ttl_update_field_,
|
||||
const String ttl_update_key_,
|
||||
TTLUpdateField ttl_update_field_,
|
||||
String ttl_update_key_,
|
||||
const TTLInfo & old_ttl_info_,
|
||||
time_t current_time_,
|
||||
bool force_)
|
||||
|
@ -22,8 +22,8 @@ public:
|
||||
TTLUpdateInfoAlgorithm(
|
||||
const TTLExpressions & ttl_expressions_,
|
||||
const TTLDescription & description_,
|
||||
const TTLUpdateField ttl_update_field_,
|
||||
const String ttl_update_key_,
|
||||
TTLUpdateField ttl_update_field_,
|
||||
String ttl_update_key_,
|
||||
const TTLInfo & old_ttl_info_,
|
||||
time_t current_time_, bool force_
|
||||
);
|
||||
|
@ -11,6 +11,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int UNKNOWN_SETTING;
|
||||
extern const int INVALID_SETTING_VALUE;
|
||||
}
|
||||
|
||||
IMPLEMENT_SETTINGS_TRAITS(FileLogSettingsTraits, LIST_OF_FILELOG_SETTINGS)
|
||||
@ -36,6 +37,11 @@ void FileLogSettings::loadFromQuery(ASTStorage & storage_def)
|
||||
settings_ast->is_standalone = false;
|
||||
storage_def.set(storage_def.settings, settings_ast);
|
||||
}
|
||||
|
||||
/// Check that batch size is not too high (the same as we check setting max_block_size).
|
||||
constexpr UInt64 max_sane_block_rows_size = 4294967296; // 2^32
|
||||
if (poll_max_batch_size > max_sane_block_rows_size)
|
||||
throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Sanity check: 'poll_max_batch_size' value is too high ({})", poll_max_batch_size);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -11,11 +11,21 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
static std::pair<Block, Block> getHeaders(const StorageSnapshotPtr & storage_snapshot)
|
||||
static std::pair<Block, Block> getHeaders(const StorageSnapshotPtr & storage_snapshot, const Names & column_names)
|
||||
{
|
||||
auto all_columns_header = storage_snapshot->metadata->getSampleBlock();
|
||||
|
||||
auto non_virtual_header = storage_snapshot->metadata->getSampleBlockNonMaterialized();
|
||||
auto virtual_header = storage_snapshot->virtual_columns->getSampleBlock();
|
||||
|
||||
for (const auto & column_name : column_names)
|
||||
{
|
||||
if (non_virtual_header.has(column_name) || virtual_header.has(column_name))
|
||||
continue;
|
||||
const auto & column = all_columns_header.getByName(column_name);
|
||||
non_virtual_header.insert(column);
|
||||
}
|
||||
|
||||
return {non_virtual_header, virtual_header};
|
||||
}
|
||||
|
||||
@ -40,7 +50,7 @@ RabbitMQSource::RabbitMQSource(
|
||||
: RabbitMQSource(
|
||||
storage_,
|
||||
storage_snapshot_,
|
||||
getHeaders(storage_snapshot_),
|
||||
getHeaders(storage_snapshot_, columns),
|
||||
context_,
|
||||
columns,
|
||||
max_block_size_,
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ASTInsertQuery.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Processors/Executors/CompletedPipelineExecutor.h>
|
||||
#include <Processors/Executors/PushingPipelineExecutor.h>
|
||||
#include <Processors/Transforms/ExpressionTransform.h>
|
||||
@ -133,6 +134,9 @@ StorageRabbitMQ::StorageRabbitMQ(
|
||||
if (configuration.secure)
|
||||
SSL_library_init();
|
||||
|
||||
if (!columns_.getMaterialized().empty() || !columns_.getAliases().empty() || !columns_.getDefaults().empty() || !columns_.getEphemeral().empty())
|
||||
context_->addWarningMessage("RabbitMQ table engine doesn't support ALIAS, DEFAULT or MATERIALIZED columns. They will be ignored and filled with default values");
|
||||
|
||||
StorageInMemoryMetadata storage_metadata;
|
||||
storage_metadata.setColumns(columns_);
|
||||
setInMemoryMetadata(storage_metadata);
|
||||
@ -1055,18 +1059,7 @@ bool StorageRabbitMQ::tryStreamToViews()
|
||||
if (!table)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Engine table {} doesn't exist.", table_id.getNameForLogs());
|
||||
|
||||
// Create an INSERT query for streaming data
|
||||
auto insert = std::make_shared<ASTInsertQuery>();
|
||||
insert->table_id = table_id;
|
||||
|
||||
// Only insert into dependent views and expect that input blocks contain virtual columns
|
||||
InterpreterInsertQuery interpreter(insert, rabbitmq_context, false, true, true);
|
||||
auto block_io = interpreter.execute();
|
||||
|
||||
auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext());
|
||||
auto column_names = block_io.pipeline.getHeader().getNames();
|
||||
auto sample_block = storage_snapshot->getSampleBlockForColumns(column_names);
|
||||
|
||||
auto block_size = getMaxBlockSize();
|
||||
|
||||
// Create a stream for each consumer and join them in a union stream
|
||||
@ -1082,13 +1075,29 @@ bool StorageRabbitMQ::tryStreamToViews()
|
||||
for (size_t i = 0; i < num_created_consumers; ++i)
|
||||
{
|
||||
auto source = std::make_shared<RabbitMQSource>(
|
||||
*this, storage_snapshot, rabbitmq_context, column_names, block_size,
|
||||
max_execution_time_ms, rabbitmq_settings->rabbitmq_handle_error_mode, false);
|
||||
*this, storage_snapshot, rabbitmq_context, Names{}, block_size,
|
||||
max_execution_time_ms, rabbitmq_settings->rabbitmq_handle_error_mode);
|
||||
|
||||
sources.emplace_back(source);
|
||||
pipes.emplace_back(source);
|
||||
}
|
||||
|
||||
// Create an INSERT query for streaming data
|
||||
auto insert = std::make_shared<ASTInsertQuery>();
|
||||
insert->table_id = table_id;
|
||||
if (!sources.empty())
|
||||
{
|
||||
auto column_list = std::make_shared<ASTExpressionList>();
|
||||
const auto & header = sources[0]->getPort().getHeader();
|
||||
for (const auto & column : header)
|
||||
column_list->children.emplace_back(std::make_shared<ASTIdentifier>(column.name));
|
||||
insert->columns = std::move(column_list);
|
||||
}
|
||||
|
||||
// Only insert into dependent views and expect that input blocks contain virtual columns
|
||||
InterpreterInsertQuery interpreter(insert, rabbitmq_context, /* allow_materialized_ */ false, /* no_squash_ */ true, /* no_destination_ */ true);
|
||||
auto block_io = interpreter.execute();
|
||||
|
||||
block_io.pipeline.complete(Pipe::unitePipes(std::move(pipes)));
|
||||
|
||||
std::atomic_size_t rows = 0;
|
||||
|
@ -72,8 +72,8 @@ struct StorageInMemoryMetadata
|
||||
StorageInMemoryMetadata(const StorageInMemoryMetadata & other);
|
||||
StorageInMemoryMetadata & operator=(const StorageInMemoryMetadata & other);
|
||||
|
||||
StorageInMemoryMetadata(StorageInMemoryMetadata && other) = default;
|
||||
StorageInMemoryMetadata & operator=(StorageInMemoryMetadata && other) = default;
|
||||
StorageInMemoryMetadata(StorageInMemoryMetadata && other) = default; /// NOLINT
|
||||
StorageInMemoryMetadata & operator=(StorageInMemoryMetadata && other) = default; /// NOLINT
|
||||
|
||||
/// NOTE: Thread unsafe part. You should not modify same StorageInMemoryMetadata
|
||||
/// structure from different threads. It should be used as MultiVersion
|
||||
|
@ -1,4 +1,3 @@
|
||||
test_build_sets_from_multiple_threads/test.py::test_set
|
||||
test_concurrent_backups_s3/test.py::test_concurrent_backups
|
||||
test_distributed_type_object/test.py::test_distributed_type_object
|
||||
test_merge_table_over_distributed/test.py::test_global_in
|
||||
|
@ -7,7 +7,6 @@
|
||||
01584_distributed_buffer_cannot_find_column
|
||||
01624_soft_constraints
|
||||
01747_join_view_filter_dictionary
|
||||
01761_cast_to_enum_nullable
|
||||
01925_join_materialized_columns
|
||||
02354_annoy
|
||||
# Check after constants refactoring
|
||||
|
@ -18,8 +18,10 @@ from github.GithubObject import NotSet
|
||||
from github.IssueComment import IssueComment
|
||||
from github.Repository import Repository
|
||||
|
||||
from ci_config import REQUIRED_CHECKS, CHECK_DESCRIPTIONS, CheckDescription
|
||||
from env_helper import GITHUB_JOB_URL, GITHUB_REPOSITORY, TEMP_PATH
|
||||
# isort: on
|
||||
|
||||
from ci_config import CHECK_DESCRIPTIONS, REQUIRED_CHECKS, CheckDescription
|
||||
from env_helper import GITHUB_REPOSITORY, GITHUB_RUN_URL, TEMP_PATH
|
||||
from pr_info import SKIP_MERGEABLE_CHECK_LABEL, PRInfo
|
||||
from report import (
|
||||
ERROR,
|
||||
@ -259,6 +261,12 @@ def generate_status_comment(pr_info: PRInfo, statuses: CommitStatuses) -> str:
|
||||
|
||||
result = [comment_body]
|
||||
|
||||
if visible_table_rows:
|
||||
visible_table_rows.sort()
|
||||
result.append(table_header)
|
||||
result.extend(visible_table_rows)
|
||||
result.append(table_footer)
|
||||
|
||||
if hidden_table_rows:
|
||||
hidden_table_rows.sort()
|
||||
result.append(details_header)
|
||||
@ -267,12 +275,6 @@ def generate_status_comment(pr_info: PRInfo, statuses: CommitStatuses) -> str:
|
||||
result.append(table_footer)
|
||||
result.append(details_footer)
|
||||
|
||||
if visible_table_rows:
|
||||
visible_table_rows.sort()
|
||||
result.append(table_header)
|
||||
result.extend(visible_table_rows)
|
||||
result.append(table_footer)
|
||||
|
||||
return "".join(result)
|
||||
|
||||
|
||||
@ -427,7 +429,7 @@ def set_mergeable_check(
|
||||
context=MERGEABLE_NAME,
|
||||
description=format_description(description),
|
||||
state=state,
|
||||
target_url=GITHUB_JOB_URL(),
|
||||
target_url=GITHUB_RUN_URL,
|
||||
)
|
||||
|
||||
|
||||
|
@ -138,13 +138,15 @@ check_spot_instance_is_old() {
|
||||
check_proceed_spot_termination() {
|
||||
# The function checks and proceeds spot instance termination if exists
|
||||
# The event for spot instance termination
|
||||
local FORCE
|
||||
FORCE=${1:-}
|
||||
if TERMINATION_DATA=$(curl -s --fail http://169.254.169.254/latest/meta-data/spot/instance-action); then
|
||||
# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/spot-instance-termination-notices.html#instance-action-metadata
|
||||
_action=$(jq '.action' -r <<< "$TERMINATION_DATA")
|
||||
_time=$(jq '.time | fromdate' <<< "$TERMINATION_DATA")
|
||||
_until_action=$((_time - $(date +%s)))
|
||||
echo "Received the '$_action' event that will be effective in $_until_action seconds"
|
||||
if (( _until_action <= 30 )); then
|
||||
if (( _until_action <= 30 )) || [ "$FORCE" == "force" ]; then
|
||||
echo "The action $_action will be done in $_until_action, killing the runner and exit"
|
||||
local runner_pid
|
||||
runner_pid=$(pgrep Runner.Listener)
|
||||
@ -309,7 +311,7 @@ while true; do
|
||||
echo "Checking if the instance suppose to terminate"
|
||||
no_terminating_metadata || terminate_on_event
|
||||
check_spot_instance_is_old && terminate_and_exit
|
||||
check_proceed_spot_termination
|
||||
check_proceed_spot_termination force
|
||||
|
||||
echo "Going to configure runner"
|
||||
sudo -u ubuntu ./config.sh --url $RUNNER_URL --token "$(get_runner_token)" \
|
||||
@ -319,7 +321,7 @@ while true; do
|
||||
echo "Another one check to avoid race between runner and infrastructure"
|
||||
no_terminating_metadata || terminate_on_event
|
||||
check_spot_instance_is_old && terminate_and_exit
|
||||
check_proceed_spot_termination
|
||||
check_proceed_spot_termination force
|
||||
|
||||
echo "Run"
|
||||
sudo -u ubuntu \
|
||||
|
@ -0,0 +1,12 @@
|
||||
<clickhouse>
|
||||
<remote_servers>
|
||||
<localhost_cluster>
|
||||
<shard>
|
||||
<replica>
|
||||
<host>localhost</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
</localhost_cluster>
|
||||
</remote_servers>
|
||||
</clickhouse>
|
@ -3,7 +3,9 @@ from helpers.cluster import ClickHouseCluster
|
||||
import logging
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
node = cluster.add_instance("node", main_configs=["configs/overrides.xml"])
|
||||
node = cluster.add_instance(
|
||||
"node", main_configs=["configs/overrides.xml", "configs/clusters.xml"]
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
@ -23,7 +25,7 @@ def test_distibuted_settings(start_cluster):
|
||||
node.query(
|
||||
"""
|
||||
CREATE TABLE data_1 (key Int) ENGINE Memory();
|
||||
CREATE TABLE dist_1 as data_1 ENGINE Distributed(default, default, data_1) SETTINGS flush_on_detach = true;
|
||||
CREATE TABLE dist_1 as data_1 ENGINE Distributed(localhost_cluster, default, data_1) SETTINGS flush_on_detach = true;
|
||||
SYSTEM STOP DISTRIBUTED SENDS dist_1;
|
||||
INSERT INTO dist_1 SETTINGS prefer_localhost_replica=0 VALUES (1);
|
||||
DETACH TABLE dist_1;
|
||||
@ -36,7 +38,7 @@ def test_distibuted_settings(start_cluster):
|
||||
node.query(
|
||||
"""
|
||||
CREATE TABLE data_2 (key Int) ENGINE Memory();
|
||||
CREATE TABLE dist_2 as data_2 ENGINE Distributed(default, default, data_2);
|
||||
CREATE TABLE dist_2 as data_2 ENGINE Distributed(localhost_cluster, default, data_2);
|
||||
SYSTEM STOP DISTRIBUTED SENDS dist_2;
|
||||
INSERT INTO dist_2 SETTINGS prefer_localhost_replica=0 VALUES (2);
|
||||
DETACH TABLE dist_2;
|
||||
|
@ -53,13 +53,13 @@ instance3 = cluster.add_instance(
|
||||
# Helpers
|
||||
|
||||
|
||||
def rabbitmq_check_result(result, check=False, ref_file="test_rabbitmq_json.reference"):
|
||||
fpath = p.join(p.dirname(__file__), ref_file)
|
||||
with open(fpath) as reference:
|
||||
if check:
|
||||
assert TSV(result) == TSV(reference)
|
||||
else:
|
||||
return TSV(result) == TSV(reference)
|
||||
def rabbitmq_check_result(result, check=False, reference=None):
|
||||
if reference is None:
|
||||
reference = "\n".join([f"{i}\t{i}" for i in range(50)])
|
||||
if check:
|
||||
assert TSV(result) == TSV(reference)
|
||||
else:
|
||||
return TSV(result) == TSV(reference)
|
||||
|
||||
|
||||
def wait_rabbitmq_to_start(rabbitmq_docker_id, cookie, timeout=180):
|
||||
@ -133,9 +133,10 @@ def test_rabbitmq_select(rabbitmq_cluster, secure):
|
||||
if secure:
|
||||
port = cluster.rabbitmq_secure_port
|
||||
|
||||
# MATERIALIZED and ALIAS columns are not supported in RabbitMQ engine, but we can test that it does not fail
|
||||
instance.query(
|
||||
"""
|
||||
CREATE TABLE test.rabbitmq (key UInt64, value UInt64)
|
||||
CREATE TABLE test.rabbitmq (key UInt64, value UInt64, value2 ALIAS value + 1, value3 MATERIALIZED value + 1)
|
||||
ENGINE = RabbitMQ
|
||||
SETTINGS rabbitmq_host_port = '{}:{}',
|
||||
rabbitmq_exchange_name = 'select',
|
||||
@ -148,6 +149,11 @@ def test_rabbitmq_select(rabbitmq_cluster, secure):
|
||||
)
|
||||
)
|
||||
|
||||
assert (
|
||||
"RabbitMQ table engine doesn\\'t support ALIAS, DEFAULT or MATERIALIZED columns"
|
||||
in instance.query("SELECT * FROM system.warnings")
|
||||
)
|
||||
|
||||
credentials = pika.PlainCredentials("root", "clickhouse")
|
||||
parameters = pika.ConnectionParameters(
|
||||
rabbitmq_cluster.rabbitmq_ip, rabbitmq_cluster.rabbitmq_port, "/", credentials
|
||||
@ -379,7 +385,7 @@ def test_rabbitmq_macros(rabbitmq_cluster):
|
||||
def test_rabbitmq_materialized_view(rabbitmq_cluster):
|
||||
instance.query(
|
||||
"""
|
||||
CREATE TABLE test.rabbitmq (key UInt64, value UInt64)
|
||||
CREATE TABLE test.rabbitmq (key UInt64, value UInt64, dt1 DateTime MATERIALIZED now(), value2 ALIAS value + 1)
|
||||
ENGINE = RabbitMQ
|
||||
SETTINGS rabbitmq_host_port = 'rabbitmq1:5672',
|
||||
rabbitmq_exchange_name = 'mv',
|
||||
@ -484,9 +490,11 @@ def test_rabbitmq_many_materialized_views(rabbitmq_cluster):
|
||||
"""
|
||||
DROP TABLE IF EXISTS test.view1;
|
||||
DROP TABLE IF EXISTS test.view2;
|
||||
DROP TABLE IF EXISTS test.view3;
|
||||
DROP TABLE IF EXISTS test.consumer1;
|
||||
DROP TABLE IF EXISTS test.consumer2;
|
||||
CREATE TABLE test.rabbitmq (key UInt64, value UInt64)
|
||||
DROP TABLE IF EXISTS test.consumer3;
|
||||
CREATE TABLE test.rabbitmq (key UInt64, value UInt64, value2 ALIAS value + 1, value3 MATERIALIZED value + 1, value4 DEFAULT 1)
|
||||
ENGINE = RabbitMQ
|
||||
SETTINGS rabbitmq_host_port = 'rabbitmq1:5672',
|
||||
rabbitmq_exchange_name = 'mmv',
|
||||
@ -497,13 +505,18 @@ def test_rabbitmq_many_materialized_views(rabbitmq_cluster):
|
||||
CREATE TABLE test.view1 (key UInt64, value UInt64)
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY key;
|
||||
CREATE TABLE test.view2 (key UInt64, value UInt64)
|
||||
CREATE TABLE test.view2 (key UInt64, value UInt64, value2 UInt64, value3 UInt64, value4 UInt64)
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY key;
|
||||
CREATE TABLE test.view3 (key UInt64)
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY key;
|
||||
CREATE MATERIALIZED VIEW test.consumer1 TO test.view1 AS
|
||||
SELECT * FROM test.rabbitmq;
|
||||
CREATE MATERIALIZED VIEW test.consumer2 TO test.view2 AS
|
||||
SELECT * FROM test.rabbitmq;
|
||||
CREATE MATERIALIZED VIEW test.consumer3 TO test.view3 AS
|
||||
SELECT * FROM test.rabbitmq;
|
||||
"""
|
||||
)
|
||||
|
||||
@ -514,7 +527,7 @@ def test_rabbitmq_many_materialized_views(rabbitmq_cluster):
|
||||
connection = pika.BlockingConnection(parameters)
|
||||
channel = connection.channel()
|
||||
|
||||
instance.wait_for_log_line("Started streaming to 2 attached views")
|
||||
instance.wait_for_log_line("Started streaming to 3 attached views")
|
||||
|
||||
messages = []
|
||||
for i in range(50):
|
||||
@ -522,24 +535,43 @@ def test_rabbitmq_many_materialized_views(rabbitmq_cluster):
|
||||
for message in messages:
|
||||
channel.basic_publish(exchange="mmv", routing_key="", body=message)
|
||||
|
||||
while True:
|
||||
is_check_passed = False
|
||||
deadline = time.monotonic() + 60
|
||||
while time.monotonic() < deadline:
|
||||
result1 = instance.query("SELECT * FROM test.view1 ORDER BY key")
|
||||
result2 = instance.query("SELECT * FROM test.view2 ORDER BY key")
|
||||
if rabbitmq_check_result(result1) and rabbitmq_check_result(result2):
|
||||
result3 = instance.query("SELECT * FROM test.view3 ORDER BY key")
|
||||
# Note that for view2 result is `i i 0 0 0`, but not `i i i+1 i+1 1` as expected, ALIAS/MATERIALIZED/DEFAULT columns are not supported in RabbitMQ engine
|
||||
# We onlt check that at least it do not fail
|
||||
if (
|
||||
rabbitmq_check_result(result1)
|
||||
and rabbitmq_check_result(
|
||||
result2, reference="\n".join([f"{i}\t{i}\t0\t0\t0" for i in range(50)])
|
||||
)
|
||||
and rabbitmq_check_result(
|
||||
result3, reference="\n".join([str(i) for i in range(50)])
|
||||
)
|
||||
):
|
||||
is_check_passed = True
|
||||
break
|
||||
time.sleep(0.1)
|
||||
|
||||
assert (
|
||||
is_check_passed
|
||||
), f"References are not equal to results, result1: {result1}, result2: {result2}, result3: {result3}"
|
||||
|
||||
instance.query(
|
||||
"""
|
||||
DROP TABLE test.consumer1;
|
||||
DROP TABLE test.consumer2;
|
||||
DROP TABLE test.consumer3;
|
||||
DROP TABLE test.view1;
|
||||
DROP TABLE test.view2;
|
||||
DROP TABLE test.view3;
|
||||
"""
|
||||
)
|
||||
|
||||
connection.close()
|
||||
rabbitmq_check_result(result1, True)
|
||||
rabbitmq_check_result(result2, True)
|
||||
|
||||
|
||||
def test_rabbitmq_big_message(rabbitmq_cluster):
|
||||
|
@ -1,50 +0,0 @@
|
||||
0 0
|
||||
1 1
|
||||
2 2
|
||||
3 3
|
||||
4 4
|
||||
5 5
|
||||
6 6
|
||||
7 7
|
||||
8 8
|
||||
9 9
|
||||
10 10
|
||||
11 11
|
||||
12 12
|
||||
13 13
|
||||
14 14
|
||||
15 15
|
||||
16 16
|
||||
17 17
|
||||
18 18
|
||||
19 19
|
||||
20 20
|
||||
21 21
|
||||
22 22
|
||||
23 23
|
||||
24 24
|
||||
25 25
|
||||
26 26
|
||||
27 27
|
||||
28 28
|
||||
29 29
|
||||
30 30
|
||||
31 31
|
||||
32 32
|
||||
33 33
|
||||
34 34
|
||||
35 35
|
||||
36 36
|
||||
37 37
|
||||
38 38
|
||||
39 39
|
||||
40 40
|
||||
41 41
|
||||
42 42
|
||||
43 43
|
||||
44 44
|
||||
45 45
|
||||
46 46
|
||||
47 47
|
||||
48 48
|
||||
49 49
|
@ -1,9 +1,10 @@
|
||||
#!/usr/bin/expect -f
|
||||
# Tags: no-fasttest
|
||||
# Tag no-fasttest: 180 seconds running
|
||||
|
||||
log_user 0
|
||||
|
||||
# In some places `-timeout 1` is used to avoid expect to always wait for the whole timeout
|
||||
set timeout 60
|
||||
|
||||
match_max 100000
|
||||
|
||||
if ![info exists env(CLICKHOUSE_PORT_TCP)] {set env(CLICKHOUSE_PORT_TCP) 9000}
|
||||
@ -13,11 +14,11 @@ expect ":) "
|
||||
|
||||
# Make a query
|
||||
send -- "SELECT 1\r"
|
||||
expect ":-] "
|
||||
expect -timeout 1 ":-] "
|
||||
send -- "-- xxx\r"
|
||||
expect ":-] "
|
||||
expect -timeout 1 ":-] "
|
||||
send -- ", 2\r"
|
||||
expect ":-] "
|
||||
expect -timeout 1 ":-] "
|
||||
send -- ";\r"
|
||||
|
||||
expect "│ 1 │ 2 │"
|
||||
|
@ -1,3 +1,5 @@
|
||||
SET default_table_engine = 'None';
|
||||
|
||||
CREATE TABLE table_02184 (x UInt8); --{serverError 119}
|
||||
SET default_table_engine = 'Log';
|
||||
CREATE TABLE table_02184 (x UInt8);
|
||||
|
@ -0,0 +1,2 @@
|
||||
create table test (number UInt64) engine=FileLog('./user_files/data.jsonl', 'JSONEachRow') settings poll_max_batch_size=18446744073709; -- {serverError INVALID_SETTING_VALUE}
|
||||
|
@ -1,9 +1,13 @@
|
||||
v24.2.2.71-stable 2024-03-15
|
||||
v24.2.1.2248-stable 2024-02-29
|
||||
v24.1.7.18-stable 2024-03-15
|
||||
v24.1.6.52-stable 2024-03-07
|
||||
v24.1.5.6-stable 2024-02-14
|
||||
v24.1.4.20-stable 2024-02-14
|
||||
v24.1.3.31-stable 2024-02-09
|
||||
v24.1.2.5-stable 2024-02-02
|
||||
v24.1.1.2048-stable 2024-01-30
|
||||
v23.12.5.81-stable 2024-03-15
|
||||
v23.12.4.15-stable 2024-02-09
|
||||
v23.12.3.40-stable 2024-02-02
|
||||
v23.12.2.59-stable 2024-01-05
|
||||
@ -25,6 +29,7 @@ v23.9.4.11-stable 2023-11-08
|
||||
v23.9.3.12-stable 2023-10-31
|
||||
v23.9.2.56-stable 2023-10-19
|
||||
v23.9.1.1854-stable 2023-09-29
|
||||
v23.8.11.28-lts 2024-03-15
|
||||
v23.8.10.43-lts 2024-03-05
|
||||
v23.8.9.54-lts 2024-01-05
|
||||
v23.8.8.20-lts 2023-11-25
|
||||
@ -55,6 +60,7 @@ v23.4.4.16-stable 2023-06-17
|
||||
v23.4.3.48-stable 2023-06-12
|
||||
v23.4.2.11-stable 2023-05-02
|
||||
v23.4.1.1943-stable 2023-04-27
|
||||
v23.3.21.26-lts 2024-03-15
|
||||
v23.3.20.27-lts 2024-03-05
|
||||
v23.3.19.32-lts 2024-01-05
|
||||
v23.3.18.15-lts 2023-11-25
|
||||
|
|
Loading…
Reference in New Issue
Block a user