diff --git a/.clang-tidy b/.clang-tidy index 0dacf813c7e..13c1b116ead 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -5,128 +5,128 @@ # a) the new check is not controversial (this includes many checks in readability-* and google-*) or # b) too noisy (checks with > 100 new warnings are considered noisy, this includes e.g. cppcoreguidelines-*). -# TODO: Once clang(-tidy) 17 is the minimum, we can convert this list to YAML -# See https://releases.llvm.org/17.0.1/tools/clang/tools/extra/docs/ReleaseNotes.html#improvements-to-clang-tidy +HeaderFilterRegex: '^.*/(base|src|programs|utils)/.*(h|hpp)$' -# TODO Let clang-tidy check headers in further directories -# --> HeaderFilterRegex: '^.*/(src|base|programs|utils)/.*(h|hpp)$' -HeaderFilterRegex: '^.*/(base|programs|utils)/.*(h|hpp)$' +Checks: [ + '*', -Checks: '*, - -abseil-*, + '-abseil-*', - -altera-*, + '-altera-*', - -android-*, + '-android-*', - -bugprone-assignment-in-if-condition, - -bugprone-branch-clone, - -bugprone-easily-swappable-parameters, - -bugprone-exception-escape, - -bugprone-implicit-widening-of-multiplication-result, - -bugprone-narrowing-conversions, - -bugprone-not-null-terminated-result, - -bugprone-reserved-identifier, # useful but too slow, TODO retry when https://reviews.llvm.org/rG1c282052624f9d0bd273bde0b47b30c96699c6c7 is merged - -bugprone-unchecked-optional-access, + '-bugprone-assignment-in-if-condition', + '-bugprone-branch-clone', + '-bugprone-easily-swappable-parameters', + '-bugprone-exception-escape', + '-bugprone-forward-declaration-namespace', + '-bugprone-implicit-widening-of-multiplication-result', + '-bugprone-narrowing-conversions', + '-bugprone-not-null-terminated-result', + '-bugprone-reserved-identifier', # useful but too slow, TODO retry when https://reviews.llvm.org/rG1c282052624f9d0bd273bde0b47b30c96699c6c7 is merged + '-bugprone-unchecked-optional-access', - -cert-dcl16-c, - -cert-dcl37-c, - -cert-dcl51-cpp, - -cert-err58-cpp, - -cert-msc32-c, - -cert-msc51-cpp, - -cert-oop54-cpp, - -cert-oop57-cpp, + '-cert-dcl16-c', + '-cert-dcl37-c', + '-cert-dcl51-cpp', + '-cert-err58-cpp', + '-cert-msc32-c', + '-cert-msc51-cpp', + '-cert-oop54-cpp', + '-cert-oop57-cpp', - -clang-analyzer-unix.Malloc, + '-clang-analyzer-optin.performance.Padding', - -cppcoreguidelines-*, # impractical in a codebase as large as ClickHouse, also slow + '-clang-analyzer-unix.Malloc', - -darwin-*, + '-cppcoreguidelines-*', # impractical in a codebase as large as ClickHouse, also slow - -fuchsia-*, + '-darwin-*', - -google-build-using-namespace, - -google-readability-braces-around-statements, - -google-readability-casting, - -google-readability-function-size, - -google-readability-namespace-comments, - -google-readability-todo, + '-fuchsia-*', - -hicpp-avoid-c-arrays, - -hicpp-avoid-goto, - -hicpp-braces-around-statements, - -hicpp-explicit-conversions, - -hicpp-function-size, - -hicpp-member-init, - -hicpp-move-const-arg, - -hicpp-multiway-paths-covered, - -hicpp-named-parameter, - -hicpp-no-array-decay, - -hicpp-no-assembler, - -hicpp-no-malloc, - -hicpp-signed-bitwise, - -hicpp-special-member-functions, - -hicpp-uppercase-literal-suffix, - -hicpp-use-auto, - -hicpp-use-emplace, - -hicpp-vararg, + '-google-build-using-namespace', + '-google-readability-braces-around-statements', + '-google-readability-casting', + '-google-readability-function-size', + '-google-readability-namespace-comments', + '-google-readability-todo', - -linuxkernel-*, + '-hicpp-avoid-c-arrays', + '-hicpp-avoid-goto', + '-hicpp-braces-around-statements', + '-hicpp-explicit-conversions', + '-hicpp-function-size', + '-hicpp-member-init', + '-hicpp-move-const-arg', + '-hicpp-multiway-paths-covered', + '-hicpp-named-parameter', + '-hicpp-no-array-decay', + '-hicpp-no-assembler', + '-hicpp-no-malloc', + '-hicpp-signed-bitwise', + '-hicpp-special-member-functions', + '-hicpp-uppercase-literal-suffix', + '-hicpp-use-auto', + '-hicpp-use-emplace', + '-hicpp-vararg', - -llvm-*, + '-linuxkernel-*', - -llvmlibc-*, + '-llvm-*', - -openmp-*, + '-llvmlibc-*', - -misc-const-correctness, - -misc-include-cleaner, # useful but far too many occurrences - -misc-no-recursion, - -misc-non-private-member-variables-in-classes, - -misc-confusable-identifiers, # useful but slooow - -misc-use-anonymous-namespace, + '-openmp-*', - -modernize-avoid-c-arrays, - -modernize-concat-nested-namespaces, - -modernize-macro-to-enum, - -modernize-pass-by-value, - -modernize-return-braced-init-list, - -modernize-use-auto, - -modernize-use-default-member-init, - -modernize-use-emplace, - -modernize-use-nodiscard, - -modernize-use-override, - -modernize-use-trailing-return-type, + '-misc-const-correctness', + '-misc-include-cleaner', # useful but far too many occurrences + '-misc-no-recursion', + '-misc-non-private-member-variables-in-classes', + '-misc-confusable-identifiers', # useful but slooo + '-misc-use-anonymous-namespace', - -performance-inefficient-string-concatenation, - -performance-no-int-to-ptr, - -performance-avoid-endl, - -performance-unnecessary-value-param, + '-modernize-avoid-c-arrays', + '-modernize-concat-nested-namespaces', + '-modernize-macro-to-enum', + '-modernize-pass-by-value', + '-modernize-return-braced-init-list', + '-modernize-use-auto', + '-modernize-use-default-member-init', + '-modernize-use-emplace', + '-modernize-use-nodiscard', + '-modernize-use-override', + '-modernize-use-trailing-return-type', - -portability-simd-intrinsics, + '-performance-inefficient-string-concatenation', + '-performance-no-int-to-ptr', + '-performance-avoid-endl', + '-performance-unnecessary-value-param', - -readability-avoid-unconditional-preprocessor-if, - -readability-braces-around-statements, - -readability-convert-member-functions-to-static, - -readability-else-after-return, - -readability-function-cognitive-complexity, - -readability-function-size, - -readability-identifier-length, - -readability-identifier-naming, # useful but too slow - -readability-implicit-bool-conversion, - -readability-isolate-declaration, - -readability-magic-numbers, - -readability-named-parameter, - -readability-redundant-declaration, - -readability-simplify-boolean-expr, - -readability-static-accessed-through-instance, - -readability-suspicious-call-argument, - -readability-uppercase-literal-suffix, - -readability-use-anyofallof, + '-portability-simd-intrinsics', - -zircon-*, -' + '-readability-avoid-unconditional-preprocessor-if', + '-readability-braces-around-statements', + '-readability-convert-member-functions-to-static', + '-readability-else-after-return', + '-readability-function-cognitive-complexity', + '-readability-function-size', + '-readability-identifier-length', + '-readability-identifier-naming', # useful but too slow + '-readability-implicit-bool-conversion', + '-readability-isolate-declaration', + '-readability-magic-numbers', + '-readability-named-parameter', + '-readability-redundant-declaration', + '-readability-simplify-boolean-expr', + '-readability-static-accessed-through-instance', + '-readability-suspicious-call-argument', + '-readability-uppercase-literal-suffix', + '-readability-use-anyofallof', + + '-zircon-*' +] WarningsAsErrors: '*' diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 93ac2be19b4..515236bb826 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -45,62 +45,3 @@ jobs: with: data: "${{ needs.RunConfig.outputs.data }}" set_latest: true - SonarCloud: - runs-on: [self-hosted, builder] - env: - SONAR_SCANNER_VERSION: 4.8.0.2856 - SONAR_SERVER_URL: "https://sonarcloud.io" - BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed - CC: clang-17 - CXX: clang++-17 - steps: - - name: Check out repository code - uses: ClickHouse/checkout@v1 - with: - clear-repository: true - fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis - filter: tree:0 - submodules: true - - name: Set up JDK 11 - uses: actions/setup-java@v1 - with: - java-version: 11 - - name: Download and set up sonar-scanner - env: - SONAR_SCANNER_DOWNLOAD_URL: https://binaries.sonarsource.com/Distribution/sonar-scanner-cli/sonar-scanner-cli-${{ env.SONAR_SCANNER_VERSION }}-linux.zip - run: | - mkdir -p "$HOME/.sonar" - curl -sSLo "$HOME/.sonar/sonar-scanner.zip" "${{ env.SONAR_SCANNER_DOWNLOAD_URL }}" - unzip -o "$HOME/.sonar/sonar-scanner.zip" -d "$HOME/.sonar/" - echo "$HOME/.sonar/sonar-scanner-${{ env.SONAR_SCANNER_VERSION }}-linux/bin" >> "$GITHUB_PATH" - - name: Download and set up build-wrapper - env: - BUILD_WRAPPER_DOWNLOAD_URL: ${{ env.SONAR_SERVER_URL }}/static/cpp/build-wrapper-linux-x86.zip - run: | - curl -sSLo "$HOME/.sonar/build-wrapper-linux-x86.zip" "${{ env.BUILD_WRAPPER_DOWNLOAD_URL }}" - unzip -o "$HOME/.sonar/build-wrapper-linux-x86.zip" -d "$HOME/.sonar/" - echo "$HOME/.sonar/build-wrapper-linux-x86" >> "$GITHUB_PATH" - - name: Set Up Build Tools - run: | - sudo apt-get update - sudo apt-get install -yq git cmake ccache ninja-build python3 yasm nasm - sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" - - name: Run build-wrapper - run: | - mkdir build - cd build - cmake .. - cd .. - build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/ - - name: Run sonar-scanner - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} - run: | - sonar-scanner \ - --define sonar.host.url="${{ env.SONAR_SERVER_URL }}" \ - --define sonar.cfamily.build-wrapper-output="${{ env.BUILD_WRAPPER_OUT_DIR }}" \ - --define sonar.projectKey="ClickHouse_ClickHouse" \ - --define sonar.organization="clickhouse-java" \ - --define sonar.cfamily.cpp23.enabled=true \ - --define sonar.exclusions="**/*.java,**/*.ts,**/*.js,**/*.css,**/*.sql" diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index c065219f980..2dddde9aa14 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -172,6 +172,7 @@ jobs: run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 finish_check.py + python3 merge_pr.py --check-approved ############################################################################################# diff --git a/.github/workflows/reusable_build.yml b/.github/workflows/reusable_build.yml index d2fe6f5dbe7..80d78d93e1b 100644 --- a/.github/workflows/reusable_build.yml +++ b/.github/workflows/reusable_build.yml @@ -43,8 +43,7 @@ jobs: runs-on: [self-hosted, '${{inputs.runner_type}}'] steps: - name: Check out repository code - # WIP: temporary try commit with limited perallelization of checkout - uses: ClickHouse/checkout@0be3f7b3098bae494d3ef5d29d2e0676fb606232 + uses: ClickHouse/checkout@v1 with: clear-repository: true ref: ${{ fromJson(inputs.data).git_ref }} diff --git a/CMakeLists.txt b/CMakeLists.txt index b55e9810361..8c4e16eace2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -56,13 +56,13 @@ option(ENABLE_CHECK_HEAVY_BUILDS "Don't allow C++ translation units to compile t if (ENABLE_CHECK_HEAVY_BUILDS) # set DATA (since RSS does not work since 2.6.x+) to 5G set (RLIMIT_DATA 5000000000) - # set VIRT (RLIMIT_AS) to 10G (DATA*10) + # set VIRT (RLIMIT_AS) to 10G (DATA*2) set (RLIMIT_AS 10000000000) # set CPU time limit to 1000 seconds set (RLIMIT_CPU 1000) - # -fsanitize=memory is too heavy - if (SANITIZE STREQUAL "memory") + # -fsanitize=memory and address are too heavy + if (SANITIZE OR SANITIZE_COVERAGE OR WITH_COVERAGE) set (RLIMIT_DATA 10000000000) # 10G endif() @@ -110,11 +110,6 @@ endif() # - sanitize.cmake add_library(global-libs INTERFACE) -# We don't want to instrument everything with fuzzer, but only specific targets (see below), -# also, since we build our own llvm, we specifically don't want to instrument -# libFuzzer library itself - it would result in infinite recursion -#include (cmake/fuzzer.cmake) - include (cmake/sanitize.cmake) option(ENABLE_COLORED_BUILD "Enable colors in compiler output" ON) @@ -554,7 +549,9 @@ if (ENABLE_RUST) endif() endif() -if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND NOT SANITIZE AND NOT SANITIZE_COVERAGE AND OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64)) +if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" + AND NOT SANITIZE AND NOT SANITIZE_COVERAGE AND NOT ENABLE_FUZZING + AND OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64)) set(CHECK_LARGE_OBJECT_SIZES_DEFAULT ON) else () set(CHECK_LARGE_OBJECT_SIZES_DEFAULT OFF) @@ -577,10 +574,7 @@ if (FUZZER) if (NOT(target_type STREQUAL "INTERFACE_LIBRARY" OR target_type STREQUAL "UTILITY")) target_compile_options(${target} PRIVATE "-fsanitize=fuzzer-no-link") endif() - # clickhouse fuzzer isn't working correctly - # initial PR https://github.com/ClickHouse/ClickHouse/pull/27526 - #if (target MATCHES ".+_fuzzer" OR target STREQUAL "clickhouse") - if (target_type STREQUAL "EXECUTABLE" AND target MATCHES ".+_fuzzer") + if (target_type STREQUAL "EXECUTABLE" AND (target MATCHES ".+_fuzzer" OR target STREQUAL "clickhouse")) message(STATUS "${target} instrumented with fuzzer") target_link_libraries(${target} PUBLIC ch_contrib::fuzzer) # Add to fuzzers bundle diff --git a/base/base/coverage.cpp b/base/base/coverage.cpp index 99b897c4571..d96b3ea1e9a 100644 --- a/base/base/coverage.cpp +++ b/base/base/coverage.cpp @@ -1,7 +1,7 @@ #include "coverage.h" #include -#pragma GCC diagnostic ignored "-Wreserved-identifier" +#pragma clang diagnostic ignored "-Wreserved-identifier" /// WITH_COVERAGE enables the default implementation of code coverage, diff --git a/base/base/defines.h b/base/base/defines.h index 1f02748633d..627c50c27d2 100644 --- a/base/base/defines.h +++ b/base/base/defines.h @@ -108,16 +108,22 @@ { [[noreturn]] void abortOnFailedAssertion(const String & description); } - #define chassert(x) do { static_cast(x) ? void(0) : ::DB::abortOnFailedAssertion(#x); } while (0) + #define chassert_1(x, ...) do { static_cast(x) ? void(0) : ::DB::abortOnFailedAssertion(#x); } while (0) + #define chassert_2(x, comment, ...) do { static_cast(x) ? void(0) : ::DB::abortOnFailedAssertion(comment); } while (0) #define UNREACHABLE() abort() // clang-format off #else /// Here sizeof() trick is used to suppress unused warning for result, /// since simple "(void)x" will evaluate the expression, while /// "sizeof(!(x))" will not. - #define chassert(x) (void)sizeof(!(x)) + #define chassert_1(x, ...) (void)sizeof(!(x)) + #define chassert_2(x, comment, ...) (void)sizeof(!(x)) #define UNREACHABLE() __builtin_unreachable() #endif + #define CHASSERT_DISPATCH(_1,_2, N,...) N(_1, _2) + #define CHASSERT_INVOKE(tuple) CHASSERT_DISPATCH tuple + #define chassert(...) CHASSERT_INVOKE((__VA_ARGS__, chassert_2, chassert_1)) + #endif /// Macros for Clang Thread Safety Analysis (TSA). They can be safely ignored by other compilers. diff --git a/base/base/getMemoryAmount.cpp b/base/base/getMemoryAmount.cpp index 0311238caed..3d01e301f45 100644 --- a/base/base/getMemoryAmount.cpp +++ b/base/base/getMemoryAmount.cpp @@ -50,9 +50,6 @@ std::optional getCgroupsV2MemoryLimit() } -/** Returns the size of physical memory (RAM) in bytes. - * Returns 0 on unsupported platform - */ uint64_t getMemoryAmountOrZero() { int64_t num_pages = sysconf(_SC_PHYS_PAGES); diff --git a/base/base/getMemoryAmount.h b/base/base/getMemoryAmount.h index 7ebd92a8bcf..37ee0ebe7c6 100644 --- a/base/base/getMemoryAmount.h +++ b/base/base/getMemoryAmount.h @@ -2,11 +2,10 @@ #include -/** Returns the size of physical memory (RAM) in bytes. - * Returns 0 on unsupported platform or if it cannot determine the size of physical memory. - */ +/// Returns the size in bytes of physical memory (RAM) available to the process. The value can +/// be smaller than the total available RAM available to the system due to cgroups settings. +/// Returns 0 on unsupported platform or if it cannot determine the size of physical memory. uint64_t getMemoryAmountOrZero(); -/** Throws exception if it cannot determine the size of physical memory. - */ +/// Throws exception if it cannot determine the size of physical memory. uint64_t getMemoryAmount(); diff --git a/base/base/sort.h b/base/base/sort.h index 99bf8a0830e..e46c388d185 100644 --- a/base/base/sort.h +++ b/base/base/sort.h @@ -59,8 +59,8 @@ using ComparatorWrapper = Comparator; #endif -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wold-style-cast" #include @@ -115,7 +115,7 @@ void partial_sort(RandomIt first, RandomIt middle, RandomIt last) ::partial_sort(first, middle, last, comparator()); } -#pragma GCC diagnostic pop +#pragma clang diagnostic pop template void sort(RandomIt first, RandomIt last, Compare compare) diff --git a/base/poco/Net/include/Poco/Net/HTTPRequestHandlerFactory.h b/base/poco/Net/include/Poco/Net/HTTPRequestHandlerFactory.h index 029d936769b..9bc35b7ff70 100644 --- a/base/poco/Net/include/Poco/Net/HTTPRequestHandlerFactory.h +++ b/base/poco/Net/include/Poco/Net/HTTPRequestHandlerFactory.h @@ -30,7 +30,6 @@ namespace Net class HTTPServerRequest; - class HTTPServerResponse; class HTTPRequestHandler; diff --git a/base/poco/Net/src/TCPServerDispatcher.cpp b/base/poco/Net/src/TCPServerDispatcher.cpp index 20a1ffe1b4f..7f9f9a20ee7 100644 --- a/base/poco/Net/src/TCPServerDispatcher.cpp +++ b/base/poco/Net/src/TCPServerDispatcher.cpp @@ -93,7 +93,7 @@ void TCPServerDispatcher::release() void TCPServerDispatcher::run() { - AutoPtr guard(this, true); // ensure object stays alive + AutoPtr guard(this); // ensure object stays alive int idleTime = (int) _pParams->getThreadIdleTime().totalMilliseconds(); @@ -149,11 +149,13 @@ void TCPServerDispatcher::enqueue(const StreamSocket& socket) { try { + this->duplicate(); _threadPool.startWithPriority(_pParams->getThreadPriority(), *this, threadName); ++_currentThreads; } catch (Poco::Exception& exc) { + this->release(); ++_refusedConnections; std::cerr << "Got exception while starting thread for connection. Error code: " << exc.code() << ", message: '" << exc.displayText() << "'" << std::endl; diff --git a/cmake/fuzzer.cmake b/cmake/fuzzer.cmake deleted file mode 100644 index dd0c4b080fe..00000000000 --- a/cmake/fuzzer.cmake +++ /dev/null @@ -1,17 +0,0 @@ -# see ./CMakeLists.txt for variable declaration -if (FUZZER) - if (FUZZER STREQUAL "libfuzzer") - # NOTE: Eldar Zaitov decided to name it "libfuzzer" instead of "fuzzer" to keep in mind another possible fuzzer backends. - # NOTE: no-link means that all the targets are built with instrumentation for fuzzer, but only some of them - # (tests) have entry point for fuzzer and it's not checked. - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link -DFUZZER=1") - set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link -DFUZZER=1") - - # NOTE: oss-fuzz can change LIB_FUZZING_ENGINE variable - if (NOT LIB_FUZZING_ENGINE) - set (LIB_FUZZING_ENGINE "-fsanitize=fuzzer") - endif () - else () - message (FATAL_ERROR "Unknown fuzzer type: ${FUZZER}") - endif () -endif() diff --git a/contrib/curl b/contrib/curl index 5ce164e0e92..1a05e833f8f 160000 --- a/contrib/curl +++ b/contrib/curl @@ -1 +1 @@ -Subproject commit 5ce164e0e9290c96eb7d502173426c0a135ec008 +Subproject commit 1a05e833f8f7140628b27882b10525fd9ec4b873 diff --git a/contrib/libhdfs3 b/contrib/libhdfs3 index b9598e60167..0d04201c453 160000 --- a/contrib/libhdfs3 +++ b/contrib/libhdfs3 @@ -1 +1 @@ -Subproject commit b9598e6016720a7c088bfe85ce1fa0410f9d2103 +Subproject commit 0d04201c45359f0d0701fb1e8297d25eff7cfecf diff --git a/contrib/libmetrohash/src/metrohash128.h b/contrib/libmetrohash/src/metrohash128.h index 2dbb6ca5a8a..f507c917caf 100644 --- a/contrib/libmetrohash/src/metrohash128.h +++ b/contrib/libmetrohash/src/metrohash128.h @@ -17,6 +17,8 @@ #ifndef METROHASH_METROHASH_128_H #define METROHASH_METROHASH_128_H +// NOLINTBEGIN(readability-avoid-const-params-in-decls) + #include class MetroHash128 @@ -68,5 +70,6 @@ private: void metrohash128_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); void metrohash128_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); +// NOLINTEND(readability-avoid-const-params-in-decls) #endif // #ifndef METROHASH_METROHASH_128_H diff --git a/contrib/libmetrohash/src/metrohash64.cpp b/contrib/libmetrohash/src/metrohash64.cpp index 7b5ec7f1a42..6ff64027292 100644 --- a/contrib/libmetrohash/src/metrohash64.cpp +++ b/contrib/libmetrohash/src/metrohash64.cpp @@ -26,13 +26,13 @@ const uint8_t MetroHash64::test_seed_1[8] = { 0x3B, 0x0D, 0x48, 0x1C, 0xF4, 0x -MetroHash64::MetroHash64(const uint64_t seed) +MetroHash64::MetroHash64(uint64_t seed) { Initialize(seed); } -void MetroHash64::Initialize(const uint64_t seed) +void MetroHash64::Initialize(uint64_t seed) { vseed = (static_cast(seed) + k2) * k0; @@ -47,7 +47,7 @@ void MetroHash64::Initialize(const uint64_t seed) } -void MetroHash64::Update(const uint8_t * const buffer, const uint64_t length) +void MetroHash64::Update(const uint8_t * const buffer, uint64_t length) { const uint8_t * ptr = reinterpret_cast(buffer); const uint8_t * const end = ptr + length; @@ -62,7 +62,7 @@ void MetroHash64::Update(const uint8_t * const buffer, const uint64_t length) memcpy(input.b + (bytes % 32), ptr, static_cast(fill)); ptr += fill; bytes += fill; - + // input buffer is still partially filled if ((bytes % 32) != 0) return; @@ -72,7 +72,7 @@ void MetroHash64::Update(const uint8_t * const buffer, const uint64_t length) state.v[2] += read_u64(&input.b[16]) * k2; state.v[2] = rotate_right(state.v[2],29) + state.v[0]; state.v[3] += read_u64(&input.b[24]) * k3; state.v[3] = rotate_right(state.v[3],29) + state.v[1]; } - + // bulk update bytes += static_cast(end - ptr); while (ptr <= (end - 32)) @@ -83,14 +83,14 @@ void MetroHash64::Update(const uint8_t * const buffer, const uint64_t length) state.v[2] += read_u64(ptr) * k2; ptr += 8; state.v[2] = rotate_right(state.v[2],29) + state.v[0]; state.v[3] += read_u64(ptr) * k3; ptr += 8; state.v[3] = rotate_right(state.v[3],29) + state.v[1]; } - + // store remaining bytes in input buffer if (ptr < end) memcpy(input.b, ptr, static_cast(end - ptr)); } -void MetroHash64::Finalize(uint8_t * const hash) +void MetroHash64::Finalize(uint8_t * hash) { // finalize bulk loop, if used if (bytes >= 32) @@ -102,11 +102,11 @@ void MetroHash64::Finalize(uint8_t * const hash) state.v[0] = vseed + (state.v[0] ^ state.v[1]); } - + // process any bytes remaining in the input buffer const uint8_t * ptr = reinterpret_cast(input.b); const uint8_t * const end = ptr + (bytes % 32); - + if ((end - ptr) >= 16) { state.v[1] = state.v[0] + (read_u64(ptr) * k2); ptr += 8; state.v[1] = rotate_right(state.v[1],29) * k3; @@ -139,7 +139,7 @@ void MetroHash64::Finalize(uint8_t * const hash) state.v[0] += read_u8 (ptr) * k3; state.v[0] ^= rotate_right(state.v[0], 37) * k1; } - + state.v[0] ^= rotate_right(state.v[0], 28); state.v[0] *= k0; state.v[0] ^= rotate_right(state.v[0], 29); @@ -152,7 +152,7 @@ void MetroHash64::Finalize(uint8_t * const hash) } -void MetroHash64::Hash(const uint8_t * buffer, const uint64_t length, uint8_t * const hash, const uint64_t seed) +void MetroHash64::Hash(const uint8_t * buffer, uint64_t length, uint8_t * const hash, uint64_t seed) { const uint8_t * ptr = reinterpret_cast(buffer); const uint8_t * const end = ptr + length; @@ -238,7 +238,7 @@ bool MetroHash64::ImplementationVerified() // verify incremental implementation MetroHash64 metro; - + metro.Initialize(0); metro.Update(reinterpret_cast(MetroHash64::test_string), strlen(MetroHash64::test_string)); metro.Finalize(hash); @@ -262,9 +262,9 @@ void metrohash64_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * o const uint8_t * ptr = reinterpret_cast(key); const uint8_t * const end = ptr + len; - + uint64_t hash = ((static_cast(seed) + k2) * k0) + len; - + if (len >= 32) { uint64_t v[4]; @@ -272,7 +272,7 @@ void metrohash64_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * o v[1] = hash; v[2] = hash; v[3] = hash; - + do { v[0] += read_u64(ptr) * k0; ptr += 8; v[0] = rotate_right(v[0],29) + v[2]; @@ -288,7 +288,7 @@ void metrohash64_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * o v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 33) * k0; hash += v[0] ^ v[1]; } - + if ((end - ptr) >= 16) { uint64_t v0 = hash + (read_u64(ptr) * k0); ptr += 8; v0 = rotate_right(v0,33) * k1; @@ -297,32 +297,32 @@ void metrohash64_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * o v1 ^= rotate_right(v1 * k3, 35) + v0; hash += v1; } - + if ((end - ptr) >= 8) { hash += read_u64(ptr) * k3; ptr += 8; hash ^= rotate_right(hash, 33) * k1; - + } - + if ((end - ptr) >= 4) { hash += read_u32(ptr) * k3; ptr += 4; hash ^= rotate_right(hash, 15) * k1; } - + if ((end - ptr) >= 2) { hash += read_u16(ptr) * k3; ptr += 2; hash ^= rotate_right(hash, 13) * k1; } - + if ((end - ptr) >= 1) { hash += read_u8 (ptr) * k3; hash ^= rotate_right(hash, 25) * k1; } - + hash ^= rotate_right(hash, 33); hash *= k0; hash ^= rotate_right(hash, 33); @@ -336,13 +336,13 @@ void metrohash64_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * o static const uint64_t k0 = 0xD6D018F5; static const uint64_t k1 = 0xA2AA033B; static const uint64_t k2 = 0x62992FC1; - static const uint64_t k3 = 0x30BC5B29; + static const uint64_t k3 = 0x30BC5B29; const uint8_t * ptr = reinterpret_cast(key); const uint8_t * const end = ptr + len; - + uint64_t hash = ((static_cast(seed) + k2) * k0) + len; - + if (len >= 32) { uint64_t v[4]; @@ -350,7 +350,7 @@ void metrohash64_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * o v[1] = hash; v[2] = hash; v[3] = hash; - + do { v[0] += read_u64(ptr) * k0; ptr += 8; v[0] = rotate_right(v[0],29) + v[2]; @@ -366,7 +366,7 @@ void metrohash64_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * o v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 30) * k0; hash += v[0] ^ v[1]; } - + if ((end - ptr) >= 16) { uint64_t v0 = hash + (read_u64(ptr) * k2); ptr += 8; v0 = rotate_right(v0,29) * k3; @@ -375,31 +375,31 @@ void metrohash64_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * o v1 ^= rotate_right(v1 * k3, 34) + v0; hash += v1; } - + if ((end - ptr) >= 8) { hash += read_u64(ptr) * k3; ptr += 8; hash ^= rotate_right(hash, 36) * k1; } - + if ((end - ptr) >= 4) { hash += read_u32(ptr) * k3; ptr += 4; hash ^= rotate_right(hash, 15) * k1; } - + if ((end - ptr) >= 2) { hash += read_u16(ptr) * k3; ptr += 2; hash ^= rotate_right(hash, 15) * k1; } - + if ((end - ptr) >= 1) { hash += read_u8 (ptr) * k3; hash ^= rotate_right(hash, 23) * k1; } - + hash ^= rotate_right(hash, 28); hash *= k0; hash ^= rotate_right(hash, 29); diff --git a/contrib/libmetrohash/src/metrohash64.h b/contrib/libmetrohash/src/metrohash64.h index 911e54e6863..7003a1848be 100644 --- a/contrib/libmetrohash/src/metrohash64.h +++ b/contrib/libmetrohash/src/metrohash64.h @@ -25,24 +25,24 @@ public: static const uint32_t bits = 64; // Constructor initializes the same as Initialize() - explicit MetroHash64(const uint64_t seed=0); + explicit MetroHash64(uint64_t seed=0); // Initializes internal state for new hash with optional seed - void Initialize(const uint64_t seed=0); + void Initialize(uint64_t seed=0); // Update the hash state with a string of bytes. If the length // is sufficiently long, the implementation switches to a bulk // hashing algorithm directly on the argument buffer for speed. - void Update(const uint8_t * buffer, const uint64_t length); + void Update(const uint8_t * buffer, uint64_t length); // Constructs the final hash and writes it to the argument buffer. // After a hash is finalized, this instance must be Initialized()-ed // again or the behavior of Update() and Finalize() is undefined. - void Finalize(uint8_t * const hash); + void Finalize(uint8_t * hash); // A non-incremental function implementation. This can be significantly // faster than the incremental implementation for some usage patterns. - static void Hash(const uint8_t * buffer, const uint64_t length, uint8_t * const hash, const uint64_t seed=0); + static void Hash(const uint8_t * buffer, uint64_t length, uint8_t * hash, uint64_t seed=0); // Does implementation correctly execute test vectors? static bool ImplementationVerified(); diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 2f42854a972..17eee6d4287 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.2.1.2248" +ARG VERSION="24.2.2.71" ARG PACKAGES="clickhouse-keeper" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/packager/README.md b/docker/packager/README.md index e0b7f38ea58..3604e8585a4 100644 --- a/docker/packager/README.md +++ b/docker/packager/README.md @@ -28,7 +28,6 @@ lrwxrwxrwx 1 root root 10 clickhouse-benchmark -> clickhouse lrwxrwxrwx 1 root root 10 clickhouse-clang -> clickhouse lrwxrwxrwx 1 root root 10 clickhouse-client -> clickhouse lrwxrwxrwx 1 root root 10 clickhouse-compressor -> clickhouse -lrwxrwxrwx 1 root root 10 clickhouse-copier -> clickhouse lrwxrwxrwx 1 root root 10 clickhouse-extract-from-config -> clickhouse lrwxrwxrwx 1 root root 10 clickhouse-format -> clickhouse lrwxrwxrwx 1 root root 10 clickhouse-lld -> clickhouse diff --git a/docker/packager/binary-builder/Dockerfile b/docker/packager/binary-builder/Dockerfile index 96c90403187..c9442accd7e 100644 --- a/docker/packager/binary-builder/Dockerfile +++ b/docker/packager/binary-builder/Dockerfile @@ -4,6 +4,9 @@ FROM clickhouse/fasttest:$FROM_TAG ENV CC=clang-${LLVM_VERSION} ENV CXX=clang++-${LLVM_VERSION} +# If the cctools is updated, then first build it in the CI, then update here in a different commit +COPY --from=clickhouse/cctools:d9e3596e706b /cctools /cctools + # Rust toolchain and libraries ENV RUSTUP_HOME=/rust/rustup ENV CARGO_HOME=/rust/cargo @@ -73,9 +76,6 @@ RUN curl -Lo /usr/bin/clang-tidy-cache \ "https://raw.githubusercontent.com/matus-chochlik/ctcache/$CLANG_TIDY_SHA1/clang-tidy-cache" \ && chmod +x /usr/bin/clang-tidy-cache -# If the cctools is updated, then first build it in the CI, then update here in a different commit -COPY --from=clickhouse/cctools:5a908f73878a /cctools /cctools - RUN mkdir /workdir && chmod 777 /workdir WORKDIR /workdir diff --git a/docker/packager/cctools/Dockerfile b/docker/packager/cctools/Dockerfile index 1b8c675a5c5..d986c6a3c86 100644 --- a/docker/packager/cctools/Dockerfile +++ b/docker/packager/cctools/Dockerfile @@ -2,7 +2,7 @@ # It's based on the assumption that we don't care of the cctools version so much # It event does not depend on the clickhouse/fasttest in the `docker/images.json` ARG FROM_TAG=latest -FROM clickhouse/fasttest:$FROM_TAG +FROM clickhouse/fasttest:$FROM_TAG as builder ENV CC=clang-${LLVM_VERSION} ENV CXX=clang++-${LLVM_VERSION} @@ -29,3 +29,6 @@ RUN git clone https://github.com/tpoechtrager/cctools-port.git \ && make install -j$(nproc) \ && cd ../.. \ && rm -rf cctools-port + +FROM scratch +COPY --from=builder /cctools /cctools diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 7bd777de5b9..bd5fa313adc 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.2.1.2248" +ARG VERSION="24.2.2.71" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 03d01cfd5d7..256dcdc029f 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -27,7 +27,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="24.2.1.2248" +ARG VERSION="24.2.2.71" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index 55229e893de..2317f84e0cb 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -33,6 +33,9 @@ ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_m ENV UBSAN_OPTIONS='print_stacktrace=1' ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1' +# for external_symbolizer_path +RUN ln -s /usr/bin/llvm-symbolizer-${LLVM_VERSION} /usr/bin/llvm-symbolizer + RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8 ENV LC_ALL en_US.UTF-8 diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 62cdcc3f830..912ff191e57 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -14,7 +14,6 @@ RUN apt-get update \ libclang-${LLVM_VERSION}-dev \ libclang-rt-${LLVM_VERSION}-dev \ lld-${LLVM_VERSION} \ - llvm-${LLVM_VERSION} \ llvm-${LLVM_VERSION}-dev \ lsof \ ninja-build \ @@ -37,8 +36,6 @@ RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3 # This symlink is required by gcc to find the lld linker RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld -# for external_symbolizer_path -RUN ln -s /usr/bin/llvm-symbolizer-${LLVM_VERSION} /usr/bin/llvm-symbolizer # FIXME: workaround for "The imported target "merge-fdata" references the file" error # https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index dc1b35b3a21..63471c288dd 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -343,7 +343,7 @@ quit # which is confusing. task_exit_code=$fuzzer_exit_code echo "failure" > status.txt - echo "Achtung!" > description.txt + echo "Let op!" > description.txt echo "Fuzzer went wrong with error code: ($fuzzer_exit_code). Its process died somehow when the server stayed alive. The server log probably won't tell you much so try to find information in other files." >>description.txt { rg -ao "Found error:.*" fuzzer.log || rg -ao "Exception:.*" fuzzer.log; } | tail -1 >>description.txt fi diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 98da5988ad5..18ac909df54 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -33,12 +33,12 @@ fi echo "Using cache policy: $cache_policy" -if [ "$cache_policy" = "SLRU" ]; then - sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \ - | sed "s|LRU|SLRU|" \ - > /etc/clickhouse-server/config.d/storage_conf.xml.tmp - mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml -fi +#if [ "$cache_policy" = "SLRU" ]; then +# sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \ +# | sed "s|LRU|SLRU|" \ +# > /etc/clickhouse-server/config.d/storage_conf.xml.tmp +# mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml +#fi if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then # It is not needed, we will explicitly create tables on s3. diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index dc181339786..f4de7677012 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -61,6 +61,18 @@ if [[ -n "$BUGFIX_VALIDATE_CHECK" ]] && [[ "$BUGFIX_VALIDATE_CHECK" -eq 1 ]]; th rm /etc/clickhouse-server/users.d/s3_cache_new.xml rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml + #todo: remove these after 24.3 released. + sudo cat /etc/clickhouse-server/config.d/azure_storage_conf.xml \ + | sed "s|azure|azure_blob_storage|" \ + > /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp + sudo mv /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp /etc/clickhouse-server/config.d/azure_storage_conf.xml + + #todo: remove these after 24.3 released. + sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \ + | sed "s|local|local_blob_storage|" \ + > /etc/clickhouse-server/config.d/storage_conf.xml.tmp + sudo mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml + function remove_keeper_config() { sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \ @@ -77,7 +89,7 @@ fi if [ "$NUM_TRIES" -gt "1" ]; then export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000 export THREAD_FUZZER_SLEEP_PROBABILITY=0.1 - export THREAD_FUZZER_SLEEP_TIME_US=100000 + export THREAD_FUZZER_SLEEP_TIME_US_MAX=100000 export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1 export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1 @@ -88,10 +100,10 @@ if [ "$NUM_TRIES" -gt "1" ]; then export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001 export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001 export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001 - export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000 - export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000 - export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000 - export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000 + export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US_MAX=10000 + export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US_MAX=10000 + export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US_MAX=10000 + export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US_MAX=10000 mkdir -p /var/run/clickhouse-server # simplest way to forward env variables to server diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 621a6ced7f6..ea7e3aece1d 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -27,7 +27,7 @@ install_packages package_folder # and find more potential issues. export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000 export THREAD_FUZZER_SLEEP_PROBABILITY=0.1 -export THREAD_FUZZER_SLEEP_TIME_US=100000 +export THREAD_FUZZER_SLEEP_TIME_US_MAX=100000 export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1 export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1 @@ -38,11 +38,11 @@ export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001 export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001 export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001 export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001 -export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000 +export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US_MAX=10000 -export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000 -export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000 -export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000 +export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US_MAX=10000 +export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US_MAX=10000 +export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US_MAX=10000 export THREAD_FUZZER_EXPLICIT_SLEEP_PROBABILITY=0.01 export THREAD_FUZZER_EXPLICIT_MEMORY_EXCEPTION_PROBABILITY=0.01 diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index 4f2dc9df849..5446adf3793 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -26,6 +26,8 @@ RUN apt-get update \ && export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \ && echo "deb https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \ /etc/apt/sources.list \ + && apt-get update \ + && apt-get install --yes --no-install-recommends --verbose-versions llvm-${LLVM_VERSION} \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* diff --git a/docs/changelogs/v23.12.5.81-stable.md b/docs/changelogs/v23.12.5.81-stable.md new file mode 100644 index 00000000000..0a0acd97d58 --- /dev/null +++ b/docs/changelogs/v23.12.5.81-stable.md @@ -0,0 +1,64 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v23.12.5.81-stable (a0fbe3ae813) FIXME as compared to v23.12.4.15-stable (4233d111d20) + +#### Improvement +* Backported in [#60290](https://github.com/ClickHouse/ClickHouse/issues/60290): Copy S3 file GCP fallback to buffer copy in case GCP returned `Internal Error` with `GATEWAY_TIMEOUT` HTTP error code. [#60164](https://github.com/ClickHouse/ClickHouse/pull/60164) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#60830](https://github.com/ClickHouse/ClickHouse/issues/60830): Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)). + +#### Build/Testing/Packaging Improvement +* Backported in [#59883](https://github.com/ClickHouse/ClickHouse/issues/59883): If you want to run initdb scripts every time when ClickHouse container is starting you shoud initialize environment varible CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS. [#59808](https://github.com/ClickHouse/ClickHouse/pull/59808) ([Alexander Nikolaev](https://github.com/AlexNik)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix_kql_issue_found_by_wingfuzz [#59626](https://github.com/ClickHouse/ClickHouse/pull/59626) ([Yong Wang](https://github.com/kashwy)). +* Fix error "Read beyond last offset" for AsynchronousBoundedReadBuffer [#59630](https://github.com/ClickHouse/ClickHouse/pull/59630) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix query start time on non initial queries [#59662](https://github.com/ClickHouse/ClickHouse/pull/59662) ([Raúl Marín](https://github.com/Algunenano)). +* rabbitmq: fix having neither acked nor nacked messages [#59775](https://github.com/ClickHouse/ClickHouse/pull/59775) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix parsing of partition expressions surrounded by parens [#59901](https://github.com/ClickHouse/ClickHouse/pull/59901) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix optimize_uniq_to_count removing the column alias [#60026](https://github.com/ClickHouse/ClickHouse/pull/60026) ([Raúl Marín](https://github.com/Algunenano)). +* Fix cosineDistance crash with Nullable [#60150](https://github.com/ClickHouse/ClickHouse/pull/60150) ([Raúl Marín](https://github.com/Algunenano)). +* Hide sensitive info for s3queue [#60233](https://github.com/ClickHouse/ClickHouse/pull/60233) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix_max_query_size_for_kql_compound_operator: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)). +* Reduce the number of read rows from `system.numbers` [#60546](https://github.com/ClickHouse/ClickHouse/pull/60546) ([JackyWoo](https://github.com/JackyWoo)). +* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Prevent setting custom metadata headers on unsupported multipart upload operations [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)). +* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)). +* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)). +* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)). + +#### CI Fix or Improvement (changelog entry is not required) + +* Backported in [#60767](https://github.com/ClickHouse/ClickHouse/issues/60767): Decoupled changes from [#60408](https://github.com/ClickHouse/ClickHouse/issues/60408). [#60553](https://github.com/ClickHouse/ClickHouse/pull/60553) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#60582](https://github.com/ClickHouse/ClickHouse/issues/60582): Arm and amd docker build jobs use similar job names and thus overwrite job reports - aarch64 and amd64 suffixes added to fix this. [#60554](https://github.com/ClickHouse/ClickHouse/pull/60554) ([Max K.](https://github.com/maxknv)). +* Backported in [#61041](https://github.com/ClickHouse/ClickHouse/issues/61041): Debug and fix markreleaseready. [#60611](https://github.com/ClickHouse/ClickHouse/pull/60611) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#61030](https://github.com/ClickHouse/ClickHouse/issues/61030): ... [#61022](https://github.com/ClickHouse/ClickHouse/pull/61022) ([Max K.](https://github.com/maxknv)). +* Backported in [#61224](https://github.com/ClickHouse/ClickHouse/issues/61224): ... [#61183](https://github.com/ClickHouse/ClickHouse/pull/61183) ([Han Fei](https://github.com/hanfei1991)). +* Backported in [#61190](https://github.com/ClickHouse/ClickHouse/issues/61190): ... [#61185](https://github.com/ClickHouse/ClickHouse/pull/61185) ([Max K.](https://github.com/maxknv)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Backport [#59798](https://github.com/ClickHouse/ClickHouse/issues/59798) to 23.12: CI: do not reuse builds on release branches"'. [#59979](https://github.com/ClickHouse/ClickHouse/pull/59979) ([Max K.](https://github.com/maxknv)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* CI: move ci-specifics from job scripts to ci.py [#58516](https://github.com/ClickHouse/ClickHouse/pull/58516) ([Max K.](https://github.com/maxknv)). +* Make ZooKeeper actually sequentialy consistent [#59735](https://github.com/ClickHouse/ClickHouse/pull/59735) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix special build reports in release branches [#59797](https://github.com/ClickHouse/ClickHouse/pull/59797) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* CI: do not reuse builds on release branches [#59798](https://github.com/ClickHouse/ClickHouse/pull/59798) ([Max K.](https://github.com/maxknv)). +* Fix mark release ready [#59994](https://github.com/ClickHouse/ClickHouse/pull/59994) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Ability to detect undead ZooKeeper sessions [#60044](https://github.com/ClickHouse/ClickHouse/pull/60044) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Detect io_uring in tests [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)). +* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)). +* Remove broken test while we fix it [#60547](https://github.com/ClickHouse/ClickHouse/pull/60547) ([Raúl Marín](https://github.com/Algunenano)). +* Speed up cctools building [#61011](https://github.com/ClickHouse/ClickHouse/pull/61011) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v23.3.21.26-lts.md b/docs/changelogs/v23.3.21.26-lts.md new file mode 100644 index 00000000000..b0f059c4907 --- /dev/null +++ b/docs/changelogs/v23.3.21.26-lts.md @@ -0,0 +1,24 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v23.3.21.26-lts (d9672a3731f) FIXME as compared to v23.3.20.27-lts (cc974ba4f81) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix reading from sparse columns after restart [#49660](https://github.com/ClickHouse/ClickHouse/pull/49660) ([Anton Popov](https://github.com/CurtizJ)). +* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)). +* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)). +* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Cancel PipelineExecutor properly in case of exception in spawnThreads [#57104](https://github.com/ClickHouse/ClickHouse/pull/57104) ([Kruglov Pavel](https://github.com/Avogar)). +* Detect io_uring in tests [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)). +* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)). + diff --git a/docs/changelogs/v23.8.11.28-lts.md b/docs/changelogs/v23.8.11.28-lts.md new file mode 100644 index 00000000000..acc284caa72 --- /dev/null +++ b/docs/changelogs/v23.8.11.28-lts.md @@ -0,0 +1,30 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v23.8.11.28-lts (31879d2ab4c) FIXME as compared to v23.8.10.43-lts (a278225bba9) + +#### Improvement +* Backported in [#60828](https://github.com/ClickHouse/ClickHouse/issues/60828): Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)). +* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)). +* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Use the current branch test-utils to build cctools'. [#61276](https://github.com/ClickHouse/ClickHouse/pull/61276) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Cancel PipelineExecutor properly in case of exception in spawnThreads [#57104](https://github.com/ClickHouse/ClickHouse/pull/57104) ([Kruglov Pavel](https://github.com/Avogar)). +* Detect io_uring in tests [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)). +* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)). + diff --git a/docs/changelogs/v24.1.7.18-stable.md b/docs/changelogs/v24.1.7.18-stable.md new file mode 100644 index 00000000000..603a83a67be --- /dev/null +++ b/docs/changelogs/v24.1.7.18-stable.md @@ -0,0 +1,26 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.1.7.18-stable (90925babd78) FIXME as compared to v24.1.6.52-stable (fa09f677bc9) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix_max_query_size_for_kql_compound_operator: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)). +* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)). +* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)). + +#### CI Fix or Improvement (changelog entry is not required) + +* Backported in [#61043](https://github.com/ClickHouse/ClickHouse/issues/61043): Debug and fix markreleaseready. [#60611](https://github.com/ClickHouse/ClickHouse/pull/60611) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#61168](https://github.com/ClickHouse/ClickHouse/issues/61168): Just a preparation for the merge queue support. [#61099](https://github.com/ClickHouse/ClickHouse/pull/61099) ([Max K.](https://github.com/maxknv)). +* Backported in [#61192](https://github.com/ClickHouse/ClickHouse/issues/61192): ... [#61185](https://github.com/ClickHouse/ClickHouse/pull/61185) ([Max K.](https://github.com/maxknv)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)). + diff --git a/docs/changelogs/v24.2.2.71-stable.md b/docs/changelogs/v24.2.2.71-stable.md new file mode 100644 index 00000000000..b9aa5be626b --- /dev/null +++ b/docs/changelogs/v24.2.2.71-stable.md @@ -0,0 +1,55 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.2.2.71-stable (9293d361e72) FIXME as compared to v24.2.1.2248-stable (891689a4150) + +#### Improvement +* Backported in [#60834](https://github.com/ClickHouse/ClickHouse/issues/60834): Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* PartsSplitter invalid ranges for the same part [#60041](https://github.com/ClickHouse/ClickHouse/pull/60041) ([Maksim Kita](https://github.com/kitaisreal)). +* Try to avoid calculation of scalar subqueries for CREATE TABLE. [#60464](https://github.com/ClickHouse/ClickHouse/pull/60464) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix_max_query_size_for_kql_compound_operator: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)). +* Reduce the number of read rows from `system.numbers` [#60546](https://github.com/ClickHouse/ClickHouse/pull/60546) ([JackyWoo](https://github.com/JackyWoo)). +* Don't output number tips for date types [#60577](https://github.com/ClickHouse/ClickHouse/pull/60577) ([Raúl Marín](https://github.com/Algunenano)). +* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Prevent setting custom metadata headers on unsupported multipart upload operations [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)). +* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)). +* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)). +* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix multiple bugs in groupArraySorted [#61203](https://github.com/ClickHouse/ClickHouse/pull/61203) ([Raúl Marín](https://github.com/Algunenano)). +* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)). + +#### CI Fix or Improvement (changelog entry is not required) + +* Backported in [#60758](https://github.com/ClickHouse/ClickHouse/issues/60758): Decoupled changes from [#60408](https://github.com/ClickHouse/ClickHouse/issues/60408). [#60553](https://github.com/ClickHouse/ClickHouse/pull/60553) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#60706](https://github.com/ClickHouse/ClickHouse/issues/60706): Eliminates the need to provide input args to docker server jobs to clean yml files. [#60602](https://github.com/ClickHouse/ClickHouse/pull/60602) ([Max K.](https://github.com/maxknv)). +* Backported in [#61045](https://github.com/ClickHouse/ClickHouse/issues/61045): Debug and fix markreleaseready. [#60611](https://github.com/ClickHouse/ClickHouse/pull/60611) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#60721](https://github.com/ClickHouse/ClickHouse/issues/60721): Fix build_report job so that it's defined by ci_config only (not yml file). [#60613](https://github.com/ClickHouse/ClickHouse/pull/60613) ([Max K.](https://github.com/maxknv)). +* Backported in [#60668](https://github.com/ClickHouse/ClickHouse/issues/60668): Do not await ci pending jobs on release branches decrease wait timeout to fit into gh job timeout. [#60652](https://github.com/ClickHouse/ClickHouse/pull/60652) ([Max K.](https://github.com/maxknv)). +* Backported in [#60863](https://github.com/ClickHouse/ClickHouse/issues/60863): Set limited number of builds for "special build check" report in backports. [#60850](https://github.com/ClickHouse/ClickHouse/pull/60850) ([Max K.](https://github.com/maxknv)). +* Backported in [#60946](https://github.com/ClickHouse/ClickHouse/issues/60946): ... [#60935](https://github.com/ClickHouse/ClickHouse/pull/60935) ([Max K.](https://github.com/maxknv)). +* Backported in [#60972](https://github.com/ClickHouse/ClickHouse/issues/60972): ... [#60952](https://github.com/ClickHouse/ClickHouse/pull/60952) ([Max K.](https://github.com/maxknv)). +* Backported in [#60980](https://github.com/ClickHouse/ClickHouse/issues/60980): ... [#60958](https://github.com/ClickHouse/ClickHouse/pull/60958) ([Max K.](https://github.com/maxknv)). +* Backported in [#61170](https://github.com/ClickHouse/ClickHouse/issues/61170): Just a preparation for the merge queue support. [#61099](https://github.com/ClickHouse/ClickHouse/pull/61099) ([Max K.](https://github.com/maxknv)). +* Backported in [#61181](https://github.com/ClickHouse/ClickHouse/issues/61181): ... [#61172](https://github.com/ClickHouse/ClickHouse/pull/61172) ([Max K.](https://github.com/maxknv)). +* Backported in [#61228](https://github.com/ClickHouse/ClickHouse/issues/61228): ... [#61183](https://github.com/ClickHouse/ClickHouse/pull/61183) ([Han Fei](https://github.com/hanfei1991)). +* Backported in [#61194](https://github.com/ClickHouse/ClickHouse/issues/61194): ... [#61185](https://github.com/ClickHouse/ClickHouse/pull/61185) ([Max K.](https://github.com/maxknv)). +* Backported in [#61244](https://github.com/ClickHouse/ClickHouse/issues/61244): ... [#61214](https://github.com/ClickHouse/ClickHouse/pull/61214) ([Max K.](https://github.com/maxknv)). +* Backported in [#61388](https://github.com/ClickHouse/ClickHouse/issues/61388):. [#61373](https://github.com/ClickHouse/ClickHouse/pull/61373) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* CI: make workflow yml abstract [#60421](https://github.com/ClickHouse/ClickHouse/pull/60421) ([Max K.](https://github.com/maxknv)). +* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)). +* General sanity in function `seriesOutliersDetectTukey` [#60535](https://github.com/ClickHouse/ClickHouse/pull/60535) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Speed up cctools building [#61011](https://github.com/ClickHouse/ClickHouse/pull/61011) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/en/engines/table-engines/integrations/jdbc.md b/docs/en/engines/table-engines/integrations/jdbc.md index a4a1e2a31ae..16ed01fecb5 100644 --- a/docs/en/engines/table-engines/integrations/jdbc.md +++ b/docs/en/engines/table-engines/integrations/jdbc.md @@ -6,6 +6,11 @@ sidebar_label: JDBC # JDBC +:::note +clickhouse-jdbc-bridge contains experimental codes and is no longer supported. It may contain reliability issues and security vulnerabilities. Use it at your own risk. +ClickHouse recommend using built-in table functions in ClickHouse which provide a better alternative for ad-hoc querying scenarios (Postgres, MySQL, MongoDB, etc). +::: + Allows ClickHouse to connect to external databases via [JDBC](https://en.wikipedia.org/wiki/Java_Database_Connectivity). To implement the JDBC connection, ClickHouse uses the separate program [clickhouse-jdbc-bridge](https://github.com/ClickHouse/clickhouse-jdbc-bridge) that should run as a daemon. diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md index 0f3fef3d6fb..a4d0cf78066 100644 --- a/docs/en/engines/table-engines/integrations/rabbitmq.md +++ b/docs/en/engines/table-engines/integrations/rabbitmq.md @@ -18,8 +18,8 @@ This engine allows integrating ClickHouse with [RabbitMQ](https://www.rabbitmq.c ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ( - name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], - name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], + name1 [type1], + name2 [type2], ... ) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'host:port' [or rabbitmq_address = 'amqp(s)://guest:guest@localhost/vhost'], @@ -198,6 +198,10 @@ Additional virtual columns when `kafka_handle_error_mode='stream'`: Note: `_raw_message` and `_error` virtual columns are filled only in case of exception during parsing, they are always `NULL` when message was parsed successfully. +## Caveats {#caveats} + +Even though you may specify [default column expressions](/docs/en/sql-reference/statements/create/table.md/#default_values) (such as `DEFAULT`, `MATERIALIZED`, `ALIAS`) in the table definition, these will be ignored. Instead, the columns will be filled with their respective default values for their types. + ## Data formats support {#data-formats-support} RabbitMQ engine supports all [formats](../../../interfaces/formats.md) supported in ClickHouse. diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 58717b33aef..29672541d9d 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -946,96 +946,6 @@ You could change storage policy after table creation with [ALTER TABLE ... MODIF The number of threads performing background moves of data parts can be changed by [background_move_pool_size](/docs/en/operations/server-configuration-parameters/settings.md/#background_move_pool_size) setting. -### Dynamic Storage - -This example query shows how to attach a table stored at a URL and configure the -remote storage within the query. The web storage is not configured in the ClickHouse -configuration files; all the settings are in the CREATE/ATTACH query. - -:::note -The example uses `type=web`, but any disk type can be configured as dynamic, even Local disk. Local disks require a path argument to be inside the server config parameter `custom_local_disks_base_directory`, which has no default, so set that also when using local disk. -::: - -#### Example dynamic web storage - -:::tip -A [demo dataset](https://github.com/ClickHouse/web-tables-demo) is hosted in GitHub. To prepare your own tables for web storage see the tool [clickhouse-static-files-uploader](/docs/en/operations/storing-data.md/#storing-data-on-webserver) -::: - -In this `ATTACH TABLE` query the `UUID` provided matches the directory name of the data, and the endpoint is the URL for the raw GitHub content. - -```sql -# highlight-next-line -ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' -( - price UInt32, - date Date, - postcode1 LowCardinality(String), - postcode2 LowCardinality(String), - type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4), - is_new UInt8, - duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2), - addr1 String, - addr2 String, - street LowCardinality(String), - locality LowCardinality(String), - town LowCardinality(String), - district LowCardinality(String), - county LowCardinality(String) -) -ENGINE = MergeTree -ORDER BY (postcode1, postcode2, addr1, addr2) - # highlight-start - SETTINGS disk = disk( - type=web, - endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/' - ); - # highlight-end -``` - -### Nested Dynamic Storage - -This example query builds on the above dynamic disk configuration and shows how to -use a local disk to cache data from a table stored at a URL. Neither the cache disk -nor the web storage is configured in the ClickHouse configuration files; both are -configured in the CREATE/ATTACH query settings. - -In the settings highlighted below notice that the disk of `type=web` is nested within -the disk of `type=cache`. - -```sql -ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' -( - price UInt32, - date Date, - postcode1 LowCardinality(String), - postcode2 LowCardinality(String), - type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4), - is_new UInt8, - duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2), - addr1 String, - addr2 String, - street LowCardinality(String), - locality LowCardinality(String), - town LowCardinality(String), - district LowCardinality(String), - county LowCardinality(String) -) -ENGINE = MergeTree -ORDER BY (postcode1, postcode2, addr1, addr2) - # highlight-start - SETTINGS disk = disk( - type=cache, - max_size='1Gi', - path='/var/lib/clickhouse/custom_disk_cache/', - disk=disk( - type=web, - endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/' - ) - ); - # highlight-end -``` - ### Details {#details} In the case of `MergeTree` tables, data is getting to disk in different ways: @@ -1064,13 +974,11 @@ During this time, they are not moved to other volumes or disks. Therefore, until User can assign new big parts to different disks of a [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures) volume in a balanced way using the [min_bytes_to_rebalance_partition_over_jbod](/docs/en/operations/settings/merge-tree-settings.md/#min-bytes-to-rebalance-partition-over-jbod) setting. -## Using S3 for Data Storage {#table_engine-mergetree-s3} +## Using External Storage for Data Storage {#table_engine-mergetree-s3} -:::note -Google Cloud Storage (GCS) is also supported using the type `s3`. See [GCS backed MergeTree](/docs/en/integrations/gcs). -::: +[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) family table engines can store data to `S3`, `AzureBlobStorage`, `HDFS` using a disk with types `s3`, `azure_blob_storage`, `hdfs` accordingly. See [configuring external storage options](/docs/en/operations/storing-data.md/#configuring-external-storage) for more details. -`MergeTree` family table engines can store data to [S3](https://aws.amazon.com/s3/) using a disk with type `s3`. +Example for [S3](https://aws.amazon.com/s3/) as external storage using a disk with type `s3`. Configuration markup: ``` xml @@ -1112,253 +1020,12 @@ Configuration markup: ``` +Also see [configuring external storage options](/docs/en/operations/storing-data.md/#configuring-external-storage). + :::note cache configuration ClickHouse versions 22.3 through 22.7 use a different cache configuration, see [using local cache](/docs/en/operations/storing-data.md/#using-local-cache) if you are using one of those versions. ::: -### Configuring the S3 disk - -Required parameters: - -- `endpoint` — S3 endpoint URL in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint URL should contain a bucket and root path to store data. -- `access_key_id` — S3 access key id. -- `secret_access_key` — S3 secret access key. - -Optional parameters: - -- `region` — S3 region name. -- `support_batch_delete` — This controls the check to see if batch deletes are supported. Set this to `false` when using Google Cloud Storage (GCS) as GCS does not support batch deletes and preventing the checks will prevent error messages in the logs. -- `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`. -- `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`. -- `expiration_window_seconds` — Grace period for checking if expiration-based credentials have expired. Optional, default value is `120`. -- `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL. -- `connect_timeout_ms` — Socket connect timeout in milliseconds. Default value is `10 seconds`. -- `request_timeout_ms` — Request timeout in milliseconds. Default value is `5 seconds`. -- `retry_attempts` — Number of retry attempts in case of failed request. Default value is `10`. -- `single_read_retries` — Number of retry attempts in case of connection drop during read. Default value is `4`. -- `min_bytes_for_seek` — Minimal number of bytes to use seek operation instead of sequential read. Default value is `1 Mb`. -- `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks//`. -- `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`. -- `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times. -- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. -- `server_side_encryption_kms_key_id` - If specified, required headers for accessing S3 objects with [SSE-KMS encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingKMSEncryption.html) will be set. If an empty string is specified, the AWS managed S3 key will be used. Optional. -- `server_side_encryption_kms_encryption_context` - If specified alongside `server_side_encryption_kms_key_id`, the given encryption context header for SSE-KMS will be set. Optional. -- `server_side_encryption_kms_bucket_key_enabled` - If specified alongside `server_side_encryption_kms_key_id`, the header to enable S3 bucket keys for SSE-KMS will be set. Optional, can be `true` or `false`, defaults to nothing (matches the bucket-level setting). -- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited). -- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`. -- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited). -- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`. -- `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). -- `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). -- `key_template` — Define the format with which the object keys are generated. By default, Clickhouse takes `root path` from `endpoint` option and adds random generated suffix. That suffix is a dir with 3 random symbols and a file name with 29 random symbols. With that option you have a full control how to the object keys are generated. Some usage scenarios require having random symbols in the prefix or in the middle of object key. For example: `[a-z]{3}-prefix-random/constant-part/random-middle-[a-z]{3}/random-suffix-[a-z]{29}`. The value is parsed with [`re2`](https://github.com/google/re2/wiki/Syntax). Only some subset of the syntax is supported. Check if your preferred format is supported before using that option. Disk isn't initialized if clickhouse is unable to generate a key by the value of `key_template`. It requires enabled feature flag [storage_metadata_write_full_object_key](/docs/en/operations/settings/settings#storage_metadata_write_full_object_key). It forbids declaring the `root path` in `endpoint` option. It requires definition of the option `key_compatibility_prefix`. -- `key_compatibility_prefix` — That option is required when option `key_template` is in use. In order to be able to read the objects keys which were stored in the metadata files with the metadata version lower that `VERSION_FULL_OBJECT_KEY`, the previous `root path` from the `endpoint` option should be set here. - -### Configuring the cache - -This is the cache configuration from above: -```xml - - cache - s3 - /var/lib/clickhouse/disks/s3_cache/ - 10Gi - -``` - -These parameters define the cache layer: -- `type` — If a disk is of type `cache` it caches mark and index files in memory. -- `disk` — The name of the disk that will be cached. - -Cache parameters: -- `path` — The path where metadata for the cache is stored. -- `max_size` — The size (amount of disk space) that the cache can grow to. - -:::tip -There are several other cache parameters that you can use to tune your storage, see [using local cache](/docs/en/operations/storing-data.md/#using-local-cache) for the details. -::: - -S3 disk can be configured as `main` or `cold` storage: -``` xml - - ... - - - s3 - https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/root-path/ - your_access_key_id - your_secret_access_key - - - - - -
- s3 -
-
-
- - -
- default -
- - s3 - -
- 0.2 -
-
- ... -
-``` - -In case of `cold` option a data can be moved to S3 if local disk free size will be smaller than `move_factor * disk_size` or by TTL move rule. - -## Using Azure Blob Storage for Data Storage {#table_engine-mergetree-azure-blob-storage} - -`MergeTree` family table engines can store data to [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) using a disk with type `azure_blob_storage`. - -As of February 2022, this feature is still a fresh addition, so expect that some Azure Blob Storage functionalities might be unimplemented. - -Configuration markup: -``` xml - - ... - - - azure_blob_storage - http://account.blob.core.windows.net - container - account - pass123 - /var/lib/clickhouse/disks/blob_storage_disk/ - /var/lib/clickhouse/disks/blob_storage_disk/cache/ - false - - - ... - -``` - -Connection parameters: -* `endpoint` — AzureBlobStorage endpoint URL with container & prefix. Optionally can contain account_name if the authentication method used needs it. (`http://account.blob.core.windows.net:{port}/[account_name]{container_name}/{data_prefix}`) or these parameters can be provided separately using storage_account_url, account_name & container. For specifying prefix, endpoint should be used. -* `endpoint_contains_account_name` - This flag is used to specify if endpoint contains account_name as it is only needed for certain authentication methods. (Default : true) -* `storage_account_url` - Required if endpoint is not specified, Azure Blob Storage account URL, like `http://account.blob.core.windows.net` or `http://azurite1:10000/devstoreaccount1`. -* `container_name` - Target container name, defaults to `default-container`. -* `container_already_exists` - If set to `false`, a new container `container_name` is created in the storage account, if set to `true`, disk connects to the container directly, and if left unset, disk connects to the account, checks if the container `container_name` exists, and creates it if it doesn't exist yet. - -Authentication parameters (the disk will try all available methods **and** Managed Identity Credential): -* `connection_string` - For authentication using a connection string. -* `account_name` and `account_key` - For authentication using Shared Key. - -Limit parameters (mainly for internal usage): -* `s3_max_single_part_upload_size` - Limits the size of a single block upload to Blob Storage. -* `min_bytes_for_seek` - Limits the size of a seekable region. -* `max_single_read_retries` - Limits the number of attempts to read a chunk of data from Blob Storage. -* `max_single_download_retries` - Limits the number of attempts to download a readable buffer from Blob Storage. -* `thread_pool_size` - Limits the number of threads with which `IDiskRemote` is instantiated. -* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object. - -Other parameters: -* `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks//`. -* `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`. -* `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). -* `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). - -Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)). - -:::note Zero-copy replication is not ready for production -Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use. -::: - -## HDFS storage {#hdfs-storage} - -In this sample configuration: -- the disk is of type `hdfs` -- the data is hosted at `hdfs://hdfs1:9000/clickhouse/` - -```xml - - - - - hdfs - hdfs://hdfs1:9000/clickhouse/ - true - - - local - / - - - - - -
- hdfs -
- - hdd - -
-
-
-
-
-``` - -## Web storage (read-only) {#web-storage} - -Web storage can be used for read-only purposes. An example use is for hosting sample -data, or for migrating data. - -:::tip -Storage can also be configured temporarily within a query, if a web dataset is not expected -to be used routinely, see [dynamic storage](#dynamic-storage) and skip editing the -configuration file. -::: - -In this sample configuration: -- the disk is of type `web` -- the data is hosted at `http://nginx:80/test1/` -- a cache on local storage is used - -```xml - - - - - web - http://nginx:80/test1/ - - - cache - web - cached_web_cache/ - 100000000 - - - - - -
- web -
-
-
- - -
- cached_web -
-
-
-
-
-
-``` - ## Virtual Columns {#virtual-columns} - `_part` — Name of a part. diff --git a/docs/en/engines/table-engines/special/memory.md b/docs/en/engines/table-engines/special/memory.md index 0d552a69804..19b5c798a76 100644 --- a/docs/en/engines/table-engines/special/memory.md +++ b/docs/en/engines/table-engines/special/memory.md @@ -21,3 +21,79 @@ When restarting a server, data disappears from the table and the table becomes e Normally, using this table engine is not justified. However, it can be used for tests, and for tasks where maximum speed is required on a relatively small number of rows (up to approximately 100,000,000). The Memory engine is used by the system for temporary tables with external query data (see the section “External data for processing a query”), and for implementing `GLOBAL IN` (see the section “IN operators”). + +Upper and lower bounds can be specified to limit Memory engine table size, effectively allowing it to act as a circular buffer (see [Engine Parameters](#engine-parameters)). + +## Engine Parameters {#engine-parameters} + +- `min_bytes_to_keep` — Minimum bytes to keep when memory table is size-capped. + - Default value: `0` + - Requires `max_bytes_to_keep` +- `max_bytes_to_keep` — Maximum bytes to keep within memory table where oldest rows are deleted on each insertion (i.e circular buffer). Max bytes can exceed the stated limit if the oldest batch of rows to remove falls under the `min_bytes_to_keep` limit when adding a large block. + - Default value: `0` +- `min_rows_to_keep` — Minimum rows to keep when memory table is size-capped. + - Default value: `0` + - Requires `max_rows_to_keep` +- `max_rows_to_keep` — Maximum rows to keep within memory table where oldest rows are deleted on each insertion (i.e circular buffer). Max rows can exceed the stated limit if the oldest batch of rows to remove falls under the `min_rows_to_keep` limit when adding a large block. + - Default value: `0` + +## Usage {#usage} + + +**Initialize settings** +``` sql +CREATE TABLE memory (i UInt32) ENGINE = Memory SETTINGS min_rows_to_keep = 100, max_rows_to_keep = 1000; +``` + +**Note:** Both `bytes` and `rows` capping parameters can be set at the same time, however, the lower bounds of `max` and `min` will be adhered to. + +## Examples {#examples} +``` sql +CREATE TABLE memory (i UInt32) ENGINE = Memory SETTINGS min_bytes_to_keep = 4096, max_bytes_to_keep = 16384; + +/* 1. testing oldest block doesn't get deleted due to min-threshold - 3000 rows */ +INSERT INTO memory SELECT * FROM numbers(0, 1600); -- 8'192 bytes + +/* 2. adding block that doesn't get deleted */ +INSERT INTO memory SELECT * FROM numbers(1000, 100); -- 1'024 bytes + +/* 3. testing oldest block gets deleted - 9216 bytes - 1100 */ +INSERT INTO memory SELECT * FROM numbers(9000, 1000); -- 8'192 bytes + +/* 4. checking a very large block overrides all */ +INSERT INTO memory SELECT * FROM numbers(9000, 10000); -- 65'536 bytes + +SELECT total_bytes, total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); +``` + +``` text +┌─total_bytes─┬─total_rows─┐ +│ 65536 │ 10000 │ +└─────────────┴────────────┘ +``` + +also, for rows: + +``` sql +CREATE TABLE memory (i UInt32) ENGINE = Memory SETTINGS min_rows_to_keep = 4000, max_rows_to_keep = 10000; + +/* 1. testing oldest block doesn't get deleted due to min-threshold - 3000 rows */ +INSERT INTO memory SELECT * FROM numbers(0, 1600); -- 1'600 rows + +/* 2. adding block that doesn't get deleted */ +INSERT INTO memory SELECT * FROM numbers(1000, 100); -- 100 rows + +/* 3. testing oldest block gets deleted - 9216 bytes - 1100 */ +INSERT INTO memory SELECT * FROM numbers(9000, 1000); -- 1'000 rows + +/* 4. checking a very large block overrides all */ +INSERT INTO memory SELECT * FROM numbers(9000, 10000); -- 10'000 rows + +SELECT total_bytes, total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); +``` + +``` text +┌─total_bytes─┬─total_rows─┐ +│ 65536 │ 10000 │ +└─────────────┴────────────┘ +``` diff --git a/docs/en/getting-started/example-datasets/criteo.md b/docs/en/getting-started/example-datasets/criteo.md index a2e0fda0cb0..4becdb50731 100644 --- a/docs/en/getting-started/example-datasets/criteo.md +++ b/docs/en/getting-started/example-datasets/criteo.md @@ -55,7 +55,7 @@ CREATE TABLE criteo_log ( ) ENGINE = Log; ``` -Download the data: +Insert the data: ``` bash $ for i in {00..23}; do echo $i; zcat datasets/criteo/day_${i#0}.gz | sed -r 's/^/2000-01-'${i/00/24}'\t/' | clickhouse-client --host=example-perftest01j --query="INSERT INTO criteo_log FORMAT TabSeparated"; done diff --git a/docs/en/getting-started/example-datasets/laion.md b/docs/en/getting-started/example-datasets/laion.md index 0dbaceffc13..327c1796d11 100644 --- a/docs/en/getting-started/example-datasets/laion.md +++ b/docs/en/getting-started/example-datasets/laion.md @@ -10,10 +10,14 @@ The embeddings and the metadata are stored in separate files in the raw data. A converts them to CSV and imports them into ClickHouse. You can use the following `download.sh` script for that: ```bash -wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/img_emb/img_emb_${1}.npy # download image embedding -wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/text_emb/text_emb_${1}.npy # download text embedding -wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/metadata/metadata_${1}.parquet # download metadata -python3 process.py ${1} # merge files and convert to CSV +number=${1} +if [[ $number == '' ]]; then + number=1 +fi; +wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/img_emb/img_emb_${number}.npy # download image embedding +wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/text_emb/text_emb_${number}.npy # download text embedding +wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/metadata/metadata_${number}.parquet # download metadata +python3 process.py $number # merge files and convert to CSV ``` Script `process.py` is defined as follows: diff --git a/docs/en/getting-started/example-datasets/nyc-taxi.md b/docs/en/getting-started/example-datasets/nyc-taxi.md index cac75fdc45a..516a6d54248 100644 --- a/docs/en/getting-started/example-datasets/nyc-taxi.md +++ b/docs/en/getting-started/example-datasets/nyc-taxi.md @@ -248,6 +248,9 @@ Some of the files might not download fully. Check the file sizes and re-download ``` bash $ curl -O https://datasets.clickhouse.com/trips_mergetree/partitions/trips_mergetree.tar +# Validate the checksum +$ md5sum trips_mergetree.tar +# Checksum should be equal to: f3b8d469b41d9a82da064ded7245d12c $ tar xvf trips_mergetree.tar -C /var/lib/clickhouse # path to ClickHouse data directory $ # check permissions of unpacked data, fix if required $ sudo service clickhouse-server restart diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 234420de374..ca689ef7995 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -78,8 +78,8 @@ It is recommended to use official pre-compiled `deb` packages for Debian or Ubun #### Setup the Debian repository ``` bash -sudo apt-get install -y apt-transport-https ca-certificates dirmngr -sudo gpg --no-default-keyring --keyring /usr/share/keyrings/clickhouse-keyring.gpg --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 8919F6BD2B48D754 +sudo apt-get install -y apt-transport-https ca-certificates curl gnupg +curl -fsSL 'https://packages.clickhouse.com/rpm/lts/repodata/repomd.xml.key' | sudo gpg --dearmor -o /usr/share/keyrings/clickhouse-keyring.gpg echo "deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb stable main" | sudo tee \ /etc/apt/sources.list.d/clickhouse.list diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index 2d9bf2a2ee8..8639af468c2 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -170,7 +170,7 @@ RESTORE TABLE test.table PARTITIONS '2', '3' ### Backups as tar archives -Backups can also be stored as tar archives. The functionality is the same as for zip, except that a password is not supported. +Backups can also be stored as tar archives. The functionality is the same as for zip, except that a password is not supported. Write a backup as a tar: ``` @@ -444,10 +444,6 @@ Often data that is ingested into ClickHouse is delivered through some sort of pe Some local filesystems provide snapshot functionality (for example, [ZFS](https://en.wikipedia.org/wiki/ZFS)), but they might not be the best choice for serving live queries. A possible solution is to create additional replicas with this kind of filesystem and exclude them from the [Distributed](../engines/table-engines/special/distributed.md) tables that are used for `SELECT` queries. Snapshots on such replicas will be out of reach of any queries that modify data. As a bonus, these replicas might have special hardware configurations with more disks attached per server, which would be cost-effective. -### clickhouse-copier {#clickhouse-copier} - -[clickhouse-copier](../operations/utilities/clickhouse-copier.md) is a versatile tool that was initially created to re-shard petabyte-sized tables. It can also be used for backup and restore purposes because it reliably copies data between ClickHouse tables and clusters. - For smaller volumes of data, a simple `INSERT INTO ... SELECT ...` to remote tables might work as well. ### Manipulations with Parts {#manipulations-with-parts} diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index 9f17f4af1e8..089704705d0 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -95,9 +95,11 @@ which is equal to ## Substituting Configuration {#substitution} -The config can also define “substitutions”. If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/clickhouse/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md#macros)). +The config can define substitutions. There are two types of substitutions: -If you want to replace an entire element with a substitution use `include` as the element name. +- If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/clickhouse/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md#macros)). + +- If you want to replace an entire element with a substitution, use `include` as the element name. Substitutions can also be performed from ZooKeeper by specifying attribute `from_zk = "/path/to/node"`. In this case, the element value is replaced with the contents of the Zookeeper node at `/path/to/node`. This also works with you store an entire XML subtree as a Zookeeper node, it will be fully inserted into the source element. XML substitution example: @@ -114,7 +116,7 @@ XML substitution example: ``` -Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node, and it will be fully inserted into the source element. +If you want to merge the substituting content with the existing configuration instead of appending you can use attribute `merge="true"`, for example: ``. In this case, the existing configuration will be merged with the content from the substitution and the existing configuration settings will be replaced with values from substitution. ## Encrypting and Hiding Configuration {#encryption} diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 07c9a2b88ab..f20dcb9025e 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -933,9 +933,9 @@ Hard limit is configured via system tools ## database_atomic_delay_before_drop_table_sec {#database_atomic_delay_before_drop_table_sec} -Sets the delay before remove table data in seconds. If the query has `SYNC` modifier, this setting is ignored. +The delay before a table data is dropped in seconds. If the `DROP TABLE` query has a `SYNC` modifier, this setting is ignored. -Default value: `480` (8 minute). +Default value: `480` (8 minutes). ## database_catalog_unused_dir_hide_timeout_sec {#database_catalog_unused_dir_hide_timeout_sec} diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 3f4dec9dc93..6c427442e49 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -4337,6 +4337,18 @@ Possible values: Default value: `0`. + +## function_locate_has_mysql_compatible_argument_order {#function-locate-has-mysql-compatible-argument-order} + +Controls the order of arguments in function [locate](../../sql-reference/functions/string-search-functions.md#locate). + +Possible values: + +- 0 — Function `locate` accepts arguments `(haystack, needle[, start_pos])`. +- 1 — Function `locate` accepts arguments `(needle, haystack, [, start_pos])` (MySQL-compatible behavior) + +Default value: `1`. + ## date_time_overflow_behavior {#date_time_overflow_behavior} Defines the behavior when [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md), [DateTime64](../../sql-reference/data-types/datetime64.md) or integers are converted into Date, Date32, DateTime or DateTime64 but the value cannot be represented in the result type. diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index 84251812c01..fd81bc197d1 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -5,26 +5,416 @@ sidebar_label: "External Disks for Storing Data" title: "External Disks for Storing Data" --- -Data, processed in ClickHouse, is usually stored in the local file system — on the same machine with the ClickHouse server. That requires large-capacity disks, which can be expensive enough. To avoid that you can store the data remotely — on [Amazon S3](https://aws.amazon.com/s3/) disks or in the Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)). +Data, processed in ClickHouse, is usually stored in the local file system — on the same machine with the ClickHouse server. That requires large-capacity disks, which can be expensive enough. To avoid that you can store the data remotely. Various storages are supported: +1. [Amazon S3](https://aws.amazon.com/s3/) object storage. +2. The Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)) +3. [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs). -To work with data stored on `Amazon S3` disks use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine, and to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine. +:::note ClickHouse also has support for external table engines, which are different from external storage option described on this page as they allow to read data stored in some general file format (like Parquet), while on this page we are describing storage configuration for ClickHouse `MergeTree` family or `Log` family tables. +1. to work with data stored on `Amazon S3` disks, use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine. +2. to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine. +3. to work with data stored in Azure Blob Storage use [AzureBlobStorage](/docs/en/engines/table-engines/integrations/azureBlobStorage.md) table engine. +::: -To load data from a web server with static files use a disk with type [web](#storing-data-on-webserver). +## Configuring external storage {#configuring-external-storage} -## Configuring HDFS {#configuring-hdfs} +[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) family table engines can store data to `S3`, `AzureBlobStorage`, `HDFS` using a disk with types `s3`, `azure_blob_storage`, `hdfs` accordingly. -[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) family table engines can store data to HDFS using a disk with type `HDFS`. +Disk configuration requires: +1. `type` section, equal to one of `s3`, `azure_blob_storage`, `hdfs`, `local_blob_storage`, `web`. +2. Configuration of a specific external storage type. -Configuration markup: +Starting from 24.1 clickhouse version, it is possible to use a new configuration option. +It requires to specify: +1. `type` equal to `object_storage` +2. `object_storage_type`, equal to one of `s3`, `azure_blob_storage` (or just `azure` from `24.3`), `hdfs`, `local_blob_storage` (or just `local` from `24.3`), `web`. +Optionally, `metadata_type` can be specified (it is equal to `local` by default), but it can also be set to `plain`, `web`. +Usage of `plain` metadata type is described in [plain storage section](/docs/en/operations/storing-data.md/#storing-data-on-webserver), `web` metadata type can be used only with `web` object storage type, `local` metadata type stores metadata files locally (each metadata files contains mapping to files in object storage and some additional meta information about them). + +E.g. configuration option +``` xml + + s3 + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + +``` + +is equal to configuration (from `24.1`): +``` xml + + object_storage + s3 + local + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + +``` + +Configuration +``` xml + + s3_plain + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + +``` + +is equal to +``` xml + + object_storage + s3 + plain + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + +``` + +Example of full storage configuration will look like: +``` xml + + + + + s3 + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + + + + + +
+ s3 +
+
+
+
+
+
+``` + +Starting with 24.1 clickhouse version, it can also look like: +``` xml + + + + + object_storage + s3 + local + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + + + + + +
+ s3 +
+
+
+
+
+
+``` + +In order to make a specific kind of storage a default option for all `MergeTree` tables add the following section to configuration file: +``` xml + + + s3 + + +``` + +If you want to configure a specific storage policy only to specific table, you can define it in settings while creating the table: + +``` sql +CREATE TABLE test (a Int32, b String) +ENGINE = MergeTree() ORDER BY a +SETTINGS storage_policy = 's3'; +``` + +You can also use `disk` instead of `storage_policy`. In this case it is not requires to have `storage_policy` section in configuration file, only `disk` section would be enough. + +``` sql +CREATE TABLE test (a Int32, b String) +ENGINE = MergeTree() ORDER BY a +SETTINGS disk = 's3'; +``` + +## Dynamic Configuration {#dynamic-configuration} + +There is also a possibility to specify storage configuration without a predefined disk in configuration in a configuration file, but can be configured in the `CREATE`/`ATTACH` query settings. + +The following example query builds on the above dynamic disk configuration and shows how to use a local disk to cache data from a table stored at a URL. + +```sql +ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' +( + price UInt32, + date Date, + postcode1 LowCardinality(String), + postcode2 LowCardinality(String), + type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4), + is_new UInt8, + duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2), + addr1 String, + addr2 String, + street LowCardinality(String), + locality LowCardinality(String), + town LowCardinality(String), + district LowCardinality(String), + county LowCardinality(String) +) +ENGINE = MergeTree +ORDER BY (postcode1, postcode2, addr1, addr2) + # highlight-start + SETTINGS disk = disk( + type=web, + endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/' + ); + # highlight-end +``` + +The example below adds cache to external storage. + +```sql +ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' +( + price UInt32, + date Date, + postcode1 LowCardinality(String), + postcode2 LowCardinality(String), + type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4), + is_new UInt8, + duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2), + addr1 String, + addr2 String, + street LowCardinality(String), + locality LowCardinality(String), + town LowCardinality(String), + district LowCardinality(String), + county LowCardinality(String) +) +ENGINE = MergeTree +ORDER BY (postcode1, postcode2, addr1, addr2) + # highlight-start + SETTINGS disk = disk( + type=cache, + max_size='1Gi', + path='/var/lib/clickhouse/custom_disk_cache/', + disk=disk( + type=web, + endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/' + ) + ); + # highlight-end +``` + +In the settings highlighted below notice that the disk of `type=web` is nested within +the disk of `type=cache`. + +:::note +The example uses `type=web`, but any disk type can be configured as dynamic, even Local disk. Local disks require a path argument to be inside the server config parameter `custom_local_disks_base_directory`, which has no default, so set that also when using local disk. +::: + +A combination of config-based configuration and sql-defined configuration is also possible: + +```sql +ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' +( + price UInt32, + date Date, + postcode1 LowCardinality(String), + postcode2 LowCardinality(String), + type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4), + is_new UInt8, + duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2), + addr1 String, + addr2 String, + street LowCardinality(String), + locality LowCardinality(String), + town LowCardinality(String), + district LowCardinality(String), + county LowCardinality(String) +) +ENGINE = MergeTree +ORDER BY (postcode1, postcode2, addr1, addr2) + # highlight-start + SETTINGS disk = disk( + type=cache, + max_size='1Gi', + path='/var/lib/clickhouse/custom_disk_cache/', + disk=disk( + type=web, + endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/' + ) + ); + # highlight-end +``` + +where `web` is a from a server configuration file: ``` xml + + + + web + 'https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/' + + + +``` + +### Using S3 Storage {#s3-storage} + +Required parameters: + +- `endpoint` — S3 endpoint URL in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint URL should contain a bucket and root path to store data. +- `access_key_id` — S3 access key id. +- `secret_access_key` — S3 secret access key. + +Optional parameters: + +- `region` — S3 region name. +- `support_batch_delete` — This controls the check to see if batch deletes are supported. Set this to `false` when using Google Cloud Storage (GCS) as GCS does not support batch deletes and preventing the checks will prevent error messages in the logs. +- `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`. +- `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`. +- `expiration_window_seconds` — Grace period for checking if expiration-based credentials have expired. Optional, default value is `120`. +- `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL. +- `connect_timeout_ms` — Socket connect timeout in milliseconds. Default value is `10 seconds`. +- `request_timeout_ms` — Request timeout in milliseconds. Default value is `5 seconds`. +- `retry_attempts` — Number of retry attempts in case of failed request. Default value is `10`. +- `single_read_retries` — Number of retry attempts in case of connection drop during read. Default value is `4`. +- `min_bytes_for_seek` — Minimal number of bytes to use seek operation instead of sequential read. Default value is `1 Mb`. +- `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks//`. +- `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`. +- `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times. +- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. +- `server_side_encryption_kms_key_id` - If specified, required headers for accessing S3 objects with [SSE-KMS encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingKMSEncryption.html) will be set. If an empty string is specified, the AWS managed S3 key will be used. Optional. +- `server_side_encryption_kms_encryption_context` - If specified alongside `server_side_encryption_kms_key_id`, the given encryption context header for SSE-KMS will be set. Optional. +- `server_side_encryption_kms_bucket_key_enabled` - If specified alongside `server_side_encryption_kms_key_id`, the header to enable S3 bucket keys for SSE-KMS will be set. Optional, can be `true` or `false`, defaults to nothing (matches the bucket-level setting). +- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited). +- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`. +- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited). +- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`. +- `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). +- `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). +- `key_template` — Define the format with which the object keys are generated. By default, Clickhouse takes `root path` from `endpoint` option and adds random generated suffix. That suffix is a dir with 3 random symbols and a file name with 29 random symbols. With that option you have a full control how to the object keys are generated. Some usage scenarios require having random symbols in the prefix or in the middle of object key. For example: `[a-z]{3}-prefix-random/constant-part/random-middle-[a-z]{3}/random-suffix-[a-z]{29}`. The value is parsed with [`re2`](https://github.com/google/re2/wiki/Syntax). Only some subset of the syntax is supported. Check if your preferred format is supported before using that option. Disk isn't initialized if clickhouse is unable to generate a key by the value of `key_template`. It requires enabled feature flag [storage_metadata_write_full_object_key](/docs/en/operations/settings/settings#storage_metadata_write_full_object_key). It forbids declaring the `root path` in `endpoint` option. It requires definition of the option `key_compatibility_prefix`. +- `key_compatibility_prefix` — That option is required when option `key_template` is in use. In order to be able to read the objects keys which were stored in the metadata files with the metadata version lower that `VERSION_FULL_OBJECT_KEY`, the previous `root path` from the `endpoint` option should be set here. + +:::note +Google Cloud Storage (GCS) is also supported using the type `s3`. See [GCS backed MergeTree](/docs/en/integrations/gcs). +::: + +### Using Plain Storage {#plain-storage} + +In `22.10` a new disk type `s3_plain` was introduced, which provides a write-once storage. Configuration parameters are the same as for `s3` disk type. +Unlike `s3` disk type, it stores data as is, e.g. instead of randomly-generated blob names, it uses normal file names (the same way as clickhouse stores files on local disk) and does not store any metadata locally, e.g. it is derived from data on `s3`. + +This disk type allows to keep a static version of the table, as it does not allow executing merges on the existing data and does not allow inserting of new data. +A use case for this disk type is to create backups on it, which can be done via `BACKUP TABLE data TO Disk('plain_disk_name', 'backup_name')`. Afterwards you can do `RESTORE TABLE data AS data_restored FROM Disk('plain_disk_name', 'backup_name')` or using `ATTACH TABLE data (...) ENGINE = MergeTree() SETTINGS disk = 'plain_disk_name'`. + +Configuration: +``` xml + + s3_plain + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + +``` + +Starting from `24.1` it is possible configure any object storage disk (`s3`, `azure`, `hdfs`, `local`) using `plain` metadata type. + +Configuration: +``` xml + + object_storage + azure + plain + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + +``` + +### Using Azure Blob Storage {#azure-blob-storage} + +`MergeTree` family table engines can store data to [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) using a disk with type `azure_blob_storage`. + +As of February 2022, this feature is still a fresh addition, so expect that some Azure Blob Storage functionalities might be unimplemented. + +Configuration markup: +``` xml + + ... + + + azure_blob_storage + http://account.blob.core.windows.net + container + account + pass123 + /var/lib/clickhouse/disks/blob_storage_disk/ + /var/lib/clickhouse/disks/blob_storage_disk/cache/ + false + + + ... + +``` + +Connection parameters: +* `storage_account_url` - **Required**, Azure Blob Storage account URL, like `http://account.blob.core.windows.net` or `http://azurite1:10000/devstoreaccount1`. +* `container_name` - Target container name, defaults to `default-container`. +* `container_already_exists` - If set to `false`, a new container `container_name` is created in the storage account, if set to `true`, disk connects to the container directly, and if left unset, disk connects to the account, checks if the container `container_name` exists, and creates it if it doesn't exist yet. + +Authentication parameters (the disk will try all available methods **and** Managed Identity Credential): +* `connection_string` - For authentication using a connection string. +* `account_name` and `account_key` - For authentication using Shared Key. + +Limit parameters (mainly for internal usage): +* `s3_max_single_part_upload_size` - Limits the size of a single block upload to Blob Storage. +* `min_bytes_for_seek` - Limits the size of a seekable region. +* `max_single_read_retries` - Limits the number of attempts to read a chunk of data from Blob Storage. +* `max_single_download_retries` - Limits the number of attempts to download a readable buffer from Blob Storage. +* `thread_pool_size` - Limits the number of threads with which `IDiskRemote` is instantiated. +* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object. + +Other parameters: +* `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks//`. +* `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`. +* `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). +* `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). + +Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)). + +:::note Zero-copy replication is not ready for production +Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use. +::: + +## Using HDFS storage {#hdfs-storage} + +In this sample configuration: +- the disk is of type `hdfs` +- the data is hosted at `hdfs://hdfs1:9000/clickhouse/` + +```xml hdfs hdfs://hdfs1:9000/clickhouse/ + true + + local + / + @@ -32,26 +422,17 @@ Configuration markup:
hdfs
+ + hdd +
- - - 0 -
``` -Required parameters: - -- `endpoint` — HDFS endpoint URL in `path` format. Endpoint URL should contain a root path to store data. - -Optional parameters: - -- `min_bytes_for_seek` — The minimal number of bytes to use seek operation instead of sequential read. Default value: `1 Mb`. - -## Using Virtual File System for Data Encryption {#encrypted-virtual-file-system} +### Using Data Encryption {#encrypted-virtual-file-system} You can encrypt the data stored on [S3](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3), or [HDFS](#configuring-hdfs) external disks, or on a local disk. To turn on the encryption mode, in the configuration file you must define a disk with the type `encrypted` and choose a disk on which the data will be saved. An `encrypted` disk ciphers all written files on the fly, and when you read files from an `encrypted` disk it deciphers them automatically. So you can work with an `encrypted` disk like with a normal one. @@ -112,7 +493,7 @@ Example of disk configuration: ``` -## Using local cache {#using-local-cache} +### Using local cache {#using-local-cache} It is possible to configure local cache over disks in storage configuration starting from version 22.3. For versions 22.3 - 22.7 cache is supported only for `s3` disk type. For versions >= 22.8 cache is supported for any disk type: S3, Azure, Local, Encrypted, etc. @@ -275,23 +656,92 @@ Cache profile events: - `CachedWriteBufferCacheWriteBytes`, `CachedWriteBufferCacheWriteMicroseconds` -## Using in-memory cache (userspace page cache) {#userspace-page-cache} - -The File Cache described above stores cached data in local files. Alternatively, object-store-based disks can be configured to use "Userspace Page Cache", which is RAM-only. Userspace page cache is recommended only if file cache can't be used for some reason, e.g. if the machine doesn't have a local disk at all. Note that file cache effectively uses RAM for caching too, since the OS caches contents of local files. - -To enable userspace page cache for disks that don't use file cache, use setting `use_page_cache_for_disks_without_file_cache`. - -By default, on Linux, the userspace page cache will use all available memory, similar to the OS page cache. In tools like `top` and `ps`, the clickhouse server process will typically show resident set size near 100% of the machine's RAM - this is normal, and most of this memory is actually reclaimable by the OS on memory pressure (`MADV_FREE`). This behavior can be disabled with server setting `page_cache_use_madv_free = 0`, making the userspace page cache just use a fixed amount of memory `page_cache_size` with no special interaction with the OS. On Mac OS, `page_cache_use_madv_free` is always disabled as it doesn't have lazy `MADV_FREE`. - -Unfortunately, `page_cache_use_madv_free` makes it difficult to tell if the server is close to running out of memory, since the RSS metric becomes useless. Async metric `UnreclaimableRSS` shows the amount of physical memory used by the server, excluding the memory reclaimable by the OS: `select value from system.asynchronous_metrics where metric = 'UnreclaimableRSS'`. Use it for monitoring instead of RSS. This metric is only available if `page_cache_use_madv_free` is enabled. - -## Storing Data on Web Server {#storing-data-on-webserver} - -There is a tool `clickhouse-static-files-uploader`, which prepares a data directory for a given table (`SELECT data_paths FROM system.tables WHERE name = 'table_name'`). For each table you need, you get a directory of files. These files can be uploaded to, for example, a web server with static files. After this preparation, you can load this table into any ClickHouse server via `DiskWeb`. +### Using static Web storage (read-only) {#web-storage} This is a read-only disk. Its data is only read and never modified. A new table is loaded to this disk via `ATTACH TABLE` query (see example below). Local disk is not actually used, each `SELECT` query will result in a `http` request to fetch required data. All modification of the table data will result in an exception, i.e. the following types of queries are not allowed: [CREATE TABLE](/docs/en/sql-reference/statements/create/table.md), [ALTER TABLE](/docs/en/sql-reference/statements/alter/index.md), [RENAME TABLE](/docs/en/sql-reference/statements/rename.md/#misc_operations-rename_table), [DETACH TABLE](/docs/en/sql-reference/statements/detach.md) and [TRUNCATE TABLE](/docs/en/sql-reference/statements/truncate.md). +Web storage can be used for read-only purposes. An example use is for hosting sample data, or for migrating data. +There is a tool `clickhouse-static-files-uploader`, which prepares a data directory for a given table (`SELECT data_paths FROM system.tables WHERE name = 'table_name'`). For each table you need, you get a directory of files. These files can be uploaded to, for example, a web server with static files. After this preparation, you can load this table into any ClickHouse server via `DiskWeb`. -Web server storage is supported only for the [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) engine families. To access the data stored on a `web` disk, use the [storage_policy](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#terms) setting when executing the query. For example, `ATTACH TABLE table_web UUID '{}' (id Int32) ENGINE = MergeTree() ORDER BY id SETTINGS storage_policy = 'web'`. +In this sample configuration: +- the disk is of type `web` +- the data is hosted at `http://nginx:80/test1/` +- a cache on local storage is used + +```xml + + + + + web + http://nginx:80/test1/ + + + cache + web + cached_web_cache/ + 100000000 + + + + + +
+ web +
+
+
+ + +
+ cached_web +
+
+
+
+
+
+``` + +:::tip +Storage can also be configured temporarily within a query, if a web dataset is not expected +to be used routinely, see [dynamic configuration](#dynamic-configuration) and skip editing the +configuration file. +::: + +:::tip +A [demo dataset](https://github.com/ClickHouse/web-tables-demo) is hosted in GitHub. To prepare your own tables for web storage see the tool [clickhouse-static-files-uploader](/docs/en/operations/storing-data.md/#storing-data-on-webserver) +::: + +In this `ATTACH TABLE` query the `UUID` provided matches the directory name of the data, and the endpoint is the URL for the raw GitHub content. + +```sql +# highlight-next-line +ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' +( + price UInt32, + date Date, + postcode1 LowCardinality(String), + postcode2 LowCardinality(String), + type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4), + is_new UInt8, + duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2), + addr1 String, + addr2 String, + street LowCardinality(String), + locality LowCardinality(String), + town LowCardinality(String), + district LowCardinality(String), + county LowCardinality(String) +) +ENGINE = MergeTree +ORDER BY (postcode1, postcode2, addr1, addr2) + # highlight-start + SETTINGS disk = disk( + type=web, + endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/' + ); + # highlight-end +``` A ready test case. You need to add this configuration to config: @@ -487,7 +937,7 @@ If URL is not reachable on disk load when the server is starting up tables, then Use [http_max_single_read_retries](/docs/en/operations/settings/settings.md/#http-max-single-read-retries) setting to limit the maximum number of retries during a single HTTP read. -## Zero-copy Replication (not ready for production) {#zero-copy} +### Zero-copy Replication (not ready for production) {#zero-copy} Zero-copy replication is possible, but not recommended, with `S3` and `HDFS` disks. Zero-copy replication means that if the data is stored remotely on several machines and needs to be synchronized, then only the metadata is replicated (paths to the data parts), but not the data itself. diff --git a/docs/en/operations/system-tables/metrics.md b/docs/en/operations/system-tables/metrics.md index 898e6ae2e2c..83ce817b7db 100644 --- a/docs/en/operations/system-tables/metrics.md +++ b/docs/en/operations/system-tables/metrics.md @@ -513,10 +513,6 @@ Part was moved to another disk and should be deleted in own destructor. Not active data part with identity refcounter, it is deleting right now by a cleaner. -### PartsInMemory - -In-memory parts. - ### PartsOutdated Not active data part, but could be used by only current SELECTs, could be deleted after SELECTs finishes. diff --git a/docs/en/operations/system-tables/scheduler.md b/docs/en/operations/system-tables/scheduler.md index 953db4c28f2..c4de7f76fdc 100644 --- a/docs/en/operations/system-tables/scheduler.md +++ b/docs/en/operations/system-tables/scheduler.md @@ -26,7 +26,9 @@ priority: 0 is_active: 0 active_children: 0 dequeued_requests: 67 +canceled_requests: 0 dequeued_cost: 4692272 +canceled_cost: 0 busy_periods: 63 vruntime: 938454.1999999989 system_vruntime: ᴺᵁᴸᴸ @@ -54,7 +56,9 @@ Columns: - `is_active` (`UInt8`) - Whether this node is currently active - has resource requests to be dequeued and constraints satisfied. - `active_children` (`UInt64`) - The number of children in active state. - `dequeued_requests` (`UInt64`) - The total number of resource requests dequeued from this node. +- `canceled_requests` (`UInt64`) - The total number of resource requests canceled from this node. - `dequeued_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests dequeued from this node. +- `canceled_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests canceled from this node. - `busy_periods` (`UInt64`) - The total number of deactivations of this node. - `vruntime` (`Nullable(Float64)`) - For children of `fair` nodes only. Virtual runtime of a node used by SFQ algorithm to select the next child to process in a max-min fair manner. - `system_vruntime` (`Nullable(Float64)`) - For `fair` nodes only. Virtual runtime showing `vruntime` of the last processed resource request. Used during child activation as the new value of `vruntime`. diff --git a/docs/en/operations/utilities/clickhouse-copier.md b/docs/en/operations/utilities/clickhouse-copier.md deleted file mode 100644 index 0d329487504..00000000000 --- a/docs/en/operations/utilities/clickhouse-copier.md +++ /dev/null @@ -1,187 +0,0 @@ ---- -slug: /en/operations/utilities/clickhouse-copier -sidebar_position: 59 -sidebar_label: clickhouse-copier ---- - -# clickhouse-copier - -Copies data from the tables in one cluster to tables in another (or the same) cluster. - -:::note -To get a consistent copy, the data in the source tables and partitions should not change during the entire process. -::: - -You can run multiple `clickhouse-copier` instances on different servers to perform the same job. ClickHouse Keeper, or ZooKeeper, is used for syncing the processes. - -After starting, `clickhouse-copier`: - -- Connects to ClickHouse Keeper and receives: - - - Copying jobs. - - The state of the copying jobs. - -- It performs the jobs. - - Each running process chooses the “closest” shard of the source cluster and copies the data into the destination cluster, resharding the data if necessary. - -`clickhouse-copier` tracks the changes in ClickHouse Keeper and applies them on the fly. - -To reduce network traffic, we recommend running `clickhouse-copier` on the same server where the source data is located. - -## Running Clickhouse-copier {#running-clickhouse-copier} - -The utility should be run manually: - -``` bash -$ clickhouse-copier --daemon --config keeper.xml --task-path /task/path --base-dir /path/to/dir -``` - -Parameters: - -- `daemon` — Starts `clickhouse-copier` in daemon mode. -- `config` — The path to the `keeper.xml` file with the parameters for the connection to ClickHouse Keeper. -- `task-path` — The path to the ClickHouse Keeper node. This node is used for syncing `clickhouse-copier` processes and storing tasks. Tasks are stored in `$task-path/description`. -- `task-file` — Optional path to file with task configuration for initial upload to ClickHouse Keeper. -- `task-upload-force` — Force upload `task-file` even if node already exists. Default is false. -- `base-dir` — The path to logs and auxiliary files. When it starts, `clickhouse-copier` creates `clickhouse-copier_YYYYMMHHSS_` subdirectories in `$base-dir`. If this parameter is omitted, the directories are created in the directory where `clickhouse-copier` was launched. - -## Format of keeper.xml {#format-of-zookeeper-xml} - -``` xml - - - trace - 100M - 3 - - - - - 127.0.0.1 - 2181 - - - -``` - -## Configuration of Copying Tasks {#configuration-of-copying-tasks} - -``` xml - - - - - - - false - - 127.0.0.1 - 9000 - - - - ... - - - - ... - - - - - 2 - - - - 1 - - - - - 0 - - - - - 3 - - 1 - - - - - - - - source_cluster - test - hits - - - destination_cluster - test - hits2 - - - - ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/hits2', '{replica}') - PARTITION BY toMonday(date) - ORDER BY (CounterID, EventDate) - - - - jumpConsistentHash(intHash64(UserID), 2) - - - CounterID != 0 - - - - '2018-02-26' - '2018-03-05' - ... - - - - - - ... - - ... - - -``` - -`clickhouse-copier` tracks the changes in `/task/path/description` and applies them on the fly. For instance, if you change the value of `max_workers`, the number of processes running tasks will also change. diff --git a/docs/en/operations/utilities/index.md b/docs/en/operations/utilities/index.md index 8959073d00e..912a5b9ccb1 100644 --- a/docs/en/operations/utilities/index.md +++ b/docs/en/operations/utilities/index.md @@ -2,13 +2,11 @@ slug: /en/operations/utilities/ sidebar_position: 56 sidebar_label: List of tools and utilities -pagination_next: 'en/operations/utilities/clickhouse-copier' --- # List of tools and utilities - [clickhouse-local](../../operations/utilities/clickhouse-local.md) — Allows running SQL queries on data without starting the ClickHouse server, similar to how `awk` does this. -- [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) — Copies (and reshards) data from one cluster to another cluster. - [clickhouse-benchmark](../../operations/utilities/clickhouse-benchmark.md) — Loads server with the custom queries and settings. - [clickhouse-format](../../operations/utilities/clickhouse-format.md) — Enables formatting input queries. - [ClickHouse obfuscator](../../operations/utilities/clickhouse-obfuscator.md) — Obfuscates data. diff --git a/docs/en/sql-reference/aggregate-functions/index.md b/docs/en/sql-reference/aggregate-functions/index.md index 5d2229fbcce..96bf0c5d93b 100644 --- a/docs/en/sql-reference/aggregate-functions/index.md +++ b/docs/en/sql-reference/aggregate-functions/index.md @@ -16,7 +16,9 @@ ClickHouse also supports: ## NULL Processing -During aggregation, all `NULL`s are skipped. If the aggregation has several parameters it will ignore any row in which one or more of the parameters are NULL. +During aggregation, all `NULL` arguments are skipped. If the aggregation has several arguments it will ignore any row in which one or more of them are NULL. + +There is an exception to this rule, which are the functions [`first_value`](../../sql-reference/aggregate-functions/reference/first_value.md), [`last_value`](../../sql-reference/aggregate-functions/reference/last_value.md) and their aliases when followed by the modifier `RESPECT NULLS`: `FIRST_VALUE(b) RESPECT NULLS`. **Examples:** @@ -85,3 +87,50 @@ FROM t_null_big; │ [2,2,3] │ [2,NULL,2,3,NULL] │ └───────────────┴───────────────────────────────────────┘ ``` + +Note that aggregations are skipped when the columns are used as arguments to an aggregated function. For example [`count`](../../sql-reference/aggregate-functions/reference/count.md) without parameters (`count()`) or with constant ones (`count(1)`) will count all rows in the block (independently of the value of the GROUP BY column as it's not an argument), while `count(column)` will only return the number of rows where column is not NULL. + +```sql +SELECT + v, + count(1), + count(v) +FROM +( + SELECT if(number < 10, NULL, number % 3) AS v + FROM numbers(15) +) +GROUP BY v + +┌────v─┬─count()─┬─count(v)─┐ +│ ᴺᵁᴸᴸ │ 10 │ 0 │ +│ 0 │ 1 │ 1 │ +│ 1 │ 2 │ 2 │ +│ 2 │ 2 │ 2 │ +└──────┴─────────┴──────────┘ +``` + +And here is an example of of first_value with `RESPECT NULLS` where we can see that NULL inputs are respected and it will return the first value read, whether it's NULL or not: + +```sql +SELECT + col || '_' || ((col + 1) * 5 - 1) as range, + first_value(odd_or_null) as first, + first_value(odd_or_null) IGNORE NULLS as first_ignore_null, + first_value(odd_or_null) RESPECT NULLS as first_respect_nulls +FROM +( + SELECT + intDiv(number, 5) AS col, + if(number % 2 == 0, NULL, number) as odd_or_null + FROM numbers(15) +) +GROUP BY col +ORDER BY col + +┌─range─┬─first─┬─first_ignore_null─┬─first_respect_nulls─┐ +│ 0_4 │ 1 │ 1 │ ᴺᵁᴸᴸ │ +│ 1_9 │ 5 │ 5 │ 5 │ +│ 2_14 │ 11 │ 11 │ ᴺᵁᴸᴸ │ +└───────┴───────┴───────────────────┴─────────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/varpop.md b/docs/en/sql-reference/aggregate-functions/reference/varpop.md index 751688b0830..2044b7e690b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/varpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/varpop.md @@ -1,16 +1,99 @@ --- -slug: /en/sql-reference/aggregate-functions/reference/varpop +title: "varPop" +slug: "/en/sql-reference/aggregate-functions/reference/varpop" sidebar_position: 32 --- -# varPop(x) +This page covers the `varPop` and `varPopStable` functions available in ClickHouse. -Calculates the amount `Σ((x - x̅)^2) / n`, where `n` is the sample size and `x̅`is the average value of `x`. +## varPop -In other words, dispersion for a set of values. Returns `Float64`. +Calculates the population covariance between two data columns. The population covariance measures the degree to which two variables vary together. Calculates the amount `Σ((x - x̅)^2) / n`, where `n` is the sample size and `x̅`is the average value of `x`. -Alias: `VAR_POP`. +**Syntax** -:::note -This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varPopStable` function. It works slower but provides a lower computational error. -::: \ No newline at end of file +```sql +covarPop(x, y) +``` + +**Parameters** + +- `x`: The first data column. [Numeric](../../../native-protocol/columns.md) +- `y`: The second data column. [Numeric](../../../native-protocol/columns.md) + +**Returned value** + +Returns an integer of type `Float64`. + +**Implementation details** + +This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varPopStable` function](#varPopStable). + +**Example** + +```sql +DROP TABLE IF EXISTS test_data; +CREATE TABLE test_data +( + x Int32, + y Int32 +) +ENGINE = Memory; + +INSERT INTO test_data VALUES (1, 2), (2, 3), (3, 5), (4, 6), (5, 8); + +SELECT + covarPop(x, y) AS covar_pop +FROM test_data; +``` + +```response +3 +``` + +## varPopStable + +Calculates population covariance between two data columns using a stable, numerically accurate method to calculate the variance. This function is designed to provide reliable results even with large datasets or values that might cause numerical instability in other implementations. + +**Syntax** + +```sql +covarPopStable(x, y) +``` + +**Parameters** + +- `x`: The first data column. [String literal](../../syntax#syntax-string-literal) +- `y`: The second data column. [Expression](../../syntax#syntax-expressions) + +**Returned value** + +Returns an integer of type `Float64`. + +**Implementation details** + +Unlike [`varPop()`](#varPop), this function uses a stable, numerically accurate algorithm to calculate the population variance to avoid issues like catastrophic cancellation or loss of precision. This function also handles `NaN` and `Inf` values correctly, excluding them from calculations. + +**Example** + +Query: + +```sql +DROP TABLE IF EXISTS test_data; +CREATE TABLE test_data +( + x Int32, + y Int32 +) +ENGINE = Memory; + +INSERT INTO test_data VALUES (1, 2), (2, 9), (9, 5), (4, 6), (5, 8); + +SELECT + covarPopStable(x, y) AS covar_pop_stable +FROM test_data; +``` + +```response +0.5999999999999999 +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/varsamp.md b/docs/en/sql-reference/aggregate-functions/reference/varsamp.md index 9b2b94936ec..be669a16ae8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/varsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/varsamp.md @@ -1,18 +1,128 @@ --- +title: "varSamp" slug: /en/sql-reference/aggregate-functions/reference/varsamp sidebar_position: 33 --- -# varSamp +This page contains information on the `varSamp` and `varSampStable` ClickHouse functions. -Calculates the amount `Σ((x - x̅)^2) / (n - 1)`, where `n` is the sample size and `x̅`is the average value of `x`. +## varSamp -It represents an unbiased estimate of the variance of a random variable if passed values from its sample. +Calculate the sample variance of a data set. -Returns `Float64`. When `n <= 1`, returns `+∞`. +**Syntax** -Alias: `VAR_SAMP`. +```sql +varSamp(expr) +``` -:::note -This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varSampStable` function. It works slower but provides a lower computational error. -::: +**Parameters** + +- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../../syntax#syntax-expressions) + +**Returned value** + +Returns a Float64 value representing the sample variance of the input data set. + +**Implementation details** + +The `varSamp()` function calculates the sample variance using the following formula: + +```plaintext +∑(x - mean(x))^2 / (n - 1) +``` + +Where: + +- `x` is each individual data point in the data set. +- `mean(x)` is the arithmetic mean of the data set. +- `n` is the number of data points in the data set. + +The function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPop()` function](./varpop#varpop) instead. + +This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varSampStable` function](#varSampStable). + +**Example** + +Query: + +```sql +CREATE TABLE example_table +( + id UInt64, + value Float64 +) +ENGINE = MergeTree +ORDER BY id; + +INSERT INTO example_table VALUES (1, 10.5), (2, 12.3), (3, 9.8), (4, 11.2), (5, 10.7); + +SELECT varSamp(value) FROM example_table; +``` + +Response: + +```response +0.8650000000000091 +``` + +## varSampStable + +Calculate the sample variance of a data set using a numerically stable algorithm. + +**Syntax** + +```sql +varSampStable(expr) +``` + +**Parameters** + +- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../../syntax#syntax-expressions) + +**Returned value** + +The `varSampStable()` function returns a Float64 value representing the sample variance of the input data set. + +**Implementation details** + +The `varSampStable()` function calculates the sample variance using the same formula as the [`varSamp()`](#varSamp function): + +```plaintext +∑(x - mean(x))^2 / (n - 1) +``` + +Where: +- `x` is each individual data point in the data set. +- `mean(x)` is the arithmetic mean of the data set. +- `n` is the number of data points in the data set. + +The difference between `varSampStable()` and `varSamp()` is that `varSampStable()` is designed to provide a more deterministic and stable result when dealing with floating-point arithmetic. It uses an algorithm that minimizes the accumulation of rounding errors, which can be particularly important when dealing with large data sets or data with a wide range of values. + +Like `varSamp()`, the `varSampStable()` function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPopStable()` function](./varpop#varpopstable) instead. + +**Example** + +Query: + +```sql +CREATE TABLE example_table +( + id UInt64, + value Float64 +) +ENGINE = MergeTree +ORDER BY id; + +INSERT INTO example_table VALUES (1, 10.5), (2, 12.3), (3, 9.8), (4, 11.2), (5, 10.7); + +SELECT varSampStable(value) FROM example_table; +``` + +Response: + +```response +0.865 +``` + +This query calculates the sample variance of the `value` column in the `example_table` using the `varSampStable()` function. The result shows that the sample variance of the values `[10.5, 12.3, 9.8, 11.2, 10.7]` is approximately 0.865, which may differ slightly from the result of `varSamp()` due to the more precise handling of floating-point arithmetic. diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md index 618dd3f4b4f..4f6da764b3c 100644 --- a/docs/en/sql-reference/functions/encoding-functions.md +++ b/docs/en/sql-reference/functions/encoding-functions.md @@ -433,3 +433,292 @@ Result: │ [0,1,2,3,4,5,6,7] │ └───────────────────┘ ``` + +## mortonEncode + +Calculates the Morton encoding (ZCurve) for a list of unsigned integers. + +The function has two modes of operation: +- Simple +- Expanded + +### Simple mode + +Accepts up to 8 unsigned integers as arguments and produces a UInt64 code. + +**Syntax** + +```sql +mortonEncode(args) +``` + +**Parameters** + +- `args`: up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) or columns of the aforementioned type. + +**Returned value** + +- A UInt64 code + +Type: [UInt64](../../sql-reference/data-types/int-uint.md) + +**Example** + +Query: + +```sql +SELECT mortonEncode(1, 2, 3); +``` +Result: + +```response +53 +``` + +### Expanded mode + +Accepts a range mask ([tuple](../../sql-reference/data-types/tuple.md)) as a first argument and up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) as other arguments. + +Each number in the mask configures the amount of range expansion:
+1 - no expansion
+2 - 2x expansion
+3 - 3x expansion
+...
+Up to 8x expansion.
+ +**Syntax** + +```sql +mortonEncode(range_mask, args) +``` + +**Parameters** +- `range_mask`: 1-8. +- `args`: up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) or columns of the aforementioned type. + +Note: when using columns for `args` the provided `range_mask` tuple should still be a constant. + +**Returned value** + +- A UInt64 code + +Type: [UInt64](../../sql-reference/data-types/int-uint.md) + + +**Example** + +Range expansion can be beneficial when you need a similar distribution for arguments with wildly different ranges (or cardinality) +For example: 'IP Address' (0...FFFFFFFF) and 'Country code' (0...FF). + +Query: + +```sql +SELECT mortonEncode((1,2), 1024, 16); +``` + +Result: + +```response +1572864 +``` + +Note: tuple size must be equal to the number of the other arguments. + +**Example** + +Morton encoding for one argument is always the argument itself: + +Query: + +```sql +SELECT mortonEncode(1); +``` + +Result: + +```response +1 +``` + +**Example** + +It is also possible to expand one argument too: + +Query: + +```sql +SELECT mortonEncode(tuple(2), 128); +``` + +Result: + +```response +32768 +``` + +**Example** + +You can also use column names in the function. + +Query: + +First create the table and insert some data. + +```sql +create table morton_numbers( + n1 UInt32, + n2 UInt32, + n3 UInt16, + n4 UInt16, + n5 UInt8, + n6 UInt8, + n7 UInt8, + n8 UInt8 +) +Engine=MergeTree() +ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +insert into morton_numbers (*) values(1,2,3,4,5,6,7,8); +``` +Use column names instead of constants as function arguments to `mortonEncode` + +Query: + +```sql +SELECT mortonEncode(n1, n2, n3, n4, n5, n6, n7, n8) FROM morton_numbers; +``` + +Result: + +```response +2155374165 +``` + +**implementation details** + +Please note that you can fit only so many bits of information into Morton code as [UInt64](../../sql-reference/data-types/int-uint.md) has. Two arguments will have a range of maximum 2^32 (64/2) each, three arguments a range of max 2^21 (64/3) each and so on. All overflow will be clamped to zero. + +## mortonDecode + +Decodes a Morton encoding (ZCurve) into the corresponding unsigned integer tuple. + +As with the `mortonEncode` function, this function has two modes of operation: +- Simple +- Expanded + +### Simple mode + +Accepts a resulting tuple size as the first argument and the code as the second argument. + +**Syntax** + +```sql +mortonDecode(tuple_size, code) +``` + +**Parameters** +- `tuple_size`: integer value no more than 8. +- `code`: [UInt64](../../sql-reference/data-types/int-uint.md) code. + +**Returned value** + +- [tuple](../../sql-reference/data-types/tuple.md) of the specified size. + +Type: [UInt64](../../sql-reference/data-types/int-uint.md) + +**Example** + +Query: + +```sql +SELECT mortonDecode(3, 53); +``` + +Result: + +```response +["1","2","3"] +``` + +### Expanded mode + +Accepts a range mask (tuple) as a first argument and the code as the second argument. +Each number in the mask configures the amount of range shrink:
+1 - no shrink
+2 - 2x shrink
+3 - 3x shrink
+...
+Up to 8x shrink.
+ +Range expansion can be beneficial when you need a similar distribution for arguments with wildly different ranges (or cardinality) +For example: 'IP Address' (0...FFFFFFFF) and 'Country code' (0...FF). +As with the encode function, this is limited to 8 numbers at most. + +**Example** + +Query: + +```sql +SELECT mortonDecode(1, 1); +``` + +Result: + +```response +["1"] +``` + +**Example** + +It is also possible to shrink one argument: + +Query: + +```sql +SELECT mortonDecode(tuple(2), 32768); +``` + +Result: + +```response +["128"] +``` + +**Example** + +You can also use column names in the function. + +First create the table and insert some data. + +Query: +```sql +create table morton_numbers( + n1 UInt32, + n2 UInt32, + n3 UInt16, + n4 UInt16, + n5 UInt8, + n6 UInt8, + n7 UInt8, + n8 UInt8 +) +Engine=MergeTree() +ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +insert into morton_numbers (*) values(1,2,3,4,5,6,7,8); +``` +Use column names instead of constants as function arguments to `mortonDecode` + +Query: + +```sql +select untuple(mortonDecode(8, mortonEncode(n1, n2, n3, n4, n5, n6, n7, n8))) from morton_numbers; +``` + +Result: + +```response +1 2 3 4 5 6 7 8 +``` + + + + diff --git a/docs/en/sql-reference/functions/functions-for-nulls.md b/docs/en/sql-reference/functions/functions-for-nulls.md index 91c04cfded3..4dfbf4262ed 100644 --- a/docs/en/sql-reference/functions/functions-for-nulls.md +++ b/docs/en/sql-reference/functions/functions-for-nulls.md @@ -10,6 +10,8 @@ sidebar_label: Nullable Returns whether the argument is [NULL](../../sql-reference/syntax.md#null). +See also operator [`IS NULL`](../operators/index.md#is_null). + ``` sql isNull(x) ``` @@ -54,6 +56,8 @@ Result: Returns whether the argument is not [NULL](../../sql-reference/syntax.md#null-literal). +See also operator [`IS NOT NULL`](../operators/index.md#is_not_null). + ``` sql isNotNull(x) ``` diff --git a/docs/en/sql-reference/functions/geo/polygon.md b/docs/en/sql-reference/functions/geo/polygon.md index 4a8653965c2..c2572779ada 100644 --- a/docs/en/sql-reference/functions/geo/polygon.md +++ b/docs/en/sql-reference/functions/geo/polygon.md @@ -4,6 +4,67 @@ sidebar_label: Polygons title: "Functions for Working with Polygons" --- +## WKT + +Returns a WKT (Well Known Text) geometric object from various [Geo Data Types](../../data-types/geo.md). Supported WKT objects are: + +- POINT +- POLYGON +- MULTIPOLYGON + +**Syntax** + +```sql +WKT(geo_data) +``` + +**Parameters** + +`geo_data` can be one of the following [Geo Data Types](../../data-types/geo.md) or their underlying primitive types: + +- [Point](../../data-types/geo.md#point) +- [Ring](../../data-types/geo.md#ring) +- [Polygon](../../data-types/geo.md#polygon) +- [MultiPolygon](../../data-types/geo.md#multipolygon) + +**Returned value** + +- WKT geometric object `POINT` is returned for a Point. +- WKT geometric object `POLYGON` is returned for a Polygon +- WKT geometric object `MULTIPOLYGON` is returned for a MultiPolygon. + +**Examples** + +POINT from tuple: + +```sql +SELECT wkt((0., 0.)); +``` + +```response +POINT(0 0) +``` + +POLYGON from an array of tuples or an array of tuple arrays: + +```sql +SELECT wkt([(0., 0.), (10., 0.), (10., 10.), (0., 10.)]); +``` + +```response +POLYGON((0 0,10 0,10 10,0 10)) +``` + +MULTIPOLYGON from an array of multi-dimensional tuple arrays: + +```sql +SELECT wkt([[[(0., 0.), (10., 0.), (10., 10.), (0., 10.)], [(4., 4.), (5., 4.), (5., 5.), (4., 5.)]], [[(-10., -10.), (-10., -9.), (-9., 10.)]]]); +``` + +```response +MULTIPOLYGON(((0 0,10 0,10 10,0 10,0 0),(4 4,5 4,5 5,4 5,4 4)),((-10 -10,-10 -9,-9 10,-10 -10))) +``` + ## readWKTMultiPolygon Converts a WKT (Well Known Text) MultiPolygon into a MultiPolygon type. @@ -53,6 +114,62 @@ String starting with `POLYGON` Polygon +## readWKTPoint + +The `readWKTPoint` function in ClickHouse parses a Well-Known Text (WKT) representation of a Point geometry and returns a point in the internal ClickHouse format. + +### Syntax + +```sql +readWKTPoint(wkt_string) +``` + +### Arguments + +- `wkt_string`: The input WKT string representing a Point geometry. + +### Returned value + +The function returns a ClickHouse internal representation of the Point geometry. + +### Example + +```sql +SELECT readWKTPoint('POINT (1.2 3.4)'); +``` + +```response +(1.2,3.4) +``` + +## readWKTRing + +Parses a Well-Known Text (WKT) representation of a Polygon geometry and returns a ring (closed linestring) in the internal ClickHouse format. + +### Syntax + +```sql +readWKTRing(wkt_string) +``` + +### Arguments + +- `wkt_string`: The input WKT string representing a Polygon geometry. + +### Returned value + +The function returns a ClickHouse internal representation of the ring (closed linestring) geometry. + +### Example + +```sql +SELECT readWKTRing('LINESTRING (1 1, 2 2, 3 3, 1 1)'); +``` + +```response +[(1,1),(2,2),(3,3),(1,1)] +``` + ## polygonsWithinSpherical Returns true or false depending on whether or not one polygon lies completely inside another polygon. Reference https://www.boost.org/doc/libs/1_62_0/libs/geometry/doc/html/geometry/reference/algorithms/within/within_2.html diff --git a/docs/en/sql-reference/functions/json-functions.md b/docs/en/sql-reference/functions/json-functions.md index 2c837ff4a42..e920ab82988 100644 --- a/docs/en/sql-reference/functions/json-functions.md +++ b/docs/en/sql-reference/functions/json-functions.md @@ -5,80 +5,372 @@ sidebar_label: JSON --- There are two sets of functions to parse JSON. - - `visitParam*` (`simpleJSON*`) is made to parse a special very limited subset of a JSON, but these functions are extremely fast. + - `simpleJSON*` (`visitParam*`) is made to parse a special very limited subset of a JSON, but these functions are extremely fast. - `JSONExtract*` is made to parse normal JSON. -# visitParam functions +# simpleJSON/visitParam functions ClickHouse has special functions for working with simplified JSON. All these JSON functions are based on strong assumptions about what the JSON can be, but they try to do as little as possible to get the job done. The following assumptions are made: 1. The field name (function argument) must be a constant. -2. The field name is somehow canonically encoded in JSON. For example: `visitParamHas('{"abc":"def"}', 'abc') = 1`, but `visitParamHas('{"\\u0061\\u0062\\u0063":"def"}', 'abc') = 0` +2. The field name is somehow canonically encoded in JSON. For example: `simpleJSONHas('{"abc":"def"}', 'abc') = 1`, but `simpleJSONHas('{"\\u0061\\u0062\\u0063":"def"}', 'abc') = 0` 3. Fields are searched for on any nesting level, indiscriminately. If there are multiple matching fields, the first occurrence is used. 4. The JSON does not have space characters outside of string literals. -## visitParamHas(params, name) +## simpleJSONHas -Checks whether there is a field with the `name` name. +Checks whether there is a field named `field_name`. The result is `UInt8`. -Alias: `simpleJSONHas`. +**Syntax** -## visitParamExtractUInt(params, name) - -Parses UInt64 from the value of the field named `name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns 0. - -Alias: `simpleJSONExtractUInt`. - -## visitParamExtractInt(params, name) - -The same as for Int64. - -Alias: `simpleJSONExtractInt`. - -## visitParamExtractFloat(params, name) - -The same as for Float64. - -Alias: `simpleJSONExtractFloat`. - -## visitParamExtractBool(params, name) - -Parses a true/false value. The result is UInt8. - -Alias: `simpleJSONExtractBool`. - -## visitParamExtractRaw(params, name) - -Returns the value of a field, including separators. - -Alias: `simpleJSONExtractRaw`. - -Examples: - -``` sql -visitParamExtractRaw('{"abc":"\\n\\u0000"}', 'abc') = '"\\n\\u0000"'; -visitParamExtractRaw('{"abc":{"def":[1,2,3]}}', 'abc') = '{"def":[1,2,3]}'; +```sql +simpleJSONHas(json, field_name) ``` -## visitParamExtractString(params, name) +**Parameters** -Parses the string in double quotes. The value is unescaped. If unescaping failed, it returns an empty string. +- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `field_name`: The name of the field to search for. [String literal](../syntax#string) -Alias: `simpleJSONExtractString`. +**Returned value** -Examples: +It returns `1` if the field exists, `0` otherwise. -``` sql -visitParamExtractString('{"abc":"\\n\\u0000"}', 'abc') = '\n\0'; -visitParamExtractString('{"abc":"\\u263a"}', 'abc') = '☺'; -visitParamExtractString('{"abc":"\\u263"}', 'abc') = ''; -visitParamExtractString('{"abc":"hello}', 'abc') = ''; +**Example** + +Query: + +```sql +CREATE TABLE jsons +( + `json` String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"true","qux":1}'); + +SELECT simpleJSONHas(json, 'foo') FROM jsons; +SELECT simpleJSONHas(json, 'bar') FROM jsons; ``` +```response +1 +0 +``` +## simpleJSONExtractUInt + +Parses `UInt64` from the value of the field named `field_name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns `0`. + +**Syntax** + +```sql +simpleJSONExtractUInt(json, field_name) +``` + +**Parameters** + +- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `field_name`: The name of the field to search for. [String literal](../syntax#string) + +**Returned value** + +It returns the number parsed from the field if the field exists and contains a number, `0` otherwise. + +**Example** + +Query: + +```sql +CREATE TABLE jsons +( + `json` String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"4e3"}'); +INSERT INTO jsons VALUES ('{"foo":3.4}'); +INSERT INTO jsons VALUES ('{"foo":5}'); +INSERT INTO jsons VALUES ('{"foo":"not1number"}'); +INSERT INTO jsons VALUES ('{"baz":2}'); + +SELECT simpleJSONExtractUInt(json, 'foo') FROM jsons ORDER BY json; +``` + +```response +0 +4 +0 +3 +5 +``` + +## simpleJSONExtractInt + +Parses `Int64` from the value of the field named `field_name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns `0`. + +**Syntax** + +```sql +simpleJSONExtractInt(json, field_name) +``` + +**Parameters** + +- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `field_name`: The name of the field to search for. [String literal](../syntax#string) + +**Returned value** + +It returns the number parsed from the field if the field exists and contains a number, `0` otherwise. + +**Example** + +Query: + +```sql +CREATE TABLE jsons +( + `json` String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"-4e3"}'); +INSERT INTO jsons VALUES ('{"foo":-3.4}'); +INSERT INTO jsons VALUES ('{"foo":5}'); +INSERT INTO jsons VALUES ('{"foo":"not1number"}'); +INSERT INTO jsons VALUES ('{"baz":2}'); + +SELECT simpleJSONExtractInt(json, 'foo') FROM jsons ORDER BY json; +``` + +```response +0 +-4 +0 +-3 +5 +``` + +## simpleJSONExtractFloat + +Parses `Float64` from the value of the field named `field_name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns `0`. + +**Syntax** + +```sql +simpleJSONExtractFloat(json, field_name) +``` + +**Parameters** + +- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `field_name`: The name of the field to search for. [String literal](../syntax#string) + +**Returned value** + +It returns the number parsed from the field if the field exists and contains a number, `0` otherwise. + +**Example** + +Query: + +```sql +CREATE TABLE jsons +( + `json` String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"-4e3"}'); +INSERT INTO jsons VALUES ('{"foo":-3.4}'); +INSERT INTO jsons VALUES ('{"foo":5}'); +INSERT INTO jsons VALUES ('{"foo":"not1number"}'); +INSERT INTO jsons VALUES ('{"baz":2}'); + +SELECT simpleJSONExtractFloat(json, 'foo') FROM jsons ORDER BY json; +``` + +```response +0 +-4000 +0 +-3.4 +5 +``` + +## simpleJSONExtractBool + +Parses a true/false value from the value of the field named `field_name`. The result is `UInt8`. + +**Syntax** + +```sql +simpleJSONExtractBool(json, field_name) +``` + +**Parameters** + +- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `field_name`: The name of the field to search for. [String literal](../syntax#string) + +**Returned value** + +It returns `1` if the value of the field is `true`, `0` otherwise. This means this function will return `0` including (and not only) in the following cases: + - If the field doesn't exists. + - If the field contains `true` as a string, e.g.: `{"field":"true"}`. + - If the field contains `1` as a numerical value. + +**Example** + +Query: + +```sql +CREATE TABLE jsons +( + `json` String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":false,"bar":true}'); +INSERT INTO jsons VALUES ('{"foo":"true","qux":1}'); + +SELECT simpleJSONExtractBool(json, 'bar') FROM jsons ORDER BY json; +SELECT simpleJSONExtractBool(json, 'foo') FROM jsons ORDER BY json; +``` + +```response +0 +1 +0 +0 +``` + +## simpleJSONExtractRaw + +Returns the value of the field named `field_name` as a `String`, including separators. + +**Syntax** + +```sql +simpleJSONExtractRaw(json, field_name) +``` + +**Parameters** + +- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `field_name`: The name of the field to search for. [String literal](../syntax#string) + +**Returned value** + +It returns the value of the field as a [`String`](../../sql-reference/data-types/string.md#string), including separators if the field exists, or an empty `String` otherwise. + +**Example** + +Query: + +```sql +CREATE TABLE jsons +( + `json` String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"-4e3"}'); +INSERT INTO jsons VALUES ('{"foo":-3.4}'); +INSERT INTO jsons VALUES ('{"foo":5}'); +INSERT INTO jsons VALUES ('{"foo":{"def":[1,2,3]}}'); +INSERT INTO jsons VALUES ('{"baz":2}'); + +SELECT simpleJSONExtractRaw(json, 'foo') FROM jsons ORDER BY json; +``` + +```response + +"-4e3" +-3.4 +5 +{"def":[1,2,3]} +``` + +## simpleJSONExtractString + +Parses `String` in double quotes from the value of the field named `field_name`. + +**Syntax** + +```sql +simpleJSONExtractString(json, field_name) +``` + +**Parameters** + +- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `field_name`: The name of the field to search for. [String literal](../syntax#string) + +**Returned value** + +It returns the value of a field as a [`String`](../../sql-reference/data-types/string.md#string), including separators. The value is unescaped. It returns an empty `String`: if the field doesn't contain a double quoted string, if unescaping fails or if the field doesn't exist. + +**Implementation details** + There is currently no support for code points in the format `\uXXXX\uYYYY` that are not from the basic multilingual plane (they are converted to CESU-8 instead of UTF-8). +**Example** + +Query: + +```sql +CREATE TABLE jsons +( + `json` String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"\\n\\u0000"}'); +INSERT INTO jsons VALUES ('{"foo":"\\u263"}'); +INSERT INTO jsons VALUES ('{"foo":"\\u263a"}'); +INSERT INTO jsons VALUES ('{"foo":"hello}'); + +SELECT simpleJSONExtractString(json, 'foo') FROM jsons ORDER BY json; +``` + +```response +\n\0 + +☺ + +``` + +## visitParamHas + +This function is [an alias of `simpleJSONHas`](./json-functions#simplejsonhas). + +## visitParamExtractUInt + +This function is [an alias of `simpleJSONExtractUInt`](./json-functions#simplejsonextractuint). + +## visitParamExtractInt + +This function is [an alias of `simpleJSONExtractInt`](./json-functions#simplejsonextractint). + +## visitParamExtractFloat + +This function is [an alias of `simpleJSONExtractFloat`](./json-functions#simplejsonextractfloat). + +## visitParamExtractBool + +This function is [an alias of `simpleJSONExtractBool`](./json-functions#simplejsonextractbool). + +## visitParamExtractRaw + +This function is [an alias of `simpleJSONExtractRaw`](./json-functions#simplejsonextractraw). + +## visitParamExtractString + +This function is [an alias of `simpleJSONExtractString`](./json-functions#simplejsonextractstring). + # JSONExtract functions The following functions are based on [simdjson](https://github.com/lemire/simdjson) designed for more complex JSON parsing requirements. diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md index b27668caf0c..fc659891b5c 100644 --- a/docs/en/sql-reference/functions/math-functions.md +++ b/docs/en/sql-reference/functions/math-functions.md @@ -299,6 +299,18 @@ sin(x) Type: [Float*](../../sql-reference/data-types/float.md). +**Example** + +Query: + +```sql +SELECT sin(1.23); +``` + +```response +0.9424888019316975 +``` + ## cos Returns the cosine of the argument. diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 739b688a0d2..e7fca31483a 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -17,7 +17,7 @@ Returns a named value from the [macros](../../operations/server-configuration-pa **Syntax** -``` sql +```sql getMacro(name); ``` @@ -35,7 +35,7 @@ Type: [String](../../sql-reference/data-types/string.md). Example `` section in the server configuration file: -``` xml +```xml Value @@ -43,13 +43,13 @@ Example `` section in the server configuration file: Query: -``` sql +```sql SELECT getMacro('test'); ``` Result: -``` text +```text ┌─getMacro('test')─┐ │ Value │ └──────────────────┘ @@ -57,12 +57,12 @@ Result: The same value can be retrieved as follows: -``` sql +```sql SELECT * FROM system.macros WHERE macro = 'test'; ``` -``` text +```text ┌─macro─┬─substitution─┐ │ test │ Value │ └───────┴──────────────┘ @@ -74,7 +74,7 @@ Returns the fully qualified domain name of the ClickHouse server. **Syntax** -``` sql +```sql fqdn(); ``` @@ -88,13 +88,13 @@ Type: `String`. **Example** -``` sql +```sql SELECT FQDN(); ``` Result: -``` text +```text ┌─FQDN()──────────────────────────┐ │ clickhouse.ru-central1.internal │ └─────────────────────────────────┘ @@ -104,7 +104,7 @@ Result: Extracts the tail of a string following its last slash or backslash. This function if often used to extract the filename from a path. -``` sql +```sql basename(expr) ``` @@ -123,13 +123,13 @@ A string that contains: Query: -``` sql +```sql SELECT 'some/long/path/to/file' AS a, basename(a) ``` Result: -``` text +```text ┌─a──────────────────────┬─basename('some\\long\\path\\to\\file')─┐ │ some\long\path\to\file │ file │ └────────────────────────┴────────────────────────────────────────┘ @@ -137,13 +137,13 @@ Result: Query: -``` sql +```sql SELECT 'some\\long\\path\\to\\file' AS a, basename(a) ``` Result: -``` text +```text ┌─a──────────────────────┬─basename('some\\long\\path\\to\\file')─┐ │ some\long\path\to\file │ file │ └────────────────────────┴────────────────────────────────────────┘ @@ -151,13 +151,13 @@ Result: Query: -``` sql +```sql SELECT 'some-file-name' AS a, basename(a) ``` Result: -``` text +```text ┌─a──────────────┬─basename('some-file-name')─┐ │ some-file-name │ some-file-name │ └────────────────┴────────────────────────────┘ @@ -170,11 +170,11 @@ This function is used by the system to implement Pretty formats. `NULL` is represented as a string corresponding to `NULL` in `Pretty` formats. -``` sql +```sql SELECT visibleWidth(NULL) ``` -``` text +```text ┌─visibleWidth(NULL)─┐ │ 4 │ └────────────────────┘ @@ -256,7 +256,7 @@ SELECT key, byteSize(u8) AS `byteSize(UInt8)`, byteSize(u16) AS `byteSize(UInt16 Result: -``` text +```text Row 1: ────── key: 1 @@ -298,13 +298,99 @@ Full columns and constants are represented differently in memory. Functions usua Accepts any arguments, including `NULL` and does nothing. Always returns 0. The argument is internally still evaluated. Useful e.g. for benchmarks. -## sleep(seconds) +## sleep -Sleeps ‘seconds’ seconds for each data block. The sleep time can be specified as integer or as floating-point number. +Used to introduce a delay or pause in the execution of a query. It is primarily used for testing and debugging purposes. -## sleepEachRow(seconds) +**Syntax** -Sleeps ‘seconds’ seconds for each row. The sleep time can be specified as integer or as floating-point number. +```sql +sleep(seconds) +``` + +**Arguments** + +- `seconds`: [UInt*](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md) The number of seconds to pause the query execution to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds. + +**Returned value** + +This function does not return any value. + +**Example** + +```sql +SELECT sleep(2); +``` + +This function does not return any value. However, if you run the function with `clickhouse client` you will see something similar to: + +```response +SELECT sleep(2) + +Query id: 8aa9943e-a686-45e1-8317-6e8e3a5596ac + +┌─sleep(2)─┐ +│ 0 │ +└──────────┘ + +1 row in set. Elapsed: 2.012 sec. +``` + +This query will pause for 2 seconds before completing. During this time, no results will be returned, and the query will appear to be hanging or unresponsive. + +**Implementation details** + +The `sleep()` function is generally not used in production environments, as it can negatively impact query performance and system responsiveness. However, it can be useful in the following scenarios: + +1. **Testing**: When testing or benchmarking ClickHouse, you may want to simulate delays or introduce pauses to observe how the system behaves under certain conditions. +2. **Debugging**: If you need to examine the state of the system or the execution of a query at a specific point in time, you can use `sleep()` to introduce a pause, allowing you to inspect or collect relevant information. +3. **Simulation**: In some cases, you may want to simulate real-world scenarios where delays or pauses occur, such as network latency or external system dependencies. + +It's important to use the `sleep()` function judiciously and only when necessary, as it can potentially impact the overall performance and responsiveness of your ClickHouse system. + +## sleepEachRow + +Pauses the execution of a query for a specified number of seconds for each row in the result set. + +**Syntax** + +```sql +sleepEachRow(seconds) +``` + +**Arguments** + +- `seconds`: [UInt*](../../sql-reference/data-types/int-uint.md) or [Float*](../../sql-reference/data-types/float.md) The number of seconds to pause the query execution for each row in the result set to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds. + +**Returned value** + +This function returns the same input values as it receives, without modifying them. + +**Example** + +```sql +SELECT number, sleepEachRow(0.5) FROM system.numbers LIMIT 5; +``` + +```response +┌─number─┬─sleepEachRow(0.5)─┐ +│ 0 │ 0 │ +│ 1 │ 0 │ +│ 2 │ 0 │ +│ 3 │ 0 │ +│ 4 │ 0 │ +└────────┴───────────────────┘ +``` + +But the output will be delayed, with a 0.5-second pause between each row. + +The `sleepEachRow()` function is primarily used for testing and debugging purposes, similar to the `sleep()` function. It allows you to simulate delays or introduce pauses in the processing of each row, which can be useful in scenarios such as: + +1. **Testing**: When testing or benchmarking ClickHouse's performance under specific conditions, you can use `sleepEachRow()` to simulate delays or introduce pauses for each row processed. +2. **Debugging**: If you need to examine the state of the system or the execution of a query for each row processed, you can use `sleepEachRow()` to introduce pauses, allowing you to inspect or collect relevant information. +3. **Simulation**: In some cases, you may want to simulate real-world scenarios where delays or pauses occur for each row processed, such as when dealing with external systems or network latencies. + +Like the [`sleep()` function](#sleep), it's important to use `sleepEachRow()` judiciously and only when necessary, as it can significantly impact the overall performance and responsiveness of your ClickHouse system, especially when dealing with large result sets. ## currentDatabase() @@ -315,7 +401,7 @@ Useful in table engine parameters of `CREATE TABLE` queries where you need to sp Returns the name of the current user. In case of a distributed query, the name of the user who initiated the query is returned. -``` sql +```sql SELECT currentUser(); ``` @@ -330,13 +416,13 @@ Type: `String`. **Example** -``` sql +```sql SELECT currentUser(); ``` Result: -``` text +```text ┌─currentUser()─┐ │ default │ └───────────────┘ @@ -352,7 +438,7 @@ This function is mostly intended for development, debugging and demonstration. **Syntax** -``` sql +```sql isConstant(x) ``` @@ -371,13 +457,13 @@ Type: [UInt8](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT isConstant(x + 1) FROM (SELECT 43 AS x) ``` Result: -``` text +```text ┌─isConstant(plus(x, 1))─┐ │ 1 │ └────────────────────────┘ @@ -385,13 +471,13 @@ Result: Query: -``` sql +```sql WITH 3.14 AS pi SELECT isConstant(cos(pi)) ``` Result: -``` text +```text ┌─isConstant(cos(pi))─┐ │ 1 │ └─────────────────────┘ @@ -399,13 +485,13 @@ Result: Query: -``` sql +```sql SELECT isConstant(number) FROM numbers(1) ``` Result: -``` text +```text ┌─isConstant(number)─┐ │ 0 │ └────────────────────┘ @@ -425,7 +511,7 @@ Checks whether a floating point value is finite. **Syntax** -``` sql +```sql ifNotFinite(x,y) ``` @@ -479,7 +565,7 @@ The band is drawn with accuracy to one eighth of a symbol. Example: -``` sql +```sql SELECT toHour(EventTime) AS h, count() AS c, @@ -489,7 +575,7 @@ GROUP BY h ORDER BY h ASC ``` -``` text +```text ┌──h─┬──────c─┬─bar────────────────┐ │ 0 │ 292907 │ █████████▋ │ │ 1 │ 180563 │ ██████ │ @@ -547,7 +633,7 @@ For example, the first argument could have type `Int64`, while the second argume Example: -``` sql +```sql SELECT transform(SearchEngineID, [2, 3], ['Yandex', 'Google'], 'Other') AS title, count() AS c @@ -557,7 +643,7 @@ GROUP BY title ORDER BY c DESC ``` -``` text +```text ┌─title─────┬──────c─┐ │ Yandex │ 498635 │ │ Google │ 229872 │ @@ -571,7 +657,7 @@ Similar to the other variation but has no ‘default’ argument. In case no mat Example: -``` sql +```sql SELECT transform(domain(Referer), ['yandex.ru', 'google.ru', 'vkontakte.ru'], ['www.yandex', 'example.com', 'vk.com']) AS s, count() AS c @@ -581,7 +667,7 @@ ORDER BY count() DESC LIMIT 10 ``` -``` text +```text ┌─s──────────────┬───────c─┐ │ │ 2906259 │ │ www.yandex │ 867767 │ @@ -601,13 +687,13 @@ Given a size (number of bytes), this function returns a readable, rounded size w Example: -``` sql +```sql SELECT arrayJoin([1, 1024, 1024*1024, 192851925]) AS filesize_bytes, formatReadableDecimalSize(filesize_bytes) AS filesize ``` -``` text +```text ┌─filesize_bytes─┬─filesize───┐ │ 1 │ 1.00 B │ │ 1024 │ 1.02 KB │ @@ -622,7 +708,7 @@ Given a size (number of bytes), this function returns a readable, rounded size w Example: -``` sql +```sql SELECT arrayJoin([1, 1024, 1024*1024, 192851925]) AS filesize_bytes, formatReadableSize(filesize_bytes) AS filesize @@ -630,7 +716,7 @@ SELECT Alias: `FORMAT_BYTES`. -``` text +```text ┌─filesize_bytes─┬─filesize───┐ │ 1 │ 1.00 B │ │ 1024 │ 1.00 KiB │ @@ -645,13 +731,13 @@ Given a number, this function returns a rounded number with suffix (thousand, mi Example: -``` sql +```sql SELECT arrayJoin([1024, 1234 * 1000, (4567 * 1000) * 1000, 98765432101234]) AS number, formatReadableQuantity(number) AS number_for_humans ``` -``` text +```text ┌─────────number─┬─number_for_humans─┐ │ 1024 │ 1.02 thousand │ │ 1234000 │ 1.23 million │ @@ -666,7 +752,7 @@ Given a time interval (delta) in seconds, this function returns a time delta wit **Syntax** -``` sql +```sql formatReadableTimeDelta(column[, maximum_unit, minimum_unit]) ``` @@ -674,21 +760,22 @@ formatReadableTimeDelta(column[, maximum_unit, minimum_unit]) - `column` — A column with a numeric time delta. - `maximum_unit` — Optional. Maximum unit to show. - * Acceptable values: `nanoseconds`, `microseconds`, `milliseconds`, `seconds`, `minutes`, `hours`, `days`, `months`, `years`. - * Default value: `years`. + - Acceptable values: `nanoseconds`, `microseconds`, `milliseconds`, `seconds`, `minutes`, `hours`, `days`, `months`, `years`. + - Default value: `years`. - `minimum_unit` — Optional. Minimum unit to show. All smaller units are truncated. - * Acceptable values: `nanoseconds`, `microseconds`, `milliseconds`, `seconds`, `minutes`, `hours`, `days`, `months`, `years`. - * If explicitly specified value is bigger than `maximum_unit`, an exception will be thrown. - * Default value: `seconds` if `maximum_unit` is `seconds` or bigger, `nanoseconds` otherwise. + - Acceptable values: `nanoseconds`, `microseconds`, `milliseconds`, `seconds`, `minutes`, `hours`, `days`, `months`, `years`. + - If explicitly specified value is bigger than `maximum_unit`, an exception will be thrown. + - Default value: `seconds` if `maximum_unit` is `seconds` or bigger, `nanoseconds` otherwise. **Example** -``` sql + +```sql SELECT arrayJoin([100, 12345, 432546534]) AS elapsed, formatReadableTimeDelta(elapsed) AS time_delta ``` -``` text +```text ┌────elapsed─┬─time_delta ─────────────────────────────────────────────────────┐ │ 100 │ 1 minute and 40 seconds │ │ 12345 │ 3 hours, 25 minutes and 45 seconds │ @@ -696,13 +783,13 @@ SELECT └────────────┴─────────────────────────────────────────────────────────────────┘ ``` -``` sql +```sql SELECT arrayJoin([100, 12345, 432546534]) AS elapsed, formatReadableTimeDelta(elapsed, 'minutes') AS time_delta ``` -``` text +```text ┌────elapsed─┬─time_delta ─────────────────────────────────────────────────────┐ │ 100 │ 1 minute and 40 seconds │ │ 12345 │ 205 minutes and 45 seconds │ @@ -738,7 +825,6 @@ parseTimeDelta(timestr) - `timestr` — A sequence of numbers followed by something resembling a time unit. - **Returned value** - A floating-point number with the number of seconds. @@ -850,7 +936,7 @@ The window function that provides access to a row at a specified offset before o **Syntax** -``` sql +```sql neighbor(column, offset[, default_value]) ``` @@ -880,13 +966,13 @@ Type: type of data blocks affected or default value type. Query: -``` sql +```sql SELECT number, neighbor(number, 2) FROM system.numbers LIMIT 10; ``` Result: -``` text +```text ┌─number─┬─neighbor(number, 2)─┐ │ 0 │ 2 │ │ 1 │ 3 │ @@ -903,13 +989,13 @@ Result: Query: -``` sql +```sql SELECT number, neighbor(number, 2, 999) FROM system.numbers LIMIT 10; ``` Result: -``` text +```text ┌─number─┬─neighbor(number, 2, 999)─┐ │ 0 │ 2 │ │ 1 │ 3 │ @@ -928,7 +1014,7 @@ This function can be used to compute year-over-year metric value: Query: -``` sql +```sql WITH toDate('2018-01-01') AS start_date SELECT toStartOfMonth(start_date + (number * 32)) AS month, @@ -940,7 +1026,7 @@ FROM numbers(16) Result: -``` text +```text ┌──────month─┬─money─┬─prev_year─┬─year_over_year─┐ │ 2018-01-01 │ 32 │ 0 │ 0 │ │ 2018-02-01 │ 63 │ 0 │ 0 │ @@ -977,7 +1063,7 @@ To prevent that you can create a subquery with [ORDER BY](../../sql-reference/st Example: -``` sql +```sql SELECT EventID, EventTime, @@ -994,7 +1080,7 @@ FROM ) ``` -``` text +```text ┌─EventID─┬───────────EventTime─┬─delta─┐ │ 1106 │ 2016-11-24 00:00:04 │ 0 │ │ 1107 │ 2016-11-24 00:00:05 │ 1 │ @@ -1006,7 +1092,7 @@ FROM Please note that the block size affects the result. The internal state of `runningDifference` state is reset for each new block. -``` sql +```sql SELECT number, runningDifference(number + 1) AS diff @@ -1014,7 +1100,7 @@ FROM numbers(100000) WHERE diff != 1 ``` -``` text +```text ┌─number─┬─diff─┐ │ 0 │ 0 │ └────────┴──────┘ @@ -1023,7 +1109,7 @@ WHERE diff != 1 └────────┴──────┘ ``` -``` sql +```sql set max_block_size=100000 -- default value is 65536! SELECT @@ -1033,7 +1119,7 @@ FROM numbers(100000) WHERE diff != 1 ``` -``` text +```text ┌─number─┬─diff─┐ │ 0 │ 0 │ └────────┴──────┘ @@ -1049,21 +1135,20 @@ Calculates the number of concurrent events. Each event has a start time and an end time. The start time is included in the event, while the end time is excluded. Columns with a start time and an end time must be of the same data type. The function calculates the total number of active (concurrent) events for each event start time. - :::tip Events must be ordered by the start time in ascending order. If this requirement is violated the function raises an exception. Every data block is processed separately. If events from different data blocks overlap then they can not be processed correctly. ::: **Syntax** -``` sql +```sql runningConcurrency(start, end) ``` **Arguments** - `start` — A column with the start time of events. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `end` — A column with the end time of events. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `end` — A column with the end time of events. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md). **Returned values** @@ -1075,7 +1160,7 @@ Type: [UInt32](../../sql-reference/data-types/int-uint.md) Consider the table: -``` text +```text ┌──────start─┬────────end─┐ │ 2021-03-03 │ 2021-03-11 │ │ 2021-03-06 │ 2021-03-12 │ @@ -1086,13 +1171,13 @@ Consider the table: Query: -``` sql +```sql SELECT start, runningConcurrency(start, end) FROM example_table; ``` Result: -``` text +```text ┌──────start─┬─runningConcurrency(start, end)─┐ │ 2021-03-03 │ 1 │ │ 2021-03-06 │ 2 │ @@ -1118,7 +1203,7 @@ Given a MAC address in format AA:BB:CC:DD:EE:FF (colon-separated numbers in hexa Returns the number of fields in [Enum](../../sql-reference/data-types/enum.md). An exception is thrown if the type is not `Enum`. -``` sql +```sql getSizeOfEnumType(value) ``` @@ -1132,11 +1217,11 @@ getSizeOfEnumType(value) **Example** -``` sql +```sql SELECT getSizeOfEnumType( CAST('a' AS Enum8('a' = 1, 'b' = 2) ) ) AS x ``` -``` text +```text ┌─x─┐ │ 2 │ └───┘ @@ -1146,7 +1231,7 @@ SELECT getSizeOfEnumType( CAST('a' AS Enum8('a' = 1, 'b' = 2) ) ) AS x Returns the size on disk without considering compression. -``` sql +```sql blockSerializedSize(value[, value[, ...]]) ``` @@ -1162,13 +1247,13 @@ blockSerializedSize(value[, value[, ...]]) Query: -``` sql +```sql SELECT blockSerializedSize(maxState(1)) as x ``` Result: -``` text +```text ┌─x─┐ │ 2 │ └───┘ @@ -1178,7 +1263,7 @@ Result: Returns the internal name of the data type that represents the value. -``` sql +```sql toColumnTypeName(value) ``` @@ -1194,13 +1279,13 @@ toColumnTypeName(value) Difference between `toTypeName ' and ' toColumnTypeName`: -``` sql +```sql SELECT toTypeName(CAST('2018-01-01 01:02:03' AS DateTime)) ``` Result: -``` text +```text ┌─toTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐ │ DateTime │ └─────────────────────────────────────────────────────┘ @@ -1208,13 +1293,13 @@ Result: Query: -``` sql +```sql SELECT toColumnTypeName(CAST('2018-01-01 01:02:03' AS DateTime)) ``` Result: -``` text +```text ┌─toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐ │ Const(UInt32) │ └───────────────────────────────────────────────────────────┘ @@ -1226,7 +1311,7 @@ The example shows that the `DateTime` data type is internally stored as `Const(U Outputs a detailed description of data structures in RAM -``` sql +```sql dumpColumnStructure(value) ``` @@ -1240,11 +1325,11 @@ dumpColumnStructure(value) **Example** -``` sql +```sql SELECT dumpColumnStructure(CAST('2018-01-01 01:02:03', 'DateTime')) ``` -``` text +```text ┌─dumpColumnStructure(CAST('2018-01-01 01:02:03', 'DateTime'))─┐ │ DateTime, Const(size = 1, UInt32(size = 1)) │ └──────────────────────────────────────────────────────────────┘ @@ -1256,7 +1341,7 @@ Returns the default value for the given data type. Does not include default values for custom columns set by the user. -``` sql +```sql defaultValueOfArgumentType(expression) ``` @@ -1274,13 +1359,13 @@ defaultValueOfArgumentType(expression) Query: -``` sql +```sql SELECT defaultValueOfArgumentType( CAST(1 AS Int8) ) ``` Result: -``` text +```text ┌─defaultValueOfArgumentType(CAST(1, 'Int8'))─┐ │ 0 │ └─────────────────────────────────────────────┘ @@ -1288,13 +1373,13 @@ Result: Query: -``` sql +```sql SELECT defaultValueOfArgumentType( CAST(1 AS Nullable(Int8) ) ) ``` Result: -``` text +```text ┌─defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)'))─┐ │ ᴺᵁᴸᴸ │ └───────────────────────────────────────────────────────┘ @@ -1306,7 +1391,7 @@ Returns the default value for the given type name. Does not include default values for custom columns set by the user. -``` sql +```sql defaultValueOfTypeName(type) ``` @@ -1324,13 +1409,13 @@ defaultValueOfTypeName(type) Query: -``` sql +```sql SELECT defaultValueOfTypeName('Int8') ``` Result: -``` text +```text ┌─defaultValueOfTypeName('Int8')─┐ │ 0 │ └────────────────────────────────┘ @@ -1338,13 +1423,13 @@ Result: Query: -``` sql +```sql SELECT defaultValueOfTypeName('Nullable(Int8)') ``` Result: -``` text +```text ┌─defaultValueOfTypeName('Nullable(Int8)')─┐ │ ᴺᵁᴸᴸ │ └──────────────────────────────────────────┘ @@ -1456,7 +1541,7 @@ Creates an array with a single value. Used for the internal implementation of [arrayJoin](../../sql-reference/functions/array-join.md#functions_arrayjoin). -``` sql +```sql SELECT replicate(x, arr); ``` @@ -1475,13 +1560,13 @@ Type: `Array`. Query: -``` sql +```sql SELECT replicate(1, ['a', 'b', 'c']) ``` Result: -``` text +```text ┌─replicate(1, ['a', 'b', 'c'])─┐ │ [1,1,1] │ └───────────────────────────────┘ @@ -1493,7 +1578,7 @@ Returns the amount of free space in the filesystem hosting the database persiste **Syntax** -``` sql +```sql filesystemAvailable() ``` @@ -1507,13 +1592,13 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT formatReadableSize(filesystemAvailable()) AS "Available space"; ``` Result: -``` text +```text ┌─Available space─┐ │ 30.75 GiB │ └─────────────────┘ @@ -1525,7 +1610,7 @@ Returns the total amount of the free space on the filesystem hosting the databas **Syntax** -``` sql +```sql filesystemFree() ``` @@ -1539,13 +1624,13 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT formatReadableSize(filesystemFree()) AS "Free space"; ``` Result: -``` text +```text ┌─Free space─┐ │ 32.39 GiB │ └────────────┘ @@ -1557,7 +1642,7 @@ Returns the capacity of the filesystem in bytes. Needs the [path](../../operatio **Syntax** -``` sql +```sql filesystemCapacity() ``` @@ -1571,13 +1656,13 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT formatReadableSize(filesystemCapacity()) AS "Capacity"; ``` Result: -``` text +```text ┌─Capacity──┐ │ 39.32 GiB │ └───────────┘ @@ -1589,7 +1674,7 @@ Calculates the result of an aggregate function based on a single value. This fun **Syntax** -``` sql +```sql initializeAggregation (aggregate_function, arg1, arg2, ..., argN) ``` @@ -1611,6 +1696,7 @@ Query: ```sql SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM numbers(10000)); ``` + Result: ```text @@ -1663,7 +1749,7 @@ Given a state of aggregate function, this function returns the result of aggrega **Syntax** -``` sql +```sql finalizeAggregation(state) ``` @@ -1768,7 +1854,7 @@ The state is reset for each new block of data. **Syntax** -``` sql +```sql runningAccumulate(agg_state[, grouping]); ``` @@ -1789,13 +1875,13 @@ Consider how you can use `runningAccumulate` to find the cumulative sum of numbe Query: -``` sql +```sql SELECT k, runningAccumulate(sum_k) AS res FROM (SELECT number as k, sumState(k) AS sum_k FROM numbers(10) GROUP BY k ORDER BY k); ``` Result: -``` text +```text ┌─k─┬─res─┐ │ 0 │ 0 │ │ 1 │ 1 │ @@ -1823,7 +1909,7 @@ The following example shows the `groupping` parameter usage: Query: -``` sql +```sql SELECT grouping, item, @@ -1842,7 +1928,7 @@ FROM Result: -``` text +```text ┌─grouping─┬─item─┬─res─┐ │ 0 │ 0 │ 0 │ │ 0 │ 1 │ 1 │ @@ -1874,7 +1960,7 @@ Only supports tables created with the `ENGINE = Join(ANY, LEFT, )` st **Syntax** -``` sql +```sql joinGet(join_storage_table_name, `value_column`, join_keys) ``` @@ -1896,13 +1982,13 @@ More info about `join_use_nulls` in [Join operation](../../engines/table-engines Input table: -``` sql +```sql CREATE DATABASE db_test CREATE TABLE db_test.id_val(`id` UInt32, `val` UInt32) ENGINE = Join(ANY, LEFT, id) SETTINGS join_use_nulls = 1 INSERT INTO db_test.id_val VALUES (1,11)(2,12)(4,13) ``` -``` text +```text ┌─id─┬─val─┐ │ 4 │ 13 │ │ 2 │ 12 │ @@ -1912,13 +1998,13 @@ INSERT INTO db_test.id_val VALUES (1,11)(2,12)(4,13) Query: -``` sql +```sql SELECT joinGet(db_test.id_val, 'val', toUInt32(number)) from numbers(4) SETTINGS join_use_nulls = 1 ``` Result: -``` text +```text ┌─joinGet(db_test.id_val, 'val', toUInt32(number))─┐ │ 0 │ │ 11 │ @@ -1936,7 +2022,7 @@ This function is not available in ClickHouse Cloud. Evaluate an external catboost model. [CatBoost](https://catboost.ai) is an open-source gradient boosting library developed by Yandex for machine learning. Accepts a path to a catboost model and model arguments (features). Returns Float64. -``` sql +```sql SELECT feat1, ..., feat_n, catboostEvaluate('/path/to/model.bin', feat_1, ..., feat_n) AS prediction FROM data_table ``` @@ -1949,7 +2035,7 @@ Before evaluating catboost models, the `libcatboostmodel.` library mus Next, specify the path to `libcatboostmodel.` in the clickhouse configuration: -``` xml +```xml ... /path/to/libcatboostmodel.so @@ -1962,7 +2048,7 @@ At the first execution of `catboostEvaluate()`, the server starts the library br communicate using a HTTP interface. By default, port `9012` is used. A different port can be specified as follows - this is useful if port `9012` is already assigned to a different service. -``` xml +```xml 9019 @@ -1986,13 +2072,13 @@ To use the `error_code` argument, configuration parameter `allow_custom_error_co **Example** -``` sql +```sql SELECT throwIf(number = 3, 'Too many') FROM numbers(10); ``` Result: -``` text +```text ↙ Progress: 0.00 rows, 0.00 B (0.00 rows/s., 0.00 B/s.) Received exception from server (version 19.14.1): Code: 395. DB::Exception: Received from localhost:9000. DB::Exception: Too many. ``` @@ -2003,7 +2089,7 @@ Returns its argument. Intended for debugging and testing. Allows to cancel using **Syntax** -``` sql +```sql identity(x) ``` @@ -2011,13 +2097,13 @@ identity(x) Query: -``` sql +```sql SELECT identity(42); ``` Result: -``` text +```text ┌─identity(42)─┐ │ 42 │ └──────────────┘ @@ -2064,7 +2150,7 @@ Checks whether the [Decimal](../../sql-reference/data-types/decimal.md) value is **Syntax** -``` sql +```sql isDecimalOverflow(d, [p]) ``` @@ -2082,7 +2168,7 @@ isDecimalOverflow(d, [p]) Query: -``` sql +```sql SELECT isDecimalOverflow(toDecimal32(1000000000, 0), 9), isDecimalOverflow(toDecimal32(1000000000, 0)), isDecimalOverflow(toDecimal32(-1000000000, 0), 9), @@ -2091,7 +2177,7 @@ SELECT isDecimalOverflow(toDecimal32(1000000000, 0), 9), Result: -``` text +```text 1 1 1 1 ``` @@ -2101,7 +2187,7 @@ Returns number of decimal digits need to represent a value. **Syntax** -``` sql +```sql countDigits(x) ``` @@ -2123,7 +2209,7 @@ For `Decimal` values takes into account their scales: calculates result over und Query: -``` sql +```sql SELECT countDigits(toDecimal32(1, 9)), countDigits(toDecimal32(-1, 9)), countDigits(toDecimal64(1, 18)), countDigits(toDecimal64(-1, 18)), countDigits(toDecimal128(1, 38)), countDigits(toDecimal128(-1, 38)); @@ -2131,7 +2217,7 @@ SELECT countDigits(toDecimal32(1, 9)), countDigits(toDecimal32(-1, 9)), Result: -``` text +```text 10 10 19 19 39 39 ``` @@ -2143,13 +2229,13 @@ Type: [LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md) **Syntax** -``` sql +```sql errorCodeToName(1) ``` Result: -``` text +```text UNSUPPORTED_METHOD ``` @@ -2160,7 +2246,7 @@ If executed in the context of a distributed table, this function generates a nor **Syntax** -``` sql +```sql tcpPort() ``` @@ -2178,13 +2264,13 @@ Type: [UInt16](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT tcpPort(); ``` Result: -``` text +```text ┌─tcpPort()─┐ │ 9000 │ └───────────┘ @@ -2202,7 +2288,7 @@ The command [SET PROFILE](../../sql-reference/statements/set.md#query-set) could **Syntax** -``` sql +```sql currentProfiles() ``` @@ -2214,11 +2300,11 @@ Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-refere ## enabledProfiles - Returns settings profiles, assigned to the current user both explicitly and implicitly. Explicitly assigned profiles are the same as returned by the [currentProfiles](#current-profiles) function. Implicitly assigned profiles include parent profiles of other assigned profiles, profiles assigned via granted roles, profiles assigned via their own settings, and the main default profile (see the `default_profile` section in the main server configuration file). +Returns settings profiles, assigned to the current user both explicitly and implicitly. Explicitly assigned profiles are the same as returned by the [currentProfiles](#current-profiles) function. Implicitly assigned profiles include parent profiles of other assigned profiles, profiles assigned via granted roles, profiles assigned via their own settings, and the main default profile (see the `default_profile` section in the main server configuration file). **Syntax** -``` sql +```sql enabledProfiles() ``` @@ -2234,7 +2320,7 @@ Returns all the profiles specified at the current user's definition (see [CREATE **Syntax** -``` sql +```sql defaultProfiles() ``` @@ -2250,7 +2336,7 @@ Returns the roles assigned to the current user. The roles can be changed by the **Syntax** -``` sql +```sql currentRoles() ``` @@ -2266,7 +2352,7 @@ Returns the names of the current roles and the roles, granted to some of the cur **Syntax** -``` sql +```sql enabledRoles() ``` @@ -2282,7 +2368,7 @@ Returns the roles which are enabled by default for the current user when he logs **Syntax** -``` sql +```sql defaultRoles() ``` @@ -2298,7 +2384,7 @@ Returns the server port number. When the port is not used by the server, throws **Syntax** -``` sql +```sql getServerPort(port_name) ``` @@ -2306,16 +2392,16 @@ getServerPort(port_name) - `port_name` — The name of the server port. [String](../../sql-reference/data-types/string.md#string). Possible values: - - 'tcp_port' - - 'tcp_port_secure' - - 'http_port' - - 'https_port' - - 'interserver_http_port' - - 'interserver_https_port' - - 'mysql_port' - - 'postgresql_port' - - 'grpc_port' - - 'prometheus.port' + - 'tcp_port' + - 'tcp_port_secure' + - 'http_port' + - 'https_port' + - 'interserver_http_port' + - 'interserver_https_port' + - 'mysql_port' + - 'postgresql_port' + - 'grpc_port' + - 'prometheus.port' **Returned value** @@ -2327,13 +2413,13 @@ Type: [UInt16](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT getServerPort('tcp_port'); ``` Result: -``` text +```text ┌─getServerPort('tcp_port')─┐ │ 9000 │ └───────────────────────────┘ @@ -2347,7 +2433,7 @@ In contrast to [initialQueryID](#initial-query-id) function, `queryID` can retur **Syntax** -``` sql +```sql queryID() ``` @@ -2361,7 +2447,7 @@ Type: [String](../../sql-reference/data-types/string.md) Query: -``` sql +```sql CREATE TABLE tmp (str String) ENGINE = Log; INSERT INTO tmp (*) VALUES ('a'); SELECT count(DISTINCT t) FROM (SELECT queryID() AS t FROM remote('127.0.0.{1..3}', currentDatabase(), 'tmp') GROUP BY queryID()); @@ -2369,7 +2455,7 @@ SELECT count(DISTINCT t) FROM (SELECT queryID() AS t FROM remote('127.0.0.{1..3} Result: -``` text +```text ┌─count()─┐ │ 3 │ └─────────┘ @@ -2383,7 +2469,7 @@ In contrast to [queryID](#query-id) function, `initialQueryID` returns the same **Syntax** -``` sql +```sql initialQueryID() ``` @@ -2397,7 +2483,7 @@ Type: [String](../../sql-reference/data-types/string.md) Query: -``` sql +```sql CREATE TABLE tmp (str String) ENGINE = Log; INSERT INTO tmp (*) VALUES ('a'); SELECT count(DISTINCT t) FROM (SELECT initialQueryID() AS t FROM remote('127.0.0.{1..3}', currentDatabase(), 'tmp') GROUP BY queryID()); @@ -2405,7 +2491,7 @@ SELECT count(DISTINCT t) FROM (SELECT initialQueryID() AS t FROM remote('127.0.0 Result: -``` text +```text ┌─count()─┐ │ 1 │ └─────────┘ @@ -2418,7 +2504,7 @@ If a query is not distributed then constant value `0` is returned. **Syntax** -``` sql +```sql shardNum() ``` @@ -2434,7 +2520,7 @@ In the following example a configuration with two shards is used. The query is e Query: -``` sql +```sql CREATE TABLE shard_num_example (dummy UInt8) ENGINE=Distributed(test_cluster_two_shards_localhost, system, one, dummy); SELECT dummy, shardNum(), shardCount() FROM shard_num_example; @@ -2442,7 +2528,7 @@ SELECT dummy, shardNum(), shardCount() FROM shard_num_example; Result: -``` text +```text ┌─dummy─┬─shardNum()─┬─shardCount()─┐ │ 0 │ 2 │ 2 │ │ 0 │ 1 │ 2 │ @@ -2460,7 +2546,7 @@ If a query is not distributed then constant value `0` is returned. **Syntax** -``` sql +```sql shardCount() ``` @@ -2480,7 +2566,7 @@ Returns a string with the current OS kernel version. **Syntax** -``` sql +```sql getOSKernelVersion() ``` @@ -2498,13 +2584,13 @@ Type: [String](../../sql-reference/data-types/string.md). Query: -``` sql +```sql SELECT getOSKernelVersion(); ``` Result: -``` text +```text ┌─getOSKernelVersion()────┐ │ Linux 4.15.0-55-generic │ └─────────────────────────┘ @@ -2516,7 +2602,7 @@ Returns the uptime of the current ZooKeeper session in seconds. **Syntax** -``` sql +```sql zookeeperSessionUptime() ``` @@ -2534,13 +2620,13 @@ Type: [UInt32](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT zookeeperSessionUptime(); ``` Result: -``` text +```text ┌─zookeeperSessionUptime()─┐ │ 286 │ └──────────────────────────┘ @@ -2552,7 +2638,7 @@ Generates random table structure in a format `column1_name column1_type, column2 **Syntax** -``` sql +```sql generateRandomStructure([number_of_columns, seed]) ``` @@ -2573,13 +2659,13 @@ Type: [String](../../sql-reference/data-types/string.md). Query: -``` sql +```sql SELECT generateRandomStructure() ``` Result: -``` text +```text ┌─generateRandomStructure()─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ c1 Decimal32(5), c2 Date, c3 Tuple(LowCardinality(String), Int128, UInt64, UInt16, UInt8, IPv6), c4 Array(UInt128), c5 UInt32, c6 IPv4, c7 Decimal256(64), c8 Decimal128(3), c9 UInt256, c10 UInt64, c11 DateTime │ └───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ @@ -2587,13 +2673,13 @@ Result: Query: -``` sql +```sql SELECT generateRandomStructure(1) ``` Result: -``` text +```text ┌─generateRandomStructure(1)─┐ │ c1 Map(UInt256, UInt16) │ └────────────────────────────┘ @@ -2601,13 +2687,13 @@ Result: Query: -``` sql +```sql SELECT generateRandomStructure(NULL, 33) ``` Result: -``` text +```text ┌─generateRandomStructure(NULL, 33)─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ c1 DateTime, c2 Enum8('c2V0' = 0, 'c2V1' = 1, 'c2V2' = 2, 'c2V3' = 3), c3 LowCardinality(Nullable(FixedString(30))), c4 Int16, c5 Enum8('c5V0' = 0, 'c5V1' = 1, 'c5V2' = 2, 'c5V3' = 3), c6 Nullable(UInt8), c7 String, c8 Nested(e1 IPv4, e2 UInt8, e3 UInt16, e4 UInt16, e5 Int32, e6 Map(Date, Decimal256(70))) │ └────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ @@ -2623,7 +2709,7 @@ Converts ClickHouse table structure to CapnProto schema. **Syntax** -``` sql +```sql structureToCapnProtoSchema(structure) ``` @@ -2634,7 +2720,7 @@ structureToCapnProtoSchema(structure) **Returned value** -- CapnProto schema +- CapnProto schema Type: [String](../../sql-reference/data-types/string.md). @@ -2642,13 +2728,13 @@ Type: [String](../../sql-reference/data-types/string.md). Query: -``` sql +```sql SELECT structureToCapnProtoSchema('column1 String, column2 UInt32, column3 Array(String)') FORMAT RawBLOB ``` Result: -``` text +```text @0xf96402dd754d0eb7; struct Message @@ -2661,13 +2747,13 @@ struct Message Query: -``` sql +```sql SELECT structureToCapnProtoSchema('column1 Nullable(String), column2 Tuple(element1 UInt32, element2 Array(String)), column3 Map(String, String)') FORMAT RawBLOB ``` Result: -``` text +```text @0xd1c8320fecad2b7f; struct Message @@ -2702,13 +2788,13 @@ struct Message Query: -``` sql +```sql SELECT structureToCapnProtoSchema('column1 String, column2 UInt32', 'Root') FORMAT RawBLOB ``` Result: -``` text +```text @0x96ab2d4ab133c6e1; struct Root @@ -2724,7 +2810,7 @@ Converts ClickHouse table structure to Protobuf schema. **Syntax** -``` sql +```sql structureToProtobufSchema(structure) ``` @@ -2743,13 +2829,13 @@ Type: [String](../../sql-reference/data-types/string.md). Query: -``` sql +```sql SELECT structureToProtobufSchema('column1 String, column2 UInt32, column3 Array(String)') FORMAT RawBLOB ``` Result: -``` text +```text syntax = "proto3"; message Message @@ -2762,13 +2848,13 @@ message Message Query: -``` sql +```sql SELECT structureToProtobufSchema('column1 Nullable(String), column2 Tuple(element1 UInt32, element2 Array(String)), column3 Map(String, String)') FORMAT RawBLOB ``` Result: -``` text +```text syntax = "proto3"; message Message @@ -2786,13 +2872,13 @@ message Message Query: -``` sql +```sql SELECT structureToProtobufSchema('column1 String, column2 UInt32', 'Root') FORMAT RawBLOB ``` Result: -``` text +```text syntax = "proto3"; message Root @@ -2964,7 +3050,7 @@ Calculates minimum required sample size for an A/B test comparing conversions (p **Syntax** -``` sql +```sql minSampleSizeConversion(baseline, mde, power, alpha) ``` @@ -2989,13 +3075,13 @@ A named [Tuple](../data-types/tuple.md) with 3 elements: The following query calculates the required sample size for an A/B test with baseline conversion of 25%, MDE of 3%, significance level of 5%, and the desired statistical power of 80%: -``` sql +```sql SELECT minSampleSizeConversion(0.25, 0.03, 0.80, 0.05) AS sample_size; ``` Result: -``` text +```text ┌─sample_size───────────────────┐ │ (3396.077603219163,0.22,0.28) │ └───────────────────────────────┘ @@ -3007,7 +3093,7 @@ Calculates minimum required sample size for an A/B test comparing means of a con **Syntax** -``` sql +```sql minSampleSizeContinous(baseline, sigma, mde, power, alpha) ``` @@ -3019,7 +3105,7 @@ Uses the formula described in [this article](https://towardsdatascience.com/requ - `baseline` — Baseline value of a metric. [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md). - `sigma` — Baseline standard deviation of a metric. [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md). -- `mde` — Minimum detectable effect (MDE) as percentage of the baseline value (e.g. for a baseline value 112.25 the MDE 0.03 means an expected change to 112.25 ± 112.25*0.03). [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md). +- `mde` — Minimum detectable effect (MDE) as percentage of the baseline value (e.g. for a baseline value 112.25 the MDE 0.03 means an expected change to 112.25 ± 112.25\*0.03). [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md). - `power` — Required statistical power of a test (1 - probability of Type II error). [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md). - `alpha` — Required significance level of a test (probability of Type I error). [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md). @@ -3035,13 +3121,13 @@ A named [Tuple](../data-types/tuple.md) with 3 elements: The following query calculates the required sample size for an A/B test on a metric with baseline value of 112.25, standard deviation of 21.1, MDE of 3%, significance level of 5%, and the desired statistical power of 80%: -``` sql +```sql SELECT minSampleSizeContinous(112.25, 21.1, 0.03, 0.80, 0.05) AS sample_size; ``` Result: -``` text +```text ┌─sample_size───────────────────────────┐ │ (616.2931945826209,108.8825,115.6175) │ └───────────────────────────────────────┘ diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md index 6fd31e8d25c..2d7752ed022 100644 --- a/docs/en/sql-reference/functions/random-functions.md +++ b/docs/en/sql-reference/functions/random-functions.md @@ -11,79 +11,173 @@ elimination](../../sql-reference/functions/index.md#common-subexpression-elimina function return different random values. Related content + - Blog: [Generating random data in ClickHouse](https://clickhouse.com/blog/generating-random-test-distribution-data-for-clickhouse) :::note The random numbers are generated by non-cryptographic algorithms. ::: -## rand, rand32 +## rand -Returns a random UInt32 number, evenly distributed across the range of all possible UInt32 numbers. +Returns a random UInt32 number with uniform distribution. -Uses a linear congruential generator. +Uses a linear congruential generator with an initial state obtained from the system, which means that while it appears random, it's not truly random and can be predictable if the initial state is known. For scenarios where true randomness is crucial, consider using alternative methods like system-level calls or integrating with external libraries. + +### Syntax + +```sql +rand() +``` + +Alias: `rand32` + +### Arguments + +None. + +### Returned value + +Returns a number of type UInt32. + +### Example + +```sql +SELECT rand(); +``` + +```response +1569354847 -- Note: The actual output will be a random number, not the specific number shown in the example +``` ## rand64 -Returns a random UInt64 number, evenly distributed across the range of all possible UInt64 numbers. +Returns a random UInt64 integer (UInt64) number -Uses a linear congruential generator. +### Syntax + +```sql +rand64() +``` + +### Arguments + +None. + +### Returned value + +Returns a number UInt64 number with uniform distribution. + +Uses a linear congruential generator with an initial state obtained from the system, which means that while it appears random, it's not truly random and can be predictable if the initial state is known. For scenarios where true randomness is crucial, consider using alternative methods like system-level calls or integrating with external libraries. + +### Example + +```sql +SELECT rand64(); +``` + +```response +15030268859237645412 -- Note: The actual output will be a random number, not the specific number shown in the example. +``` ## randCanonical -Returns a random Float64 value, evenly distributed in interval [0, 1). +Returns a random Float64 number. + +### Syntax + +```sql +randCanonical() +``` + +### Arguments + +None. + +### Returned value + +Returns a Float64 value between 0 (inclusive) and 1 (exclusive). + +### Example + +```sql +SELECT randCanonical(); +``` + +```response +0.3452178901234567 - Note: The actual output will be a random Float64 number between 0 and 1, not the specific number shown in the example. +``` ## randConstant -Like `rand` but produces a constant column with a random value. +Generates a single constant column filled with a random value. Unlike `rand`, this function ensures the same random value appears in every row of the generated column, making it useful for scenarios requiring a consistent random seed across rows in a single query. -**Example** +### Syntax -``` sql -SELECT rand(), rand(1), rand(number), randConstant(), randConstant(1), randConstant(number) -FROM numbers(3) +```sql +randConstant([x]); ``` -Result: +### Arguments -``` result -┌─────rand()─┬────rand(1)─┬─rand(number)─┬─randConstant()─┬─randConstant(1)─┬─randConstant(number)─┐ -│ 3047369878 │ 4132449925 │ 4044508545 │ 2740811946 │ 4229401477 │ 1924032898 │ -│ 2938880146 │ 1267722397 │ 4154983056 │ 2740811946 │ 4229401477 │ 1924032898 │ -│ 956619638 │ 4238287282 │ 1104342490 │ 2740811946 │ 4229401477 │ 1924032898 │ -└────────────┴────────────┴──────────────┴────────────────┴─────────────────┴──────────────────────┘ +- **[x] (Optional):** An optional expression that influences the generated random value. Even if provided, the resulting value will still be constant within the same query execution. Different queries using the same expression will likely generate different constant values. + +### Returned value + +Returns a column of type UInt32 containing the same random value in each row. + +### Implementation details + +The actual output will be different for each query execution, even with the same optional expression. The optional parameter may not significantly change the generated value compared to using `randConstant` alone. + +### Examples + +```sql +SELECT randConstant() AS random_value; +``` + +```response +| random_value | +|--------------| +| 1234567890 | +``` + +```sql +SELECT randConstant(10) AS random_value; +``` + +```response +| random_value | +|--------------| +| 9876543210 | ``` ## randUniform -Returns a random Float64 drawn uniformly from interval [`min`, `max`) ([continuous uniform distribution](https://en.wikipedia.org/wiki/Continuous_uniform_distribution)). +Returns a random Float64 drawn uniformly from interval [`min`, `max`]. -**Syntax** +### Syntax -``` sql +```sql randUniform(min, max) ``` -**Arguments** +### Arguments - `min` - `Float64` - left boundary of the range, - `max` - `Float64` - right boundary of the range. -**Returned value** +### Returned value -- Random number. +A random number of type [Float64](/docs/en/sql-reference/data-types/float.md). -Type: [Float64](/docs/en/sql-reference/data-types/float.md). +### Example -**Example** - -``` sql +```sql SELECT randUniform(5.5, 10) FROM numbers(5) ``` -Result: - -``` result +```response ┌─randUniform(5.5, 10)─┐ │ 8.094978491443102 │ │ 7.3181248914450885 │ @@ -99,7 +193,7 @@ Returns a random Float64 drawn from a [normal distribution](https://en.wikipedia **Syntax** -``` sql +```sql randNormal(mean, variance) ``` @@ -116,13 +210,13 @@ Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -``` sql +```sql SELECT randNormal(10, 2) FROM numbers(5) ``` Result: -``` result +```result ┌──randNormal(10, 2)─┐ │ 13.389228911709653 │ │ 8.622949707401295 │ @@ -138,7 +232,7 @@ Returns a random Float64 drawn from a [log-normal distribution](https://en.wikip **Syntax** -``` sql +```sql randLogNormal(mean, variance) ``` @@ -155,13 +249,13 @@ Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -``` sql +```sql SELECT randLogNormal(100, 5) FROM numbers(5) ``` Result: -``` result +```result ┌─randLogNormal(100, 5)─┐ │ 1.295699673937363e48 │ │ 9.719869109186684e39 │ @@ -177,7 +271,7 @@ Returns a random UInt64 drawn from a [binomial distribution](https://en.wikipedi **Syntax** -``` sql +```sql randBinomial(experiments, probability) ``` @@ -194,13 +288,13 @@ Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** -``` sql +```sql SELECT randBinomial(100, .75) FROM numbers(5) ``` Result: -``` result +```result ┌─randBinomial(100, 0.75)─┐ │ 74 │ │ 78 │ @@ -216,7 +310,7 @@ Returns a random UInt64 drawn from a [negative binomial distribution](https://en **Syntax** -``` sql +```sql randNegativeBinomial(experiments, probability) ``` @@ -233,13 +327,13 @@ Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** -``` sql +```sql SELECT randNegativeBinomial(100, .75) FROM numbers(5) ``` Result: -``` result +```result ┌─randNegativeBinomial(100, 0.75)─┐ │ 33 │ │ 32 │ @@ -255,7 +349,7 @@ Returns a random UInt64 drawn from a [Poisson distribution](https://en.wikipedia **Syntax** -``` sql +```sql randPoisson(n) ``` @@ -271,13 +365,13 @@ Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** -``` sql +```sql SELECT randPoisson(10) FROM numbers(5) ``` Result: -``` result +```result ┌─randPoisson(10)─┐ │ 8 │ │ 8 │ @@ -293,7 +387,7 @@ Returns a random UInt64 drawn from a [Bernoulli distribution](https://en.wikiped **Syntax** -``` sql +```sql randBernoulli(probability) ``` @@ -309,13 +403,13 @@ Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** -``` sql +```sql SELECT randBernoulli(.75) FROM numbers(5) ``` Result: -``` result +```result ┌─randBernoulli(0.75)─┐ │ 1 │ │ 1 │ @@ -331,7 +425,7 @@ Returns a random Float64 drawn from a [exponential distribution](https://en.wiki **Syntax** -``` sql +```sql randExponential(lambda) ``` @@ -347,13 +441,13 @@ Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -``` sql +```sql SELECT randExponential(1/10) FROM numbers(5) ``` Result: -``` result +```result ┌─randExponential(divide(1, 10))─┐ │ 44.71628934340778 │ │ 4.211013337903262 │ @@ -369,7 +463,7 @@ Returns a random Float64 drawn from a [Chi-square distribution](https://en.wikip **Syntax** -``` sql +```sql randChiSquared(degree_of_freedom) ``` @@ -385,13 +479,13 @@ Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -``` sql +```sql SELECT randChiSquared(10) FROM numbers(5) ``` Result: -``` result +```result ┌─randChiSquared(10)─┐ │ 10.015463656521543 │ │ 9.621799919882768 │ @@ -407,7 +501,7 @@ Returns a random Float64 drawn from a [Student's t-distribution](https://en.wiki **Syntax** -``` sql +```sql randStudentT(degree_of_freedom) ``` @@ -423,13 +517,13 @@ Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -``` sql +```sql SELECT randStudentT(10) FROM numbers(5) ``` Result: -``` result +```result ┌─────randStudentT(10)─┐ │ 1.2217309938538725 │ │ 1.7941971681200541 │ @@ -445,7 +539,7 @@ Returns a random Float64 drawn from a [F-distribution](https://en.wikipedia.org/ **Syntax** -``` sql +```sql randFisherF(d1, d2) ``` @@ -462,13 +556,13 @@ Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -``` sql +```sql SELECT randFisherF(10, 3) FROM numbers(5) ``` Result: -``` result +```result ┌──randFisherF(10, 3)─┐ │ 7.286287504216609 │ │ 0.26590779413050386 │ @@ -484,7 +578,7 @@ Generates a string of the specified length filled with random bytes (including z **Syntax** -``` sql +```sql randomString(length) ``` @@ -502,13 +596,13 @@ Type: [String](../../sql-reference/data-types/string.md). Query: -``` sql +```sql SELECT randomString(30) AS str, length(str) AS len FROM numbers(2) FORMAT Vertical; ``` Result: -``` text +```text Row 1: ────── str: 3 G : pT ?w тi k aV f6 @@ -526,7 +620,7 @@ Generates a binary string of the specified length filled with random bytes (incl **Syntax** -``` sql +```sql randomFixedString(length); ``` @@ -563,7 +657,7 @@ If you pass `length < 0`, the behavior of the function is undefined. **Syntax** -``` sql +```sql randomPrintableASCII(length) ``` @@ -579,11 +673,11 @@ Type: [String](../../sql-reference/data-types/string.md) **Example** -``` sql +```sql SELECT number, randomPrintableASCII(30) as str, length(str) FROM system.numbers LIMIT 3 ``` -``` text +```text ┌─number─┬─str────────────────────────────┬─length(randomPrintableASCII(30))─┐ │ 0 │ SuiCOSTvC0csfABSw=UcSzp2.`rv8x │ 30 │ │ 1 │ 1Ag NlJ &RCN:*>HVPG;PE-nO"SUFD │ 30 │ @@ -597,7 +691,7 @@ Generates a random string of a specified length. Result string contains valid UT **Syntax** -``` sql +```sql randomStringUTF8(length); ``` @@ -635,11 +729,12 @@ Flips the bits of String or FixedString `s`, each with probability `prob`. **Syntax** -``` sql +```sql fuzzBits(s, prob) ``` **Arguments** + - `s` - `String` or `FixedString`, - `prob` - constant `Float32/64` between 0.0 and 1.0. @@ -649,14 +744,14 @@ Fuzzed string with same type as `s`. **Example** -``` sql +```sql SELECT fuzzBits(materialize('abacaba'), 0.1) FROM numbers(3) ``` Result: -``` result +```result ┌─fuzzBits(materialize('abacaba'), 0.1)─┐ │ abaaaja │ │ a*cjab+ │ diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 3b49e4954ed..b4e2adbed3c 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -588,8 +588,41 @@ Result: ## substringUTF8 -Like `substring` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. +Returns the substring of a string `s` which starts at the specified byte index `offset` for Unicode code points. Byte counting starts from `1`. If `offset` is `0`, an empty string is returned. If `offset` is negative, the substring starts `pos` characters from the end of the string, rather than from the beginning. An optional argument `length` specifies the maximum number of bytes the returned substring may have. +Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +**Syntax** + +```sql +substringUTF8(s, offset[, length]) +``` + +**Arguments** + +- `s`: The string to calculate a substring from. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md) or [Enum](../../sql-reference/data-types/enum.md) +- `offset`: The starting position of the substring in `s` . [(U)Int*](../../sql-reference/data-types/int-uint.md). +- `length`: The maximum length of the substring. [(U)Int*](../../sql-reference/data-types/int-uint.md). Optional. + +**Returned value** + +A substring of `s` with `length` many bytes, starting at index `offset`. + +**Implementation details** + +Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +**Example** + +```sql +SELECT 'Täglich grüßt das Murmeltier.' AS str, + substringUTF8(str, 9), + substringUTF8(str, 9, 5) +``` + +```response +Täglich grüßt das Murmeltier. grüßt das Murmeltier. grüßt +``` ## substringIndex @@ -624,7 +657,39 @@ Result: ## substringIndexUTF8 -Like `substringIndex` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. +Returns the substring of `s` before `count` occurrences of the delimiter `delim`, specifically for Unicode code points. + +Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +**Syntax** + +```sql +substringIndexUTF8(s, delim, count) +``` + +**Arguments** + +- `s`: The string to extract substring from. [String](../../sql-reference/data-types/string.md). +- `delim`: The character to split. [String](../../sql-reference/data-types/string.md). +- `count`: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md) + +**Returned value** + +A substring [String](../../sql-reference/data-types/string.md) of `s` before `count` occurrences of `delim`. + +**Implementation details** + +Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +**Example** + +```sql +SELECT substringIndexUTF8('www.straßen-in-europa.de', '.', 2) +``` + +```response +www.straßen-in-europa +``` ## appendTrailingCharIfAbsent diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 22f879c62ae..1b03f220db2 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -30,7 +30,6 @@ position(haystack, needle[, start_pos]) Alias: - `position(needle IN haystack)` -- `locate(haystack, needle[, start_pos])`. **Arguments** @@ -49,7 +48,7 @@ If substring `needle` is empty, these rules apply: - if `start_pos >= 1` and `start_pos <= length(haystack) + 1`: return `start_pos` - otherwise: return `0` -The same rules also apply to functions `positionCaseInsensitive`, `positionUTF8` and `positionCaseInsensitiveUTF8` +The same rules also apply to functions `locate`, `positionCaseInsensitive`, `positionUTF8` and `positionCaseInsensitiveUTF8`. Type: `Integer`. @@ -114,6 +113,21 @@ SELECT └─────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┘ ``` +## locate + +Like [position](#position) but with arguments `haystack` and `locate` switched. + +The behavior of this function depends on the ClickHouse version: +- in versions < v24.3, `locate` was an alias of function `position` and accepted arguments `(haystack, needle[, start_pos])`. +- in versions >= 24.3,, `locate` is an individual function (for better compatibility with MySQL) and accepts arguments `(needle, haystack[, start_pos])`. The previous behavior + can be restored using setting [function_locate_has_mysql_compatible_argument_order = false](../../operations/settings/settings.md#function-locate-has-mysql-compatible-argument-order); + +**Syntax** + +``` sql +locate(needle, haystack[, start_pos]) +``` + ## positionCaseInsensitive Like [position](#position) but searches case-insensitively. diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 37d4ac30648..ea08ffa50e7 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -968,7 +968,7 @@ Converts a numeric value to String with the number of fractional digits in the o toDecimalString(number, scale) ``` -**Parameters** +**Arguments** - `number` — Value to be represented as String, [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md), [Decimal](/docs/en/sql-reference/data-types/decimal.md), - `scale` — Number of fractional digits, [UInt8](/docs/en/sql-reference/data-types/int-uint.md). @@ -1261,7 +1261,7 @@ Converts input value `x` to the specified data type `T`. Always returns [Nullabl accurateCastOrNull(x, T) ``` -**Parameters** +**Arguments** - `x` — Input value. - `T` — The name of the returned data type. @@ -1314,7 +1314,7 @@ Converts input value `x` to the specified data type `T`. Returns default type va accurateCastOrDefault(x, T) ``` -**Parameters** +**Arguments** - `x` — Input value. - `T` — The name of the returned data type. @@ -1675,7 +1675,7 @@ Same as [parseDateTimeBestEffort](#parsedatetimebesteffort) function but also pa parseDateTime64BestEffort(time_string [, precision [, time_zone]]) ``` -**Parameters** +**Arguments** - `time_string` — String containing a date or date with time to convert. [String](/docs/en/sql-reference/data-types/string.md). - `precision` — Required precision. `3` — for milliseconds, `6` — for microseconds. Default — `3`. Optional. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). @@ -1990,7 +1990,7 @@ Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wi snowflakeToDateTime(value[, time_zone]) ``` -**Parameters** +**Arguments** - `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md). - `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). @@ -2026,7 +2026,7 @@ Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wi snowflakeToDateTime64(value[, time_zone]) ``` -**Parameters** +**Arguments** - `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md). - `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). @@ -2062,7 +2062,7 @@ Converts a [DateTime](/docs/en/sql-reference/data-types/datetime.md) value to th dateTimeToSnowflake(value) ``` -**Parameters** +**Arguments** - `value` — Date with time. [DateTime](/docs/en/sql-reference/data-types/datetime.md). @@ -2096,7 +2096,7 @@ Convert a [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) to the f dateTime64ToSnowflake(value) ``` -**Parameters** +**Arguments** - `value` — Date with time. [DateTime64](/docs/en/sql-reference/data-types/datetime64.md). diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md index f6871c86c4f..ac81815b47f 100644 --- a/docs/en/sql-reference/functions/url-functions.md +++ b/docs/en/sql-reference/functions/url-functions.md @@ -155,7 +155,7 @@ Configuration example: cutToFirstSignificantSubdomain(URL, TLD) ``` -**Parameters** +**Arguments** - `URL` — URL. [String](../../sql-reference/data-types/string.md). - `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). @@ -209,7 +209,7 @@ Configuration example: cutToFirstSignificantSubdomainCustomWithWWW(URL, TLD) ``` -**Parameters** +**Arguments** - `URL` — URL. [String](../../sql-reference/data-types/string.md). - `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). @@ -263,7 +263,7 @@ Configuration example: firstSignificantSubdomainCustom(URL, TLD) ``` -**Parameters** +**Arguments** - `URL` — URL. [String](../../sql-reference/data-types/string.md). - `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). diff --git a/docs/en/sql-reference/operators/index.md b/docs/en/sql-reference/operators/index.md index 120e464e009..31bf43e8b35 100644 --- a/docs/en/sql-reference/operators/index.md +++ b/docs/en/sql-reference/operators/index.md @@ -353,7 +353,7 @@ For efficiency, the `and` and `or` functions accept any number of arguments. The ClickHouse supports the `IS NULL` and `IS NOT NULL` operators. -### IS NULL +### IS NULL {#is_null} - For [Nullable](../../sql-reference/data-types/nullable.md) type values, the `IS NULL` operator returns: - `1`, if the value is `NULL`. @@ -374,7 +374,7 @@ SELECT x+100 FROM t_null WHERE y IS NULL └──────────────┘ ``` -### IS NOT NULL +### IS NOT NULL {#is_not_null} - For [Nullable](../../sql-reference/data-types/nullable.md) type values, the `IS NOT NULL` operator returns: - `0`, if the value is `NULL`. diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index 0989c151d18..a23710b12bd 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -335,7 +335,7 @@ The `ALTER` query lets you create and delete separate elements (columns) in nest There is no support for deleting columns in the primary key or the sampling key (columns that are used in the `ENGINE` expression). Changing the type for columns that are included in the primary key is only possible if this change does not cause the data to be modified (for example, you are allowed to add values to an Enum or to change a type from `DateTime` to `UInt32`). -If the `ALTER` query is not sufficient to make the table changes you need, you can create a new table, copy the data to it using the [INSERT SELECT](/docs/en/sql-reference/statements/insert-into.md/#inserting-the-results-of-select) query, then switch the tables using the [RENAME](/docs/en/sql-reference/statements/rename.md/#rename-table) query and delete the old table. You can use the [clickhouse-copier](/docs/en/operations/utilities/clickhouse-copier.md) as an alternative to the `INSERT SELECT` query. +If the `ALTER` query is not sufficient to make the table changes you need, you can create a new table, copy the data to it using the [INSERT SELECT](/docs/en/sql-reference/statements/insert-into.md/#inserting-the-results-of-select) query, then switch the tables using the [RENAME](/docs/en/sql-reference/statements/rename.md/#rename-table) query and delete the old table. The `ALTER` query blocks all reads and writes for the table. In other words, if a long `SELECT` is running at the time of the `ALTER` query, the `ALTER` query will wait for it to complete. At the same time, all new queries to the same table will wait while this `ALTER` is running. diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index a21ef4f4af5..ce5cecf6fd6 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -350,6 +350,7 @@ ALTER TABLE mt DELETE IN PARTITION ID '2' WHERE p = 2; You can specify the partition expression in `ALTER ... PARTITION` queries in different ways: - As a value from the `partition` column of the `system.parts` table. For example, `ALTER TABLE visits DETACH PARTITION 201901`. +- Using the keyword `ALL`. It can be used only with DROP/DETACH/ATTACH. For example, `ALTER TABLE visits ATTACH PARTITION ALL`. - As a tuple of expressions or constants that matches (in types) the table partitioning keys tuple. In the case of a single element partitioning key, the expression should be wrapped in the `tuple (...)` function. For example, `ALTER TABLE visits DETACH PARTITION tuple(toYYYYMM(toDate('2019-01-25')))`. - Using the partition ID. Partition ID is a string identifier of the partition (human-readable, if possible) that is used as the names of partitions in the file system and in ZooKeeper. The partition ID must be specified in the `PARTITION ID` clause, in a single quotes. For example, `ALTER TABLE visits DETACH PARTITION ID '201901'`. - In the [ALTER ATTACH PART](#alter_attach-partition) and [DROP DETACHED PART](#alter_drop-detached) query, to specify the name of a part, use string literal with a value from the `name` column of the [system.detached_parts](/docs/en/operations/system-tables/detached_parts.md/#system_tables-detached_parts) table. For example, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`. diff --git a/docs/en/sql-reference/statements/undrop.md b/docs/en/sql-reference/statements/undrop.md index 40ac1ab4f99..4b138bfe679 100644 --- a/docs/en/sql-reference/statements/undrop.md +++ b/docs/en/sql-reference/statements/undrop.md @@ -13,13 +13,6 @@ a system table called `system.dropped_tables`. If you have a materialized view without a `TO` clause associated with the dropped table, then you will also have to UNDROP the inner table of that view. -:::note -UNDROP TABLE is experimental. To use it add this setting: -```sql -set allow_experimental_undrop_table_query = 1; -``` -::: - :::tip Also see [DROP TABLE](/docs/en/sql-reference/statements/drop.md) ::: @@ -32,60 +25,53 @@ UNDROP TABLE [db.]name [UUID ''] [ON CLUSTER cluster] **Example** -``` sql -set allow_experimental_undrop_table_query = 1; -``` - ```sql -CREATE TABLE undropMe +CREATE TABLE tab ( `id` UInt8 ) ENGINE = MergeTree -ORDER BY id -``` +ORDER BY id; + +DROP TABLE tab; -```sql -DROP TABLE undropMe -``` -```sql SELECT * FROM system.dropped_tables -FORMAT Vertical +FORMAT Vertical; ``` + ```response Row 1: ────── index: 0 database: default -table: undropMe +table: tab uuid: aa696a1a-1d70-4e60-a841-4c80827706cc engine: MergeTree -metadata_dropped_path: /var/lib/clickhouse/metadata_dropped/default.undropMe.aa696a1a-1d70-4e60-a841-4c80827706cc.sql +metadata_dropped_path: /var/lib/clickhouse/metadata_dropped/default.tab.aa696a1a-1d70-4e60-a841-4c80827706cc.sql table_dropped_time: 2023-04-05 14:12:12 1 row in set. Elapsed: 0.001 sec. ``` + ```sql -UNDROP TABLE undropMe -``` -```response -Ok. -``` -```sql +UNDROP TABLE tab; + SELECT * FROM system.dropped_tables -FORMAT Vertical -``` +FORMAT Vertical; + ```response Ok. 0 rows in set. Elapsed: 0.001 sec. ``` + ```sql -DESCRIBE TABLE undropMe -FORMAT Vertical +DESCRIBE TABLE tab +FORMAT Vertical; ``` + ```response Row 1: ────── diff --git a/docs/en/sql-reference/table-functions/cluster.md b/docs/en/sql-reference/table-functions/cluster.md index ad92ab39183..136ff72e4a9 100644 --- a/docs/en/sql-reference/table-functions/cluster.md +++ b/docs/en/sql-reference/table-functions/cluster.md @@ -5,7 +5,7 @@ sidebar_label: cluster title: "cluster, clusterAllReplicas" --- -Allows to access all shards in an existing cluster which configured in `remote_servers` section without creating a [Distributed](../../engines/table-engines/special/distributed.md) table. One replica of each shard is queried. +Allows to access all shards (configured in the `remote_servers` section) of a cluster without creating a [Distributed](../../engines/table-engines/special/distributed.md) table. Only one replica of each shard is queried. `clusterAllReplicas` function — same as `cluster`, but all replicas are queried. Each replica in a cluster is used as a separate shard/connection. diff --git a/docs/en/sql-reference/table-functions/jdbc.md b/docs/en/sql-reference/table-functions/jdbc.md index fbc917c1e1a..6b801344a83 100644 --- a/docs/en/sql-reference/table-functions/jdbc.md +++ b/docs/en/sql-reference/table-functions/jdbc.md @@ -6,6 +6,11 @@ sidebar_label: jdbc # jdbc +:::note +clickhouse-jdbc-bridge contains experimental codes and is no longer supported. It may contain reliability issues and security vulnerabilities. Use it at your own risk. +ClickHouse recommend using built-in table functions in ClickHouse which provide a better alternative for ad-hoc querying scenarios (Postgres, MySQL, MongoDB, etc). +::: + `jdbc(datasource, schema, table)` - returns table that is connected via JDBC driver. This table function requires separate [clickhouse-jdbc-bridge](https://github.com/ClickHouse/clickhouse-jdbc-bridge) program to be running. diff --git a/docs/ru/getting-started/tutorial.md b/docs/ru/getting-started/tutorial.md index 34064b6cf2f..8c827137e6d 100644 --- a/docs/ru/getting-started/tutorial.md +++ b/docs/ru/getting-started/tutorial.md @@ -585,10 +585,6 @@ ENGINE = Distributed(perftest_3shards_1replicas, tutorial, hits_local, rand()); INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1; ``` -:::danger Внимание! -Этот подход не годится для сегментирования больших таблиц. Есть инструмент [clickhouse-copier](../operations/utilities/clickhouse-copier.md), специально предназначенный для перераспределения любых больших таблиц. -::: - Как и следовало ожидать, вычислительно сложные запросы работают втрое быстрее, если они выполняются на трёх серверах, а не на одном. В данном случае мы использовали кластер из трёх сегментов с одной репликой для каждого. diff --git a/docs/ru/operations/backup.md b/docs/ru/operations/backup.md index 9ff13bbc8a6..44877ff8071 100644 --- a/docs/ru/operations/backup.md +++ b/docs/ru/operations/backup.md @@ -24,10 +24,6 @@ sidebar_label: "Резервное копирование данных" Некоторые локальные файловые системы позволяют делать снимки (например, [ZFS](https://en.wikipedia.org/wiki/ZFS)), но они могут быть не лучшим выбором для обслуживания живых запросов. Возможным решением является создание дополнительных реплик с такой файловой системой и исключение их из [Distributed](../engines/table-engines/special/distributed.md) таблиц, используемых для запросов `SELECT`. Снимки на таких репликах будут недоступны для запросов, изменяющих данные. В качестве бонуса, эти реплики могут иметь особые конфигурации оборудования с большим количеством дисков, подключенных к серверу, что будет экономически эффективным. -## clickhouse-copier {#clickhouse-copier} - -[clickhouse-copier](utilities/clickhouse-copier.md) — это универсальный инструмент, который изначально был создан для перешардирования таблиц с петабайтами данных. Его также можно использовать для резервного копирования и восстановления, поскольку он надёжно копирует данные между таблицами и кластерами ClickHouse. - Для небольших объёмов данных можно применять `INSERT INTO ... SELECT ...` в удалённые таблицы. ## Манипуляции с партициями {#manipuliatsii-s-partitsiiami} diff --git a/docs/ru/operations/utilities/clickhouse-copier.md b/docs/ru/operations/utilities/clickhouse-copier.md deleted file mode 100644 index da86ef2d35d..00000000000 --- a/docs/ru/operations/utilities/clickhouse-copier.md +++ /dev/null @@ -1,183 +0,0 @@ ---- -slug: /ru/operations/utilities/clickhouse-copier -sidebar_position: 59 -sidebar_label: clickhouse-copier ---- - -# clickhouse-copier {#clickhouse-copier} - -Копирует данные из таблиц одного кластера в таблицы другого (или этого же) кластера. - -Можно запустить несколько `clickhouse-copier` для разных серверах для выполнения одного и того же задания. Для синхронизации между процессами используется ZooKeeper. - -После запуска, `clickhouse-copier`: - -- Соединяется с ZooKeeper и получает: - - - Задания на копирование. - - Состояние заданий на копирование. - -- Выполняет задания. - - Каждый запущенный процесс выбирает "ближайший" шард исходного кластера и копирует данные в кластер назначения, при необходимости перешардируя их. - -`clickhouse-copier` отслеживает изменения в ZooKeeper и применяет их «на лету». - -Для снижения сетевого трафика рекомендуем запускать `clickhouse-copier` на том же сервере, где находятся исходные данные. - -## Запуск Clickhouse-copier {#zapusk-clickhouse-copier} - -Утилиту следует запускать вручную следующим образом: - -``` bash -$ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --base-dir /path/to/dir -``` - -Параметры запуска: - -- `daemon` - запускает `clickhouse-copier` в режиме демона. -- `config` - путь к файлу `zookeeper.xml` с параметрами соединения с ZooKeeper. -- `task-path` - путь к ноде ZooKeeper. Нода используется для синхронизации между процессами `clickhouse-copier` и для хранения заданий. Задания хранятся в `$task-path/description`. -- `task-file` - необязательный путь к файлу с описанием конфигурация заданий для загрузки в ZooKeeper. -- `task-upload-force` - Загрузить `task-file` в ZooKeeper даже если уже было загружено. -- `base-dir` - путь к логам и вспомогательным файлам. При запуске `clickhouse-copier` создает в `$base-dir` подкаталоги `clickhouse-copier_YYYYMMHHSS_`. Если параметр не указан, то каталоги будут создаваться в каталоге, где `clickhouse-copier` был запущен. - -## Формат Zookeeper.xml {#format-zookeeper-xml} - -``` xml - - - trace - 100M - 3 - - - - - 127.0.0.1 - 2181 - - - -``` - -## Конфигурация заданий на копирование {#konfiguratsiia-zadanii-na-kopirovanie} - -``` xml - - - - - - - false - - 127.0.0.1 - 9000 - - - - ... - - - - ... - - - - - 2 - - - - 1 - - - - - 0 - - - - - 3 - - 1 - - - - - - - - source_cluster - test - hits - - - destination_cluster - test - hits2 - - - - ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/hits2', '{replica}') - PARTITION BY toMonday(date) - ORDER BY (CounterID, EventDate) - - - - jumpConsistentHash(intHash64(UserID), 2) - - - CounterID != 0 - - - - '2018-02-26' - '2018-03-05' - ... - - - - - - ... - - ... - - -``` - -`clickhouse-copier` отслеживает изменения `/task/path/description` и применяет их «на лету». Если вы поменяете, например, значение `max_workers`, то количество процессов, выполняющих задания, также изменится. diff --git a/docs/ru/operations/utilities/index.md b/docs/ru/operations/utilities/index.md index 9eb90a3037c..e4b01a0276d 100644 --- a/docs/ru/operations/utilities/index.md +++ b/docs/ru/operations/utilities/index.md @@ -7,7 +7,6 @@ sidebar_position: 56 # Утилиты ClickHouse {#utility-clickhouse} - [clickhouse-local](clickhouse-local.md) - позволяет выполнять SQL-запросы над данными без остановки сервера ClickHouse, подобно утилите `awk`. -- [clickhouse-copier](clickhouse-copier.md) - копирует (и перешардирует) данные с одного кластера на другой. - [clickhouse-benchmark](../../operations/utilities/clickhouse-benchmark.md) — устанавливает соединение с сервером ClickHouse и запускает циклическое выполнение указанных запросов. - [clickhouse-format](../../operations/utilities/clickhouse-format.md) — позволяет форматировать входящие запросы. - [ClickHouse obfuscator](../../operations/utilities/clickhouse-obfuscator.md) — обфусцирует данные. diff --git a/docs/ru/sql-reference/statements/alter/column.md b/docs/ru/sql-reference/statements/alter/column.md index 385a9835eca..2ea045f4ae3 100644 --- a/docs/ru/sql-reference/statements/alter/column.md +++ b/docs/ru/sql-reference/statements/alter/column.md @@ -94,7 +94,7 @@ RENAME COLUMN [IF EXISTS] name to new_name Переименовывает столбец `name` в `new_name`. Если указано выражение `IF EXISTS`, то запрос не будет возвращать ошибку при условии, что столбец `name` не существует. Поскольку переименование не затрагивает физические данные колонки, запрос выполняется практически мгновенно. -**ЗАМЕЧЕНИЕ**: Столбцы, являющиеся частью основного ключа или ключа сортировки (заданные с помощью `ORDER BY` или `PRIMARY KEY`), не могут быть переименованы. Попытка переименовать эти слобцы приведет к `SQL Error [524]`. +**ЗАМЕЧЕНИЕ**: Столбцы, являющиеся частью основного ключа или ключа сортировки (заданные с помощью `ORDER BY` или `PRIMARY KEY`), не могут быть переименованы. Попытка переименовать эти слобцы приведет к `SQL Error [524]`. Пример: @@ -254,7 +254,7 @@ SELECT groupArray(x), groupArray(s) FROM tmp; Отсутствует возможность удалять столбцы, входящие в первичный ключ или ключ для сэмплирования (в общем, входящие в выражение `ENGINE`). Изменение типа у столбцов, входящих в первичный ключ возможно только в том случае, если это изменение не приводит к изменению данных (например, разрешено добавление значения в Enum или изменение типа с `DateTime` на `UInt32`). -Если возможностей запроса `ALTER` не хватает для нужного изменения таблицы, вы можете создать новую таблицу, скопировать туда данные с помощью запроса [INSERT SELECT](../insert-into.md#inserting-the-results-of-select), затем поменять таблицы местами с помощью запроса [RENAME](../rename.md#rename-table), и удалить старую таблицу. В качестве альтернативы для запроса `INSERT SELECT`, можно использовать инструмент [clickhouse-copier](../../../sql-reference/statements/alter/index.md). +Если возможностей запроса `ALTER` не хватает для нужного изменения таблицы, вы можете создать новую таблицу, скопировать туда данные с помощью запроса [INSERT SELECT](../insert-into.md#inserting-the-results-of-select), затем поменять таблицы местами с помощью запроса [RENAME](../rename.md#rename-table), и удалить старую таблицу. Запрос `ALTER` блокирует все чтения и записи для таблицы. То есть если на момент запроса `ALTER` выполнялся долгий `SELECT`, то запрос `ALTER` сначала дождётся его выполнения. И в это время все новые запросы к той же таблице будут ждать, пока завершится этот `ALTER`. diff --git a/docs/zh/getting-started/tutorial.md b/docs/zh/getting-started/tutorial.md index 989cf5f57d8..d0c9bda83ef 100644 --- a/docs/zh/getting-started/tutorial.md +++ b/docs/zh/getting-started/tutorial.md @@ -582,8 +582,6 @@ ENGINE = Distributed(perftest_3shards_1replicas, tutorial, hits_local, rand()); INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1; ``` -!!! warning "注意:" - 这种方法不适合大型表的分片。 有一个单独的工具 [clickhouse-copier](../operations/utilities/clickhouse-copier.md) 这可以重新分片任意大表。 正如您所期望的那样,如果计算量大的查询使用3台服务器而不是一个,则运行速度快N倍。 diff --git a/docs/zh/operations/backup.md b/docs/zh/operations/backup.md index 6d491f9c2f7..48e852b4228 100644 --- a/docs/zh/operations/backup.md +++ b/docs/zh/operations/backup.md @@ -24,12 +24,6 @@ sidebar_label: "\u6570\u636E\u5907\u4EFD" 某些本地文件系统提供快照功能(例如, [ZFS](https://en.wikipedia.org/wiki/ZFS)),但它们可能不是提供实时查询的最佳选择。 一个可能的解决方案是使用这种文件系统创建额外的副本,并将它们与用于`SELECT` 查询的 [分布式](../engines/table-engines/special/distributed.md) 表分离。 任何修改数据的查询都无法访问此类副本上的快照。 作为回报,这些副本可能具有特殊的硬件配置,每个服务器附加更多的磁盘,这将是经济高效的。 -## clickhouse-copier {#clickhouse-copier} - -[clickhouse-copier](utilities/clickhouse-copier.md) 是一个多功能工具,最初创建它是为了用于重新切分pb大小的表。 因为它能够在ClickHouse表和集群之间可靠地复制数据,所以它也可用于备份和还原数据。 - -对于较小的数据量,一个简单的 `INSERT INTO ... SELECT ...` 到远程表也可以工作。 - ## part操作 {#manipulations-with-parts} ClickHouse允许使用 `ALTER TABLE ... FREEZE PARTITION ...` 查询以创建表分区的本地副本。 这是利用硬链接(hardlink)到 `/var/lib/clickhouse/shadow/` 文件夹中实现的,所以它通常不会因为旧数据而占用额外的磁盘空间。 创建的文件副本不由ClickHouse服务器处理,所以你可以把它们留在那里:你将有一个简单的备份,不需要任何额外的外部系统,但它仍然容易出现硬件问题。 出于这个原因,最好将它们远程复制到另一个位置,然后删除本地副本。 分布式文件系统和对象存储仍然是一个不错的选择,但是具有足够大容量的正常附加文件服务器也可以工作(在这种情况下,传输将通过网络文件系统或者也许是 [rsync](https://en.wikipedia.org/wiki/Rsync) 来进行). diff --git a/docs/zh/operations/utilities/clickhouse-copier.md b/docs/zh/operations/utilities/clickhouse-copier.md deleted file mode 100644 index b01edd9257c..00000000000 --- a/docs/zh/operations/utilities/clickhouse-copier.md +++ /dev/null @@ -1,172 +0,0 @@ ---- -slug: /zh/operations/utilities/clickhouse-copier ---- -# clickhouse-copier {#clickhouse-copier} - -将数据从一个群集中的表复制到另一个(或相同)群集中的表。 - -您可以运行多个 `clickhouse-copier` 不同服务器上的实例执行相同的作业。 ZooKeeper用于同步进程。 - -开始后, `clickhouse-copier`: - -- 连接到ZooKeeper并且接收: - - - 复制作业。 - - 复制作业的状态。 - -- 它执行的工作。 - - 每个正在运行的进程都会选择源集群的“最接近”分片,然后将数据复制到目标集群,并在必要时重新分片数据。 - -`clickhouse-copier` 跟踪ZooKeeper中的更改,并实时应用它们。 - -为了减少网络流量,我们建议运行 `clickhouse-copier` 在源数据所在的同一服务器上。 - -## 运行Clickhouse-copier {#running-clickhouse-copier} - -该实用程序应手动运行: - -``` bash -clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --base-dir /path/to/dir -``` - -参数: - -- `daemon` — 在守护进程模式下启动`clickhouse-copier`。 -- `config` — `zookeeper.xml`文件的路径,其中包含用于连接ZooKeeper的参数。 -- `task-path` — ZooKeeper节点的路径。 该节点用于同步`clickhouse-copier`进程和存储任务。 任务存储在`$task-path/description`中。 -- `task-file` — 可选的非必须参数, 指定一个包含任务配置的参数文件, 用于初始上传到ZooKeeper。 -- `task-upload-force` — 即使节点已经存在,也强制上载`task-file`。 -- `base-dir` — 日志和辅助文件的路径。 启动时,`clickhouse-copier`在`$base-dir`中创建`clickhouse-copier_YYYYMMHHSS_`子目录。 如果省略此参数,则会在启动`clickhouse-copier`的目录中创建目录。 - - - -## Zookeeper.xml格式 {#format-of-zookeeper-xml} - -``` xml - - - trace - 100M - 3 - - - - - 127.0.0.1 - 2181 - - - -``` - -## 复制任务的配置 {#configuration-of-copying-tasks} - -``` xml - - - - - - false - - 127.0.0.1 - 9000 - - - ... - - - - ... - - - - - 2 - - - - 1 - - - - - 0 - - - - - 3 - - 1 - - - - - - - - source_cluster - test - hits - - - destination_cluster - test - hits2 - - - - ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/hits2', '{replica}') - PARTITION BY toMonday(date) - ORDER BY (CounterID, EventDate) - - - - jumpConsistentHash(intHash64(UserID), 2) - - - CounterID != 0 - - - - '2018-02-26' - '2018-03-05' - ... - - - - - - ... - - ... - - -``` - -`clickhouse-copier` 跟踪更改 `/task/path/description` 并在飞行中应用它们。 例如,如果你改变的值 `max_workers`,运行任务的进程数也会发生变化。 diff --git a/docs/zh/operations/utilities/index.md b/docs/zh/operations/utilities/index.md index af158baf275..cebe312450c 100644 --- a/docs/zh/operations/utilities/index.md +++ b/docs/zh/operations/utilities/index.md @@ -4,5 +4,4 @@ slug: /zh/operations/utilities/ # 实用工具 {#clickhouse-utility} - [本地查询](clickhouse-local.md) — 在不停止ClickHouse服务的情况下,对数据执行查询操作(类似于 `awk` 命令)。 -- [跨集群复制](clickhouse-copier.md) — 在不同集群间复制数据。 - [性能测试](clickhouse-benchmark.md) — 连接到Clickhouse服务器,执行性能测试。 diff --git a/docs/zh/sql-reference/statements/alter.md b/docs/zh/sql-reference/statements/alter.md index 002d5102fa3..48665ae04ab 100644 --- a/docs/zh/sql-reference/statements/alter.md +++ b/docs/zh/sql-reference/statements/alter.md @@ -150,7 +150,7 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String) 不支持对primary key或者sampling key中的列(在 `ENGINE` 表达式中用到的列)进行删除操作。改变包含在primary key中的列的类型时,如果操作不会导致数据的变化(例如,往Enum中添加一个值,或者将`DateTime` 类型改成 `UInt32`),那么这种操作是可行的。 -如果 `ALTER` 操作不足以完成你想要的表变动操作,你可以创建一张新的表,通过 [INSERT SELECT](../../sql-reference/statements/insert-into.md#inserting-the-results-of-select)将数据拷贝进去,然后通过 [RENAME](../../sql-reference/statements/misc.md#misc_operations-rename)将新的表改成和原有表一样的名称,并删除原有的表。你可以使用 [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) 代替 `INSERT SELECT`。 +如果 `ALTER` 操作不足以完成你想要的表变动操作,你可以创建一张新的表,通过 [INSERT SELECT](../../sql-reference/statements/insert-into.md#inserting-the-results-of-select)将数据拷贝进去,然后通过 [RENAME](../../sql-reference/statements/misc.md#misc_operations-rename)将新的表改成和原有表一样的名称,并删除原有的表。 `ALTER` 操作会阻塞对表的所有读写操作。换句话说,当一个大的 `SELECT` 语句和 `ALTER`同时执行时,`ALTER`会等待,直到 `SELECT` 执行结束。与此同时,当 `ALTER` 运行时,新的 sql 语句将会等待。 diff --git a/packages/clickhouse-server.yaml b/packages/clickhouse-server.yaml index 7894129b8e3..dc183ead102 100644 --- a/packages/clickhouse-server.yaml +++ b/packages/clickhouse-server.yaml @@ -50,8 +50,6 @@ contents: dst: /etc/init.d/clickhouse-server - src: clickhouse-server.service dst: /lib/systemd/system/clickhouse-server.service -- src: root/usr/bin/clickhouse-copier - dst: /usr/bin/clickhouse-copier - src: root/usr/bin/clickhouse-server dst: /usr/bin/clickhouse-server # clickhouse-keeper part diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 62bcf068879..d945fdf4a6f 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -122,7 +122,6 @@ add_subdirectory (local) add_subdirectory (benchmark) add_subdirectory (extract-from-config) add_subdirectory (compressor) -add_subdirectory (copier) add_subdirectory (format) add_subdirectory (obfuscator) add_subdirectory (install) @@ -200,7 +199,6 @@ clickhouse_program_install(clickhouse-server server) clickhouse_program_install(clickhouse-client client chc) clickhouse_program_install(clickhouse-local local chl ch) clickhouse_program_install(clickhouse-benchmark benchmark) -clickhouse_program_install(clickhouse-copier copier) clickhouse_program_install(clickhouse-extract-from-config extract-from-config) clickhouse_program_install(clickhouse-compressor compressor) clickhouse_program_install(clickhouse-format format) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index a2bd6b6016a..4203e4738dd 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -50,6 +50,7 @@ #include #include #include +#include namespace fs = std::filesystem; using namespace std::literals; @@ -1137,6 +1138,13 @@ void Client::processOptions(const OptionsDescription & options_description, } +static bool checkIfStdoutIsRegularFile() +{ + struct stat file_stat; + return fstat(STDOUT_FILENO, &file_stat) == 0 && S_ISREG(file_stat.st_mode); +} + + void Client::processConfig() { if (!queries.empty() && config().has("queries-file")) @@ -1173,7 +1181,14 @@ void Client::processConfig() pager = config().getString("pager", ""); is_default_format = !config().has("vertical") && !config().has("format"); - if (config().has("vertical")) + if (is_default_format && checkIfStdoutIsRegularFile()) + { + is_default_format = false; + std::optional format_from_file_name; + format_from_file_name = FormatFactory::instance().tryGetFormatFromFileDescriptor(STDOUT_FILENO); + format = format_from_file_name ? *format_from_file_name : "TabSeparated"; + } + else if (config().has("vertical")) format = config().getString("format", "Vertical"); else format = config().getString("format", is_interactive ? "PrettyCompact" : "TabSeparated"); @@ -1377,8 +1392,8 @@ void Client::readArguments( } -#pragma GCC diagnostic ignored "-Wunused-function" -#pragma GCC diagnostic ignored "-Wmissing-declarations" +#pragma clang diagnostic ignored "-Wunused-function" +#pragma clang diagnostic ignored "-Wmissing-declarations" int mainEntryClickHouseClient(int argc, char ** argv) { diff --git a/programs/compressor/Compressor.cpp b/programs/compressor/Compressor.cpp index 7125fdc744f..050bb495024 100644 --- a/programs/compressor/Compressor.cpp +++ b/programs/compressor/Compressor.cpp @@ -143,7 +143,7 @@ int mainEntryClickHouseCompressor(int argc, char ** argv) ParserCodec codec_parser; std::string codecs_line = boost::algorithm::join(codecs, ","); - auto ast = parseQuery(codec_parser, "(" + codecs_line + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + auto ast = parseQuery(codec_parser, "(" + codecs_line + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); codec = CompressionCodecFactory::instance().get(ast, nullptr); } else diff --git a/programs/copier/Aliases.h b/programs/copier/Aliases.h deleted file mode 100644 index 02be3441acd..00000000000 --- a/programs/copier/Aliases.h +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -#include - -#include - -#include - -namespace DB -{ - using ConfigurationPtr = Poco::AutoPtr; - - using DatabaseAndTableName = std::pair; - using ListOfDatabasesAndTableNames = std::vector; -} diff --git a/programs/copier/CMakeLists.txt b/programs/copier/CMakeLists.txt deleted file mode 100644 index 2c17e70bc5e..00000000000 --- a/programs/copier/CMakeLists.txt +++ /dev/null @@ -1,28 +0,0 @@ -set(CLICKHOUSE_COPIER_SOURCES - "${CMAKE_CURRENT_SOURCE_DIR}/ClusterCopierApp.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/ClusterCopier.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/Internals.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/ShardPartition.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/ShardPartitionPiece.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/StatusAccumulator.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/TaskCluster.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/TaskShard.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/TaskTable.cpp") - -set (CLICKHOUSE_COPIER_LINK - PRIVATE - clickhouse_common_zookeeper - clickhouse_common_config - clickhouse_parsers - clickhouse_functions - clickhouse_table_functions - clickhouse_aggregate_functions - string_utils - - PUBLIC - daemon -) - -set(CLICKHOUSE_COPIER_INCLUDE SYSTEM PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) - -clickhouse_program_add(copier) diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp deleted file mode 100644 index 59505d08f5c..00000000000 --- a/programs/copier/ClusterCopier.cpp +++ /dev/null @@ -1,2076 +0,0 @@ -#include "ClusterCopier.h" - -#include "Internals.h" -#include "StatusAccumulator.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace CurrentMetrics -{ - extern const Metric LocalThread; - extern const Metric LocalThreadActive; - extern const Metric LocalThreadScheduled; -} - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NOT_IMPLEMENTED; - extern const int LOGICAL_ERROR; - extern const int UNFINISHED; - extern const int BAD_ARGUMENTS; -} - - -void ClusterCopier::init() -{ - auto zookeeper = getContext()->getZooKeeper(); - - task_description_watch_callback = [this] (const Coordination::WatchResponse & response) - { - if (response.error != Coordination::Error::ZOK) - return; - UInt64 version = ++task_description_version; - LOG_INFO(log, "Task description should be updated, local version {}", version); - }; - - task_description_path = task_zookeeper_path + "/description"; - task_cluster = std::make_unique(task_zookeeper_path, working_database_name); - - reloadTaskDescription(); - - task_cluster->loadTasks(*task_cluster_current_config); - getContext()->setClustersConfig(task_cluster_current_config, false, task_cluster->clusters_prefix); - - /// Set up shards and their priority - task_cluster->random_engine.seed(randomSeed()); - for (auto & task_table : task_cluster->table_tasks) - { - task_table.cluster_pull = getContext()->getCluster(task_table.cluster_pull_name); - task_table.cluster_push = getContext()->getCluster(task_table.cluster_push_name); - task_table.initShards(task_cluster->random_engine); - } - - LOG_INFO(log, "Will process {} table tasks", task_cluster->table_tasks.size()); - - /// Do not initialize tables, will make deferred initialization in process() - - zookeeper->createAncestors(getWorkersPathVersion() + "/"); - zookeeper->createAncestors(getWorkersPath() + "/"); - /// Init status node - zookeeper->createIfNotExists(task_zookeeper_path + "/status", "{}"); -} - -template -decltype(auto) ClusterCopier::retry(T && func, UInt64 max_tries) -{ - std::exception_ptr exception; - - if (max_tries == 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot perform zero retries"); - - for (UInt64 try_number = 1; try_number <= max_tries; ++try_number) - { - try - { - return func(); - } - catch (...) - { - exception = std::current_exception(); - if (try_number < max_tries) - { - tryLogCurrentException(log, "Will retry"); - std::this_thread::sleep_for(retry_delay_ms); - } - } - } - - std::rethrow_exception(exception); -} - - -void ClusterCopier::discoverShardPartitions(const ConnectionTimeouts & timeouts, const TaskShardPtr & task_shard) -{ - TaskTable & task_table = task_shard->task_table; - - LOG_INFO(log, "Discover partitions of shard {}", task_shard->getDescription()); - - auto get_partitions = [&] () { return getShardPartitions(timeouts, *task_shard); }; - auto existing_partitions_names = retry(get_partitions, 60); - Strings filtered_partitions_names; - Strings missing_partitions; - - /// Check that user specified correct partition names - auto check_partition_format = [] (const DataTypePtr & type, const String & partition_text_quoted) - { - MutableColumnPtr column_dummy = type->createColumn(); - ReadBufferFromString rb(partition_text_quoted); - - try - { - type->getDefaultSerialization()->deserializeTextQuoted(*column_dummy, rb, FormatSettings()); - } - catch (Exception & e) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Partition {} has incorrect format. {}", partition_text_quoted, e.displayText()); - } - }; - - if (task_table.has_enabled_partitions) - { - /// Process partition in order specified by - for (const String & partition_name : task_table.enabled_partitions) - { - /// Check that user specified correct partition names - check_partition_format(task_shard->partition_key_column.type, partition_name); - - auto it = existing_partitions_names.find(partition_name); - - /// Do not process partition if it is not in enabled_partitions list - if (it == existing_partitions_names.end()) - { - missing_partitions.emplace_back(partition_name); - continue; - } - - filtered_partitions_names.emplace_back(*it); - } - - for (const String & partition_name : existing_partitions_names) - { - if (!task_table.enabled_partitions_set.contains(partition_name)) - { - LOG_INFO(log, "Partition {} will not be processed, since it is not in enabled_partitions of {}", partition_name, task_table.table_id); - } - } - } - else - { - for (const String & partition_name : existing_partitions_names) - filtered_partitions_names.emplace_back(partition_name); - } - - for (const String & partition_name : filtered_partitions_names) - { - const size_t number_of_splits = task_table.number_of_splits; - task_shard->partition_tasks.emplace(partition_name, ShardPartition(*task_shard, partition_name, number_of_splits)); - task_shard->checked_partitions.emplace(partition_name, true); - - auto shard_partition_it = task_shard->partition_tasks.find(partition_name); - PartitionPieces & shard_partition_pieces = shard_partition_it->second.pieces; - - for (size_t piece_number = 0; piece_number < number_of_splits; ++piece_number) - { - bool res = checkPresentPartitionPiecesOnCurrentShard(timeouts, *task_shard, partition_name, piece_number); - shard_partition_pieces.emplace_back(shard_partition_it->second, piece_number, res); - } - } - - if (!missing_partitions.empty()) - { - WriteBufferFromOwnString ss; - for (const String & missing_partition : missing_partitions) - ss << " " << missing_partition; - - LOG_WARNING(log, "There are no {} partitions from enabled_partitions in shard {} :{}", missing_partitions.size(), task_shard->getDescription(), ss.str()); - } - - LOG_INFO(log, "Will copy {} partitions from shard {}", task_shard->partition_tasks.size(), task_shard->getDescription()); -} - -void ClusterCopier::discoverTablePartitions(const ConnectionTimeouts & timeouts, TaskTable & task_table, UInt64 num_threads) -{ - /// Fetch partitions list from a shard - { - ThreadPool thread_pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, num_threads ? num_threads : 2 * getNumberOfPhysicalCPUCores()); - - for (const TaskShardPtr & task_shard : task_table.all_shards) - thread_pool.scheduleOrThrowOnError([this, timeouts, task_shard]() - { - setThreadName("DiscoverPartns"); - discoverShardPartitions(timeouts, task_shard); - }); - - LOG_INFO(log, "Waiting for {} setup jobs", thread_pool.active()); - thread_pool.wait(); - } -} - -void ClusterCopier::uploadTaskDescription(const std::string & task_path, const std::string & task_file, const bool force) -{ - auto local_task_description_path = task_path + "/description"; - - String task_config_str; - { - ReadBufferFromFile in(task_file); - readStringUntilEOF(task_config_str, in); - } - if (task_config_str.empty()) - return; - - auto zookeeper = getContext()->getZooKeeper(); - - zookeeper->createAncestors(local_task_description_path); - auto code = zookeeper->tryCreate(local_task_description_path, task_config_str, zkutil::CreateMode::Persistent); - if (code != Coordination::Error::ZOK && force) - zookeeper->createOrUpdate(local_task_description_path, task_config_str, zkutil::CreateMode::Persistent); - - LOG_INFO(log, "Task description {} uploaded to {} with result {} ({})", - ((code != Coordination::Error::ZOK && !force) ? "not " : ""), local_task_description_path, code, Coordination::errorMessage(code)); -} - -void ClusterCopier::reloadTaskDescription() -{ - auto zookeeper = getContext()->getZooKeeper(); - task_description_watch_zookeeper = zookeeper; - - Coordination::Stat stat{}; - - /// It will throw exception if such a node doesn't exist. - auto task_config_str = zookeeper->get(task_description_path, &stat); - - LOG_INFO(log, "Loading task description"); - task_cluster_current_config = getConfigurationFromXMLString(task_config_str); - - /// Setup settings - task_cluster->reloadSettings(*task_cluster_current_config); - getContext()->setSettings(task_cluster->settings_common); -} - -void ClusterCopier::updateConfigIfNeeded() -{ - UInt64 version_to_update = task_description_version; - bool is_outdated_version = task_description_current_version != version_to_update; - bool is_expired_session = !task_description_watch_zookeeper || task_description_watch_zookeeper->expired(); - - if (!is_outdated_version && !is_expired_session) - return; - - LOG_INFO(log, "Updating task description"); - reloadTaskDescription(); - - task_description_current_version = version_to_update; -} - -void ClusterCopier::process(const ConnectionTimeouts & timeouts) -{ - for (TaskTable & task_table : task_cluster->table_tasks) - { - LOG_INFO(log, "Process table task {} with {} shards, {} of them are local ones", task_table.table_id, task_table.all_shards.size(), task_table.local_shards.size()); - - if (task_table.all_shards.empty()) - continue; - - /// Discover partitions of each shard and total set of partitions - if (!task_table.has_enabled_partitions) - { - /// If there are no specified enabled_partitions, we must discover them manually - discoverTablePartitions(timeouts, task_table); - - /// After partitions of each shard are initialized, initialize cluster partitions - for (const TaskShardPtr & task_shard : task_table.all_shards) - { - for (const auto & partition_elem : task_shard->partition_tasks) - { - const String & partition_name = partition_elem.first; - task_table.cluster_partitions.emplace(partition_name, ClusterPartition{}); - } - } - - for (auto & partition_elem : task_table.cluster_partitions) - { - const String & partition_name = partition_elem.first; - - for (const TaskShardPtr & task_shard : task_table.all_shards) - task_shard->checked_partitions.emplace(partition_name); - - task_table.ordered_partition_names.emplace_back(partition_name); - } - } - else - { - /// If enabled_partitions are specified, assume that each shard has all partitions - /// We will refine partition set of each shard in future - - for (const String & partition_name : task_table.enabled_partitions) - { - task_table.cluster_partitions.emplace(partition_name, ClusterPartition{}); - task_table.ordered_partition_names.emplace_back(partition_name); - } - } - - task_table.watch.restart(); - - /// Retry table processing - bool table_is_done = false; - for (UInt64 num_table_tries = 1; num_table_tries <= max_table_tries; ++num_table_tries) - { - if (tryProcessTable(timeouts, task_table)) - { - table_is_done = true; - break; - } - } - - if (!table_is_done) - { - throw Exception(ErrorCodes::UNFINISHED, "Too many tries to process table {}. Abort remaining execution", - task_table.table_id); - } - } -} - -/// Protected section - - -/* - * Creates task worker node and checks maximum number of workers not to exceed the limit. - * To achieve this we have to check version of workers_version_path node and create current_worker_path - * node atomically. - * */ - -zkutil::EphemeralNodeHolder::Ptr ClusterCopier::createTaskWorkerNodeAndWaitIfNeed( - const zkutil::ZooKeeperPtr & zookeeper, - const String & description, - bool unprioritized) -{ - std::chrono::milliseconds current_sleep_time = retry_delay_ms; - static constexpr std::chrono::milliseconds max_sleep_time(30000); // 30 sec - - if (unprioritized) - std::this_thread::sleep_for(current_sleep_time); - - String workers_version_path = getWorkersPathVersion(); - String workers_path = getWorkersPath(); - String current_worker_path = getCurrentWorkerNodePath(); - - UInt64 num_bad_version_errors = 0; - - while (true) - { - updateConfigIfNeeded(); - - Coordination::Stat stat; - zookeeper->get(workers_version_path, &stat); - auto version = stat.version; - zookeeper->get(workers_path, &stat); - - if (static_cast(stat.numChildren) >= task_cluster->max_workers) - { - LOG_INFO(log, "Too many workers ({}, maximum {}). Postpone processing {}", stat.numChildren, task_cluster->max_workers, description); - - if (unprioritized) - current_sleep_time = std::min(max_sleep_time, current_sleep_time + retry_delay_ms); - - std::this_thread::sleep_for(current_sleep_time); - num_bad_version_errors = 0; - } - else - { - Coordination::Requests ops; - ops.emplace_back(zkutil::makeSetRequest(workers_version_path, description, version)); - ops.emplace_back(zkutil::makeCreateRequest(current_worker_path, description, zkutil::CreateMode::Ephemeral)); - Coordination::Responses responses; - auto code = zookeeper->tryMulti(ops, responses); - - if (code == Coordination::Error::ZOK || code == Coordination::Error::ZNODEEXISTS) - return zkutil::EphemeralNodeHolder::existing(current_worker_path, *zookeeper); - - if (code == Coordination::Error::ZBADVERSION) - { - ++num_bad_version_errors; - - /// Try to make fast retries - if (num_bad_version_errors > 3) - { - LOG_INFO(log, "A concurrent worker has just been added, will check free worker slots again"); - std::chrono::milliseconds random_sleep_time(std::uniform_int_distribution(1, 1000)(task_cluster->random_engine)); - std::this_thread::sleep_for(random_sleep_time); - num_bad_version_errors = 0; - } - } - else - throw Coordination::Exception(code); - } - } -} - - -bool ClusterCopier::checkPartitionPieceIsClean( - const zkutil::ZooKeeperPtr & zookeeper, - const CleanStateClock & clean_state_clock, - const String & task_status_path) -{ - LogicalClock task_start_clock; - - Coordination::Stat stat{}; - if (zookeeper->exists(task_status_path, &stat)) - task_start_clock = LogicalClock(stat.mzxid); - - return clean_state_clock.is_clean() && (!task_start_clock.hasHappened() || clean_state_clock.discovery_zxid <= task_start_clock); -} - - -bool ClusterCopier::checkAllPiecesInPartitionAreDone(const TaskTable & task_table, const String & partition_name, const TasksShard & shards_with_partition) -{ - bool answer = true; - for (size_t piece_number = 0; piece_number < task_table.number_of_splits; ++piece_number) - { - bool piece_is_done = checkPartitionPieceIsDone(task_table, partition_name, piece_number, shards_with_partition); - if (!piece_is_done) - LOG_INFO(log, "Partition {} piece {} is not already done.", partition_name, piece_number); - answer &= piece_is_done; - } - - return answer; -} - - -/* The same as function above - * Assume that we don't know on which shards do we have partition certain piece. - * We'll check them all (I mean shards that contain the whole partition) - * And shards that don't have certain piece MUST mark that piece is_done true. - * */ -bool ClusterCopier::checkPartitionPieceIsDone(const TaskTable & task_table, const String & partition_name, - size_t piece_number, const TasksShard & shards_with_partition) -{ - LOG_INFO(log, "Check that all shards processed partition {} piece {} successfully", partition_name, piece_number); - - auto zookeeper = getContext()->getZooKeeper(); - - /// Collect all shards that contain partition piece number piece_number. - Strings piece_status_paths; - for (const auto & shard : shards_with_partition) - { - ShardPartition & task_shard_partition = shard->partition_tasks.find(partition_name)->second; - ShardPartitionPiece & shard_partition_piece = task_shard_partition.pieces[piece_number]; - piece_status_paths.emplace_back(shard_partition_piece.getShardStatusPath()); - } - - std::vector zxid1, zxid2; - - try - { - std::vector get_futures; - for (const String & path : piece_status_paths) - get_futures.emplace_back(zookeeper->asyncGet(path)); - - // Check that state is Finished and remember zxid - for (auto & future : get_futures) - { - auto res = future.get(); - - TaskStateWithOwner status = TaskStateWithOwner::fromString(res.data); - if (status.state != TaskState::Finished) - { - LOG_INFO(log, "The task {} is being rewritten by {}. Partition piece will be rechecked", res.data, status.owner); - return false; - } - - zxid1.push_back(res.stat.pzxid); - } - - const String piece_is_dirty_flag_path = task_table.getCertainPartitionPieceIsDirtyPath(partition_name, piece_number); - const String piece_is_dirty_cleaned_path = task_table.getCertainPartitionPieceIsCleanedPath(partition_name, piece_number); - const String piece_task_status_path = task_table.getCertainPartitionPieceTaskStatusPath(partition_name, piece_number); - - CleanStateClock clean_state_clock (zookeeper, piece_is_dirty_flag_path, piece_is_dirty_cleaned_path); - - const bool is_clean = checkPartitionPieceIsClean(zookeeper, clean_state_clock, piece_task_status_path); - - - if (!is_clean) - { - LOG_INFO(log, "Partition {} become dirty", partition_name); - return false; - } - - get_futures.clear(); - for (const String & path : piece_status_paths) - get_futures.emplace_back(zookeeper->asyncGet(path)); - - // Remember zxid of states again - for (auto & future : get_futures) - { - auto res = future.get(); - zxid2.push_back(res.stat.pzxid); - } - } - catch (const Coordination::Exception & e) - { - LOG_INFO(log, "A ZooKeeper error occurred while checking partition {} piece number {}. Will recheck the partition. Error: {}", partition_name, toString(piece_number), e.displayText()); - return false; - } - - // If all task is finished and zxid is not changed then partition could not become dirty again - for (UInt64 shard_num = 0; shard_num < piece_status_paths.size(); ++shard_num) - { - if (zxid1[shard_num] != zxid2[shard_num]) - { - LOG_INFO(log, "The task {} is being modified now. Partition piece will be rechecked", piece_status_paths[shard_num]); - return false; - } - } - - LOG_INFO(log, "Partition {} piece number {} is copied successfully", partition_name, toString(piece_number)); - return true; -} - - -TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & task_table, const String & partition_name) -{ - bool inject_fault = false; - if (move_fault_probability > 0) - { - double value = std::uniform_real_distribution<>(0, 1)(task_table.task_cluster.random_engine); - inject_fault = value < move_fault_probability; - } - - LOG_INFO(log, "Try to move {} to destination table", partition_name); - - auto zookeeper = getContext()->getZooKeeper(); - - const auto current_partition_attach_is_active = task_table.getPartitionAttachIsActivePath(partition_name); - const auto current_partition_attach_is_done = task_table.getPartitionAttachIsDonePath(partition_name); - - /// Create ephemeral node to mark that we are active and process the partition - zookeeper->createAncestors(current_partition_attach_is_active); - zkutil::EphemeralNodeHolderPtr partition_attach_node_holder; - try - { - partition_attach_node_holder = zkutil::EphemeralNodeHolder::create(current_partition_attach_is_active, *zookeeper, host_id); - } - catch (const Coordination::Exception & e) - { - if (e.code == Coordination::Error::ZNODEEXISTS) - { - LOG_INFO(log, "Someone is already moving pieces {}", current_partition_attach_is_active); - return TaskStatus::Active; - } - - throw; - } - - - /// Exit if task has been already processed; - /// create blocking node to signal cleaning up if it is abandoned - { - String status_data; - if (zookeeper->tryGet(current_partition_attach_is_done, status_data)) - { - TaskStateWithOwner status = TaskStateWithOwner::fromString(status_data); - if (status.state == TaskState::Finished) - { - LOG_INFO(log, "All pieces for partition from this task {} has been successfully moved to destination table by {}", current_partition_attach_is_active, status.owner); - return TaskStatus::Finished; - } - - /// Task is abandoned, because previously we created ephemeral node, possibly in other copier's process. - /// Initialize DROP PARTITION - LOG_INFO(log, "Moving piece for partition {} has not been successfully finished by {}. Will try to move by myself.", current_partition_attach_is_active, status.owner); - - /// Remove is_done marker. - zookeeper->remove(current_partition_attach_is_done); - } - } - - - /// Try start processing, create node about it - { - String start_state = TaskStateWithOwner::getData(TaskState::Started, host_id); - zookeeper->create(current_partition_attach_is_done, start_state, zkutil::CreateMode::Persistent); - } - - - /// Try to drop destination partition in original table - if (task_table.allow_to_drop_target_partitions) - { - DatabaseAndTableName original_table = task_table.table_push; - - WriteBufferFromOwnString ss; - ss << "ALTER TABLE " << getQuotedTable(original_table) << ((partition_name == "'all'") ? " DROP PARTITION ID " : " DROP PARTITION ") << partition_name; - - UInt64 num_shards_drop_partition = executeQueryOnCluster(task_table.cluster_push, ss.str(), task_cluster->settings_push, ClusterExecutionMode::ON_EACH_SHARD); - if (num_shards_drop_partition != task_table.cluster_push->getShardCount()) - return TaskStatus::Error; - - LOG_INFO(log, "Drop partition {} in original table {} have been executed successfully on {} shards of {}", - partition_name, getQuotedTable(original_table), num_shards_drop_partition, task_table.cluster_push->getShardCount()); - } - - /// Move partition to original destination table. - for (size_t current_piece_number = 0; current_piece_number < task_table.number_of_splits; ++current_piece_number) - { - LOG_INFO(log, "Trying to move partition {} piece {} to original table", partition_name, toString(current_piece_number)); - - ASTPtr query_alter_ast; - String query_alter_ast_string; - - DatabaseAndTableName original_table = task_table.table_push; - DatabaseAndTableName helping_table = DatabaseAndTableName(original_table.first, - original_table.second + "_piece_" + - toString(current_piece_number)); - - Settings settings_push = task_cluster->settings_push; - ClusterExecutionMode execution_mode = ClusterExecutionMode::ON_EACH_NODE; - - if (settings_push.alter_sync == 1) - execution_mode = ClusterExecutionMode::ON_EACH_SHARD; - - query_alter_ast_string += " ALTER TABLE " + getQuotedTable(original_table) + - ((partition_name == "'all'") ? " ATTACH PARTITION ID " : " ATTACH PARTITION ") + partition_name + - " FROM " + getQuotedTable(helping_table); - - LOG_INFO(log, "Executing ALTER query: {}", query_alter_ast_string); - - try - { - /// Try attach partition on each shard - UInt64 num_nodes = executeQueryOnCluster( - task_table.cluster_push, - query_alter_ast_string, - task_cluster->settings_push, - execution_mode); - - if (settings_push.alter_sync == 1) - { - LOG_INFO( - log, - "Destination tables {} have been executed alter query successfully on {} shards of {}", - getQuotedTable(task_table.table_push), - num_nodes, - task_table.cluster_push->getShardCount()); - - if (num_nodes != task_table.cluster_push->getShardCount()) - return TaskStatus::Error; - } - else - { - LOG_INFO(log, "Number of nodes that executed ALTER query successfully : {}", toString(num_nodes)); - } - } - catch (...) - { - LOG_INFO(log, "Error while moving partition {} piece {} to original table", partition_name, toString(current_piece_number)); - LOG_WARNING(log, "In case of non-replicated tables it can cause duplicates."); - throw; - } - - if (inject_fault) - throw Exception(ErrorCodes::UNFINISHED, "Copy fault injection is activated"); - } - - /// Create node to signal that we finished moving - /// Also increment a counter of processed partitions - { - const auto state_finished = TaskStateWithOwner::getData(TaskState::Finished, host_id); - const auto task_status = task_zookeeper_path + "/status"; - - /// Try until success - while (true) - { - Coordination::Stat stat; - auto status_json = zookeeper->get(task_status, &stat); - auto statuses = StatusAccumulator::fromJSON(status_json); - - /// Increment status for table. - (*statuses)[task_table.name_in_config].processed_partitions_count += 1; - auto statuses_to_commit = StatusAccumulator::serializeToJSON(statuses); - - Coordination::Requests ops; - ops.emplace_back(zkutil::makeSetRequest(current_partition_attach_is_done, state_finished, 0)); - ops.emplace_back(zkutil::makeSetRequest(task_status, statuses_to_commit, stat.version)); - - Coordination::Responses responses; - Coordination::Error code = zookeeper->tryMulti(ops, responses); - - if (code == Coordination::Error::ZOK) - break; - } - } - - return TaskStatus::Finished; -} - -/// This is needed to create internal Distributed table -/// Removes column's TTL expression from `CREATE` query -/// Removes MATEREALIZED or ALIAS columns not to copy additional and useless data over the network. -/// Removes data skipping indices. -ASTPtr ClusterCopier::removeAliasMaterializedAndTTLColumnsFromCreateQuery(const ASTPtr & query_ast, bool allow_to_copy_alias_and_materialized_columns) -{ - const ASTs & column_asts = query_ast->as().columns_list->columns->children; - auto new_columns = std::make_shared(); - - for (const ASTPtr & column_ast : column_asts) - { - const auto & column = column_ast->as(); - - /// Skip this columns - if (!column.default_specifier.empty() && !allow_to_copy_alias_and_materialized_columns) - { - ColumnDefaultKind kind = columnDefaultKindFromString(column.default_specifier); - if (kind == ColumnDefaultKind::Materialized || kind == ColumnDefaultKind::Alias) - continue; - } - - /// Remove TTL on columns definition. - auto new_column_ast = column_ast->clone(); - auto & new_column = new_column_ast->as(); - if (new_column.ttl) - new_column.ttl.reset(); - - new_columns->children.emplace_back(new_column_ast); - } - - ASTPtr new_query_ast = query_ast->clone(); - auto & new_query = new_query_ast->as(); - - auto new_columns_list = std::make_shared(); - new_columns_list->set(new_columns_list->columns, new_columns); - - /// Skip indices and projections are not needed, because distributed table doesn't support it. - - new_query.replace(new_query.columns_list, new_columns_list); - - return new_query_ast; -} - -/// Replaces ENGINE and table name in a create query -std::shared_ptr rewriteCreateQueryStorage(const ASTPtr & create_query_ast, - const DatabaseAndTableName & new_table, - const ASTPtr & new_storage_ast) -{ - const auto & create = create_query_ast->as(); - auto res = std::make_shared(create); - - if (create.storage == nullptr || new_storage_ast == nullptr) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Storage is not specified"); - - res->setDatabase(new_table.first); - res->setTable(new_table.second); - - res->children.clear(); - res->set(res->columns_list, create.columns_list->clone()); - res->set(res->storage, new_storage_ast->clone()); - /// Just to make it better and don't store additional flag like `is_table_created` somewhere else - res->if_not_exists = true; - - return res; -} - - -bool ClusterCopier::tryDropPartitionPiece( - ShardPartition & task_partition, - const size_t current_piece_number, - const zkutil::ZooKeeperPtr & zookeeper, - const CleanStateClock & clean_state_clock) -{ - if (is_safe_mode) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DROP PARTITION is prohibited in safe mode"); - - TaskTable & task_table = task_partition.task_shard.task_table; - ShardPartitionPiece & partition_piece = task_partition.pieces[current_piece_number]; - - const String current_shards_path = partition_piece.getPartitionPieceShardsPath(); - const String current_partition_active_workers_dir = partition_piece.getPartitionPieceActiveWorkersPath(); - const String is_dirty_flag_path = partition_piece.getPartitionPieceIsDirtyPath(); - const String dirty_cleaner_path = partition_piece.getPartitionPieceCleanerPath(); - const String is_dirty_cleaned_path = partition_piece.getPartitionPieceIsCleanedPath(); - - zkutil::EphemeralNodeHolder::Ptr cleaner_holder; - try - { - cleaner_holder = zkutil::EphemeralNodeHolder::create(dirty_cleaner_path, *zookeeper, host_id); - } - catch (const Coordination::Exception & e) - { - if (e.code == Coordination::Error::ZNODEEXISTS) - { - LOG_INFO(log, "Partition {} piece {} is cleaning now by somebody, sleep", task_partition.name, toString(current_piece_number)); - std::this_thread::sleep_for(retry_delay_ms); - return false; - } - - throw; - } - - Coordination::Stat stat{}; - if (zookeeper->exists(current_partition_active_workers_dir, &stat)) - { - if (stat.numChildren != 0) - { - LOG_INFO(log, "Partition {} contains {} active workers while trying to drop it. Going to sleep.", task_partition.name, stat.numChildren); - std::this_thread::sleep_for(retry_delay_ms); - return false; - } - else - { - zookeeper->remove(current_partition_active_workers_dir); - } - } - - { - zkutil::EphemeralNodeHolder::Ptr active_workers_lock; - try - { - active_workers_lock = zkutil::EphemeralNodeHolder::create(current_partition_active_workers_dir, *zookeeper, host_id); - } - catch (const Coordination::Exception & e) - { - if (e.code == Coordination::Error::ZNODEEXISTS) - { - LOG_INFO(log, "Partition {} is being filled now by somebody, sleep", task_partition.name); - return false; - } - - throw; - } - - // Lock the dirty flag - zookeeper->set(is_dirty_flag_path, host_id, clean_state_clock.discovery_version.value()); - zookeeper->tryRemove(partition_piece.getPartitionPieceCleanStartPath()); - CleanStateClock my_clock(zookeeper, is_dirty_flag_path, is_dirty_cleaned_path); - - /// Remove all status nodes - { - Strings children; - if (zookeeper->tryGetChildren(current_shards_path, children) == Coordination::Error::ZOK) - for (const auto & child : children) - { - zookeeper->removeRecursive(current_shards_path + "/" + child); - } - } - - - DatabaseAndTableName original_table = task_table.table_push; - DatabaseAndTableName helping_table = DatabaseAndTableName(original_table.first, original_table.second + "_piece_" + toString(current_piece_number)); - - String query = "ALTER TABLE " + getQuotedTable(helping_table); - query += ((task_partition.name == "'all'") ? " DROP PARTITION ID " : " DROP PARTITION ") + task_partition.name + ""; - - /// TODO: use this statement after servers will be updated up to 1.1.54310 - // query += " DROP PARTITION ID '" + task_partition.name + "'"; - - ClusterPtr & cluster_push = task_table.cluster_push; - Settings settings_push = task_cluster->settings_push; - - /// It is important, DROP PARTITION must be done synchronously - settings_push.alter_sync = 2; - - LOG_INFO(log, "Execute distributed DROP PARTITION: {}", query); - /// We have to drop partition_piece on each replica - size_t num_shards = executeQueryOnCluster( - cluster_push, query, - settings_push, - ClusterExecutionMode::ON_EACH_NODE); - - LOG_INFO(log, "DROP PARTITION was successfully executed on {} nodes of a cluster.", num_shards); - - /// Update the locking node - if (!my_clock.is_stale()) - { - zookeeper->set(is_dirty_flag_path, host_id, my_clock.discovery_version.value()); - if (my_clock.clean_state_version) - zookeeper->set(is_dirty_cleaned_path, host_id, my_clock.clean_state_version.value()); - else - zookeeper->create(is_dirty_cleaned_path, host_id, zkutil::CreateMode::Persistent); - } - else - { - LOG_INFO(log, "Clean state is altered when dropping the partition, cowardly bailing"); - /// clean state is stale - return false; - } - - LOG_INFO(log, "Partition {} piece {} was dropped on cluster {}", task_partition.name, toString(current_piece_number), task_table.cluster_push_name); - if (zookeeper->tryCreate(current_shards_path, host_id, zkutil::CreateMode::Persistent) == Coordination::Error::ZNODEEXISTS) - zookeeper->set(current_shards_path, host_id); - } - - LOG_INFO(log, "Partition {} piece {} is safe for work now.", task_partition.name, toString(current_piece_number)); - return true; -} - -bool ClusterCopier::tryProcessTable(const ConnectionTimeouts & timeouts, TaskTable & task_table) -{ - /// Create destination table - TaskStatus task_status = TaskStatus::Error; - - task_status = tryCreateDestinationTable(timeouts, task_table); - /// Exit if success - if (task_status != TaskStatus::Finished) - { - LOG_WARNING(log, "Create destination table failed "); - return false; - } - - /// Set all_partitions_count for table in Zookeeper - auto zookeeper = getContext()->getZooKeeper(); - while (true) - { - Coordination::Stat stat; - auto status_json = zookeeper->get(task_zookeeper_path + "/status", &stat); - auto statuses = StatusAccumulator::fromJSON(status_json); - - /// Exit if someone already set the initial value for this table. - if (statuses->find(task_table.name_in_config) != statuses->end()) - break; - (*statuses)[task_table.name_in_config] = StatusAccumulator::TableStatus - { - /*all_partitions_count=*/task_table.ordered_partition_names.size(), - /*processed_partition_count=*/0 - }; - - auto statuses_to_commit = StatusAccumulator::serializeToJSON(statuses); - auto error = zookeeper->trySet(task_zookeeper_path + "/status", statuses_to_commit, stat.version); - if (error == Coordination::Error::ZOK) - break; - } - - - /// An heuristic: if previous shard is already done, then check next one without sleeps due to max_workers constraint - bool previous_shard_is_instantly_finished = false; - - /// Process each partition that is present in cluster - for (const String & partition_name : task_table.ordered_partition_names) - { - if (!task_table.cluster_partitions.contains(partition_name)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "There are no expected partition {}. It is a bug", partition_name); - - ClusterPartition & cluster_partition = task_table.cluster_partitions[partition_name]; - - Stopwatch watch; - /// We will check all the shards of the table and check if they contain current partition. - TasksShard expected_shards; - UInt64 num_failed_shards = 0; - - ++cluster_partition.total_tries; - - LOG_INFO(log, "Processing partition {} for the whole cluster", partition_name); - - /// Process each source shard having current partition and copy current partition - /// NOTE: shards are sorted by "distance" to current host - bool has_shard_to_process = false; - for (const TaskShardPtr & shard : task_table.all_shards) - { - /// Does shard have a node with current partition? - if (!shard->partition_tasks.contains(partition_name)) - { - /// If not, did we check existence of that partition previously? - if (!shard->checked_partitions.contains(partition_name)) - { - auto check_shard_has_partition = [&] () { return checkShardHasPartition(timeouts, *shard, partition_name); }; - bool has_partition = retry(check_shard_has_partition); - - shard->checked_partitions.emplace(partition_name); - - if (has_partition) - { - const size_t number_of_splits = task_table.number_of_splits; - shard->partition_tasks.emplace(partition_name, ShardPartition(*shard, partition_name, number_of_splits)); - LOG_INFO(log, "Discovered partition {} in shard {}", partition_name, shard->getDescription()); - /// To save references in the future. - auto shard_partition_it = shard->partition_tasks.find(partition_name); - PartitionPieces & shard_partition_pieces = shard_partition_it->second.pieces; - - for (size_t piece_number = 0; piece_number < number_of_splits; ++piece_number) - { - auto res = checkPresentPartitionPiecesOnCurrentShard(timeouts, *shard, partition_name, piece_number); - shard_partition_pieces.emplace_back(shard_partition_it->second, piece_number, res); - } - } - else - { - LOG_INFO(log, "Found that shard {} does not contain current partition {}", shard->getDescription(), partition_name); - continue; - } - } - else - { - /// We have already checked that partition, but did not discover it - previous_shard_is_instantly_finished = true; - continue; - } - } - - auto it_shard_partition = shard->partition_tasks.find(partition_name); - /// Previously when we discovered that shard does not contain current partition, we skipped it. - /// At this moment partition have to be present. - if (it_shard_partition == shard->partition_tasks.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "There are no such partition in a shard. This is a bug."); - auto & partition = it_shard_partition->second; - - expected_shards.emplace_back(shard); - - /// Do not sleep if there is a sequence of already processed shards to increase startup - bool is_unprioritized_task = !previous_shard_is_instantly_finished && shard->priority.is_remote; - task_status = TaskStatus::Error; - bool was_error = false; - has_shard_to_process = true; - for (UInt64 try_num = 1; try_num <= max_shard_partition_tries; ++try_num) - { - task_status = tryProcessPartitionTask(timeouts, partition, is_unprioritized_task); - - /// Exit if success - if (task_status == TaskStatus::Finished) - break; - - was_error = true; - - /// Skip if the task is being processed by someone - if (task_status == TaskStatus::Active) - break; - - /// Repeat on errors - std::this_thread::sleep_for(retry_delay_ms); - } - - if (task_status == TaskStatus::Error) - ++num_failed_shards; - - previous_shard_is_instantly_finished = !was_error; - } - - cluster_partition.elapsed_time_seconds += watch.elapsedSeconds(); - - /// Check that whole cluster partition is done - /// Firstly check the number of failed partition tasks, then look into ZooKeeper and ensure that each partition is done - bool partition_copying_is_done = num_failed_shards == 0; - try - { - partition_copying_is_done = - !has_shard_to_process - || (partition_copying_is_done && checkAllPiecesInPartitionAreDone(task_table, partition_name, expected_shards)); - } - catch (...) - { - tryLogCurrentException(log); - partition_copying_is_done = false; - } - - - bool partition_moving_is_done = false; - /// Try to move only if all pieces were copied. - if (partition_copying_is_done) - { - for (UInt64 try_num = 0; try_num < max_shard_partition_piece_tries_for_alter; ++try_num) - { - try - { - auto res = tryMoveAllPiecesToDestinationTable(task_table, partition_name); - /// Exit and mark current task is done. - if (res == TaskStatus::Finished) - { - partition_moving_is_done = true; - break; - } - - /// Exit if this task is active. - if (res == TaskStatus::Active) - break; - - /// Repeat on errors. - std::this_thread::sleep_for(retry_delay_ms); - } - catch (...) - { - tryLogCurrentException(log, "Some error occurred while moving pieces to destination table for partition " + partition_name); - } - } - } - - if (partition_copying_is_done && partition_moving_is_done) - { - task_table.finished_cluster_partitions.emplace(partition_name); - - task_table.bytes_copied += cluster_partition.bytes_copied; - task_table.rows_copied += cluster_partition.rows_copied; - double elapsed = cluster_partition.elapsed_time_seconds; - - LOG_INFO(log, "It took {} seconds to copy partition {}: {} uncompressed bytes, {} rows and {} source blocks are copied", - elapsed, partition_name, - formatReadableSizeWithDecimalSuffix(cluster_partition.bytes_copied), - formatReadableQuantity(cluster_partition.rows_copied), - cluster_partition.blocks_copied); - - if (cluster_partition.rows_copied) - { - LOG_INFO(log, "Average partition speed: {} per second.", formatReadableSizeWithDecimalSuffix(cluster_partition.bytes_copied / elapsed)); - } - - if (task_table.rows_copied) - { - LOG_INFO(log, "Average table {} speed: {} per second.", task_table.table_id, formatReadableSizeWithDecimalSuffix(task_table.bytes_copied / elapsed)); - } - } - } - - UInt64 required_partitions = task_table.cluster_partitions.size(); - UInt64 finished_partitions = task_table.finished_cluster_partitions.size(); - bool table_is_done = finished_partitions >= required_partitions; - - if (!table_is_done) - { - LOG_INFO(log, "Table {} is not processed yet. Copied {} of {}, will retry", task_table.table_id, finished_partitions, required_partitions); - } - else - { - /// Delete helping tables in case that whole table is done - dropHelpingTables(task_table); - } - - return table_is_done; -} - -TaskStatus ClusterCopier::tryCreateDestinationTable(const ConnectionTimeouts & timeouts, TaskTable & task_table) -{ - /// Try create original table (if not exists) on each shard - - //TaskTable & task_table = task_shard.task_table; - const TaskShardPtr task_shard = task_table.all_shards.at(0); - /// We need to update table definitions for each part, it could be changed after ALTER - task_shard->current_pull_table_create_query = getCreateTableForPullShard(timeouts, *task_shard); - try - { - auto create_query_push_ast - = rewriteCreateQueryStorage(task_shard->current_pull_table_create_query, task_table.table_push, task_table.engine_push_ast); - auto & create = create_query_push_ast->as(); - create.if_not_exists = true; - InterpreterCreateQuery::prepareOnClusterQuery(create, getContext(), task_table.cluster_push_name); - String query = queryToString(create_query_push_ast); - - LOG_INFO(log, "Create destination tables. Query: {}", query); - UInt64 shards = executeQueryOnCluster(task_table.cluster_push, query, task_cluster->settings_push, ClusterExecutionMode::ON_EACH_NODE); - LOG_INFO( - log, - "Destination tables {} have been created on {} shards of {}", - getQuotedTable(task_table.table_push), - shards, - task_table.cluster_push->getShardCount()); - } - catch (...) - { - tryLogCurrentException(log, "Error while creating original table. Maybe we are not first."); - } - - return TaskStatus::Finished; -} - -/// Job for copying partition from particular shard. -TaskStatus ClusterCopier::tryProcessPartitionTask(const ConnectionTimeouts & timeouts, ShardPartition & task_partition, bool is_unprioritized_task) -{ - TaskStatus res; - - try - { - res = iterateThroughAllPiecesInPartition(timeouts, task_partition, is_unprioritized_task); - } - catch (...) - { - tryLogCurrentException(log, "An error occurred while processing partition " + task_partition.name); - res = TaskStatus::Error; - } - - /// At the end of each task check if the config is updated - try - { - updateConfigIfNeeded(); - } - catch (...) - { - tryLogCurrentException(log, "An error occurred while updating the config"); - } - - return res; -} - -TaskStatus ClusterCopier::iterateThroughAllPiecesInPartition(const ConnectionTimeouts & timeouts, ShardPartition & task_partition, - bool is_unprioritized_task) -{ - const size_t total_number_of_pieces = task_partition.task_shard.task_table.number_of_splits; - - TaskStatus res{TaskStatus::Finished}; - - bool was_failed_pieces = false; - bool was_active_pieces = false; - - for (size_t piece_number = 0; piece_number < total_number_of_pieces; piece_number++) - { - for (UInt64 try_num = 0; try_num < max_shard_partition_tries; ++try_num) - { - LOG_INFO(log, "Attempt number {} to process partition {} piece number {} on shard number {} with index {}.", - try_num, task_partition.name, piece_number, - task_partition.task_shard.numberInCluster(), - task_partition.task_shard.indexInCluster()); - - res = processPartitionPieceTaskImpl(timeouts, task_partition, piece_number, is_unprioritized_task); - - /// Exit if success - if (res == TaskStatus::Finished) - break; - - /// Skip if the task is being processed by someone - if (res == TaskStatus::Active) - break; - - /// Repeat on errors - std::this_thread::sleep_for(retry_delay_ms); - } - - was_active_pieces |= (res == TaskStatus::Active); - was_failed_pieces |= (res == TaskStatus::Error); - } - - if (was_failed_pieces) - return TaskStatus::Error; - - if (was_active_pieces) - return TaskStatus::Active; - - return TaskStatus::Finished; -} - - -TaskStatus ClusterCopier::processPartitionPieceTaskImpl( - const ConnectionTimeouts & timeouts, ShardPartition & task_partition, - const size_t current_piece_number, bool is_unprioritized_task) -{ - TaskShard & task_shard = task_partition.task_shard; - TaskTable & task_table = task_shard.task_table; - ClusterPartition & cluster_partition = task_table.getClusterPartition(task_partition.name); - ShardPartitionPiece & partition_piece = task_partition.pieces[current_piece_number]; - - const size_t number_of_splits = task_table.number_of_splits; - const String primary_key_comma_separated = task_table.primary_key_comma_separated; - - /// We need to update table definitions for each partition, it could be changed after ALTER - createShardInternalTables(timeouts, task_shard, true); - - auto split_table_for_current_piece = task_shard.list_of_split_tables_on_shard[current_piece_number]; - - auto zookeeper = getContext()->getZooKeeper(); - - const String piece_is_dirty_flag_path = partition_piece.getPartitionPieceIsDirtyPath(); - const String piece_is_dirty_cleaned_path = partition_piece.getPartitionPieceIsCleanedPath(); - const String current_task_piece_is_active_path = partition_piece.getActiveWorkerPath(); - const String current_task_piece_status_path = partition_piece.getShardStatusPath(); - - /// Auxiliary functions: - - /// Creates is_dirty node to initialize DROP PARTITION - auto create_is_dirty_node = [&] (const CleanStateClock & clock) - { - if (clock.is_stale()) - LOG_INFO(log, "Clean state clock is stale while setting dirty flag, cowardly bailing"); - else if (!clock.is_clean()) - LOG_INFO(log, "Thank you, Captain Obvious"); - else if (clock.discovery_version) - { - LOG_INFO(log, "Updating clean state clock"); - zookeeper->set(piece_is_dirty_flag_path, host_id, clock.discovery_version.value()); - } - else - { - LOG_INFO(log, "Creating clean state clock"); - zookeeper->create(piece_is_dirty_flag_path, host_id, zkutil::CreateMode::Persistent); - } - }; - - /// Returns SELECT query filtering current partition and applying user filter - auto get_select_query = [&] (const DatabaseAndTableName & from_table, const String & fields, bool enable_splitting, String limit = "") - { - String query; - query += "WITH " + task_partition.name + " AS partition_key "; - query += "SELECT " + fields + " FROM " + getQuotedTable(from_table); - - if (enable_splitting && experimental_use_sample_offset) - query += " SAMPLE 1/" + toString(number_of_splits) + " OFFSET " + toString(current_piece_number) + "/" + toString(number_of_splits); - - /// TODO: Bad, it is better to rewrite with ASTLiteral(partition_key_field) - query += " WHERE (" + queryToString(task_table.engine_push_partition_key_ast) + " = partition_key)"; - - if (enable_splitting && !experimental_use_sample_offset) - query += " AND ( cityHash64(" + primary_key_comma_separated + ") %" + toString(number_of_splits) + " = " + toString(current_piece_number) + " )"; - - if (!task_table.where_condition_str.empty()) - query += " AND (" + task_table.where_condition_str + ")"; - - if (!limit.empty()) - query += " LIMIT " + limit; - - query += " FORMAT Native"; - - ParserQuery p_query(query.data() + query.size()); - - const auto & settings = getContext()->getSettingsRef(); - return parseQuery(p_query, query, settings.max_query_size, settings.max_parser_depth); - }; - - /// Load balancing - auto worker_node_holder = createTaskWorkerNodeAndWaitIfNeed(zookeeper, current_task_piece_status_path, is_unprioritized_task); - - LOG_INFO(log, "Processing {}", current_task_piece_status_path); - - const String piece_status_path = partition_piece.getPartitionPieceShardsPath(); - - CleanStateClock clean_state_clock(zookeeper, piece_is_dirty_flag_path, piece_is_dirty_cleaned_path); - - const bool is_clean = checkPartitionPieceIsClean(zookeeper, clean_state_clock, piece_status_path); - - /// Do not start if partition piece is dirty, try to clean it - if (is_clean) - { - LOG_INFO(log, "Partition {} piece {} appears to be clean", task_partition.name, current_piece_number); - zookeeper->createAncestors(current_task_piece_status_path); - } - else - { - LOG_INFO(log, "Partition {} piece {} is dirty, try to drop it", task_partition.name, current_piece_number); - - try - { - tryDropPartitionPiece(task_partition, current_piece_number, zookeeper, clean_state_clock); - } - catch (...) - { - tryLogCurrentException(log, "An error occurred when clean partition"); - } - - return TaskStatus::Error; - } - - /// Create ephemeral node to mark that we are active and process the partition - zookeeper->createAncestors(current_task_piece_is_active_path); - zkutil::EphemeralNodeHolderPtr partition_task_node_holder; - try - { - partition_task_node_holder = zkutil::EphemeralNodeHolder::create(current_task_piece_is_active_path, *zookeeper, host_id); - } - catch (const Coordination::Exception & e) - { - if (e.code == Coordination::Error::ZNODEEXISTS) - { - LOG_INFO(log, "Someone is already processing {}", current_task_piece_is_active_path); - return TaskStatus::Active; - } - - throw; - } - - /// Exit if task has been already processed; - /// create blocking node to signal cleaning up if it is abandoned - { - String status_data; - if (zookeeper->tryGet(current_task_piece_status_path, status_data)) - { - TaskStateWithOwner status = TaskStateWithOwner::fromString(status_data); - if (status.state == TaskState::Finished) - { - LOG_INFO(log, "Task {} has been successfully executed by {}", current_task_piece_status_path, status.owner); - return TaskStatus::Finished; - } - - /// Task is abandoned, because previously we created ephemeral node, possibly in other copier's process. - /// Initialize DROP PARTITION - LOG_INFO(log, "Task {} has not been successfully finished by {}. Partition will be dropped and refilled.", current_task_piece_status_path, status.owner); - - create_is_dirty_node(clean_state_clock); - return TaskStatus::Error; - } - } - - - /// Try create table (if not exists) on each shard - /// We have to create this table even in case that partition piece is empty - /// This is significant, because we will have simpler code - { - /// 1) Get columns description from any replica of destination cluster - /// 2) Change ENGINE, database and table name - /// 3) Create helping table on the whole destination cluster - auto & settings_push = task_cluster->settings_push; - - auto connection = task_table.cluster_push->getAnyShardInfo().pool->get(timeouts, settings_push, true); - String create_query = getRemoteCreateTable(task_shard.task_table.table_push, *connection, settings_push); - - ParserCreateQuery parser_create_query; - auto create_query_ast = parseQuery(parser_create_query, create_query, settings_push.max_query_size, settings_push.max_parser_depth); - /// Define helping table database and name for current partition piece - DatabaseAndTableName database_and_table_for_current_piece - { - task_table.table_push.first, - task_table.table_push.second + "_piece_" + toString(current_piece_number) - }; - - - auto new_engine_push_ast = task_table.engine_push_ast; - if (task_table.isReplicatedTable()) - new_engine_push_ast = task_table.rewriteReplicatedCreateQueryToPlain(); - - /// Take columns definition from destination table, new database and table name, and new engine (non replicated variant of MergeTree) - auto create_query_push_ast = rewriteCreateQueryStorage(create_query_ast, database_and_table_for_current_piece, new_engine_push_ast); - String query = queryToString(create_query_push_ast); - - LOG_INFO(log, "Create destination tables. Query: {}", query); - UInt64 shards = executeQueryOnCluster(task_table.cluster_push, query, task_cluster->settings_push, ClusterExecutionMode::ON_EACH_NODE); - LOG_INFO( - log, - "Destination tables {} have been created on {} shards of {}", - getQuotedTable(task_table.table_push), - shards, - task_table.cluster_push->getShardCount()); - } - - - /// Exit if current piece is absent on this shard. Also mark it as finished, because we will check - /// whether each shard have processed each partitition (and its pieces). - if (partition_piece.is_absent_piece) - { - String state_finished = TaskStateWithOwner::getData(TaskState::Finished, host_id); - auto res = zookeeper->tryCreate(current_task_piece_status_path, state_finished, zkutil::CreateMode::Persistent); - if (res == Coordination::Error::ZNODEEXISTS) - LOG_INFO(log, "Partition {} piece {} is absent on current replica of a shard. But other replicas have already marked it as done.", task_partition.name, current_piece_number); - if (res == Coordination::Error::ZOK) - LOG_INFO(log, "Partition {} piece {} is absent on current replica of a shard. Will mark it as done. Other replicas will do the same.", task_partition.name, current_piece_number); - return TaskStatus::Finished; - } - - /// Check that destination partition is empty if we are first worker - /// NOTE: this check is incorrect if pull and push tables have different partition key! - String clean_start_status; - if (!zookeeper->tryGet(partition_piece.getPartitionPieceCleanStartPath(), clean_start_status) || clean_start_status != "ok") - { - zookeeper->createIfNotExists(partition_piece.getPartitionPieceCleanStartPath(), ""); - auto checker = zkutil::EphemeralNodeHolder::create(partition_piece.getPartitionPieceCleanStartPath() + "/checker", - *zookeeper, host_id); - // Maybe we are the first worker - - ASTPtr query_select_ast = get_select_query(split_table_for_current_piece, "count()", /* enable_splitting= */ true); - UInt64 count; - { - auto local_context = Context::createCopy(context); - // Use pull (i.e. readonly) settings, but fetch data from destination servers - local_context->setSettings(task_cluster->settings_pull); - local_context->setSetting("skip_unavailable_shards", true); - - InterpreterSelectWithUnionQuery select(query_select_ast, local_context, SelectQueryOptions{}); - QueryPlan plan; - select.buildQueryPlan(plan); - auto builder = std::move(*plan.buildQueryPipeline( - QueryPlanOptimizationSettings::fromContext(local_context), - BuildQueryPipelineSettings::fromContext(local_context))); - - Block block = getBlockWithAllStreamData(std::move(builder)); - count = (block) ? block.safeGetByPosition(0).column->getUInt(0) : 0; - } - - if (count != 0) - { - LOG_INFO(log, "Partition {} piece {} is not empty. In contains {} rows.", task_partition.name, current_piece_number, count); - Coordination::Stat stat_shards{}; - zookeeper->get(partition_piece.getPartitionPieceShardsPath(), &stat_shards); - - /// NOTE: partition is still fresh if dirt discovery happens before cleaning - if (stat_shards.numChildren == 0) - { - LOG_WARNING(log, "There are no workers for partition {} piece {}, but destination table contains {} rows. Partition will be dropped and refilled.", task_partition.name, toString(current_piece_number), count); - - create_is_dirty_node(clean_state_clock); - return TaskStatus::Error; - } - } - zookeeper->set(partition_piece.getPartitionPieceCleanStartPath(), "ok"); - } - /// At this point, we need to sync that the destination table is clean - /// before any actual work - - /// Try start processing, create node about it - { - String start_state = TaskStateWithOwner::getData(TaskState::Started, host_id); - CleanStateClock new_clean_state_clock(zookeeper, piece_is_dirty_flag_path, piece_is_dirty_cleaned_path); - if (clean_state_clock != new_clean_state_clock) - { - LOG_INFO(log, "Partition {} piece {} clean state changed, cowardly bailing", task_partition.name, toString(current_piece_number)); - return TaskStatus::Error; - } - else if (!new_clean_state_clock.is_clean()) - { - LOG_INFO(log, "Partition {} piece {} is dirty and will be dropped and refilled", task_partition.name, toString(current_piece_number)); - create_is_dirty_node(new_clean_state_clock); - return TaskStatus::Error; - } - zookeeper->create(current_task_piece_status_path, start_state, zkutil::CreateMode::Persistent); - } - - - /// Do the copying - { - bool inject_fault = false; - if (copy_fault_probability > 0) - { - double value = std::uniform_real_distribution<>(0, 1)(task_table.task_cluster.random_engine); - inject_fault = value < copy_fault_probability; - } - - // Select all fields - ASTPtr query_select_ast = get_select_query(task_shard.table_read_shard, "*", /* enable_splitting= */ true, inject_fault ? "1" : ""); - - LOG_INFO(log, "Executing SELECT query and pull from {}: {}", task_shard.getDescription(), queryToString(query_select_ast)); - - ASTPtr query_insert_ast; - { - String query; - query += "INSERT INTO " + getQuotedTable(split_table_for_current_piece) + " FORMAT Native "; - - ParserQuery p_query(query.data() + query.size()); - const auto & settings = getContext()->getSettingsRef(); - query_insert_ast = parseQuery(p_query, query, settings.max_query_size, settings.max_parser_depth); - - LOG_INFO(log, "Executing INSERT query: {}", query); - } - - try - { - auto context_select = Context::createCopy(context); - context_select->setSettings(task_cluster->settings_pull); - - auto context_insert = Context::createCopy(context); - context_insert->setSettings(task_cluster->settings_push); - - /// Custom INSERT SELECT implementation - QueryPipeline input; - QueryPipeline output; - { - BlockIO io_insert = InterpreterFactory::instance().get(query_insert_ast, context_insert)->execute(); - - InterpreterSelectWithUnionQuery select(query_select_ast, context_select, SelectQueryOptions{}); - QueryPlan plan; - select.buildQueryPlan(plan); - auto builder = std::move(*plan.buildQueryPipeline( - QueryPlanOptimizationSettings::fromContext(context_select), - BuildQueryPipelineSettings::fromContext(context_select))); - - output = std::move(io_insert.pipeline); - - /// Add converting actions to make it possible to copy blocks with slightly different schema - const auto & select_block = builder.getHeader(); - const auto & insert_block = output.getHeader(); - auto actions_dag = ActionsDAG::makeConvertingActions( - select_block.getColumnsWithTypeAndName(), - insert_block.getColumnsWithTypeAndName(), - ActionsDAG::MatchColumnsMode::Position); - - auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext())); - - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, actions); - }); - input = QueryPipelineBuilder::getPipeline(std::move(builder)); - } - - /// Fail-fast optimization to abort copying when the current clean state expires - std::future future_is_dirty_checker; - - Stopwatch watch(CLOCK_MONOTONIC_COARSE); - constexpr UInt64 check_period_milliseconds = 500; - - /// Will asynchronously check that ZooKeeper connection and is_dirty flag appearing while copying data - auto cancel_check = [&] () - { - if (zookeeper->expired()) - throw Exception(ErrorCodes::UNFINISHED, "ZooKeeper session is expired, cancel INSERT SELECT"); - - if (!future_is_dirty_checker.valid()) - future_is_dirty_checker = zookeeper->asyncExists(piece_is_dirty_flag_path); - - /// check_period_milliseconds should less than average insert time of single block - /// Otherwise, the insertion will slow a little bit - if (watch.elapsedMilliseconds() >= check_period_milliseconds) - { - Coordination::ExistsResponse status = future_is_dirty_checker.get(); - - if (status.error != Coordination::Error::ZNONODE) - { - LogicalClock dirt_discovery_epoch (status.stat.mzxid); - if (dirt_discovery_epoch == clean_state_clock.discovery_zxid) - return false; - throw Exception(ErrorCodes::UNFINISHED, "Partition is dirty, cancel INSERT SELECT"); - } - } - - return false; - }; - - /// Update statistics - /// It is quite rough: bytes_copied don't take into account DROP PARTITION. - auto update_stats = [&cluster_partition] (const Block & block) - { - cluster_partition.bytes_copied += block.bytes(); - cluster_partition.rows_copied += block.rows(); - cluster_partition.blocks_copied += 1; - }; - - /// Main work is here - PullingPipelineExecutor pulling_executor(input); - PushingPipelineExecutor pushing_executor(output); - - Block data; - bool is_cancelled = false; - while (pulling_executor.pull(data)) - { - if (cancel_check()) - { - is_cancelled = true; - pushing_executor.cancel(); - pushing_executor.cancel(); - break; - } - pushing_executor.push(data); - update_stats(data); - } - - if (!is_cancelled) - pushing_executor.finish(); - - // Just in case - if (future_is_dirty_checker.valid()) - future_is_dirty_checker.get(); - - if (inject_fault) - throw Exception(ErrorCodes::UNFINISHED, "Copy fault injection is activated"); - } - catch (...) - { - tryLogCurrentException(log, "An error occurred during copying, partition will be marked as dirty"); - create_is_dirty_node(clean_state_clock); - return TaskStatus::Error; - } - } - - LOG_INFO(log, "Partition {} piece {} copied. But not moved to original destination table.", task_partition.name, toString(current_piece_number)); - - /// Finalize the processing, change state of current partition task (and also check is_dirty flag) - { - String state_finished = TaskStateWithOwner::getData(TaskState::Finished, host_id); - CleanStateClock new_clean_state_clock (zookeeper, piece_is_dirty_flag_path, piece_is_dirty_cleaned_path); - if (clean_state_clock != new_clean_state_clock) - { - LOG_INFO(log, "Partition {} piece {} clean state changed, cowardly bailing", task_partition.name, toString(current_piece_number)); - return TaskStatus::Error; - } - else if (!new_clean_state_clock.is_clean()) - { - LOG_INFO(log, "Partition {} piece {} became dirty and will be dropped and refilled", task_partition.name, toString(current_piece_number)); - create_is_dirty_node(new_clean_state_clock); - return TaskStatus::Error; - } - zookeeper->set(current_task_piece_status_path, state_finished, 0); - } - - return TaskStatus::Finished; -} - -void ClusterCopier::dropAndCreateLocalTable(const ASTPtr & create_ast) -{ - const auto & create = create_ast->as(); - dropLocalTableIfExists({create.getDatabase(), create.getTable()}); - - auto create_context = Context::createCopy(getContext()); - - InterpreterCreateQuery interpreter(create_ast, create_context); - interpreter.execute(); -} - -void ClusterCopier::dropLocalTableIfExists(const DatabaseAndTableName & table_name) const -{ - auto drop_ast = std::make_shared(); - drop_ast->if_exists = true; - drop_ast->setDatabase(table_name.first); - drop_ast->setTable(table_name.second); - - auto drop_context = Context::createCopy(getContext()); - - InterpreterDropQuery interpreter(drop_ast, drop_context); - interpreter.execute(); -} - -void ClusterCopier::dropHelpingTablesByPieceNumber(const TaskTable & task_table, size_t current_piece_number) -{ - LOG_INFO(log, "Removing helping tables piece {}", current_piece_number); - - DatabaseAndTableName original_table = task_table.table_push; - DatabaseAndTableName helping_table - = DatabaseAndTableName(original_table.first, original_table.second + "_piece_" + toString(current_piece_number)); - - String query = "DROP TABLE IF EXISTS " + getQuotedTable(helping_table); - - const ClusterPtr & cluster_push = task_table.cluster_push; - Settings settings_push = task_cluster->settings_push; - - LOG_INFO(log, "Execute distributed DROP TABLE: {}", query); - - /// We have to drop partition_piece on each replica - UInt64 num_nodes = executeQueryOnCluster(cluster_push, query, settings_push, ClusterExecutionMode::ON_EACH_NODE); - - LOG_INFO(log, "DROP TABLE query was successfully executed on {} nodes.", toString(num_nodes)); -} - -void ClusterCopier::dropHelpingTables(const TaskTable & task_table) -{ - LOG_INFO(log, "Removing helping tables"); - for (size_t current_piece_number = 0; current_piece_number < task_table.number_of_splits; ++current_piece_number) - { - dropHelpingTablesByPieceNumber(task_table, current_piece_number); - } -} - -void ClusterCopier::dropParticularPartitionPieceFromAllHelpingTables(const TaskTable & task_table, const String & partition_name) -{ - LOG_INFO(log, "Try drop partition partition from all helping tables."); - for (size_t current_piece_number = 0; current_piece_number < task_table.number_of_splits; ++current_piece_number) - { - DatabaseAndTableName original_table = task_table.table_push; - DatabaseAndTableName helping_table = DatabaseAndTableName(original_table.first, original_table.second + "_piece_" + toString(current_piece_number)); - - String query = "ALTER TABLE " + getQuotedTable(helping_table) + ((partition_name == "'all'") ? " DROP PARTITION ID " : " DROP PARTITION ") + partition_name; - - const ClusterPtr & cluster_push = task_table.cluster_push; - Settings settings_push = task_cluster->settings_push; - - LOG_INFO(log, "Execute distributed DROP PARTITION: {}", query); - /// We have to drop partition_piece on each replica - UInt64 num_nodes = executeQueryOnCluster( - cluster_push, query, - settings_push, - ClusterExecutionMode::ON_EACH_NODE); - - LOG_INFO(log, "DROP PARTITION query was successfully executed on {} nodes.", toString(num_nodes)); - } - LOG_INFO(log, "All helping tables dropped partition {}", partition_name); -} - -String ClusterCopier::getRemoteCreateTable(const DatabaseAndTableName & table, Connection & connection, const Settings & settings) -{ - auto remote_context = Context::createCopy(context); - remote_context->setSettings(settings); - - String query = "SHOW CREATE TABLE " + getQuotedTable(table); - - QueryPipelineBuilder builder; - builder.init(Pipe(std::make_shared( - std::make_shared(connection, query, InterpreterShowCreateQuery::getSampleBlock(), remote_context), false, false, /* async_query_sending= */ false))); - Block block = getBlockWithAllStreamData(std::move(builder)); - return typeid_cast(*block.safeGetByPosition(0).column).getDataAt(0).toString(); -} - - -ASTPtr ClusterCopier::getCreateTableForPullShard(const ConnectionTimeouts & timeouts, TaskShard & task_shard) -{ - /// Fetch and parse (possibly) new definition - auto connection_entry = task_shard.info.pool->get(timeouts, task_cluster->settings_pull, true); - String create_query_pull_str = getRemoteCreateTable( - task_shard.task_table.table_pull, - *connection_entry, - task_cluster->settings_pull); - - ParserCreateQuery parser_create_query; - const auto & settings = getContext()->getSettingsRef(); - return parseQuery(parser_create_query, create_query_pull_str, settings.max_query_size, settings.max_parser_depth); -} - - -/// If it is implicitly asked to create split Distributed table for certain piece on current shard, we will do it. -void ClusterCopier::createShardInternalTables(const ConnectionTimeouts & timeouts, - TaskShard & task_shard, bool create_split) -{ - TaskTable & task_table = task_shard.task_table; - - /// We need to update table definitions for each part, it could be changed after ALTER - task_shard.current_pull_table_create_query = getCreateTableForPullShard(timeouts, task_shard); - - /// Create local Distributed tables: - /// a table fetching data from current shard and a table inserting data to the whole destination cluster - String read_shard_prefix = ".read_shard_" + toString(task_shard.indexInCluster()) + "."; - String split_shard_prefix = ".split."; - task_shard.table_read_shard = DatabaseAndTableName(working_database_name, read_shard_prefix + task_table.table_id); - task_shard.main_table_split_shard = DatabaseAndTableName(working_database_name, split_shard_prefix + task_table.table_id); - - for (const auto & piece_number : collections::range(0, task_table.number_of_splits)) - { - task_shard.list_of_split_tables_on_shard[piece_number] = - DatabaseAndTableName(working_database_name, split_shard_prefix + task_table.table_id + "_piece_" + toString(piece_number)); - } - - /// Create special cluster with single shard - String shard_read_cluster_name = read_shard_prefix + task_table.cluster_pull_name; - ClusterPtr cluster_pull_current_shard = task_table.cluster_pull->getClusterWithSingleShard(task_shard.indexInCluster()); - getContext()->setCluster(shard_read_cluster_name, cluster_pull_current_shard); - - auto storage_shard_ast = createASTStorageDistributed(shard_read_cluster_name, task_table.table_pull.first, task_table.table_pull.second); - - auto create_query_ast = removeAliasMaterializedAndTTLColumnsFromCreateQuery( - task_shard.current_pull_table_create_query, - task_table.allow_to_copy_alias_and_materialized_columns); - - auto create_table_pull_ast = rewriteCreateQueryStorage(create_query_ast, task_shard.table_read_shard, storage_shard_ast); - dropAndCreateLocalTable(create_table_pull_ast); - - if (create_split) - { - auto create_table_split_piece_ast = rewriteCreateQueryStorage( - create_query_ast, - task_shard.main_table_split_shard, - task_table.main_engine_split_ast); - - dropAndCreateLocalTable(create_table_split_piece_ast); - - /// Create auxiliary split tables for each piece - for (const auto & piece_number : collections::range(0, task_table.number_of_splits)) - { - const auto & storage_piece_split_ast = task_table.auxiliary_engine_split_asts[piece_number]; - - create_table_split_piece_ast = rewriteCreateQueryStorage( - create_query_ast, - task_shard.list_of_split_tables_on_shard[piece_number], - storage_piece_split_ast); - - dropAndCreateLocalTable(create_table_split_piece_ast); - } - } - -} - - -std::set ClusterCopier::getShardPartitions(const ConnectionTimeouts & timeouts, TaskShard & task_shard) -{ - std::set res; - - createShardInternalTables(timeouts, task_shard, false); - - TaskTable & task_table = task_shard.task_table; - - const String & partition_name = queryToString(task_table.engine_push_partition_key_ast); - - if (partition_name == "'all'") - { - res.emplace("'all'"); - return res; - } - - String query; - { - WriteBufferFromOwnString wb; - wb << "SELECT " << partition_name << " AS partition FROM " - << getQuotedTable(task_shard.table_read_shard) << " GROUP BY partition ORDER BY partition DESC"; - query = wb.str(); - } - - ParserQuery parser_query(query.data() + query.size()); - const auto & settings = getContext()->getSettingsRef(); - ASTPtr query_ast = parseQuery(parser_query, query, settings.max_query_size, settings.max_parser_depth); - - LOG_INFO(log, "Computing destination partition set, executing query: {}", query); - - auto local_context = Context::createCopy(context); - local_context->setSettings(task_cluster->settings_pull); - InterpreterSelectWithUnionQuery select(query_ast, local_context, SelectQueryOptions{}); - QueryPlan plan; - select.buildQueryPlan(plan); - auto builder = std::move(*plan.buildQueryPipeline( - QueryPlanOptimizationSettings::fromContext(local_context), - BuildQueryPipelineSettings::fromContext(local_context))); - - Block block = getBlockWithAllStreamData(std::move(builder)); - - if (block) - { - ColumnWithTypeAndName & column = block.getByPosition(0); - task_shard.partition_key_column = column; - - for (size_t i = 0; i < column.column->size(); ++i) - { - WriteBufferFromOwnString wb; - column.type->getDefaultSerialization()->serializeTextQuoted(*column.column, i, wb, FormatSettings()); - res.emplace(wb.str()); - } - } - - LOG_INFO(log, "There are {} destination partitions in shard {}", res.size(), task_shard.getDescription()); - - return res; -} - -bool ClusterCopier::checkShardHasPartition(const ConnectionTimeouts & timeouts, - TaskShard & task_shard, const String & partition_quoted_name) -{ - createShardInternalTables(timeouts, task_shard, false); - - TaskTable & task_table = task_shard.task_table; - - WriteBufferFromOwnString ss; - ss << "WITH " + partition_quoted_name + " AS partition_key "; - ss << "SELECT 1 FROM " << getQuotedTable(task_shard.table_read_shard); - ss << " WHERE (" << queryToString(task_table.engine_push_partition_key_ast) << " = partition_key)"; - if (!task_table.where_condition_str.empty()) - ss << " AND (" << task_table.where_condition_str << ")"; - ss << " LIMIT 1"; - auto query = ss.str(); - - ParserQuery parser_query(query.data() + query.size()); - const auto & settings = getContext()->getSettingsRef(); - ASTPtr query_ast = parseQuery(parser_query, query, settings.max_query_size, settings.max_parser_depth); - - LOG_INFO(log, "Checking shard {} for partition {} existence, executing query: {}", - task_shard.getDescription(), partition_quoted_name, query_ast->formatForErrorMessage()); - - auto local_context = Context::createCopy(context); - local_context->setSettings(task_cluster->settings_pull); - auto pipeline = InterpreterFactory::instance().get(query_ast, local_context)->execute().pipeline; - PullingPipelineExecutor executor(pipeline); - Block block; - executor.pull(block); - return block.rows() != 0; -} - -bool ClusterCopier::checkPresentPartitionPiecesOnCurrentShard(const ConnectionTimeouts & timeouts, - TaskShard & task_shard, const String & partition_quoted_name, size_t current_piece_number) -{ - createShardInternalTables(timeouts, task_shard, false); - - TaskTable & task_table = task_shard.task_table; - const size_t number_of_splits = task_table.number_of_splits; - const String & primary_key_comma_separated = task_table.primary_key_comma_separated; - - UNUSED(primary_key_comma_separated); - - std::string query; - - query += "WITH " + partition_quoted_name + " AS partition_key "; - query += "SELECT 1 FROM " + getQuotedTable(task_shard.table_read_shard); - - if (experimental_use_sample_offset) - query += " SAMPLE 1/" + toString(number_of_splits) + " OFFSET " + toString(current_piece_number) + "/" + toString(number_of_splits); - - query += " WHERE (" + queryToString(task_table.engine_push_partition_key_ast) + " = partition_key)"; - - if (!experimental_use_sample_offset) - query += " AND (cityHash64(" + primary_key_comma_separated + ") % " - + std::to_string(number_of_splits) + " = " + std::to_string(current_piece_number) + " )"; - - if (!task_table.where_condition_str.empty()) - query += " AND (" + task_table.where_condition_str + ")"; - - query += " LIMIT 1"; - - LOG_INFO(log, "Checking shard {} for partition {} piece {} existence, executing query: {}", task_shard.getDescription(), partition_quoted_name, std::to_string(current_piece_number), query); - - ParserQuery parser_query(query.data() + query.size()); - const auto & settings = getContext()->getSettingsRef(); - ASTPtr query_ast = parseQuery(parser_query, query, settings.max_query_size, settings.max_parser_depth); - - auto local_context = Context::createCopy(context); - local_context->setSettings(task_cluster->settings_pull); - auto pipeline = InterpreterFactory::instance().get(query_ast, local_context)->execute().pipeline; - PullingPipelineExecutor executor(pipeline); - Block result; - executor.pull(result); - if (result.rows() != 0) - LOG_INFO(log, "Partition {} piece number {} is PRESENT on shard {}", partition_quoted_name, std::to_string(current_piece_number), task_shard.getDescription()); - else - LOG_INFO(log, "Partition {} piece number {} is ABSENT on shard {}", partition_quoted_name, std::to_string(current_piece_number), task_shard.getDescription()); - return result.rows() != 0; -} - - -/** Executes simple query (without output streams, for example DDL queries) on each shard of the cluster - * Returns number of shards for which at least one replica executed query successfully - */ -UInt64 ClusterCopier::executeQueryOnCluster( - const ClusterPtr & cluster, - const String & query, - const Settings & current_settings, - ClusterExecutionMode execution_mode) const -{ - ClusterPtr cluster_for_query = cluster; - if (execution_mode == ClusterExecutionMode::ON_EACH_NODE) - cluster_for_query = cluster->getClusterWithReplicasAsShards(current_settings); - - std::vector> connections; - connections.reserve(cluster->getShardCount()); - - std::atomic successfully_executed = 0; - - for (const auto & replicas : cluster_for_query->getShardsAddresses()) - { - for (const auto & node : replicas) - { - try - { - connections.emplace_back(std::make_shared( - node.host_name, node.port, node.default_database, - node.user, node.password, ssh::SSHKey(), node.quota_key, node.cluster, node.cluster_secret, - "ClusterCopier", node.compression, node.secure - )); - - /// We execute only Alter, Create and Drop queries. - const auto header = Block{}; - - /// For unknown reason global context is passed to IStorage::read() method - /// So, task_identifier is passed as constructor argument. It is more obvious. - auto remote_query_executor = std::make_shared( - *connections.back(), query, header, getContext(), - /*throttler=*/nullptr, Scalars(), Tables(), QueryProcessingStage::Complete); - - try - { - remote_query_executor->sendQuery(); - } - catch (...) - { - LOG_WARNING(log, "Node with address {} seems to be unreachable.", node.host_name); - continue; - } - - while (true) - { - auto block = remote_query_executor->readBlock(); - if (!block) - break; - } - - remote_query_executor->finish(); - ++successfully_executed; - break; - } - catch (...) - { - LOG_WARNING(log, "An error occurred while processing query: {}", query); - tryLogCurrentException(log); - continue; - } - } - } - - return successfully_executed.load(); -} - -} diff --git a/programs/copier/ClusterCopier.h b/programs/copier/ClusterCopier.h deleted file mode 100644 index 01f8b30f546..00000000000 --- a/programs/copier/ClusterCopier.h +++ /dev/null @@ -1,240 +0,0 @@ -#pragma once - -#include "Aliases.h" -#include "Internals.h" -#include "TaskCluster.h" -#include "TaskShard.h" -#include "TaskTable.h" -#include "ShardPartition.h" -#include "ShardPartitionPiece.h" -#include "ZooKeeperStaff.h" - - -namespace DB -{ - -class ClusterCopier : WithMutableContext -{ -public: - ClusterCopier(const String & task_path_, - const String & host_id_, - const String & proxy_database_name_, - ContextMutablePtr context_, - LoggerRawPtr log_) - : WithMutableContext(context_), - task_zookeeper_path(task_path_), - host_id(host_id_), - working_database_name(proxy_database_name_), - log(log_) {} - - void init(); - - template - decltype(auto) retry(T && func, UInt64 max_tries = 100); - - void discoverShardPartitions(const ConnectionTimeouts & timeouts, const TaskShardPtr & task_shard); - - /// Compute set of partitions, assume set of partitions aren't changed during the processing - void discoverTablePartitions(const ConnectionTimeouts & timeouts, TaskTable & task_table, UInt64 num_threads = 0); - - void uploadTaskDescription(const std::string & task_path, const std::string & task_file, bool force); - - void reloadTaskDescription(); - - void updateConfigIfNeeded(); - - void process(const ConnectionTimeouts & timeouts); - - /// Disables DROP PARTITION commands that used to clear data after errors - void setSafeMode(bool is_safe_mode_ = true) - { - is_safe_mode = is_safe_mode_; - } - - void setCopyFaultProbability(double copy_fault_probability_) - { - copy_fault_probability = copy_fault_probability_; - } - - void setMoveFaultProbability(double move_fault_probability_) - { - move_fault_probability = move_fault_probability_; - } - - void setExperimentalUseSampleOffset(bool value) - { - experimental_use_sample_offset = value; - } - - void setMaxTableTries(UInt64 tries) - { - max_table_tries = tries; - } - void setMaxShardPartitionTries(UInt64 tries) - { - max_shard_partition_tries = tries; - } - void setMaxShardPartitionPieceTriesForAlter(UInt64 tries) - { - max_shard_partition_piece_tries_for_alter = tries; - } - void setRetryDelayMs(std::chrono::milliseconds ms) - { - retry_delay_ms = ms; - } - -protected: - - String getWorkersPath() const - { - return task_cluster->task_zookeeper_path + "/task_active_workers"; - } - - String getWorkersPathVersion() const - { - return getWorkersPath() + "_version"; - } - - String getCurrentWorkerNodePath() const - { - return getWorkersPath() + "/" + host_id; - } - - zkutil::EphemeralNodeHolder::Ptr createTaskWorkerNodeAndWaitIfNeed( - const zkutil::ZooKeeperPtr & zookeeper, - const String & description, - bool unprioritized); - - /* - * Checks that partition piece or some other entity is clean. - * The only requirement is that you have to pass is_dirty_flag_path and is_dirty_cleaned_path to the function. - * And is_dirty_flag_path is a parent of is_dirty_cleaned_path. - * */ - static bool checkPartitionPieceIsClean( - const zkutil::ZooKeeperPtr & zookeeper, - const CleanStateClock & clean_state_clock, - const String & task_status_path); - - bool checkAllPiecesInPartitionAreDone(const TaskTable & task_table, const String & partition_name, const TasksShard & shards_with_partition); - - /** Checks that the whole partition of a table was copied. We should do it carefully due to dirty lock. - * State of some task could change during the processing. - * We have to ensure that all shards have the finished state and there is no dirty flag. - * Moreover, we have to check status twice and check zxid, because state can change during the checking. - */ - - /* The same as function above - * Assume that we don't know on which shards do we have partition certain piece. - * We'll check them all (I mean shards that contain the whole partition) - * And shards that don't have certain piece MUST mark that piece is_done true. - * */ - bool checkPartitionPieceIsDone(const TaskTable & task_table, const String & partition_name, - size_t piece_number, const TasksShard & shards_with_partition); - - - /*Alter successful insertion to helping tables it will move all pieces to destination table*/ - TaskStatus tryMoveAllPiecesToDestinationTable(const TaskTable & task_table, const String & partition_name); - - /// Removes MATERIALIZED and ALIAS columns from create table query - static ASTPtr removeAliasMaterializedAndTTLColumnsFromCreateQuery(const ASTPtr & query_ast, bool allow_to_copy_alias_and_materialized_columns); - - bool tryDropPartitionPiece(ShardPartition & task_partition, size_t current_piece_number, - const zkutil::ZooKeeperPtr & zookeeper, const CleanStateClock & clean_state_clock); - - bool tryProcessTable(const ConnectionTimeouts & timeouts, TaskTable & task_table); - - TaskStatus tryCreateDestinationTable(const ConnectionTimeouts & timeouts, TaskTable & task_table); - /// Job for copying partition from particular shard. - TaskStatus tryProcessPartitionTask(const ConnectionTimeouts & timeouts, - ShardPartition & task_partition, - bool is_unprioritized_task); - - TaskStatus iterateThroughAllPiecesInPartition(const ConnectionTimeouts & timeouts, - ShardPartition & task_partition, - bool is_unprioritized_task); - - TaskStatus processPartitionPieceTaskImpl(const ConnectionTimeouts & timeouts, - ShardPartition & task_partition, - size_t current_piece_number, - bool is_unprioritized_task); - - void dropAndCreateLocalTable(const ASTPtr & create_ast); - - void dropLocalTableIfExists(const DatabaseAndTableName & table_name) const; - - void dropHelpingTables(const TaskTable & task_table); - - void dropHelpingTablesByPieceNumber(const TaskTable & task_table, size_t current_piece_number); - - /// Is used for usage less disk space. - /// After all pieces were successfully moved to original destination - /// table we can get rid of partition pieces (partitions in helping tables). - void dropParticularPartitionPieceFromAllHelpingTables(const TaskTable & task_table, const String & partition_name); - - String getRemoteCreateTable(const DatabaseAndTableName & table, Connection & connection, const Settings & settings); - - ASTPtr getCreateTableForPullShard(const ConnectionTimeouts & timeouts, TaskShard & task_shard); - - /// If it is implicitly asked to create split Distributed table for certain piece on current shard, we will do it. - void createShardInternalTables(const ConnectionTimeouts & timeouts, TaskShard & task_shard, bool create_split = true); - - std::set getShardPartitions(const ConnectionTimeouts & timeouts, TaskShard & task_shard); - - bool checkShardHasPartition(const ConnectionTimeouts & timeouts, TaskShard & task_shard, const String & partition_quoted_name); - - bool checkPresentPartitionPiecesOnCurrentShard(const ConnectionTimeouts & timeouts, - TaskShard & task_shard, const String & partition_quoted_name, size_t current_piece_number); - - /* - * This class is used in executeQueryOnCluster function - * You can execute query on each shard (no sense it is executed on each replica of a shard or not) - * or you can execute query on each replica on each shard. - * First mode is useful for INSERTS queries. - * */ - enum ClusterExecutionMode - { - ON_EACH_SHARD, - ON_EACH_NODE - }; - - /** Executes simple query (without output streams, for example DDL queries) on each shard of the cluster - * Returns number of shards for which at least one replica executed query successfully - */ - UInt64 executeQueryOnCluster( - const ClusterPtr & cluster, - const String & query, - const Settings & current_settings, - ClusterExecutionMode execution_mode = ClusterExecutionMode::ON_EACH_SHARD) const; - -private: - String task_zookeeper_path; - String task_description_path; - String host_id; - String working_database_name; - - /// Auto update config stuff - UInt64 task_description_current_version = 1; - std::atomic task_description_version{1}; - Coordination::WatchCallback task_description_watch_callback; - /// ZooKeeper session used to set the callback - zkutil::ZooKeeperPtr task_description_watch_zookeeper; - - ConfigurationPtr task_cluster_initial_config; - ConfigurationPtr task_cluster_current_config; - - std::unique_ptr task_cluster; - - bool is_safe_mode = false; - double copy_fault_probability = 0.0; - double move_fault_probability = 0.0; - - bool experimental_use_sample_offset{false}; - - LoggerRawPtr log; - - UInt64 max_table_tries = 3; - UInt64 max_shard_partition_tries = 3; - UInt64 max_shard_partition_piece_tries_for_alter = 10; - std::chrono::milliseconds retry_delay_ms{1000}; -}; -} diff --git a/programs/copier/ClusterCopierApp.cpp b/programs/copier/ClusterCopierApp.cpp deleted file mode 100644 index fdf07dec61a..00000000000 --- a/programs/copier/ClusterCopierApp.cpp +++ /dev/null @@ -1,252 +0,0 @@ -#include "ClusterCopierApp.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace fs = std::filesystem; - -namespace DB -{ - -/// ClusterCopierApp - -void ClusterCopierApp::initialize(Poco::Util::Application & self) -{ - is_help = config().has("help"); - if (is_help) - return; - - config_xml_path = config().getString("config-file"); - task_path = config().getString("task-path"); - log_level = config().getString("log-level", "info"); - is_safe_mode = config().has("safe-mode"); - is_status_mode = config().has("status"); - if (config().has("copy-fault-probability")) - copy_fault_probability = std::max(std::min(config().getDouble("copy-fault-probability"), 1.0), 0.0); - if (config().has("move-fault-probability")) - move_fault_probability = std::max(std::min(config().getDouble("move-fault-probability"), 1.0), 0.0); - base_dir = (config().has("base-dir")) ? config().getString("base-dir") : fs::current_path().string(); - - max_table_tries = std::max(config().getUInt("max-table-tries", 3), 1); - max_shard_partition_tries = std::max(config().getUInt("max-shard-partition-tries", 3), 1); - max_shard_partition_piece_tries_for_alter = std::max(config().getUInt("max-shard-partition-piece-tries-for-alter", 10), 1); - retry_delay_ms = std::chrono::milliseconds(std::max(config().getUInt("retry-delay-ms", 1000), 100)); - - if (config().has("experimental-use-sample-offset")) - experimental_use_sample_offset = config().getBool("experimental-use-sample-offset"); - - // process_id is '#_' - time_t timestamp = Poco::Timestamp().epochTime(); - auto curr_pid = Poco::Process::id(); - - process_id = std::to_string(DateLUT::serverTimezoneInstance().toNumYYYYMMDDhhmmss(timestamp)) + "_" + std::to_string(curr_pid); - host_id = escapeForFileName(getFQDNOrHostName()) + '#' + process_id; - process_path = fs::weakly_canonical(fs::path(base_dir) / ("clickhouse-copier_" + process_id)); - fs::create_directories(process_path); - - /// Override variables for BaseDaemon - if (config().has("log-level")) - config().setString("logger.level", config().getString("log-level")); - - if (config().has("base-dir") || !config().has("logger.log")) - config().setString("logger.log", fs::path(process_path) / "log.log"); - - if (config().has("base-dir") || !config().has("logger.errorlog")) - config().setString("logger.errorlog", fs::path(process_path) / "log.err.log"); - - Base::initialize(self); -} - - -void ClusterCopierApp::handleHelp(const std::string &, const std::string &) -{ - uint16_t terminal_width = 0; - if (isatty(STDIN_FILENO)) - terminal_width = getTerminalWidth(); - - Poco::Util::HelpFormatter help_formatter(options()); - if (terminal_width) - help_formatter.setWidth(terminal_width); - help_formatter.setCommand(commandName()); - help_formatter.setHeader("Copies tables from one cluster to another"); - help_formatter.setUsage("--config-file --task-path "); - help_formatter.format(std::cerr); - help_formatter.setFooter("See also: https://clickhouse.com/docs/en/operations/utilities/clickhouse-copier/"); - - stopOptionsProcessing(); -} - - -void ClusterCopierApp::defineOptions(Poco::Util::OptionSet & options) -{ - Base::defineOptions(options); - - options.addOption(Poco::Util::Option("task-path", "", "path to task in ZooKeeper") - .argument("task-path").binding("task-path")); - options.addOption(Poco::Util::Option("task-file", "", "path to task file for uploading in ZooKeeper to task-path") - .argument("task-file").binding("task-file")); - options.addOption(Poco::Util::Option("task-upload-force", "", "Force upload task-file even node already exists. Default is false.") - .argument("task-upload-force").binding("task-upload-force")); - options.addOption(Poco::Util::Option("safe-mode", "", "disables ALTER DROP PARTITION in case of errors") - .binding("safe-mode")); - options.addOption(Poco::Util::Option("copy-fault-probability", "", "the copying fails with specified probability (used to test partition state recovering)") - .argument("copy-fault-probability").binding("copy-fault-probability")); - options.addOption(Poco::Util::Option("move-fault-probability", "", "the moving fails with specified probability (used to test partition state recovering)") - .argument("move-fault-probability").binding("move-fault-probability")); - options.addOption(Poco::Util::Option("log-level", "", "sets log level") - .argument("log-level").binding("log-level")); - options.addOption(Poco::Util::Option("base-dir", "", "base directory for copiers, consecutive copier launches will populate /base-dir/launch_id/* directories") - .argument("base-dir").binding("base-dir")); - options.addOption(Poco::Util::Option("experimental-use-sample-offset", "", "Use SAMPLE OFFSET query instead of cityHash64(PRIMARY KEY) % n == k") - .argument("experimental-use-sample-offset").binding("experimental-use-sample-offset")); - options.addOption(Poco::Util::Option("status", "", "Get for status for current execution").binding("status")); - - options.addOption(Poco::Util::Option("max-table-tries", "", "Number of tries for the copy table task") - .argument("max-table-tries").binding("max-table-tries")); - options.addOption(Poco::Util::Option("max-shard-partition-tries", "", "Number of tries for the copy one partition task") - .argument("max-shard-partition-tries").binding("max-shard-partition-tries")); - options.addOption(Poco::Util::Option("max-shard-partition-piece-tries-for-alter", "", "Number of tries for final ALTER ATTACH to destination table") - .argument("max-shard-partition-piece-tries-for-alter").binding("max-shard-partition-piece-tries-for-alter")); - options.addOption(Poco::Util::Option("retry-delay-ms", "", "Delay between task retries") - .argument("retry-delay-ms").binding("retry-delay-ms")); - - using Me = std::decay_t; - options.addOption(Poco::Util::Option("help", "", "produce this help message").binding("help") - .callback(Poco::Util::OptionCallback(this, &Me::handleHelp))); -} - - -void ClusterCopierApp::mainImpl() -{ - /// Status command - { - if (is_status_mode) - { - SharedContextHolder shared_context = Context::createShared(); - auto context = Context::createGlobal(shared_context.get()); - context->makeGlobalContext(); - SCOPE_EXIT_SAFE(context->shutdown()); - - auto zookeeper = context->getZooKeeper(); - auto status_json = zookeeper->get(task_path + "/status"); - - LOG_INFO(&logger(), "{}", status_json); - std::cout << status_json << std::endl; - - context->resetZooKeeper(); - return; - } - } - StatusFile status_file(process_path + "/status", StatusFile::write_full_info); - ThreadStatus thread_status; - - auto * log = &logger(); - LOG_INFO(log, "Starting clickhouse-copier (id {}, host_id {}, path {}, revision {})", process_id, host_id, process_path, ClickHouseRevision::getVersionRevision()); - - SharedContextHolder shared_context = Context::createShared(); - auto context = Context::createGlobal(shared_context.get()); - context->makeGlobalContext(); - SCOPE_EXIT_SAFE(context->shutdown()); - - context->setConfig(loaded_config.configuration); - context->setApplicationType(Context::ApplicationType::LOCAL); - context->setPath(process_path + "/"); - - registerInterpreters(); - registerFunctions(); - registerAggregateFunctions(); - registerTableFunctions(); - registerDatabases(); - registerStorages(); - registerDictionaries(); - registerDisks(/* global_skip_access_check= */ true); - registerFormats(); - - static const std::string default_database = "_local"; - DatabaseCatalog::instance().attachDatabase(default_database, std::make_shared(default_database, context)); - context->setCurrentDatabase(default_database); - - /// Disable queries logging, since: - /// - There are bits that is not allowed for global context, like adding factories info (for the query_log) - /// - And anyway it is useless for copier. - context->setSetting("log_queries", false); - - auto local_context = Context::createCopy(context); - - /// Initialize query scope just in case. - CurrentThread::QueryScope query_scope(local_context); - - auto copier = std::make_unique( - task_path, host_id, default_database, local_context, log); - copier->setSafeMode(is_safe_mode); - copier->setCopyFaultProbability(copy_fault_probability); - copier->setMoveFaultProbability(move_fault_probability); - copier->setMaxTableTries(max_table_tries); - copier->setMaxShardPartitionTries(max_shard_partition_tries); - copier->setMaxShardPartitionPieceTriesForAlter(max_shard_partition_piece_tries_for_alter); - copier->setRetryDelayMs(retry_delay_ms); - copier->setExperimentalUseSampleOffset(experimental_use_sample_offset); - - auto task_file = config().getString("task-file", ""); - if (!task_file.empty()) - copier->uploadTaskDescription(task_path, task_file, config().getBool("task-upload-force", false)); - - zkutil::validateZooKeeperConfig(config()); - - copier->init(); - copier->process(ConnectionTimeouts::getTCPTimeoutsWithoutFailover(context->getSettingsRef())); - - /// Reset ZooKeeper before removing ClusterCopier. - /// Otherwise zookeeper watch can call callback which use already removed ClusterCopier object. - context->resetZooKeeper(); -} - - -int ClusterCopierApp::main(const std::vector &) -{ - if (is_help) - return 0; - - try - { - mainImpl(); - } - catch (...) - { - tryLogCurrentException(&Poco::Logger::root(), __PRETTY_FUNCTION__); - auto code = getCurrentExceptionCode(); - - return (code) ? code : -1; - } - - return 0; -} - - -} - -#pragma GCC diagnostic ignored "-Wunused-function" -#pragma GCC diagnostic ignored "-Wmissing-declarations" - -int mainEntryClickHouseClusterCopier(int argc, char ** argv) -{ - try - { - DB::ClusterCopierApp app; - return app.run(argc, argv); - } - catch (...) - { - std::cerr << DB::getCurrentExceptionMessage(true) << "\n"; - auto code = DB::getCurrentExceptionCode(); - - return (code) ? code : -1; - } -} diff --git a/programs/copier/ClusterCopierApp.h b/programs/copier/ClusterCopierApp.h deleted file mode 100644 index 0ddc232381e..00000000000 --- a/programs/copier/ClusterCopierApp.h +++ /dev/null @@ -1,99 +0,0 @@ -#pragma once - -#include -#include - -#include "ClusterCopier.h" - -/* clickhouse cluster copier util - * Copies tables data from one cluster to new tables of other (possibly the same) cluster in distributed fault-tolerant manner. - * - * See overview in the docs: docs/en/utils/clickhouse-copier.md - * - * Implementation details: - * - * cluster-copier workers pull each partition of each shard of the source cluster and push it to the destination cluster through - * Distributed table (to perform data resharding). So, worker job is a partition of a source shard. - * A job has three states: Active, Finished and Abandoned. Abandoned means that worker died and did not finish the job. - * - * If an error occurred during the copying (a worker failed or a worker did not finish the INSERT), then the whole partition (on - * all destination servers) should be dropped and refilled. So, copying entity is a partition of all destination shards. - * If a failure is detected a special /is_dirty node is created in ZooKeeper signalling that other workers copying the same partition - * should stop, after a refilling procedure should start. - * - * ZooKeeper task node has the following structure: - * /task/path_root - path passed in --task-path parameter - * /description - contains user-defined XML config of the task - * /task_active_workers - contains ephemeral nodes of all currently active workers, used to implement max_workers limitation - * /server_fqdn#PID_timestamp - cluster-copier worker ID - * ... - * /tables - directory with table tasks - * /cluster.db.table1 - directory of table_hits task - * /partition1 - directory for partition1 - * /shards - directory for source cluster shards - * /1 - worker job for the first shard of partition1 of table test.hits - * Contains info about current status (Active or Finished) and worker ID. - * /2 - * ... - * /partition_active_workers - * /1 - for each job in /shards a corresponding ephemeral node created in /partition_active_workers - * It is used to detect Abandoned jobs (if there is Active node in /shards and there is no node in - * /partition_active_workers). - * Also, it is used to track active workers in the partition (when we need to refill the partition we do - * not DROP PARTITION while there are active workers) - * /2 - * ... - * /is_dirty - the node is set if some worker detected that an error occurred (the INSERT is failed or an Abandoned node is - * detected). If the node appeared workers in this partition should stop and start cleaning and refilling - * partition procedure. - * During this procedure a single 'cleaner' worker is selected. The worker waits for stopping all partition - * workers, removes /shards node, executes DROP PARTITION on each destination node and removes /is_dirty node. - * /cleaner- An ephemeral node used to select 'cleaner' worker. Contains ID of the worker. - * /cluster.db.table2 - * ... - */ - -namespace DB -{ - -class ClusterCopierApp : public BaseDaemon -{ -public: - - void initialize(Poco::Util::Application & self) override; - - void handleHelp(const std::string &, const std::string &); - - void defineOptions(Poco::Util::OptionSet & options) override; - - int main(const std::vector &) override; - -private: - - using Base = BaseDaemon; - - void mainImpl(); - - std::string config_xml_path; - std::string task_path; - std::string log_level = "info"; - bool is_safe_mode = false; - bool is_status_mode = false; - double copy_fault_probability = 0.0; - double move_fault_probability = 0.0; - bool is_help = false; - - UInt64 max_table_tries = 3; - UInt64 max_shard_partition_tries = 3; - UInt64 max_shard_partition_piece_tries_for_alter = 10; - std::chrono::milliseconds retry_delay_ms{1000}; - - bool experimental_use_sample_offset{false}; - - std::string base_dir; - std::string process_path; - std::string process_id; - std::string host_id; -}; - -} diff --git a/programs/copier/ClusterPartition.h b/programs/copier/ClusterPartition.h deleted file mode 100644 index 22063989e22..00000000000 --- a/programs/copier/ClusterPartition.h +++ /dev/null @@ -1,22 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -/// Contains info about all shards that contain a partition -struct ClusterPartition -{ - double elapsed_time_seconds = 0; - UInt64 bytes_copied = 0; - UInt64 rows_copied = 0; - UInt64 blocks_copied = 0; - - UInt64 total_tries = 0; -}; - -using ClusterPartitions = std::map>; - -} diff --git a/programs/copier/Internals.cpp b/programs/copier/Internals.cpp deleted file mode 100644 index dcd199c6b38..00000000000 --- a/programs/copier/Internals.cpp +++ /dev/null @@ -1,280 +0,0 @@ -#include "Internals.h" -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - -using ConfigurationPtr = Poco::AutoPtr; - -ConfigurationPtr getConfigurationFromXMLString(const std::string & xml_data) -{ - std::stringstream ss(xml_data); // STYLE_CHECK_ALLOW_STD_STRING_STREAM - Poco::XML::InputSource input_source{ss}; - return {new Poco::Util::XMLConfiguration{&input_source}}; -} - -String getQuotedTable(const String & database, const String & table) -{ - if (database.empty()) - return backQuoteIfNeed(table); - - return backQuoteIfNeed(database) + "." + backQuoteIfNeed(table); -} - -String getQuotedTable(const DatabaseAndTableName & db_and_table) -{ - return getQuotedTable(db_and_table.first, db_and_table.second); -} - - -// Creates AST representing 'ENGINE = Distributed(cluster, db, table, [sharding_key]) -std::shared_ptr createASTStorageDistributed( - const String & cluster_name, const String & database, const String & table, - const ASTPtr & sharding_key_ast) -{ - auto args = std::make_shared(); - args->children.emplace_back(std::make_shared(cluster_name)); - args->children.emplace_back(std::make_shared(database)); - args->children.emplace_back(std::make_shared(table)); - if (sharding_key_ast) - args->children.emplace_back(sharding_key_ast); - - auto engine = std::make_shared(); - engine->name = "Distributed"; - engine->arguments = args; - - auto storage = std::make_shared(); - storage->set(storage->engine, engine); - - return storage; -} - - -Block getBlockWithAllStreamData(QueryPipelineBuilder builder) -{ - builder.addTransform(std::make_shared( - builder.getHeader(), - std::numeric_limits::max(), - std::numeric_limits::max())); - - auto cur_pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); - Block block; - PullingPipelineExecutor executor(cur_pipeline); - executor.pull(block); - - return block; -} - -bool isExtendedDefinitionStorage(const ASTPtr & storage_ast) -{ - const auto & storage = storage_ast->as(); - return storage.partition_by || storage.order_by || storage.sample_by; -} - -ASTPtr extractPartitionKey(const ASTPtr & storage_ast) -{ - String storage_str = queryToString(storage_ast); - - const auto & storage = storage_ast->as(); - const auto & engine = storage.engine->as(); - - if (!endsWith(engine.name, "MergeTree")) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str); - } - - if (isExtendedDefinitionStorage(storage_ast)) - { - if (storage.partition_by) - return storage.partition_by->clone(); - - static const char * all = "all"; - return std::make_shared(Field(all, strlen(all))); - } - else - { - bool is_replicated = startsWith(engine.name, "Replicated"); - size_t min_args = is_replicated ? 3 : 1; - - if (!engine.arguments) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected arguments in {}", storage_str); - - ASTPtr arguments_ast = engine.arguments->clone(); - ASTs & arguments = arguments_ast->children; - - if (arguments.size() < min_args) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected at least {} arguments in {}", min_args, storage_str); - - ASTPtr & month_arg = is_replicated ? arguments[2] : arguments[1]; - return makeASTFunction("toYYYYMM", month_arg->clone()); - } -} - -ASTPtr extractPrimaryKey(const ASTPtr & storage_ast) -{ - String storage_str = queryToString(storage_ast); - - const auto & storage = storage_ast->as(); - const auto & engine = storage.engine->as(); - - if (!endsWith(engine.name, "MergeTree")) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str); - } - - if (!isExtendedDefinitionStorage(storage_ast)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Is not extended deginition storage {} Will be fixed later.", storage_str); - } - - if (storage.primary_key) - return storage.primary_key->clone(); - - return nullptr; -} - - -ASTPtr extractOrderBy(const ASTPtr & storage_ast) -{ - String storage_str = queryToString(storage_ast); - - const auto & storage = storage_ast->as(); - const auto & engine = storage.engine->as(); - - if (!endsWith(engine.name, "MergeTree")) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str); - } - - if (!isExtendedDefinitionStorage(storage_ast)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Is not extended deginition storage {} Will be fixed later.", storage_str); - } - - if (storage.order_by) - return storage.order_by->clone(); - - throw Exception(ErrorCodes::BAD_ARGUMENTS, "ORDER BY cannot be empty"); -} - -/// Wraps only identifiers with backticks. -std::string wrapIdentifiersWithBackticks(const ASTPtr & root) -{ - if (auto identifier = std::dynamic_pointer_cast(root)) - return backQuote(identifier->name()); - - if (auto function = std::dynamic_pointer_cast(root)) - return function->name + '(' + wrapIdentifiersWithBackticks(function->arguments) + ')'; - - if (auto expression_list = std::dynamic_pointer_cast(root)) - { - Names function_arguments(expression_list->children.size()); - for (size_t i = 0; i < expression_list->children.size(); ++i) - function_arguments[i] = wrapIdentifiersWithBackticks(expression_list->children[0]); - return boost::algorithm::join(function_arguments, ", "); - } - - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key could be represented only as columns or functions from columns."); -} - - -Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast) -{ - const auto sorting_key_ast = extractOrderBy(storage_ast); - const auto primary_key_ast = extractPrimaryKey(storage_ast); - - const auto sorting_key_expr_list = extractKeyExpressionList(sorting_key_ast); - const auto primary_key_expr_list = primary_key_ast - ? extractKeyExpressionList(primary_key_ast) : sorting_key_expr_list->clone(); - - /// Maybe we have to handle VersionedCollapsing engine separately. But in our case in looks pointless. - - size_t primary_key_size = primary_key_expr_list->children.size(); - size_t sorting_key_size = sorting_key_expr_list->children.size(); - - if (primary_key_size > sorting_key_size) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key must be a prefix of the sorting key, but its length: " - "{} is greater than the sorting key length: {}", - primary_key_size, sorting_key_size); - - Names primary_key_columns; - NameSet primary_key_columns_set; - - for (size_t i = 0; i < sorting_key_size; ++i) - { - /// Column name could be represented as a f_1(f_2(...f_n(column_name))). - /// Each f_i could take one or more parameters. - /// We will wrap identifiers with backticks to allow non-standard identifier names. - String sorting_key_column = sorting_key_expr_list->children[i]->getColumnName(); - - if (i < primary_key_size) - { - String pk_column = primary_key_expr_list->children[i]->getColumnName(); - if (pk_column != sorting_key_column) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Primary key must be a prefix of the sorting key, " - "but the column in the position {} is {}, not {}", i, sorting_key_column, pk_column); - - if (!primary_key_columns_set.emplace(pk_column).second) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key contains duplicate columns"); - - primary_key_columns.push_back(wrapIdentifiersWithBackticks(primary_key_expr_list->children[i])); - } - } - - return primary_key_columns; -} - -bool isReplicatedTableEngine(const ASTPtr & storage_ast) -{ - const auto & storage = storage_ast->as(); - const auto & engine = storage.engine->as(); - - if (!endsWith(engine.name, "MergeTree")) - { - String storage_str = queryToString(storage_ast); - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str); - } - - return startsWith(engine.name, "Replicated"); -} - -ShardPriority getReplicasPriority(const Cluster::Addresses & replicas, const std::string & local_hostname, UInt8 random) -{ - ShardPriority res; - - if (replicas.empty()) - return res; - - res.is_remote = 1; - for (const auto & replica : replicas) - { - if (isLocalAddress(DNSResolver::instance().resolveHostAllInOriginOrder(replica.host_name).front())) - { - res.is_remote = 0; - break; - } - } - - res.hostname_difference = std::numeric_limits::max(); - for (const auto & replica : replicas) - { - size_t difference = getHostNamePrefixDistance(local_hostname, replica.host_name); - res.hostname_difference = std::min(difference, res.hostname_difference); - } - - res.random = random; - return res; -} - -} diff --git a/programs/copier/Internals.h b/programs/copier/Internals.h deleted file mode 100644 index 27fedd5d9e8..00000000000 --- a/programs/copier/Internals.h +++ /dev/null @@ -1,198 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "Aliases.h" - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - - -ConfigurationPtr getConfigurationFromXMLString(const std::string & xml_data); - -String getQuotedTable(const String & database, const String & table); - -String getQuotedTable(const DatabaseAndTableName & db_and_table); - - -enum class TaskState -{ - Started = 0, - Finished, - Unknown -}; - -/// Used to mark status of shard partition tasks -struct TaskStateWithOwner -{ - TaskStateWithOwner() = default; - - TaskStateWithOwner(TaskState state_, const String & owner_) : state(state_), owner(owner_) {} - - TaskState state{TaskState::Unknown}; - String owner; - - static String getData(TaskState state, const String &owner) - { - return TaskStateWithOwner(state, owner).toString(); - } - - String toString() const - { - WriteBufferFromOwnString wb; - wb << static_cast(state) << "\n" << escape << owner; - return wb.str(); - } - - static TaskStateWithOwner fromString(const String & data) - { - ReadBufferFromString rb(data); - TaskStateWithOwner res; - UInt32 state; - - rb >> state >> "\n" >> escape >> res.owner; - - if (state >= static_cast(TaskState::Unknown)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown state {}", data); - - res.state = static_cast(state); - return res; - } -}; - - -struct ShardPriority -{ - UInt8 is_remote = 1; - size_t hostname_difference = 0; - UInt8 random = 0; - - static bool greaterPriority(const ShardPriority & current, const ShardPriority & other) - { - return std::forward_as_tuple(current.is_remote, current.hostname_difference, current.random) - < std::forward_as_tuple(other.is_remote, other.hostname_difference, other.random); - } -}; - -/// Execution status of a task. -/// Is used for: partition copying task status, partition piece copying task status, partition moving task status. -enum class TaskStatus -{ - Active, - Finished, - Error, -}; - -struct MultiTransactionInfo -{ - int32_t code; - Coordination::Requests requests; - Coordination::Responses responses; -}; - -// Creates AST representing 'ENGINE = Distributed(cluster, db, table, [sharding_key]) -std::shared_ptr createASTStorageDistributed( - const String & cluster_name, const String & database, const String & table, - const ASTPtr & sharding_key_ast = nullptr); - -Block getBlockWithAllStreamData(QueryPipelineBuilder builder); - -bool isExtendedDefinitionStorage(const ASTPtr & storage_ast); - -ASTPtr extractPartitionKey(const ASTPtr & storage_ast); - -/* -* Choosing a Primary Key that Differs from the Sorting Key -* It is possible to specify a primary key (an expression with values that are written in the index file for each mark) -* that is different from the sorting key (an expression for sorting the rows in data parts). -* In this case the primary key expression tuple must be a prefix of the sorting key expression tuple. -* This feature is helpful when using the SummingMergeTree and AggregatingMergeTree table engines. -* In a common case when using these engines, the table has two types of columns: dimensions and measures. -* Typical queries aggregate values of measure columns with arbitrary GROUP BY and filtering by dimensions. -* Because SummingMergeTree and AggregatingMergeTree aggregate rows with the same value of the sorting key, -* it is natural to add all dimensions to it. As a result, the key expression consists of a long list of columns -* and this list must be frequently updated with newly added dimensions. -* In this case it makes sense to leave only a few columns in the primary key that will provide efficient -* range scans and add the remaining dimension columns to the sorting key tuple. -* ALTER of the sorting key is a lightweight operation because when a new column is simultaneously added t -* o the table and to the sorting key, existing data parts don't need to be changed. -* Since the old sorting key is a prefix of the new sorting key and there is no data in the newly added column, -* the data is sorted by both the old and new sorting keys at the moment of table modification. -* -* */ -ASTPtr extractPrimaryKey(const ASTPtr & storage_ast); - -ASTPtr extractOrderBy(const ASTPtr & storage_ast); - -Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast); - -bool isReplicatedTableEngine(const ASTPtr & storage_ast); - -ShardPriority getReplicasPriority(const Cluster::Addresses & replicas, const std::string & local_hostname, UInt8 random); - -} diff --git a/programs/copier/ShardPartition.cpp b/programs/copier/ShardPartition.cpp deleted file mode 100644 index 4c962fc807d..00000000000 --- a/programs/copier/ShardPartition.cpp +++ /dev/null @@ -1,70 +0,0 @@ -#include "ShardPartition.h" - -#include "TaskShard.h" -#include "TaskTable.h" - -namespace DB -{ - -ShardPartition::ShardPartition(TaskShard & parent, String name_quoted_, size_t number_of_splits) - : task_shard(parent) - , name(std::move(name_quoted_)) -{ - pieces.reserve(number_of_splits); -} - -String ShardPartition::getPartitionCleanStartPath() const -{ - return getPartitionPath() + "/clean_start"; -} - -String ShardPartition::getPartitionPieceCleanStartPath(size_t current_piece_number) const -{ - assert(current_piece_number < task_shard.task_table.number_of_splits); - return getPartitionPiecePath(current_piece_number) + "/clean_start"; -} - -String ShardPartition::getPartitionPath() const -{ - return task_shard.task_table.getPartitionPath(name); -} - -String ShardPartition::getPartitionPiecePath(size_t current_piece_number) const -{ - assert(current_piece_number < task_shard.task_table.number_of_splits); - return task_shard.task_table.getPartitionPiecePath(name, current_piece_number); -} - -String ShardPartition::getShardStatusPath() const -{ - // schema: //tables///shards/ - // e.g. /root/table_test.hits/201701/shards/1 - return getPartitionShardsPath() + "/" + toString(task_shard.numberInCluster()); -} - -String ShardPartition::getPartitionShardsPath() const -{ - return getPartitionPath() + "/shards"; -} - -String ShardPartition::getPartitionActiveWorkersPath() const -{ - return getPartitionPath() + "/partition_active_workers"; -} - -String ShardPartition::getActiveWorkerPath() const -{ - return getPartitionActiveWorkersPath() + "/" + toString(task_shard.numberInCluster()); -} - -String ShardPartition::getCommonPartitionIsDirtyPath() const -{ - return getPartitionPath() + "/is_dirty"; -} - -String ShardPartition::getCommonPartitionIsCleanedPath() const -{ - return getCommonPartitionIsDirtyPath() + "/cleaned"; -} - -} diff --git a/programs/copier/ShardPartition.h b/programs/copier/ShardPartition.h deleted file mode 100644 index 2457213733c..00000000000 --- a/programs/copier/ShardPartition.h +++ /dev/null @@ -1,54 +0,0 @@ -#pragma once - -#include "ShardPartitionPiece.h" - -#include - -#include - -namespace DB -{ - -struct TaskShard; - -/// Just destination partition of a shard -/// I don't know what this comment means. -/// In short, when we discovered what shards contain currently processing partition, -/// This class describes a partition (name) that is stored on the shard (parent). -struct ShardPartition -{ - ShardPartition(TaskShard &parent, String name_quoted_, size_t number_of_splits = 10); - - String getPartitionPath() const; - - String getPartitionPiecePath(size_t current_piece_number) const; - - String getPartitionCleanStartPath() const; - - String getPartitionPieceCleanStartPath(size_t current_piece_number) const; - - String getCommonPartitionIsDirtyPath() const; - - String getCommonPartitionIsCleanedPath() const; - - String getPartitionActiveWorkersPath() const; - - String getActiveWorkerPath() const; - - String getPartitionShardsPath() const; - - String getShardStatusPath() const; - - /// What partition pieces are present in current shard. - /// FYI: Piece is a part of partition which has modulo equals to concrete constant (less than number_of_splits obliously) - /// For example SELECT ... from ... WHERE partition=current_partition AND cityHash64(*) == const; - /// Absent pieces have field is_absent_piece equals to true. - PartitionPieces pieces; - - TaskShard & task_shard; - String name; -}; - -using TasksPartition = std::map>; - -} diff --git a/programs/copier/ShardPartitionPiece.cpp b/programs/copier/ShardPartitionPiece.cpp deleted file mode 100644 index 36d1621e012..00000000000 --- a/programs/copier/ShardPartitionPiece.cpp +++ /dev/null @@ -1,64 +0,0 @@ -#include "ShardPartitionPiece.h" - -#include "ShardPartition.h" -#include "TaskShard.h" - -#include - -namespace DB -{ - -ShardPartitionPiece::ShardPartitionPiece(ShardPartition & parent, size_t current_piece_number_, bool is_present_piece_) - : is_absent_piece(!is_present_piece_) - , current_piece_number(current_piece_number_) - , shard_partition(parent) -{ -} - -String ShardPartitionPiece::getPartitionPiecePath() const -{ - return shard_partition.getPartitionPath() + "/piece_" + toString(current_piece_number); -} - -String ShardPartitionPiece::getPartitionPieceCleanStartPath() const -{ - return getPartitionPiecePath() + "/clean_start"; -} - -String ShardPartitionPiece::getPartitionPieceIsDirtyPath() const -{ - return getPartitionPiecePath() + "/is_dirty"; -} - -String ShardPartitionPiece::getPartitionPieceIsCleanedPath() const -{ - return getPartitionPieceIsDirtyPath() + "/cleaned"; -} - -String ShardPartitionPiece::getPartitionPieceActiveWorkersPath() const -{ - return getPartitionPiecePath() + "/partition_piece_active_workers"; -} - -String ShardPartitionPiece::getActiveWorkerPath() const -{ - return getPartitionPieceActiveWorkersPath() + "/" + toString(shard_partition.task_shard.numberInCluster()); -} - -/// On what shards do we have current partition. -String ShardPartitionPiece::getPartitionPieceShardsPath() const -{ - return getPartitionPiecePath() + "/shards"; -} - -String ShardPartitionPiece::getShardStatusPath() const -{ - return getPartitionPieceShardsPath() + "/" + toString(shard_partition.task_shard.numberInCluster()); -} - -String ShardPartitionPiece::getPartitionPieceCleanerPath() const -{ - return getPartitionPieceIsDirtyPath() + "/cleaner"; -} - -} diff --git a/programs/copier/ShardPartitionPiece.h b/programs/copier/ShardPartitionPiece.h deleted file mode 100644 index 453364c0fc8..00000000000 --- a/programs/copier/ShardPartitionPiece.h +++ /dev/null @@ -1,43 +0,0 @@ -#pragma once - -#include - -#include - -namespace DB -{ - -struct ShardPartition; - -struct ShardPartitionPiece -{ - ShardPartitionPiece(ShardPartition & parent, size_t current_piece_number_, bool is_present_piece_); - - String getPartitionPiecePath() const; - - String getPartitionPieceCleanStartPath() const; - - String getPartitionPieceIsDirtyPath() const; - - String getPartitionPieceIsCleanedPath() const; - - String getPartitionPieceActiveWorkersPath() const; - - String getActiveWorkerPath() const ; - - /// On what shards do we have current partition. - String getPartitionPieceShardsPath() const; - - String getShardStatusPath() const; - - String getPartitionPieceCleanerPath() const; - - bool is_absent_piece; - const size_t current_piece_number; - - ShardPartition & shard_partition; -}; - -using PartitionPieces = std::vector; - -} diff --git a/programs/copier/StatusAccumulator.cpp b/programs/copier/StatusAccumulator.cpp deleted file mode 100644 index 77adeac708c..00000000000 --- a/programs/copier/StatusAccumulator.cpp +++ /dev/null @@ -1,48 +0,0 @@ -#include "StatusAccumulator.h" - -#include -#include -#include -#include - -#include - -namespace DB -{ - -StatusAccumulator::MapPtr StatusAccumulator::fromJSON(String state_json) -{ - Poco::JSON::Parser parser; - auto state = parser.parse(state_json).extract(); - MapPtr result_ptr = std::make_shared(); - for (const auto & table_name : state->getNames()) - { - auto table_status_json = state->getValue(table_name); - auto table_status = parser.parse(table_status_json).extract(); - /// Map entry will be created if it is absent - auto & map_table_status = (*result_ptr)[table_name]; - map_table_status.all_partitions_count += table_status->getValue("all_partitions_count"); - map_table_status.processed_partitions_count += table_status->getValue("processed_partitions_count"); - } - return result_ptr; -} - -String StatusAccumulator::serializeToJSON(MapPtr statuses) -{ - Poco::JSON::Object result_json; - for (const auto & [table_name, table_status] : *statuses) - { - Poco::JSON::Object status_json; - status_json.set("all_partitions_count", table_status.all_partitions_count); - status_json.set("processed_partitions_count", table_status.processed_partitions_count); - - result_json.set(table_name, status_json); - } - std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM - oss.exceptions(std::ios::failbit); - Poco::JSON::Stringifier::stringify(result_json, oss); - auto result = oss.str(); - return result; -} - -} diff --git a/programs/copier/StatusAccumulator.h b/programs/copier/StatusAccumulator.h deleted file mode 100644 index d420b611602..00000000000 --- a/programs/copier/StatusAccumulator.h +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once - -#include - -#include -#include - -namespace DB -{ - -class StatusAccumulator -{ -public: - struct TableStatus - { - size_t all_partitions_count; - size_t processed_partitions_count; - }; - - using Map = std::unordered_map; - using MapPtr = std::shared_ptr; - - static MapPtr fromJSON(String state_json); - static String serializeToJSON(MapPtr statuses); -}; - -} diff --git a/programs/copier/TaskCluster.cpp b/programs/copier/TaskCluster.cpp deleted file mode 100644 index 0fb06616e50..00000000000 --- a/programs/copier/TaskCluster.cpp +++ /dev/null @@ -1,74 +0,0 @@ -#include "TaskCluster.h" - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - -TaskCluster::TaskCluster(const String & task_zookeeper_path_, const String & default_local_database_) - : task_zookeeper_path(task_zookeeper_path_) - , default_local_database(default_local_database_) -{} - -void DB::TaskCluster::loadTasks(const Poco::Util::AbstractConfiguration & config, const String & base_key) -{ - String prefix = base_key.empty() ? "" : base_key + "."; - - clusters_prefix = prefix + "remote_servers"; - if (!config.has(clusters_prefix)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "You should specify list of clusters in {}", clusters_prefix); - - Poco::Util::AbstractConfiguration::Keys tables_keys; - config.keys(prefix + "tables", tables_keys); - - for (const auto & table_key : tables_keys) - { - table_tasks.emplace_back(*this, config, prefix + "tables", table_key); - } -} - -void DB::TaskCluster::reloadSettings(const Poco::Util::AbstractConfiguration & config, const String & base_key) -{ - String prefix = base_key.empty() ? "" : base_key + "."; - - max_workers = config.getUInt64(prefix + "max_workers"); - - settings_common = Settings(); - if (config.has(prefix + "settings")) - settings_common.loadSettingsFromConfig(prefix + "settings", config); - - settings_common.prefer_localhost_replica = false; - - settings_pull = settings_common; - if (config.has(prefix + "settings_pull")) - settings_pull.loadSettingsFromConfig(prefix + "settings_pull", config); - - settings_push = settings_common; - if (config.has(prefix + "settings_push")) - settings_push.loadSettingsFromConfig(prefix + "settings_push", config); - - auto set_default_value = [] (auto && setting, auto && default_value) - { - setting = setting.changed ? setting.value : default_value; - }; - - /// Override important settings - settings_pull.readonly = 1; - settings_pull.prefer_localhost_replica = false; - settings_push.distributed_foreground_insert = true; - settings_push.prefer_localhost_replica = false; - - set_default_value(settings_pull.load_balancing, LoadBalancing::NEAREST_HOSTNAME); - set_default_value(settings_pull.max_threads, 1); - set_default_value(settings_pull.max_block_size, 8192UL); - set_default_value(settings_pull.preferred_block_size_bytes, 0); - - set_default_value(settings_push.distributed_background_insert_timeout, 0); - set_default_value(settings_push.alter_sync, 2); -} - -} - diff --git a/programs/copier/TaskCluster.h b/programs/copier/TaskCluster.h deleted file mode 100644 index a7f8bc3baca..00000000000 --- a/programs/copier/TaskCluster.h +++ /dev/null @@ -1,51 +0,0 @@ -#pragma once - -#include "TaskTable.h" - -#include -#include - -#include - -#include - -namespace DB -{ - -struct TaskCluster -{ - TaskCluster(const String & task_zookeeper_path_, const String & default_local_database_); - - void loadTasks(const Poco::Util::AbstractConfiguration & config, const String & base_key = ""); - - /// Set (or update) settings and max_workers param - void reloadSettings(const Poco::Util::AbstractConfiguration & config, const String & base_key = ""); - - /// Base node for all tasks. Its structure: - /// workers/ - directory with active workers (amount of them is less or equal max_workers) - /// description - node with task configuration - /// table_table1/ - directories with per-partition copying status - String task_zookeeper_path; - - /// Database used to create temporary Distributed tables - String default_local_database; - - /// Limits number of simultaneous workers - UInt64 max_workers = 0; - - /// Base settings for pull and push - Settings settings_common; - /// Settings used to fetch data - Settings settings_pull; - /// Settings used to insert data - Settings settings_push; - - String clusters_prefix; - - /// Subtasks - TasksTable table_tasks; - - pcg64 random_engine; -}; - -} diff --git a/programs/copier/TaskShard.cpp b/programs/copier/TaskShard.cpp deleted file mode 100644 index d156f451a84..00000000000 --- a/programs/copier/TaskShard.cpp +++ /dev/null @@ -1,37 +0,0 @@ -#include "TaskShard.h" - -#include "TaskTable.h" - -namespace DB -{ - -TaskShard::TaskShard(TaskTable & parent, const Cluster::ShardInfo & info_) - : task_table(parent) - , info(info_) -{ - list_of_split_tables_on_shard.assign(task_table.number_of_splits, DatabaseAndTableName()); -} - -UInt32 TaskShard::numberInCluster() const -{ - return info.shard_num; -} - -UInt32 TaskShard::indexInCluster() const -{ - return info.shard_num - 1; -} - -String DB::TaskShard::getDescription() const -{ - return fmt::format("N{} (having a replica {}, pull table {} of cluster {}", - numberInCluster(), getHostNameExample(), getQuotedTable(task_table.table_pull), task_table.cluster_pull_name); -} - -String DB::TaskShard::getHostNameExample() const -{ - const auto & replicas = task_table.cluster_pull->getShardsAddresses().at(indexInCluster()); - return replicas.at(0).readableString(); -} - -} diff --git a/programs/copier/TaskShard.h b/programs/copier/TaskShard.h deleted file mode 100644 index 05d652077ea..00000000000 --- a/programs/copier/TaskShard.h +++ /dev/null @@ -1,56 +0,0 @@ -#pragma once - -#include "Aliases.h" -#include "Internals.h" -#include "ClusterPartition.h" -#include "ShardPartition.h" - - -namespace DB -{ - -struct TaskTable; - -struct TaskShard -{ - TaskShard(TaskTable & parent, const Cluster::ShardInfo & info_); - - TaskTable & task_table; - - Cluster::ShardInfo info; - - UInt32 numberInCluster() const; - - UInt32 indexInCluster() const; - - String getDescription() const; - - String getHostNameExample() const; - - /// Used to sort clusters by their proximity - ShardPriority priority; - - /// Column with unique destination partitions (computed from engine_push_partition_key expr.) in the shard - ColumnWithTypeAndName partition_key_column; - - /// There is a task for each destination partition - TasksPartition partition_tasks; - - /// Which partitions have been checked for existence - /// If some partition from this lists is exists, it is in partition_tasks - std::set checked_partitions; - - /// Last CREATE TABLE query of the table of the shard - ASTPtr current_pull_table_create_query; - ASTPtr current_push_table_create_query; - - /// Internal distributed tables - DatabaseAndTableName table_read_shard; - DatabaseAndTableName main_table_split_shard; - ListOfDatabasesAndTableNames list_of_split_tables_on_shard; -}; - -using TaskShardPtr = std::shared_ptr; -using TasksShard = std::vector; - -} diff --git a/programs/copier/TaskTable.cpp b/programs/copier/TaskTable.cpp deleted file mode 100644 index d055ceb4c7b..00000000000 --- a/programs/copier/TaskTable.cpp +++ /dev/null @@ -1,222 +0,0 @@ -#include "TaskTable.h" - -#include "ClusterPartition.h" -#include "TaskCluster.h" - -#include -#include - -#include - - -namespace DB -{ -namespace ErrorCodes -{ - extern const int UNKNOWN_ELEMENT_IN_CONFIG; - extern const int LOGICAL_ERROR; -} - -TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfiguration & config, - const String & prefix_, const String & table_key) - : task_cluster(parent) -{ - String table_prefix = prefix_ + "." + table_key + "."; - - name_in_config = table_key; - - number_of_splits = config.getUInt64(table_prefix + "number_of_splits", 3); - - allow_to_copy_alias_and_materialized_columns = config.getBool(table_prefix + "allow_to_copy_alias_and_materialized_columns", false); - allow_to_drop_target_partitions = config.getBool(table_prefix + "allow_to_drop_target_partitions", false); - - cluster_pull_name = config.getString(table_prefix + "cluster_pull"); - cluster_push_name = config.getString(table_prefix + "cluster_push"); - - table_pull.first = config.getString(table_prefix + "database_pull"); - table_pull.second = config.getString(table_prefix + "table_pull"); - - table_push.first = config.getString(table_prefix + "database_push"); - table_push.second = config.getString(table_prefix + "table_push"); - - /// Used as node name in ZooKeeper - table_id = escapeForFileName(cluster_push_name) - + "." + escapeForFileName(table_push.first) - + "." + escapeForFileName(table_push.second); - - engine_push_str = config.getString(table_prefix + "engine", "rand()"); - - { - ParserStorage parser_storage{ParserStorage::TABLE_ENGINE}; - engine_push_ast = parseQuery(parser_storage, engine_push_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - engine_push_partition_key_ast = extractPartitionKey(engine_push_ast); - primary_key_comma_separated = boost::algorithm::join(extractPrimaryKeyColumnNames(engine_push_ast), ", "); - is_replicated_table = isReplicatedTableEngine(engine_push_ast); - } - - sharding_key_str = config.getString(table_prefix + "sharding_key"); - - auxiliary_engine_split_asts.reserve(number_of_splits); - { - ParserExpressionWithOptionalAlias parser_expression(false); - sharding_key_ast = parseQuery(parser_expression, sharding_key_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - main_engine_split_ast = createASTStorageDistributed(cluster_push_name, table_push.first, table_push.second, - sharding_key_ast); - - for (const auto piece_number : collections::range(0, number_of_splits)) - { - auxiliary_engine_split_asts.emplace_back - ( - createASTStorageDistributed(cluster_push_name, table_push.first, - table_push.second + "_piece_" + toString(piece_number), sharding_key_ast) - ); - } - } - - where_condition_str = config.getString(table_prefix + "where_condition", ""); - if (!where_condition_str.empty()) - { - ParserExpressionWithOptionalAlias parser_expression(false); - where_condition_ast = parseQuery(parser_expression, where_condition_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - - // Will use canonical expression form - where_condition_str = queryToString(where_condition_ast); - } - - String enabled_partitions_prefix = table_prefix + "enabled_partitions"; - has_enabled_partitions = config.has(enabled_partitions_prefix); - - if (has_enabled_partitions) - { - Strings keys; - config.keys(enabled_partitions_prefix, keys); - - if (keys.empty()) - { - /// Parse list of partition from space-separated string - String partitions_str = config.getString(table_prefix + "enabled_partitions"); - boost::trim_if(partitions_str, isWhitespaceASCII); - boost::split(enabled_partitions, partitions_str, isWhitespaceASCII, boost::token_compress_on); - } - else - { - /// Parse sequence of ... - for (const String &key : keys) - { - if (!startsWith(key, "partition")) - throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown key {} in {}", key, enabled_partitions_prefix); - - enabled_partitions.emplace_back(config.getString(enabled_partitions_prefix + "." + key)); - } - } - - std::copy(enabled_partitions.begin(), enabled_partitions.end(), std::inserter(enabled_partitions_set, enabled_partitions_set.begin())); - } -} - - -String TaskTable::getPartitionPath(const String & partition_name) const -{ - return task_cluster.task_zookeeper_path // root - + "/tables/" + table_id // tables/dst_cluster.merge.hits - + "/" + escapeForFileName(partition_name); // 201701 -} - -String TaskTable::getPartitionAttachIsActivePath(const String & partition_name) const -{ - return getPartitionPath(partition_name) + "/attach_active"; -} - -String TaskTable::getPartitionAttachIsDonePath(const String & partition_name) const -{ - return getPartitionPath(partition_name) + "/attach_is_done"; -} - -String TaskTable::getPartitionPiecePath(const String & partition_name, size_t piece_number) const -{ - assert(piece_number < number_of_splits); - return getPartitionPath(partition_name) + "/piece_" + toString(piece_number); // 1...number_of_splits -} - -String TaskTable::getCertainPartitionIsDirtyPath(const String &partition_name) const -{ - return getPartitionPath(partition_name) + "/is_dirty"; -} - -String TaskTable::getCertainPartitionPieceIsDirtyPath(const String & partition_name, const size_t piece_number) const -{ - return getPartitionPiecePath(partition_name, piece_number) + "/is_dirty"; -} - -String TaskTable::getCertainPartitionIsCleanedPath(const String & partition_name) const -{ - return getCertainPartitionIsDirtyPath(partition_name) + "/cleaned"; -} - -String TaskTable::getCertainPartitionPieceIsCleanedPath(const String & partition_name, const size_t piece_number) const -{ - return getCertainPartitionPieceIsDirtyPath(partition_name, piece_number) + "/cleaned"; -} - -String TaskTable::getCertainPartitionTaskStatusPath(const String & partition_name) const -{ - return getPartitionPath(partition_name) + "/shards"; -} - -String TaskTable::getCertainPartitionPieceTaskStatusPath(const String & partition_name, const size_t piece_number) const -{ - return getPartitionPiecePath(partition_name, piece_number) + "/shards"; -} - -bool TaskTable::isReplicatedTable() const -{ - return is_replicated_table; -} - -String TaskTable::getStatusAllPartitionCount() const -{ - return task_cluster.task_zookeeper_path + "/status/all_partitions_count"; -} - -String TaskTable::getStatusProcessedPartitionsCount() const -{ - return task_cluster.task_zookeeper_path + "/status/processed_partitions_count"; -} - -ASTPtr TaskTable::rewriteReplicatedCreateQueryToPlain() const -{ - ASTPtr prev_engine_push_ast = engine_push_ast->clone(); - - auto & new_storage_ast = prev_engine_push_ast->as(); - auto & new_engine_ast = new_storage_ast.engine->as(); - - /// Remove "Replicated" from name - new_engine_ast.name = new_engine_ast.name.substr(10); - - if (new_engine_ast.arguments) - { - auto & replicated_table_arguments = new_engine_ast.arguments->children; - - - /// In some cases of Atomic database engine usage ReplicatedMergeTree tables - /// could be created without arguments. - if (!replicated_table_arguments.empty()) - { - /// Delete first two arguments of Replicated...MergeTree() table. - replicated_table_arguments.erase(replicated_table_arguments.begin()); - replicated_table_arguments.erase(replicated_table_arguments.begin()); - } - } - - return new_storage_ast.clone(); -} - -ClusterPartition & TaskTable::getClusterPartition(const String & partition_name) -{ - auto it = cluster_partitions.find(partition_name); - if (it == cluster_partitions.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "There are no cluster partition {} in {}", partition_name, table_id); - return it->second; -} - -} diff --git a/programs/copier/TaskTable.h b/programs/copier/TaskTable.h deleted file mode 100644 index 2bb7f078bc6..00000000000 --- a/programs/copier/TaskTable.h +++ /dev/null @@ -1,173 +0,0 @@ -#pragma once - -#include "Aliases.h" -#include "TaskShard.h" - - -namespace DB -{ - -struct ClusterPartition; -struct TaskCluster; - -struct TaskTable -{ - TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfiguration & config, const String & prefix, const String & table_key); - - TaskCluster & task_cluster; - - /// These functions used in checkPartitionIsDone() or checkPartitionPieceIsDone() - /// They are implemented here not to call task_table.tasks_shard[partition_name].second.pieces[current_piece_number] etc. - - String getPartitionPath(const String & partition_name) const; - - String getPartitionAttachIsActivePath(const String & partition_name) const; - - String getPartitionAttachIsDonePath(const String & partition_name) const; - - String getPartitionPiecePath(const String & partition_name, size_t piece_number) const; - - String getCertainPartitionIsDirtyPath(const String & partition_name) const; - - String getCertainPartitionPieceIsDirtyPath(const String & partition_name, size_t piece_number) const; - - String getCertainPartitionIsCleanedPath(const String & partition_name) const; - - String getCertainPartitionPieceIsCleanedPath(const String & partition_name, size_t piece_number) const; - - String getCertainPartitionTaskStatusPath(const String & partition_name) const; - - String getCertainPartitionPieceTaskStatusPath(const String & partition_name, size_t piece_number) const; - - bool isReplicatedTable() const; - - /// These nodes are used for check-status option - String getStatusAllPartitionCount() const; - String getStatusProcessedPartitionsCount() const; - - /// Partitions will be split into number-of-splits pieces. - /// Each piece will be copied independently. (10 by default) - size_t number_of_splits; - - bool allow_to_copy_alias_and_materialized_columns{false}; - bool allow_to_drop_target_partitions{false}; - - String name_in_config; - - /// Used as task ID - String table_id; - - /// Column names in primary key - String primary_key_comma_separated; - - /// Source cluster and table - String cluster_pull_name; - DatabaseAndTableName table_pull; - - /// Destination cluster and table - String cluster_push_name; - DatabaseAndTableName table_push; - - /// Storage of destination table - /// (tables that are stored on each shard of target cluster) - String engine_push_str; - ASTPtr engine_push_ast; - ASTPtr engine_push_partition_key_ast; - - /// First argument of Replicated...MergeTree() - String engine_push_zk_path; - bool is_replicated_table; - - ASTPtr rewriteReplicatedCreateQueryToPlain() const; - - /* - * A Distributed table definition used to split data - * Distributed table will be created on each shard of default - * cluster to perform data copying and resharding - * */ - String sharding_key_str; - ASTPtr sharding_key_ast; - ASTPtr main_engine_split_ast; - - /* - * To copy partition piece form one cluster to another we have to use Distributed table. - * In case of usage separate table (engine_push) for each partition piece, - * we have to use many Distributed tables. - * */ - ASTs auxiliary_engine_split_asts; - - /// Additional WHERE expression to filter input data - String where_condition_str; - ASTPtr where_condition_ast; - - /// Resolved clusters - ClusterPtr cluster_pull; - ClusterPtr cluster_push; - - /// Filter partitions that should be copied - bool has_enabled_partitions = false; - Strings enabled_partitions; - NameSet enabled_partitions_set; - - /** - * Prioritized list of shards - * all_shards contains information about all shards in the table. - * So we have to check whether particular shard have current partition or not while processing. - */ - TasksShard all_shards; - TasksShard local_shards; - - /// All partitions of the current table. - ClusterPartitions cluster_partitions; - NameSet finished_cluster_partitions; - - /// Partition names to process in user-specified order - Strings ordered_partition_names; - - ClusterPartition & getClusterPartition(const String & partition_name); - - Stopwatch watch; - UInt64 bytes_copied = 0; - UInt64 rows_copied = 0; - - template - void initShards(RandomEngine &&random_engine); -}; - -using TasksTable = std::list; - - -template -inline void TaskTable::initShards(RandomEngine && random_engine) -{ - const String & fqdn_name = getFQDNOrHostName(); - std::uniform_int_distribution get_urand(0, std::numeric_limits::max()); - - // Compute the priority - for (const auto & shard_info : cluster_pull->getShardsInfo()) - { - TaskShardPtr task_shard = std::make_shared(*this, shard_info); - const auto & replicas = cluster_pull->getShardsAddresses().at(task_shard->indexInCluster()); - task_shard->priority = getReplicasPriority(replicas, fqdn_name, get_urand(random_engine)); - - all_shards.emplace_back(task_shard); - } - - // Sort by priority - std::sort(all_shards.begin(), all_shards.end(), - [](const TaskShardPtr & lhs, const TaskShardPtr & rhs) - { - return ShardPriority::greaterPriority(lhs->priority, rhs->priority); - }); - - // Cut local shards - auto it_first_remote = std::lower_bound(all_shards.begin(), all_shards.end(), 1, - [](const TaskShardPtr & lhs, UInt8 is_remote) - { - return lhs->priority.is_remote < is_remote; - }); - - local_shards.assign(all_shards.begin(), it_first_remote); -} - -} diff --git a/programs/copier/ZooKeeperStaff.h b/programs/copier/ZooKeeperStaff.h deleted file mode 100644 index c15db73f060..00000000000 --- a/programs/copier/ZooKeeperStaff.h +++ /dev/null @@ -1,221 +0,0 @@ -#pragma once - -/** Allows to compare two incremental counters of type UInt32 in presence of possible overflow. - * We assume that we compare values that are not too far away. - * For example, when we increment 0xFFFFFFFF, we get 0. So, 0xFFFFFFFF is less than 0. - */ -class WrappingUInt32 -{ -public: - UInt32 value; - - explicit WrappingUInt32(UInt32 _value) - : value(_value) - {} - - bool operator<(const WrappingUInt32 & other) const - { - return value != other.value && *this <= other; - } - - bool operator<=(const WrappingUInt32 & other) const - { - const UInt32 HALF = static_cast(1) << 31; - return (value <= other.value && other.value - value < HALF) - || (value > other.value && value - other.value > HALF); - } - - bool operator==(const WrappingUInt32 & other) const - { - return value == other.value; - } -}; - -/** Conforming Zxid definition. - * cf. https://github.com/apache/zookeeper/blob/631d1b284f0edb1c4f6b0fb221bf2428aec71aaa/zookeeper-docs/src/main/resources/markdown/zookeeperInternals.md#guarantees-properties-and-definitions - * - * But it is better to read this: https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html - * - * Actually here is the definition of Zxid. - * Every change to the ZooKeeper state receives a stamp in the form of a zxid (ZooKeeper Transaction Id). - * This exposes the total ordering of all changes to ZooKeeper. Each change will have a unique zxid - * and if zxid1 is smaller than zxid2 then zxid1 happened before zxid2. - */ -class Zxid -{ -public: - WrappingUInt32 epoch; - WrappingUInt32 counter; - explicit Zxid(UInt64 _zxid) - : epoch(static_cast(_zxid >> 32)) - , counter(static_cast(_zxid)) - {} - - bool operator<=(const Zxid & other) const - { - return (epoch < other.epoch) - || (epoch == other.epoch && counter <= other.counter); - } - - bool operator==(const Zxid & other) const - { - return epoch == other.epoch && counter == other.counter; - } -}; - -/* When multiple ClusterCopiers discover that the target partition is not empty, - * they will attempt to clean up this partition before proceeding to copying. - * - * Instead of purging is_dirty, the history of cleaning work is preserved and partition hygiene is established - * based on a happens-before relation between the events. - * This relation is encoded by LogicalClock based on the mzxid of the is_dirty ZNode and is_dirty/cleaned. - * The fact of the partition hygiene is encoded by CleanStateClock. - * - * For you to know what mzxid means: - * - * ZooKeeper Stat Structure: - * The Stat structure for each znode in ZooKeeper is made up of the following fields: - * - * -- czxid - * The zxid of the change that caused this znode to be created. - * - * -- mzxid - * The zxid of the change that last modified this znode. - * - * -- ctime - * The time in milliseconds from epoch when this znode was created. - * - * -- mtime - * The time in milliseconds from epoch when this znode was last modified. - * - * -- version - * The number of changes to the data of this znode. - * - * -- cversion - * The number of changes to the children of this znode. - * - * -- aversion - * The number of changes to the ACL of this znode. - * - * -- ephemeralOwner - * The session id of the owner of this znode if the znode is an ephemeral node. - * If it is not an ephemeral node, it will be zero. - * - * -- dataLength - * The length of the data field of this znode. - * - * -- numChildren - * The number of children of this znode. - * */ - -class LogicalClock -{ -public: - std::optional zxid; - - LogicalClock() = default; - - explicit LogicalClock(UInt64 _zxid) - : zxid(_zxid) - {} - - bool hasHappened() const - { - return bool(zxid); - } - - /// happens-before relation with a reasonable time bound - bool happensBefore(const LogicalClock & other) const - { - return !zxid - || (other.zxid && *zxid <= *other.zxid); - } - - bool operator<=(const LogicalClock & other) const - { - return happensBefore(other); - } - - /// strict equality check - bool operator==(const LogicalClock & other) const - { - return zxid == other.zxid; - } -}; - - -class CleanStateClock -{ -public: - LogicalClock discovery_zxid; - std::optional discovery_version; - - LogicalClock clean_state_zxid; - std::optional clean_state_version; - - std::shared_ptr stale; - - bool is_clean() const - { - return !is_stale() - && (!discovery_zxid.hasHappened() || (clean_state_zxid.hasHappened() && discovery_zxid <= clean_state_zxid)); - } - - bool is_stale() const - { - return stale->load(); - } - - CleanStateClock( - const zkutil::ZooKeeperPtr & zookeeper, - const String & discovery_path, - const String & clean_state_path) - : stale(std::make_shared(false)) - { - Coordination::Stat stat{}; - String _some_data; - auto watch_callback = - [my_stale = stale] (const Coordination::WatchResponse & rsp) - { - auto logger = getLogger("ClusterCopier"); - if (rsp.error == Coordination::Error::ZOK) - { - switch (rsp.type) /// NOLINT(bugprone-switch-missing-default-case) - { - case Coordination::CREATED: - LOG_DEBUG(logger, "CleanStateClock change: CREATED, at {}", rsp.path); - my_stale->store(true); - break; - case Coordination::CHANGED: - LOG_DEBUG(logger, "CleanStateClock change: CHANGED, at {}", rsp.path); - my_stale->store(true); - } - } - }; - if (zookeeper->tryGetWatch(discovery_path, _some_data, &stat, watch_callback)) - { - discovery_zxid = LogicalClock(stat.mzxid); - discovery_version = stat.version; - } - if (zookeeper->tryGetWatch(clean_state_path, _some_data, &stat, watch_callback)) - { - clean_state_zxid = LogicalClock(stat.mzxid); - clean_state_version = stat.version; - } - } - - bool operator==(const CleanStateClock & other) const - { - return !is_stale() - && !other.is_stale() - && discovery_zxid == other.discovery_zxid - && discovery_version == other.discovery_version - && clean_state_zxid == other.clean_state_zxid - && clean_state_version == other.clean_state_version; - } - - bool operator!=(const CleanStateClock & other) const - { - return !(*this == other); - } -}; diff --git a/programs/copier/clickhouse-copier.cpp b/programs/copier/clickhouse-copier.cpp deleted file mode 100644 index 4dabb01775b..00000000000 --- a/programs/copier/clickhouse-copier.cpp +++ /dev/null @@ -1 +0,0 @@ -int mainEntryClickHouseClusterCopier(int argc, char ** argv); diff --git a/programs/diagnostics/testdata/configs/xml/config.xml b/programs/diagnostics/testdata/configs/xml/config.xml index ae09d207091..eb7c70cf498 100644 --- a/programs/diagnostics/testdata/configs/xml/config.xml +++ b/programs/diagnostics/testdata/configs/xml/config.xml @@ -94,7 +94,7 @@ 8123 + diff --git a/tests/config/install.sh b/tests/config/install.sh index 467636cfa40..06986871bd2 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -45,7 +45,7 @@ ln -sf $SRC_PATH/config.d/transactions.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/encryption.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/CORS.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/zookeeper_log.xml $DEST_SERVER_PATH/config.d/ -ln -sf $SRC_PATH/config.d/logger_trace.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/logger_test.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/named_collection.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/ssl_certs.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/filesystem_cache_log.xml $DEST_SERVER_PATH/config.d/ diff --git a/tests/integration/README.md b/tests/integration/README.md index 1b5a0ee8994..ac01c43769e 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -76,7 +76,7 @@ docker pull clickhouse/integration-tests-runner Notes: * If you want to run integration tests without `sudo` you have to add your user to docker group `sudo usermod -aG docker $USER`. [More information](https://docs.docker.com/install/linux/linux-postinstall/) about docker configuration. * If you already had run these tests without `./runner` script you may have problems with pytest cache. It can be removed with `rm -r __pycache__ .pytest_cache/`. -* Some tests maybe require a lot of resources (CPU, RAM, etc.). Better not try large tests like `test_cluster_copier` or `test_distributed_ddl*` on your laptop. +* Some tests maybe require a lot of resources (CPU, RAM, etc.). Better not try large tests like `test_distributed_ddl*` on your laptop. You can run tests via `./runner` script and pass pytest arguments as last arg: ``` diff --git a/tests/integration/helpers/0_common_instance_config.xml b/tests/integration/helpers/0_common_instance_config.xml index 73792affee6..1d97bcb7bcf 100644 --- a/tests/integration/helpers/0_common_instance_config.xml +++ b/tests/integration/helpers/0_common_instance_config.xml @@ -29,4 +29,7 @@ / + + + diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 52c0d8a8ee5..b695b493db7 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -1618,6 +1618,7 @@ class ClickHouseCluster: with_installed_binary=False, external_dirs=None, tmpfs=None, + mem_limit=None, zookeeper_docker_compose_path=None, minio_certs_dir=None, minio_data_dir=None, @@ -1728,6 +1729,7 @@ class ClickHouseCluster: with_installed_binary=with_installed_binary, external_dirs=external_dirs, tmpfs=tmpfs or [], + mem_limit=mem_limit, config_root_name=config_root_name, extra_configs=extra_configs, ) @@ -3203,6 +3205,7 @@ services: {krb5_conf} entrypoint: {entrypoint_cmd} tmpfs: {tmpfs} + {mem_limit} cap_add: - SYS_PTRACE - NET_ADMIN @@ -3288,6 +3291,7 @@ class ClickHouseInstance: with_installed_binary=False, external_dirs=None, tmpfs=None, + mem_limit=None, config_root_name="clickhouse", extra_configs=[], ): @@ -3299,6 +3303,10 @@ class ClickHouseInstance: self.external_dirs = external_dirs self.tmpfs = tmpfs or [] + if mem_limit is not None: + self.mem_limit = "mem_limit : " + mem_limit + else: + self.mem_limit = "" self.base_config_dir = ( p.abspath(p.join(base_path, base_config_dir)) if base_config_dir else None ) @@ -4644,6 +4652,7 @@ class ClickHouseInstance: db_dir=db_dir, external_dirs_volumes=external_dirs_volumes, tmpfs=str(self.tmpfs), + mem_limit=self.mem_limit, logs_dir=logs_dir, depends_on=str(depends_on), user=os.getuid(), diff --git a/tests/integration/helpers/mock_servers.py b/tests/integration/helpers/mock_servers.py index e4655ffeeaf..f2181d85e12 100644 --- a/tests/integration/helpers/mock_servers.py +++ b/tests/integration/helpers/mock_servers.py @@ -33,7 +33,7 @@ def start_mock_servers(cluster, script_dir, mocks, timeout=100): cluster.exec_in_container( container_id, - ["python", server_name, str(port)], + ["python3", server_name, str(port)], detach=True, ) diff --git a/tests/integration/test_attach_backup_from_s3_plain/configs/disk_s3.xml b/tests/integration/test_attach_backup_from_s3_plain/configs/disk_s3.xml index 779e4b6ae21..2edabc76c8b 100644 --- a/tests/integration/test_attach_backup_from_s3_plain/configs/disk_s3.xml +++ b/tests/integration/test_attach_backup_from_s3_plain/configs/disk_s3.xml @@ -8,9 +8,16 @@ minio minio123 + + object_storage + local_blob_storage + plain + /local_plain/ + backup_disk_s3_plain + backup_disk_local_plain diff --git a/tests/integration/test_attach_backup_from_s3_plain/test.py b/tests/integration/test_attach_backup_from_s3_plain/test.py index e575c487b7a..c2f8936b82c 100644 --- a/tests/integration/test_attach_backup_from_s3_plain/test.py +++ b/tests/integration/test_attach_backup_from_s3_plain/test.py @@ -21,16 +21,55 @@ def start_cluster(): cluster.shutdown() +s3_disk_def = """disk(type=s3_plain, + endpoint='http://minio1:9001/root/data/disks/disk_s3_plain/{}/', + access_key_id='minio', + secret_access_key='minio123');""" + +local_disk_def = "disk(type=object_storage, object_storage_type = 'local_blob_storage', metadata_type = 'plain', path = '/local_plain/{}/');" + + @pytest.mark.parametrize( - "table_name,backup_name,storage_policy,min_bytes_for_wide_part", + "table_name,backup_name,storage_policy,disk_def,min_bytes_for_wide_part", [ pytest.param( - "compact", "backup_compact", "s3_backup_compact", int(1e9), id="compact" + "compact", + "backup_compact_s3", + "backup_disk_s3_plain", + s3_disk_def, + int(1e9), + id="compact", + ), + pytest.param( + "wide", + "backup_wide_s3", + "backup_disk_s3_plain", + s3_disk_def, + int(0), + id="wide", + ), + pytest.param( + "compact", + "backup_compact_local", + "backup_disk_local_plain", + local_disk_def, + int(1e9), + id="compact", + ), + pytest.param( + "wide", + "backup_wide_local", + "backup_disk_local_plain", + local_disk_def, + int(0), + id="wide", ), - pytest.param("wide", "backup_wide", "s3_backup_wide", int(0), id="wide"), ], ) -def test_attach_part(table_name, backup_name, storage_policy, min_bytes_for_wide_part): +def test_attach_part( + table_name, backup_name, storage_policy, disk_def, min_bytes_for_wide_part +): + disk_definition = disk_def.format(backup_name) node.query( f""" -- Catch any errors (NOTE: warnings are ok) @@ -45,7 +84,7 @@ def test_attach_part(table_name, backup_name, storage_policy, min_bytes_for_wide settings min_bytes_for_wide_part={min_bytes_for_wide_part} as select number%5 part, number key from numbers(100); - backup table ordinary_db.{table_name} TO Disk('backup_disk_s3_plain', '{backup_name}') settings deduplicate_files=0; + backup table ordinary_db.{table_name} TO Disk('{storage_policy}', '{backup_name}') settings deduplicate_files=0; drop table ordinary_db.{table_name}; attach table ordinary_db.{table_name} (part UInt8, key UInt64) @@ -53,10 +92,7 @@ def test_attach_part(table_name, backup_name, storage_policy, min_bytes_for_wide order by key partition by part settings max_suspicious_broken_parts=0, - disk=disk(type=s3_plain, - endpoint='http://minio1:9001/root/data/disks/disk_s3_plain/{backup_name}/', - access_key_id='minio', - secret_access_key='minio123'); + disk={disk_definition} """ ) diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index 4d3ee8200a3..d65fc1f09d6 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -124,15 +124,17 @@ def check_backup_and_restore( def check_system_tables(backup_query_id=None): disks = [ tuple(disk.split("\t")) - for disk in node.query("SELECT name, type FROM system.disks").split("\n") + for disk in node.query( + "SELECT name, type, object_storage_type, metadata_type FROM system.disks" + ).split("\n") if disk ] expected_disks = ( - ("default", "local"), - ("disk_s3", "s3"), - ("disk_s3_cache", "s3"), - ("disk_s3_other_bucket", "s3"), - ("disk_s3_plain", "s3_plain"), + ("default", "Local", "None", "None"), + ("disk_s3", "ObjectStorage", "S3", "Local"), + ("disk_s3_cache", "ObjectStorage", "S3", "Local"), + ("disk_s3_other_bucket", "ObjectStorage", "S3", "Local"), + ("disk_s3_plain", "ObjectStorage", "S3", "Plain"), ) assert len(expected_disks) == len(disks) for expected_disk in expected_disks: diff --git a/tests/integration/test_cluster_copier/configs/conf.d/clusters.xml b/tests/integration/test_cluster_copier/configs/conf.d/clusters.xml deleted file mode 100644 index 07363e6e43a..00000000000 --- a/tests/integration/test_cluster_copier/configs/conf.d/clusters.xml +++ /dev/null @@ -1,73 +0,0 @@ - - - - - true - - s0_0_0 - 9000 - - - s0_0_1 - 9000 - - - - true - - s0_1_0 - 9000 - - - - - - true - - s1_0_0 - 9000 - - - s1_0_1 - 9000 - - - - true - - s1_1_0 - 9000 - - - - - - true - - s0_0_0 - 9000 - - - s0_0_1 - 9000 - - - - - - - s0_0_0 - 9000 - - - - - - - s1_0_0 - 9000 - - - - - diff --git a/tests/integration/test_cluster_copier/configs/conf.d/clusters_trivial.xml b/tests/integration/test_cluster_copier/configs/conf.d/clusters_trivial.xml deleted file mode 100644 index c91ec627580..00000000000 --- a/tests/integration/test_cluster_copier/configs/conf.d/clusters_trivial.xml +++ /dev/null @@ -1,20 +0,0 @@ - - - - - - first_trivial - 9000 - - - - - - - second_trivial - 9000 - - - - - diff --git a/tests/integration/test_cluster_copier/configs/conf.d/ddl.xml b/tests/integration/test_cluster_copier/configs/conf.d/ddl.xml deleted file mode 100644 index 64fa32335ab..00000000000 --- a/tests/integration/test_cluster_copier/configs/conf.d/ddl.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - /clickhouse/task_queue/ddl - - \ No newline at end of file diff --git a/tests/integration/test_cluster_copier/configs/conf.d/query_log.xml b/tests/integration/test_cluster_copier/configs/conf.d/query_log.xml deleted file mode 100644 index 1efc65bdb28..00000000000 --- a/tests/integration/test_cluster_copier/configs/conf.d/query_log.xml +++ /dev/null @@ -1,14 +0,0 @@ - - - - - system -
query_log
- - - 1000 - -
\ No newline at end of file diff --git a/tests/integration/test_cluster_copier/configs/config-copier.xml b/tests/integration/test_cluster_copier/configs/config-copier.xml deleted file mode 100644 index 590b1892f8d..00000000000 --- a/tests/integration/test_cluster_copier/configs/config-copier.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - information - /var/log/clickhouse-server/copier/log.log - /var/log/clickhouse-server/copier/log.err.log - 1000M - 10 - /var/log/clickhouse-server/copier/stderr.log - /var/log/clickhouse-server/copier/stdout.log - - diff --git a/tests/integration/test_cluster_copier/configs/users.xml b/tests/integration/test_cluster_copier/configs/users.xml deleted file mode 100644 index b463dfc81e7..00000000000 --- a/tests/integration/test_cluster_copier/configs/users.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - - 1 - - 5 - 1 - - - - - - - - ::/0 - - default - default - - - 12345678 - - ::/0 - - default - default - - - - - - - - diff --git a/tests/integration/test_cluster_copier/configs_three_nodes/conf.d/clusters.xml b/tests/integration/test_cluster_copier/configs_three_nodes/conf.d/clusters.xml deleted file mode 100644 index 9de7b57de27..00000000000 --- a/tests/integration/test_cluster_copier/configs_three_nodes/conf.d/clusters.xml +++ /dev/null @@ -1,27 +0,0 @@ - - - - - false - - first - 9000 - - - - false - - second - 9000 - - - - false - - third - 9000 - - - - - diff --git a/tests/integration/test_cluster_copier/configs_three_nodes/conf.d/ddl.xml b/tests/integration/test_cluster_copier/configs_three_nodes/conf.d/ddl.xml deleted file mode 100644 index 64fa32335ab..00000000000 --- a/tests/integration/test_cluster_copier/configs_three_nodes/conf.d/ddl.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - /clickhouse/task_queue/ddl - - \ No newline at end of file diff --git a/tests/integration/test_cluster_copier/configs_three_nodes/config-copier.xml b/tests/integration/test_cluster_copier/configs_three_nodes/config-copier.xml deleted file mode 100644 index d0cab0fafb7..00000000000 --- a/tests/integration/test_cluster_copier/configs_three_nodes/config-copier.xml +++ /dev/null @@ -1,27 +0,0 @@ - - - information - /var/log/clickhouse-server/copier/log.log - /var/log/clickhouse-server/copier/log.err.log - 1000M - 10 - /var/log/clickhouse-server/copier/stderr.log - /var/log/clickhouse-server/copier/stdout.log - - - - - zoo1 - 2181 - - - zoo2 - 2181 - - - zoo3 - 2181 - - 2000 - - diff --git a/tests/integration/test_cluster_copier/configs_three_nodes/users.xml b/tests/integration/test_cluster_copier/configs_three_nodes/users.xml deleted file mode 100644 index badaf46a5ca..00000000000 --- a/tests/integration/test_cluster_copier/configs_three_nodes/users.xml +++ /dev/null @@ -1,32 +0,0 @@ - - - - 1 - 1 - - - - - - - - ::/0 - - default - default - - - 12345678 - - ::/0 - - default - default - - - - - - - - diff --git a/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/clusters.xml b/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/clusters.xml deleted file mode 100644 index 38d88308631..00000000000 --- a/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/clusters.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - - - false - - first_of_two - 9000 - - - - - - false - - second_of_two - 9000 - - - - - diff --git a/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/ddl.xml b/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/ddl.xml deleted file mode 100644 index 64fa32335ab..00000000000 --- a/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/ddl.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - /clickhouse/task_queue/ddl - - \ No newline at end of file diff --git a/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/storage_configuration.xml b/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/storage_configuration.xml deleted file mode 100644 index 8306f40ad6a..00000000000 --- a/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/storage_configuration.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - - - - - - /jbod1/ - - - /jbod2/ - - - /external/ - - - - - - - - external - -
- jbod1 - jbod2 -
-
-
-
- -
- -
diff --git a/tests/integration/test_cluster_copier/configs_two_nodes/config-copier.xml b/tests/integration/test_cluster_copier/configs_two_nodes/config-copier.xml deleted file mode 100644 index 55bd24816ae..00000000000 --- a/tests/integration/test_cluster_copier/configs_two_nodes/config-copier.xml +++ /dev/null @@ -1,19 +0,0 @@ - - - information - /var/log/clickhouse-server/copier/log.log - /var/log/clickhouse-server/copier/log.err.log - 1000M - 10 - /var/log/clickhouse-server/copier/stderr.log - /var/log/clickhouse-server/copier/stdout.log - - - - - zoo1 - 2181 - - 2000 - - diff --git a/tests/integration/test_cluster_copier/configs_two_nodes/users.xml b/tests/integration/test_cluster_copier/configs_two_nodes/users.xml deleted file mode 100644 index badaf46a5ca..00000000000 --- a/tests/integration/test_cluster_copier/configs_two_nodes/users.xml +++ /dev/null @@ -1,32 +0,0 @@ - - - - 1 - 1 - - - - - - - - ::/0 - - default - default - - - 12345678 - - ::/0 - - default - default - - - - - - - - diff --git a/tests/integration/test_cluster_copier/task0_description.xml b/tests/integration/test_cluster_copier/task0_description.xml deleted file mode 100644 index 8d74d0bdde0..00000000000 --- a/tests/integration/test_cluster_copier/task0_description.xml +++ /dev/null @@ -1,95 +0,0 @@ - - - 3 - - - - 1 - - - - - 0 - - - - - - - - - - cluster0 - default - hits - - cluster1 - default - hits - - 2 - - 3 4 5 6 1 2 0 - - - ENGINE=ReplicatedMergeTree PARTITION BY d % 3 ORDER BY (d, sipHash64(d)) SAMPLE BY sipHash64(d) SETTINGS index_granularity = 16 - - - d + 1 - - - d - d = 0 - - - - - - - - true - - s0_0_0 - 9000 - - - s0_0_1 - 9000 - - - - true - - s0_1_0 - 9000 - - - - - - - true - - s1_0_0 - 9000 - - - s1_0_1 - 9000 - - - - true - - s1_1_0 - 9000 - - - - 255.255.255.255 - 9000 - - - - - - diff --git a/tests/integration/test_cluster_copier/task_drop_target_partition.xml b/tests/integration/test_cluster_copier/task_drop_target_partition.xml deleted file mode 100644 index dc8e6452243..00000000000 --- a/tests/integration/test_cluster_copier/task_drop_target_partition.xml +++ /dev/null @@ -1,41 +0,0 @@ - - - - - false - - first_of_two - 9000 - - - - - - false - - second_of_two - 9000 - - - - - - 2 - - - - source - db_drop_target_partition - source - - destination - db_drop_target_partition - destination - - true - - ENGINE = MergeTree() PARTITION BY toYYYYMMDD(Column3) ORDER BY (Column3, Column2, Column1) - rand() - - - diff --git a/tests/integration/test_cluster_copier/task_month_to_week_description.xml b/tests/integration/test_cluster_copier/task_month_to_week_description.xml deleted file mode 100644 index bc290ca397f..00000000000 --- a/tests/integration/test_cluster_copier/task_month_to_week_description.xml +++ /dev/null @@ -1,99 +0,0 @@ - - - 4 - - - - 1 - 2 - - - - 0 - - - - - - cluster0 - default - a - - cluster1 - default - b - - - - 2 - - - ENGINE= - ReplicatedMergeTree - PARTITION BY toMonday(date) - ORDER BY d - - - - jumpConsistentHash(intHash64(d), 2) - - - - - - - - - - - true - - s0_0_0 - 9000 - - - s0_0_1 - 9000 - - - - true - - s0_1_0 - 9000 - - - - - - - true - - s1_0_0 - 9000 - - - s1_0_1 - 9000 - - - - true - - s1_1_0 - 9000 - - - - 255.255.255.255 - 9000 - - - - - - diff --git a/tests/integration/test_cluster_copier/task_no_arg.xml b/tests/integration/test_cluster_copier/task_no_arg.xml deleted file mode 100644 index 262ff073537..00000000000 --- a/tests/integration/test_cluster_copier/task_no_arg.xml +++ /dev/null @@ -1,39 +0,0 @@ - - - - - 1 - - s0_0_0 - 9000 - - - - - - - 1 - - s1_1_0 - 9000 - - - - - - 1 - - - - source_cluster - default - copier_test1 - - default_cluster - default - copier_test1_1 - ENGINE = MergeTree PARTITION BY date ORDER BY (date, sipHash64(date)) SAMPLE BY sipHash64(date) - rand() - - - diff --git a/tests/integration/test_cluster_copier/task_no_index.xml b/tests/integration/test_cluster_copier/task_no_index.xml deleted file mode 100644 index 265f99e21a6..00000000000 --- a/tests/integration/test_cluster_copier/task_no_index.xml +++ /dev/null @@ -1,109 +0,0 @@ - - - - - false - - s0_0_0 - 9000 - - - - - - - false - - s1_1_0 - 9000 - - - - - - - 2 - - - - 1 - - - - - 0 - - - - - 3 - - 1 - - - - - - - - source_cluster - default - ontime - - - - destination_cluster - default - ontime22 - - - - - - - ENGINE = MergeTree() PARTITION BY Year ORDER BY (Year, FlightDate) SETTINGS index_granularity=8192 - - - - - jumpConsistentHash(intHash64(Year), 2) - - - - - - - 2017 - - - - - - - diff --git a/tests/integration/test_cluster_copier/task_non_partitioned_table.xml b/tests/integration/test_cluster_copier/task_non_partitioned_table.xml deleted file mode 100644 index d5424b95f45..00000000000 --- a/tests/integration/test_cluster_copier/task_non_partitioned_table.xml +++ /dev/null @@ -1,39 +0,0 @@ - - - - - 1 - - s0_0_0 - 9000 - - - - - - - 1 - - s1_1_0 - 9000 - - - - - - 1 - - - - source_cluster - default - copier_test1 - - default_cluster - default - copier_test1_1 - ENGINE = MergeTree ORDER BY date SETTINGS index_granularity = 8192 - rand() - - - diff --git a/tests/integration/test_cluster_copier/task_self_copy.xml b/tests/integration/test_cluster_copier/task_self_copy.xml deleted file mode 100644 index 21d577bc397..00000000000 --- a/tests/integration/test_cluster_copier/task_self_copy.xml +++ /dev/null @@ -1,63 +0,0 @@ - - 9440 - - - - false - - s0_0_0 - 9000 - dbuser - 12345678 - 0 - - - - - - - false - - s0_0_0 - 9000 - dbuser - 12345678 - 0 - - - - - - 2 - - - 1 - - - - 0 - - - - 3 - 1 - - - - - source_cluster - db1 - source_table - - destination_cluster - db2 - destination_table - - - ENGINE = MergeTree PARTITION BY a ORDER BY a SETTINGS index_granularity = 8192 - - - rand() - - - \ No newline at end of file diff --git a/tests/integration/test_cluster_copier/task_skip_index.xml b/tests/integration/test_cluster_copier/task_skip_index.xml deleted file mode 100644 index b04cec963d4..00000000000 --- a/tests/integration/test_cluster_copier/task_skip_index.xml +++ /dev/null @@ -1,39 +0,0 @@ - - - - - false - - first_of_two - 9000 - - - - - - false - - second_of_two - 9000 - - - - - - 2 - - - - source - db_skip_index - source - - destination - db_skip_index - destination - - ENGINE = MergeTree() PARTITION BY toYYYYMMDD(Column3) ORDER BY (Column3, Column2, Column1) - rand() - - - diff --git a/tests/integration/test_cluster_copier/task_taxi_data.xml b/tests/integration/test_cluster_copier/task_taxi_data.xml deleted file mode 100644 index 94fa5087338..00000000000 --- a/tests/integration/test_cluster_copier/task_taxi_data.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - - - false - - first - 9000 - - - - false - - second - 9000 - - - - false - - third - 9000 - - - - - - 2 - - - - events - dailyhistory - yellow_tripdata_staging - events - monthlyhistory - yellow_tripdata_staging - Engine=ReplacingMergeTree() PRIMARY KEY (tpep_pickup_datetime, id) ORDER BY (tpep_pickup_datetime, id) PARTITION BY (pickup_location_id, toYYYYMM(tpep_pickup_datetime)) - sipHash64(id) % 3 - - - \ No newline at end of file diff --git a/tests/integration/test_cluster_copier/task_test_block_size.xml b/tests/integration/test_cluster_copier/task_test_block_size.xml deleted file mode 100644 index bf29c7e1832..00000000000 --- a/tests/integration/test_cluster_copier/task_test_block_size.xml +++ /dev/null @@ -1,101 +0,0 @@ - - - 1 - - - - 1 - - - - - - - - - shard_0_0 - default - test_block_size - - cluster1 - default - test_block_size - - - '1970-01-01' - - - - ENGINE= - ReplicatedMergeTree - ORDER BY d PARTITION BY partition - - - - jumpConsistentHash(intHash64(d), 2) - - - - - - - - - - - true - - s0_0_0 - 9000 - - - s0_0_1 - 9000 - - - - true - - s0_1_0 - 9000 - - - - - - - true - - s1_0_0 - 9000 - - - s1_0_1 - 9000 - - - - true - - s1_1_0 - 9000 - - - - - - - true - - s0_0_0 - 9000 - - - s0_0_1 - 9000 - - - - - - diff --git a/tests/integration/test_cluster_copier/task_trivial.xml b/tests/integration/test_cluster_copier/task_trivial.xml deleted file mode 100644 index a3b8bc03888..00000000000 --- a/tests/integration/test_cluster_copier/task_trivial.xml +++ /dev/null @@ -1,63 +0,0 @@ - - - 3 - - - - 1 - - - - - 0 - - - - - - - - - - source_trivial_cluster - default - trivial - - destination_trivial_cluster - default - trivial - - - ENGINE=ReplicatedMergeTree('/clickhouse/tables/cluster{cluster}/{shard}/hits', '{replica}') PARTITION BY d % 5 ORDER BY (d, sipHash64(d)) SAMPLE BY sipHash64(d) SETTINGS index_granularity = 16 - - - d + 1 - - - d - d = 0 - - - - - - - - - first_trivial - 9000 - - - - - - - - - second_trivial - 9000 - - - - - - diff --git a/tests/integration/test_cluster_copier/task_trivial_without_arguments.xml b/tests/integration/test_cluster_copier/task_trivial_without_arguments.xml deleted file mode 100644 index 0197dee0181..00000000000 --- a/tests/integration/test_cluster_copier/task_trivial_without_arguments.xml +++ /dev/null @@ -1,63 +0,0 @@ - - - 3 - - - - 1 - - - - - 0 - - - - - - - - - - source_trivial_cluster - default - trivial_without_arguments - - destination_trivial_cluster - default - trivial_without_arguments - - - ENGINE=ReplicatedMergeTree() PARTITION BY d % 5 ORDER BY (d, sipHash64(d)) SAMPLE BY sipHash64(d) SETTINGS index_granularity = 16 - - - d + 1 - - - d - d = 0 - - - - - - - - - first_trivial - 9000 - - - - - - - - - second_trivial - 9000 - - - - - - diff --git a/tests/integration/test_cluster_copier/task_ttl_columns.xml b/tests/integration/test_cluster_copier/task_ttl_columns.xml deleted file mode 100644 index 2069c509c87..00000000000 --- a/tests/integration/test_cluster_copier/task_ttl_columns.xml +++ /dev/null @@ -1,39 +0,0 @@ - - - - - false - - first_of_two - 9000 - - - - - - false - - second_of_two - 9000 - - - - - - 2 - - - - source - db_ttl_columns - source - - destination - db_ttl_columns - destination - - ENGINE = MergeTree() PARTITION BY toYYYYMMDD(Column3) ORDER BY (Column3, Column2, Column1) - rand() - - - diff --git a/tests/integration/test_cluster_copier/task_ttl_move_to_volume.xml b/tests/integration/test_cluster_copier/task_ttl_move_to_volume.xml deleted file mode 100644 index 2a51fa7a66d..00000000000 --- a/tests/integration/test_cluster_copier/task_ttl_move_to_volume.xml +++ /dev/null @@ -1,39 +0,0 @@ - - - - - false - - first_of_two - 9000 - - - - - - false - - second_of_two - 9000 - - - - - - 2 - - - - source - db_move_to_volume - source - - destination - db_move_to_volume - destination - - ENGINE = MergeTree() PARTITION BY toYYYYMMDD(Column3) ORDER BY (Column3, Column2, Column1) TTL Column3 + INTERVAL 1 MONTH TO VOLUME 'external' SETTINGS storage_policy = 'external_with_jbods' - rand() - - - diff --git a/tests/integration/test_cluster_copier/task_with_different_schema.xml b/tests/integration/test_cluster_copier/task_with_different_schema.xml deleted file mode 100644 index e1e6ee4dc42..00000000000 --- a/tests/integration/test_cluster_copier/task_with_different_schema.xml +++ /dev/null @@ -1,39 +0,0 @@ - - - - - false - - first_of_two - 9000 - - - - - - false - - second_of_two - 9000 - - - - - - 2 - - - - source - db_different_schema - source - - destination - db_different_schema - destination - - ENGINE = MergeTree() PARTITION BY toYYYYMMDD(Column3) ORDER BY (Column9, Column1, Column2, Column3, Column4) - rand() - - - diff --git a/tests/integration/test_cluster_copier/test.py b/tests/integration/test_cluster_copier/test.py deleted file mode 100644 index be71fc21e33..00000000000 --- a/tests/integration/test_cluster_copier/test.py +++ /dev/null @@ -1,653 +0,0 @@ -import os -import random -import sys -import time -import kazoo -import pytest -import string -import random -from contextlib import contextmanager -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import TSV - -import docker - -CURRENT_TEST_DIR = os.path.dirname(os.path.abspath(__file__)) -sys.path.insert(0, os.path.dirname(CURRENT_TEST_DIR)) - -COPYING_FAIL_PROBABILITY = 0.2 -MOVING_FAIL_PROBABILITY = 0.2 - -cluster = ClickHouseCluster(__file__) - - -def generateRandomString(count): - return "".join( - random.choice(string.ascii_uppercase + string.digits) for _ in range(count) - ) - - -def check_all_hosts_sucesfully_executed(tsv_content, num_hosts): - M = TSV.toMat(tsv_content) - hosts = [(l[0], l[1]) for l in M] # (host, port) - codes = [l[2] for l in M] - - assert len(hosts) == num_hosts and len(set(hosts)) == num_hosts, "\n" + tsv_content - assert len(set(codes)) == 1, "\n" + tsv_content - assert codes[0] == "0", "\n" + tsv_content - - -def ddl_check_query(instance, query, num_hosts=3): - contents = instance.query(query) - check_all_hosts_sucesfully_executed(contents, num_hosts) - return contents - - -@pytest.fixture(scope="module") -def started_cluster(): - global cluster - try: - clusters_schema = { - "0": {"0": ["0", "1"], "1": ["0"]}, - "1": {"0": ["0", "1"], "1": ["0"]}, - } - - for cluster_name, shards in clusters_schema.items(): - for shard_name, replicas in shards.items(): - for replica_name in replicas: - name = "s{}_{}_{}".format(cluster_name, shard_name, replica_name) - cluster.add_instance( - name, - main_configs=[ - "configs/conf.d/query_log.xml", - "configs/conf.d/ddl.xml", - "configs/conf.d/clusters.xml", - ], - user_configs=["configs/users.xml"], - macros={ - "cluster": cluster_name, - "shard": shard_name, - "replica": replica_name, - }, - with_zookeeper=True, - ) - - cluster.start() - yield cluster - - finally: - cluster.shutdown() - - -class Task1: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_simple_" + generateRandomString(10) - self.container_task_file = "/task0_description.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task0_description.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - instance = cluster.instances["s0_0_0"] - - for cluster_num in ["0", "1"]: - ddl_check_query( - instance, - "DROP DATABASE IF EXISTS default ON CLUSTER cluster{} SYNC".format( - cluster_num - ), - ) - ddl_check_query( - instance, - "CREATE DATABASE default ON CLUSTER cluster{} ".format(cluster_num), - ) - - ddl_check_query( - instance, - "CREATE TABLE hits ON CLUSTER cluster0 (d UInt64, d1 UInt64 MATERIALIZED d+1) " - + "ENGINE=ReplicatedMergeTree " - + "PARTITION BY d % 3 ORDER BY (d, sipHash64(d)) SAMPLE BY sipHash64(d) SETTINGS index_granularity = 16", - ) - ddl_check_query( - instance, - "CREATE TABLE hits_all ON CLUSTER cluster0 (d UInt64) ENGINE=Distributed(cluster0, default, hits, d)", - ) - ddl_check_query( - instance, - "CREATE TABLE hits_all ON CLUSTER cluster1 (d UInt64) ENGINE=Distributed(cluster1, default, hits, d + 1)", - ) - instance.query( - "INSERT INTO hits_all SELECT * FROM system.numbers LIMIT 1002", - settings={"distributed_foreground_insert": 1}, - ) - - def check(self): - assert ( - self.cluster.instances["s0_0_0"] - .query("SELECT count() FROM hits_all") - .strip() - == "1002" - ) - assert ( - self.cluster.instances["s1_0_0"] - .query("SELECT count() FROM hits_all") - .strip() - == "1002" - ) - - assert ( - self.cluster.instances["s1_0_0"] - .query("SELECT DISTINCT d % 2 FROM hits") - .strip() - == "1" - ) - assert ( - self.cluster.instances["s1_1_0"] - .query("SELECT DISTINCT d % 2 FROM hits") - .strip() - == "0" - ) - - instance = self.cluster.instances["s0_0_0"] - ddl_check_query(instance, "DROP TABLE hits_all ON CLUSTER cluster0") - ddl_check_query(instance, "DROP TABLE hits_all ON CLUSTER cluster1") - ddl_check_query(instance, "DROP TABLE hits ON CLUSTER cluster0") - ddl_check_query(instance, "DROP TABLE hits ON CLUSTER cluster1") - - -class Task2: - def __init__(self, cluster, unique_zk_path): - self.cluster = cluster - self.zk_task_path = ( - "/clickhouse-copier/task_month_to_week_partition_" + generateRandomString(5) - ) - self.unique_zk_path = generateRandomString(10) - self.container_task_file = "/task_month_to_week_description.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_month_to_week_description.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - instance = cluster.instances["s0_0_0"] - - for cluster_num in ["0", "1"]: - ddl_check_query( - instance, - "DROP DATABASE IF EXISTS default ON CLUSTER cluster{}".format( - cluster_num - ), - ) - ddl_check_query( - instance, - "CREATE DATABASE IF NOT EXISTS default ON CLUSTER cluster{}".format( - cluster_num - ), - ) - - ddl_check_query( - instance, - "CREATE TABLE a ON CLUSTER cluster0 (date Date, d UInt64, d1 UInt64 ALIAS d+1) " - "ENGINE=ReplicatedMergeTree('/clickhouse/tables/cluster_{cluster}/{shard}/" - + self.unique_zk_path - + "', " - "'{replica}', date, intHash64(d), (date, intHash64(d)), 8192)", - ) - ddl_check_query( - instance, - "CREATE TABLE a_all ON CLUSTER cluster0 (date Date, d UInt64) ENGINE=Distributed(cluster0, default, a, d)", - ) - - instance.query( - "INSERT INTO a_all SELECT toDate(17581 + number) AS date, number AS d FROM system.numbers LIMIT 85", - settings={"distributed_foreground_insert": 1}, - ) - - def check(self): - assert TSV( - self.cluster.instances["s0_0_0"].query( - "SELECT count() FROM cluster(cluster0, default, a)" - ) - ) == TSV("85\n") - assert TSV( - self.cluster.instances["s1_0_0"].query( - "SELECT count(), uniqExact(date) FROM cluster(cluster1, default, b)" - ) - ) == TSV("85\t85\n") - - assert TSV( - self.cluster.instances["s1_0_0"].query( - "SELECT DISTINCT jumpConsistentHash(intHash64(d), 2) FROM b" - ) - ) == TSV("0\n") - assert TSV( - self.cluster.instances["s1_1_0"].query( - "SELECT DISTINCT jumpConsistentHash(intHash64(d), 2) FROM b" - ) - ) == TSV("1\n") - - assert TSV( - self.cluster.instances["s1_0_0"].query( - "SELECT uniqExact(partition) IN (12, 13) FROM system.parts WHERE active AND database='default' AND table='b'" - ) - ) == TSV("1\n") - assert TSV( - self.cluster.instances["s1_1_0"].query( - "SELECT uniqExact(partition) IN (12, 13) FROM system.parts WHERE active AND database='default' AND table='b'" - ) - ) == TSV("1\n") - - instance = cluster.instances["s0_0_0"] - ddl_check_query(instance, "DROP TABLE a ON CLUSTER cluster0") - ddl_check_query(instance, "DROP TABLE b ON CLUSTER cluster1") - - -class Task_test_block_size: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = ( - "/clickhouse-copier/task_test_block_size_" + generateRandomString(5) - ) - self.rows = 1000000 - self.container_task_file = "/task_test_block_size.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_test_block_size.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - instance = cluster.instances["s0_0_0"] - - ddl_check_query( - instance, - """ - CREATE TABLE test_block_size ON CLUSTER shard_0_0 (partition Date, d UInt64) - ENGINE=ReplicatedMergeTree - ORDER BY (d, sipHash64(d)) SAMPLE BY sipHash64(d)""", - 2, - ) - - instance.query( - "INSERT INTO test_block_size SELECT toDate(0) AS partition, number as d FROM system.numbers LIMIT {}".format( - self.rows - ) - ) - - def check(self): - assert TSV( - self.cluster.instances["s1_0_0"].query( - "SELECT count() FROM cluster(cluster1, default, test_block_size)" - ) - ) == TSV("{}\n".format(self.rows)) - - instance = cluster.instances["s0_0_0"] - ddl_check_query(instance, "DROP TABLE test_block_size ON CLUSTER shard_0_0", 2) - ddl_check_query(instance, "DROP TABLE test_block_size ON CLUSTER cluster1") - - -class Task_no_index: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_no_index_" + generateRandomString( - 5 - ) - self.rows = 1000000 - self.container_task_file = "/task_no_index.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_no_index.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - instance = cluster.instances["s0_0_0"] - instance.query("DROP TABLE IF EXISTS ontime SYNC") - instance.query( - "create table IF NOT EXISTS ontime (Year UInt16, FlightDate String) ENGINE = Memory" - ) - instance.query( - "insert into ontime values (2016, 'test6'), (2017, 'test7'), (2018, 'test8')" - ) - - def check(self): - assert TSV( - self.cluster.instances["s1_1_0"].query("SELECT Year FROM ontime22") - ) == TSV("2017\n") - instance = cluster.instances["s0_0_0"] - instance.query("DROP TABLE ontime") - instance = cluster.instances["s1_1_0"] - instance.query("DROP TABLE ontime22") - - -class Task_no_arg: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_no_arg" - self.rows = 1000000 - self.container_task_file = "/task_no_arg.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_no_arg.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - instance = cluster.instances["s0_0_0"] - instance.query("DROP TABLE IF EXISTS copier_test1 SYNC") - instance.query( - "create table if not exists copier_test1 (date Date, id UInt32) engine = MergeTree PARTITION BY date ORDER BY date SETTINGS index_granularity = 8192" - ) - instance.query("insert into copier_test1 values ('2016-01-01', 10);") - - def check(self): - assert TSV( - self.cluster.instances["s1_1_0"].query("SELECT date FROM copier_test1_1") - ) == TSV("2016-01-01\n") - instance = cluster.instances["s0_0_0"] - instance.query("DROP TABLE copier_test1 SYNC") - instance = cluster.instances["s1_1_0"] - instance.query("DROP TABLE copier_test1_1 SYNC") - - -class Task_non_partitioned_table: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_non_partitoned_table" - self.rows = 1000000 - self.container_task_file = "/task_non_partitioned_table.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_non_partitioned_table.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - instance = cluster.instances["s0_0_0"] - instance.query("DROP TABLE IF EXISTS copier_test1 SYNC") - instance.query( - "create table copier_test1 (date Date, id UInt32) engine = MergeTree ORDER BY date SETTINGS index_granularity = 8192" - ) - instance.query("insert into copier_test1 values ('2016-01-01', 10);") - - def check(self): - assert TSV( - self.cluster.instances["s1_1_0"].query("SELECT date FROM copier_test1_1") - ) == TSV("2016-01-01\n") - instance = cluster.instances["s0_0_0"] - instance.query("DROP TABLE copier_test1") - instance = cluster.instances["s1_1_0"] - instance.query("DROP TABLE copier_test1_1") - - -class Task_self_copy: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_self_copy" - self.container_task_file = "/task_self_copy.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_self_copy.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - instance = cluster.instances["s0_0_0"] - instance.query("DROP DATABASE IF EXISTS db1 SYNC") - instance.query("DROP DATABASE IF EXISTS db2 SYNC") - instance.query("CREATE DATABASE IF NOT EXISTS db1;") - instance.query( - "CREATE TABLE IF NOT EXISTS db1.source_table (`a` Int8, `b` String, `c` Int8) ENGINE = MergeTree PARTITION BY a ORDER BY a SETTINGS index_granularity = 8192" - ) - instance.query("CREATE DATABASE IF NOT EXISTS db2;") - instance.query( - "CREATE TABLE IF NOT EXISTS db2.destination_table (`a` Int8, `b` String, `c` Int8) ENGINE = MergeTree PARTITION BY a ORDER BY a SETTINGS index_granularity = 8192" - ) - instance.query("INSERT INTO db1.source_table VALUES (1, 'ClickHouse', 1);") - instance.query("INSERT INTO db1.source_table VALUES (2, 'Copier', 2);") - - def check(self): - instance = cluster.instances["s0_0_0"] - assert TSV( - instance.query("SELECT * FROM db2.destination_table ORDER BY a") - ) == TSV(instance.query("SELECT * FROM db1.source_table ORDER BY a")) - instance = cluster.instances["s0_0_0"] - instance.query("DROP DATABASE IF EXISTS db1 SYNC") - instance.query("DROP DATABASE IF EXISTS db2 SYNC") - - -def execute_task(started_cluster, task, cmd_options): - task.start() - - zk = started_cluster.get_kazoo_client("zoo1") - print("Use ZooKeeper server: {}:{}".format(zk.hosts[0][0], zk.hosts[0][1])) - - try: - zk.delete("/clickhouse-copier", recursive=True) - except kazoo.exceptions.NoNodeError: - print("No node /clickhouse-copier. It is Ok in first test.") - - # Run cluster-copier processes on each node - docker_api = started_cluster.docker_client.api - copiers_exec_ids = [] - - cmd = [ - "/usr/bin/clickhouse", - "copier", - "--config", - "/etc/clickhouse-server/config-copier.xml", - "--task-path", - task.zk_task_path, - "--task-file", - task.container_task_file, - "--task-upload-force", - "true", - "--base-dir", - "/var/log/clickhouse-server/copier", - ] - cmd += cmd_options - - print(cmd) - - copiers = random.sample(list(started_cluster.instances.keys()), 3) - - for instance_name in copiers: - instance = started_cluster.instances[instance_name] - container = instance.get_docker_handle() - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "configs/config-copier.xml"), - "/etc/clickhouse-server/config-copier.xml", - ) - print("Copied copier config to {}".format(instance.name)) - exec_id = docker_api.exec_create(container.id, cmd, stderr=True) - output = docker_api.exec_start(exec_id).decode("utf8") - print(output) - copiers_exec_ids.append(exec_id) - print( - "Copier for {} ({}) has started".format(instance.name, instance.ip_address) - ) - - # Wait for copiers stopping and check their return codes - for exec_id, instance_name in zip(copiers_exec_ids, copiers): - instance = started_cluster.instances[instance_name] - while True: - res = docker_api.exec_inspect(exec_id) - if not res["Running"]: - break - time.sleep(0.5) - - assert res["ExitCode"] == 0, "Instance: {} ({}). Info: {}".format( - instance.name, instance.ip_address, repr(res) - ) - - try: - task.check() - finally: - zk.delete(task.zk_task_path, recursive=True) - - -# Tests - - -@pytest.mark.parametrize(("use_sample_offset"), [False, True]) -def test_copy_simple(started_cluster, use_sample_offset): - if use_sample_offset: - execute_task( - started_cluster, - Task1(started_cluster), - ["--experimental-use-sample-offset", "1"], - ) - else: - execute_task(started_cluster, Task1(started_cluster), []) - - -@pytest.mark.parametrize(("use_sample_offset"), [False, True]) -def test_copy_with_recovering(started_cluster, use_sample_offset): - if use_sample_offset: - execute_task( - started_cluster, - Task1(started_cluster), - [ - "--copy-fault-probability", - str(COPYING_FAIL_PROBABILITY), - "--experimental-use-sample-offset", - "1", - "--max-table-tries", - "10", - ], - ) - else: - execute_task( - started_cluster, - Task1(started_cluster), - [ - "--copy-fault-probability", - str(COPYING_FAIL_PROBABILITY), - "--max-table-tries", - "10", - ], - ) - - -@pytest.mark.parametrize(("use_sample_offset"), [False, True]) -def test_copy_with_recovering_after_move_faults(started_cluster, use_sample_offset): - if use_sample_offset: - execute_task( - started_cluster, - Task1(started_cluster), - [ - "--move-fault-probability", - str(MOVING_FAIL_PROBABILITY), - "--experimental-use-sample-offset", - "1", - ], - ) - else: - execute_task( - started_cluster, - Task1(started_cluster), - ["--move-fault-probability", str(MOVING_FAIL_PROBABILITY)], - ) - - -@pytest.mark.timeout(600) -def test_copy_month_to_week_partition(started_cluster): - execute_task(started_cluster, Task2(started_cluster, "test1"), []) - - -@pytest.mark.timeout(600) -def test_copy_month_to_week_partition_with_recovering(started_cluster): - execute_task( - started_cluster, - Task2(started_cluster, "test2"), - [ - "--copy-fault-probability", - str(COPYING_FAIL_PROBABILITY), - "--max-table-tries", - "10", - ], - ) - - -@pytest.mark.timeout(600) -def test_copy_month_to_week_partition_with_recovering_after_move_faults( - started_cluster, -): - execute_task( - started_cluster, - Task2(started_cluster, "test3"), - ["--move-fault-probability", str(MOVING_FAIL_PROBABILITY)], - ) - - -def test_block_size(started_cluster): - execute_task(started_cluster, Task_test_block_size(started_cluster), []) - - -def test_no_index(started_cluster): - execute_task(started_cluster, Task_no_index(started_cluster), []) - - -def test_no_arg(started_cluster): - execute_task(started_cluster, Task_no_arg(started_cluster), []) - - -def test_non_partitioned_table(started_cluster): - execute_task(started_cluster, Task_non_partitioned_table(started_cluster), []) - - -def test_self_copy(started_cluster): - execute_task(started_cluster, Task_self_copy(started_cluster), []) diff --git a/tests/integration/test_cluster_copier/test_three_nodes.py b/tests/integration/test_cluster_copier/test_three_nodes.py deleted file mode 100644 index e7d07757adb..00000000000 --- a/tests/integration/test_cluster_copier/test_three_nodes.py +++ /dev/null @@ -1,286 +0,0 @@ -import os -import sys -import time -import logging -import pytest - -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import TSV - -import docker - -CURRENT_TEST_DIR = os.path.dirname(os.path.abspath(__file__)) -sys.path.insert(0, os.path.dirname(CURRENT_TEST_DIR)) - -cluster = ClickHouseCluster(__file__) - - -@pytest.fixture(scope="module") -def started_cluster(): - global cluster - try: - for name in ["first", "second", "third"]: - cluster.add_instance( - name, - main_configs=[ - "configs_three_nodes/conf.d/clusters.xml", - "configs_three_nodes/conf.d/ddl.xml", - ], - user_configs=["configs_three_nodes/users.xml"], - with_zookeeper=True, - ) - - cluster.start() - yield cluster - - finally: - cluster.shutdown() - - -class Task: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task" - self.container_task_file = "/task_taxi_data.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_taxi_data.xml"), - self.container_task_file, - ) - logging.debug( - f"Copied task file to container of '{instance_name}' instance. Path {self.container_task_file}" - ) - - def start(self): - for name in ["first", "second", "third"]: - node = cluster.instances[name] - node.query("DROP DATABASE IF EXISTS dailyhistory SYNC;") - node.query("DROP DATABASE IF EXISTS monthlyhistory SYNC;") - - first = cluster.instances["first"] - - # daily partition database - first.query("CREATE DATABASE IF NOT EXISTS dailyhistory on cluster events;") - first.query( - """CREATE TABLE dailyhistory.yellow_tripdata_staging ON CLUSTER events - ( - id UUID DEFAULT generateUUIDv4(), - vendor_id String, - tpep_pickup_datetime DateTime('UTC'), - tpep_dropoff_datetime DateTime('UTC'), - passenger_count Nullable(Float64), - trip_distance String, - pickup_longitude Float64, - pickup_latitude Float64, - rate_code_id String, - store_and_fwd_flag String, - dropoff_longitude Float64, - dropoff_latitude Float64, - payment_type String, - fare_amount String, - extra String, - mta_tax String, - tip_amount String, - tolls_amount String, - improvement_surcharge String, - total_amount String, - pickup_location_id String, - dropoff_location_id String, - congestion_surcharge String, - junk1 String, junk2 String - ) - Engine = ReplacingMergeTree() - PRIMARY KEY (tpep_pickup_datetime, id) - ORDER BY (tpep_pickup_datetime, id) - PARTITION BY (toYYYYMMDD(tpep_pickup_datetime))""" - ) - - first.query( - """CREATE TABLE dailyhistory.yellow_tripdata - ON CLUSTER events - AS dailyhistory.yellow_tripdata_staging - ENGINE = Distributed('events', 'dailyhistory', yellow_tripdata_staging, sipHash64(id) % 3);""" - ) - - first.query( - """INSERT INTO dailyhistory.yellow_tripdata - SELECT * FROM generateRandom( - 'id UUID DEFAULT generateUUIDv4(), - vendor_id String, - tpep_pickup_datetime DateTime(\\'UTC\\'), - tpep_dropoff_datetime DateTime(\\'UTC\\'), - passenger_count Nullable(Float64), - trip_distance String, - pickup_longitude Float64, - pickup_latitude Float64, - rate_code_id String, - store_and_fwd_flag String, - dropoff_longitude Float64, - dropoff_latitude Float64, - payment_type String, - fare_amount String, - extra String, - mta_tax String, - tip_amount String, - tolls_amount String, - improvement_surcharge String, - total_amount String, - pickup_location_id String, - dropoff_location_id String, - congestion_surcharge String, - junk1 String, - junk2 String', - 1, 10, 2) LIMIT 50;""" - ) - - # monthly partition database - first.query("create database IF NOT EXISTS monthlyhistory on cluster events;") - first.query( - """CREATE TABLE monthlyhistory.yellow_tripdata_staging ON CLUSTER events - ( - id UUID DEFAULT generateUUIDv4(), - vendor_id String, - tpep_pickup_datetime DateTime('UTC'), - tpep_dropoff_datetime DateTime('UTC'), - passenger_count Nullable(Float64), - trip_distance String, - pickup_longitude Float64, - pickup_latitude Float64, - rate_code_id String, - store_and_fwd_flag String, - dropoff_longitude Float64, - dropoff_latitude Float64, - payment_type String, - fare_amount String, - extra String, - mta_tax String, - tip_amount String, - tolls_amount String, - improvement_surcharge String, - total_amount String, - pickup_location_id String, - dropoff_location_id String, - congestion_surcharge String, - junk1 String, - junk2 String - ) - Engine = ReplacingMergeTree() - PRIMARY KEY (tpep_pickup_datetime, id) - ORDER BY (tpep_pickup_datetime, id) - PARTITION BY (pickup_location_id, toYYYYMM(tpep_pickup_datetime))""" - ) - - first.query( - """CREATE TABLE monthlyhistory.yellow_tripdata - ON CLUSTER events - AS monthlyhistory.yellow_tripdata_staging - ENGINE = Distributed('events', 'monthlyhistory', yellow_tripdata_staging, sipHash64(id) % 3);""" - ) - - def check(self): - first = cluster.instances["first"] - a = TSV(first.query("SELECT count() from dailyhistory.yellow_tripdata")) - b = TSV(first.query("SELECT count() from monthlyhistory.yellow_tripdata")) - assert a == b, "Distributed tables" - - for instance_name, instance in cluster.instances.items(): - instance = cluster.instances[instance_name] - a = instance.query( - "SELECT count() from dailyhistory.yellow_tripdata_staging" - ) - b = instance.query( - "SELECT count() from monthlyhistory.yellow_tripdata_staging" - ) - assert a == b, "MergeTree tables on each shard" - - a = TSV( - instance.query( - "SELECT sipHash64(*) from dailyhistory.yellow_tripdata_staging ORDER BY id" - ) - ) - b = TSV( - instance.query( - "SELECT sipHash64(*) from monthlyhistory.yellow_tripdata_staging ORDER BY id" - ) - ) - - assert a == b, "Data on each shard" - - for name in ["first", "second", "third"]: - node = cluster.instances[name] - node.query("DROP DATABASE IF EXISTS dailyhistory SYNC;") - node.query("DROP DATABASE IF EXISTS monthlyhistory SYNC;") - - -def execute_task(started_cluster, task, cmd_options): - task.start() - - zk = started_cluster.get_kazoo_client("zoo1") - logging.debug("Use ZooKeeper server: {}:{}".format(zk.hosts[0][0], zk.hosts[0][1])) - - # Run cluster-copier processes on each node - docker_api = started_cluster.docker_client.api - copiers_exec_ids = [] - - cmd = [ - "/usr/bin/clickhouse", - "copier", - "--config", - "/etc/clickhouse-server/config-copier.xml", - "--task-path", - task.zk_task_path, - "--task-file", - task.container_task_file, - "--task-upload-force", - "true", - "--base-dir", - "/var/log/clickhouse-server/copier", - ] - cmd += cmd_options - - logging.debug(f"execute_task cmd: {cmd}") - - for instance_name in started_cluster.instances.keys(): - instance = started_cluster.instances[instance_name] - container = instance.get_docker_handle() - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "configs_three_nodes/config-copier.xml"), - "/etc/clickhouse-server/config-copier.xml", - ) - logging.info("Copied copier config to {}".format(instance.name)) - exec_id = docker_api.exec_create(container.id, cmd, stderr=True) - output = docker_api.exec_start(exec_id).decode("utf8") - logging.info(output) - copiers_exec_ids.append(exec_id) - logging.info( - "Copier for {} ({}) has started".format(instance.name, instance.ip_address) - ) - - # time.sleep(1000) - - # Wait for copiers stopping and check their return codes - for exec_id, instance in zip( - copiers_exec_ids, iter(started_cluster.instances.values()) - ): - while True: - res = docker_api.exec_inspect(exec_id) - if not res["Running"]: - break - time.sleep(1) - - assert res["ExitCode"] == 0, "Instance: {} ({}). Info: {}".format( - instance.name, instance.ip_address, repr(res) - ) - - try: - task.check() - finally: - zk.delete(task.zk_task_path, recursive=True) - - -# Tests -@pytest.mark.timeout(600) -def test(started_cluster): - execute_task(started_cluster, Task(started_cluster), []) diff --git a/tests/integration/test_cluster_copier/test_trivial.py b/tests/integration/test_cluster_copier/test_trivial.py deleted file mode 100644 index b8060583ef8..00000000000 --- a/tests/integration/test_cluster_copier/test_trivial.py +++ /dev/null @@ -1,227 +0,0 @@ -import os -import sys -import time -import random -import string - -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import TSV - -import kazoo -import pytest -import docker - - -CURRENT_TEST_DIR = os.path.dirname(os.path.abspath(__file__)) -sys.path.insert(0, os.path.dirname(CURRENT_TEST_DIR)) - - -COPYING_FAIL_PROBABILITY = 0.1 -MOVING_FAIL_PROBABILITY = 0.1 - -cluster = ClickHouseCluster(__file__) - - -def generateRandomString(count): - return "".join( - random.choice(string.ascii_uppercase + string.digits) for _ in range(count) - ) - - -@pytest.fixture(scope="module") -def started_cluster(): - global cluster - try: - for name in ["first_trivial", "second_trivial"]: - instance = cluster.add_instance( - name, - main_configs=["configs/conf.d/clusters_trivial.xml"], - user_configs=["configs_two_nodes/users.xml"], - macros={ - "cluster": name, - "shard": "the_only_shard", - "replica": "the_only_replica", - }, - with_zookeeper=True, - ) - - cluster.start() - yield cluster - - finally: - cluster.shutdown() - - -class TaskTrivial: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_trivial" - self.copier_task_config = open( - os.path.join(CURRENT_TEST_DIR, "task_trivial.xml"), "r" - ).read() - - def start(self): - source = cluster.instances["first_trivial"] - destination = cluster.instances["second_trivial"] - - for node in [source, destination]: - node.query("DROP DATABASE IF EXISTS default") - node.query("CREATE DATABASE IF NOT EXISTS default") - - source.query( - "CREATE TABLE trivial (d UInt64, d1 UInt64 MATERIALIZED d+1)" - "ENGINE=ReplicatedMergeTree('/clickhouse/tables/source_trivial_cluster/1/trivial/{}', '1') " - "PARTITION BY d % 5 ORDER BY (d, sipHash64(d)) SAMPLE BY sipHash64(d) SETTINGS index_granularity = 16".format( - generateRandomString(10) - ) - ) - - source.query( - "INSERT INTO trivial SELECT * FROM system.numbers LIMIT 1002", - settings={"distributed_foreground_insert": 1}, - ) - - def check(self): - zk = cluster.get_kazoo_client("zoo1") - status_data, _ = zk.get(self.zk_task_path + "/status") - assert ( - status_data - == b'{"hits":{"all_partitions_count":5,"processed_partitions_count":5}}' - ) - - source = cluster.instances["first_trivial"] - destination = cluster.instances["second_trivial"] - - assert TSV(source.query("SELECT count() FROM trivial")) == TSV("1002\n") - assert TSV(destination.query("SELECT count() FROM trivial")) == TSV("1002\n") - - for node in [source, destination]: - node.query("DROP TABLE trivial") - - -class TaskReplicatedWithoutArguments: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_trivial_without_arguments" - self.copier_task_config = open( - os.path.join(CURRENT_TEST_DIR, "task_trivial_without_arguments.xml"), "r" - ).read() - - def start(self): - source = cluster.instances["first_trivial"] - destination = cluster.instances["second_trivial"] - - for node in [source, destination]: - node.query("DROP DATABASE IF EXISTS default") - node.query("CREATE DATABASE IF NOT EXISTS default") - - source.query( - "CREATE TABLE trivial_without_arguments ON CLUSTER source_trivial_cluster (d UInt64, d1 UInt64 MATERIALIZED d+1) " - "ENGINE=ReplicatedMergeTree() " - "PARTITION BY d % 5 ORDER BY (d, sipHash64(d)) SAMPLE BY sipHash64(d) SETTINGS index_granularity = 16" - ) - - source.query( - "INSERT INTO trivial_without_arguments SELECT * FROM system.numbers LIMIT 1002", - settings={"distributed_foreground_insert": 1}, - ) - - def check(self): - zk = cluster.get_kazoo_client("zoo1") - status_data, _ = zk.get(self.zk_task_path + "/status") - assert ( - status_data - == b'{"hits":{"all_partitions_count":5,"processed_partitions_count":5}}' - ) - - source = cluster.instances["first_trivial"] - destination = cluster.instances["second_trivial"] - - assert TSV( - source.query("SELECT count() FROM trivial_without_arguments") - ) == TSV("1002\n") - assert TSV( - destination.query("SELECT count() FROM trivial_without_arguments") - ) == TSV("1002\n") - - for node in [source, destination]: - node.query("DROP TABLE trivial_without_arguments") - - -def execute_task(started_cluster, task, cmd_options): - task.start() - - zk = started_cluster.get_kazoo_client("zoo1") - print("Use ZooKeeper server: {}:{}".format(zk.hosts[0][0], zk.hosts[0][1])) - - try: - zk.delete("/clickhouse-copier", recursive=True) - except kazoo.exceptions.NoNodeError: - print("No node /clickhouse-copier. It is Ok in first test.") - - zk_task_path = task.zk_task_path - zk.ensure_path(zk_task_path) - zk.create(zk_task_path + "/description", task.copier_task_config.encode()) - - # Run cluster-copier processes on each node - docker_api = started_cluster.docker_client.api - copiers_exec_ids = [] - - cmd = [ - "/usr/bin/clickhouse", - "copier", - "--config", - "/etc/clickhouse-server/config-copier.xml", - "--task-path", - zk_task_path, - "--base-dir", - "/var/log/clickhouse-server/copier", - ] - cmd += cmd_options - - copiers = list(started_cluster.instances.keys()) - - for instance_name in copiers: - instance = started_cluster.instances[instance_name] - container = instance.get_docker_handle() - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "configs/config-copier.xml"), - "/etc/clickhouse-server/config-copier.xml", - ) - print("Copied copier config to {}".format(instance.name)) - exec_id = docker_api.exec_create(container.id, cmd, stderr=True) - output = docker_api.exec_start(exec_id).decode("utf8") - print(output) - copiers_exec_ids.append(exec_id) - print( - "Copier for {} ({}) has started".format(instance.name, instance.ip_address) - ) - - # Wait for copiers stopping and check their return codes - for exec_id, instance_name in zip(copiers_exec_ids, copiers): - instance = started_cluster.instances[instance_name] - while True: - res = docker_api.exec_inspect(exec_id) - if not res["Running"]: - break - time.sleep(0.5) - - assert res["ExitCode"] == 0, "Instance: {} ({}). Info: {}".format( - instance.name, instance.ip_address, repr(res) - ) - - try: - task.check() - finally: - zk.delete(zk_task_path, recursive=True) - - -# Tests - - -def test_trivial_copy(started_cluster): - execute_task(started_cluster, TaskTrivial(started_cluster), []) - - -def test_trivial_without_arguments(started_cluster): - execute_task(started_cluster, TaskReplicatedWithoutArguments(started_cluster), []) diff --git a/tests/integration/test_cluster_copier/test_two_nodes.py b/tests/integration/test_cluster_copier/test_two_nodes.py deleted file mode 100644 index 1bd3561f24f..00000000000 --- a/tests/integration/test_cluster_copier/test_two_nodes.py +++ /dev/null @@ -1,597 +0,0 @@ -import os -import sys -import time -import logging -import pytest - -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import TSV - -import docker - -CURRENT_TEST_DIR = os.path.dirname(os.path.abspath(__file__)) -sys.path.insert(0, os.path.dirname(CURRENT_TEST_DIR)) - -cluster = ClickHouseCluster(__file__) - - -@pytest.fixture(scope="module") -def started_cluster(): - global cluster - try: - for name in ["first_of_two", "second_of_two"]: - instance = cluster.add_instance( - name, - main_configs=[ - "configs_two_nodes/conf.d/clusters.xml", - "configs_two_nodes/conf.d/ddl.xml", - "configs_two_nodes/conf.d/storage_configuration.xml", - ], - user_configs=["configs_two_nodes/users.xml"], - with_zookeeper=True, - ) - - cluster.start() - - for name in ["first_of_two", "second_of_two"]: - instance = cluster.instances[name] - instance.exec_in_container(["bash", "-c", "mkdir /jbod1"]) - instance.exec_in_container(["bash", "-c", "mkdir /jbod2"]) - instance.exec_in_container(["bash", "-c", "mkdir /external"]) - - yield cluster - - finally: - cluster.shutdown() - - -# Will copy table from `first` node to `second` -class TaskWithDifferentSchema: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_with_different_schema" - self.container_task_file = "/task_with_different_schema.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_with_different_schema.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - first = cluster.instances["first_of_two"] - second = cluster.instances["second_of_two"] - - first.query("DROP DATABASE IF EXISTS db_different_schema SYNC") - second.query("DROP DATABASE IF EXISTS db_different_schema SYNC") - - first.query("CREATE DATABASE IF NOT EXISTS db_different_schema;") - first.query( - """CREATE TABLE db_different_schema.source - ( - Column1 String, - Column2 UInt32, - Column3 Date, - Column4 DateTime, - Column5 UInt16, - Column6 String, - Column7 String, - Column8 String, - Column9 String, - Column10 String, - Column11 String, - Column12 Decimal(3, 1), - Column13 DateTime, - Column14 UInt16 - ) - ENGINE = MergeTree() - PARTITION BY (toYYYYMMDD(Column3), Column3) - PRIMARY KEY (Column1, Column2, Column3, Column4, Column6, Column7, Column8, Column9) - ORDER BY (Column1, Column2, Column3, Column4, Column6, Column7, Column8, Column9) - SETTINGS index_granularity = 8192""" - ) - - first.query( - """INSERT INTO db_different_schema.source SELECT * FROM generateRandom( - 'Column1 String, Column2 UInt32, Column3 Date, Column4 DateTime, Column5 UInt16, - Column6 String, Column7 String, Column8 String, Column9 String, Column10 String, - Column11 String, Column12 Decimal(3, 1), Column13 DateTime, Column14 UInt16', 1, 10, 2) LIMIT 50;""" - ) - - second.query("CREATE DATABASE IF NOT EXISTS db_different_schema;") - second.query( - """CREATE TABLE db_different_schema.destination - ( - Column1 LowCardinality(String) CODEC(LZ4), - Column2 UInt32 CODEC(LZ4), - Column3 Date CODEC(DoubleDelta, LZ4), - Column4 DateTime CODEC(DoubleDelta, LZ4), - Column5 UInt16 CODEC(LZ4), - Column6 LowCardinality(String) CODEC(ZSTD), - Column7 LowCardinality(String) CODEC(ZSTD), - Column8 LowCardinality(String) CODEC(ZSTD), - Column9 LowCardinality(String) CODEC(ZSTD), - Column10 String CODEC(ZSTD(6)), - Column11 LowCardinality(String) CODEC(LZ4), - Column12 Decimal(3,1) CODEC(LZ4), - Column13 DateTime CODEC(DoubleDelta, LZ4), - Column14 UInt16 CODEC(LZ4) - ) ENGINE = MergeTree() - PARTITION BY toYYYYMMDD(Column3) - ORDER BY (Column9, Column1, Column2, Column3, Column4);""" - ) - - print("Preparation completed") - - def check(self): - first = cluster.instances["first_of_two"] - second = cluster.instances["second_of_two"] - - a = first.query("SELECT count() from db_different_schema.source") - b = second.query("SELECT count() from db_different_schema.destination") - assert a == b, "Count" - - a = TSV( - first.query( - """SELECT sipHash64(*) from db_different_schema.source - ORDER BY (Column1, Column2, Column3, Column4, Column5, Column6, Column7, Column8, Column9, Column10, Column11, Column12, Column13, Column14)""" - ) - ) - b = TSV( - second.query( - """SELECT sipHash64(*) from db_different_schema.destination - ORDER BY (Column1, Column2, Column3, Column4, Column5, Column6, Column7, Column8, Column9, Column10, Column11, Column12, Column13, Column14)""" - ) - ) - assert a == b, "Data" - - first.query("DROP DATABASE IF EXISTS db_different_schema SYNC") - second.query("DROP DATABASE IF EXISTS db_different_schema SYNC") - - -# Just simple copying, but table schema has TTL on columns -# Also table will have slightly different schema -class TaskTTL: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_ttl_columns" - self.container_task_file = "/task_ttl_columns.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_ttl_columns.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - first = cluster.instances["first_of_two"] - second = cluster.instances["second_of_two"] - - first.query("DROP DATABASE IF EXISTS db_ttl_columns SYNC") - second.query("DROP DATABASE IF EXISTS db_ttl_columns SYNC") - - first.query("CREATE DATABASE IF NOT EXISTS db_ttl_columns;") - first.query( - """CREATE TABLE db_ttl_columns.source - ( - Column1 String, - Column2 UInt32, - Column3 Date, - Column4 DateTime, - Column5 UInt16, - Column6 String TTL now() + INTERVAL 1 MONTH, - Column7 Decimal(3, 1) TTL now() + INTERVAL 1 MONTH, - Column8 Tuple(Float64, Float64) TTL now() + INTERVAL 1 MONTH - ) - ENGINE = MergeTree() - PARTITION BY (toYYYYMMDD(Column3), Column3) - PRIMARY KEY (Column1, Column2, Column3) - ORDER BY (Column1, Column2, Column3) - SETTINGS index_granularity = 8192""" - ) - - first.query( - """INSERT INTO db_ttl_columns.source SELECT * FROM generateRandom( - 'Column1 String, Column2 UInt32, Column3 Date, Column4 DateTime, Column5 UInt16, - Column6 String, Column7 Decimal(3, 1), Column8 Tuple(Float64, Float64)', 1, 10, 2) LIMIT 50;""" - ) - - second.query("CREATE DATABASE IF NOT EXISTS db_ttl_columns;") - second.query( - """CREATE TABLE db_ttl_columns.destination - ( - Column1 String, - Column2 UInt32, - Column3 Date, - Column4 DateTime TTL now() + INTERVAL 1 MONTH, - Column5 UInt16 TTL now() + INTERVAL 1 MONTH, - Column6 String TTL now() + INTERVAL 1 MONTH, - Column7 Decimal(3, 1) TTL now() + INTERVAL 1 MONTH, - Column8 Tuple(Float64, Float64) - ) ENGINE = MergeTree() - PARTITION BY toYYYYMMDD(Column3) - ORDER BY (Column3, Column2, Column1);""" - ) - - print("Preparation completed") - - def check(self): - first = cluster.instances["first_of_two"] - second = cluster.instances["second_of_two"] - - a = first.query("SELECT count() from db_ttl_columns.source") - b = second.query("SELECT count() from db_ttl_columns.destination") - assert a == b, "Count" - - a = TSV( - first.query( - """SELECT sipHash64(*) from db_ttl_columns.source - ORDER BY (Column1, Column2, Column3, Column4, Column5, Column6, Column7, Column8)""" - ) - ) - b = TSV( - second.query( - """SELECT sipHash64(*) from db_ttl_columns.destination - ORDER BY (Column1, Column2, Column3, Column4, Column5, Column6, Column7, Column8)""" - ) - ) - assert a == b, "Data" - - first.query("DROP DATABASE IF EXISTS db_ttl_columns SYNC") - second.query("DROP DATABASE IF EXISTS db_ttl_columns SYNC") - - -class TaskSkipIndex: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_skip_index" - self.container_task_file = "/task_skip_index.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_skip_index.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - first = cluster.instances["first_of_two"] - second = cluster.instances["second_of_two"] - - first.query("DROP DATABASE IF EXISTS db_skip_index SYNC") - second.query("DROP DATABASE IF EXISTS db_skip_index SYNC") - - first.query("CREATE DATABASE IF NOT EXISTS db_skip_index;") - first.query( - """CREATE TABLE db_skip_index.source - ( - Column1 UInt64, - Column2 Int32, - Column3 Date, - Column4 DateTime, - Column5 String, - INDEX a (Column1 * Column2, Column5) TYPE minmax GRANULARITY 3, - INDEX b (Column1 * length(Column5)) TYPE set(1000) GRANULARITY 4 - ) - ENGINE = MergeTree() - PARTITION BY (toYYYYMMDD(Column3), Column3) - PRIMARY KEY (Column1, Column2, Column3) - ORDER BY (Column1, Column2, Column3) - SETTINGS index_granularity = 8192""" - ) - - first.query( - """INSERT INTO db_skip_index.source SELECT * FROM generateRandom( - 'Column1 UInt64, Column2 Int32, Column3 Date, Column4 DateTime, Column5 String', 1, 10, 2) LIMIT 100;""" - ) - - second.query("CREATE DATABASE IF NOT EXISTS db_skip_index;") - second.query( - """CREATE TABLE db_skip_index.destination - ( - Column1 UInt64, - Column2 Int32, - Column3 Date, - Column4 DateTime, - Column5 String, - INDEX a (Column1 * Column2, Column5) TYPE minmax GRANULARITY 3, - INDEX b (Column1 * length(Column5)) TYPE set(1000) GRANULARITY 4 - ) ENGINE = MergeTree() - PARTITION BY toYYYYMMDD(Column3) - ORDER BY (Column3, Column2, Column1);""" - ) - - print("Preparation completed") - - def check(self): - first = cluster.instances["first_of_two"] - second = cluster.instances["second_of_two"] - - a = first.query("SELECT count() from db_skip_index.source") - b = second.query("SELECT count() from db_skip_index.destination") - assert a == b, "Count" - - a = TSV( - first.query( - """SELECT sipHash64(*) from db_skip_index.source - ORDER BY (Column1, Column2, Column3, Column4, Column5)""" - ) - ) - b = TSV( - second.query( - """SELECT sipHash64(*) from db_skip_index.destination - ORDER BY (Column1, Column2, Column3, Column4, Column5)""" - ) - ) - assert a == b, "Data" - - first.query("DROP DATABASE IF EXISTS db_skip_index SYNC") - second.query("DROP DATABASE IF EXISTS db_skip_index SYNC") - - -class TaskTTLMoveToVolume: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_ttl_move_to_volume" - self.container_task_file = "/task_ttl_move_to_volume.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_ttl_move_to_volume.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - first = cluster.instances["first_of_two"] - second = cluster.instances["first_of_two"] - - first.query("DROP DATABASE IF EXISTS db_move_to_volume SYNC") - second.query("DROP DATABASE IF EXISTS db_move_to_volume SYNC") - - first.query("CREATE DATABASE IF NOT EXISTS db_move_to_volume;") - first.query( - """CREATE TABLE db_move_to_volume.source - ( - Column1 UInt64, - Column2 Int32, - Column3 Date, - Column4 DateTime, - Column5 String - ) - ENGINE = MergeTree() - PARTITION BY (toYYYYMMDD(Column3), Column3) - PRIMARY KEY (Column1, Column2, Column3) - ORDER BY (Column1, Column2, Column3) - TTL Column3 + INTERVAL 1 MONTH TO VOLUME 'external' - SETTINGS storage_policy = 'external_with_jbods';""" - ) - - first.query( - """INSERT INTO db_move_to_volume.source SELECT * FROM generateRandom( - 'Column1 UInt64, Column2 Int32, Column3 Date, Column4 DateTime, Column5 String', 1, 10, 2) LIMIT 100;""" - ) - - second.query("CREATE DATABASE IF NOT EXISTS db_move_to_volume;") - second.query( - """CREATE TABLE db_move_to_volume.destination - ( - Column1 UInt64, - Column2 Int32, - Column3 Date, - Column4 DateTime, - Column5 String - ) ENGINE = MergeTree() - PARTITION BY toYYYYMMDD(Column3) - ORDER BY (Column3, Column2, Column1) - TTL Column3 + INTERVAL 1 MONTH TO VOLUME 'external' - SETTINGS storage_policy = 'external_with_jbods';""" - ) - - print("Preparation completed") - - def check(self): - first = cluster.instances["first_of_two"] - second = cluster.instances["second_of_two"] - - a = first.query("SELECT count() from db_move_to_volume.source") - b = second.query("SELECT count() from db_move_to_volume.destination") - assert a == b, "Count" - - a = TSV( - first.query( - """SELECT sipHash64(*) from db_move_to_volume.source - ORDER BY (Column1, Column2, Column3, Column4, Column5)""" - ) - ) - b = TSV( - second.query( - """SELECT sipHash64(*) from db_move_to_volume.destination - ORDER BY (Column1, Column2, Column3, Column4, Column5)""" - ) - ) - assert a == b, "Data" - - first.query("DROP DATABASE IF EXISTS db_move_to_volume SYNC") - second.query("DROP DATABASE IF EXISTS db_move_to_volume SYNC") - - -class TaskDropTargetPartition: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_drop_target_partition" - self.container_task_file = "/task_drop_target_partition.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_drop_target_partition.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - first = cluster.instances["first_of_two"] - second = cluster.instances["second_of_two"] - - first.query("DROP DATABASE IF EXISTS db_drop_target_partition SYNC") - second.query("DROP DATABASE IF EXISTS db_drop_target_partition SYNC") - - first.query("CREATE DATABASE IF NOT EXISTS db_drop_target_partition;") - first.query( - """CREATE TABLE db_drop_target_partition.source - ( - Column1 UInt64, - Column2 Int32, - Column3 Date, - Column4 DateTime, - Column5 String - ) - ENGINE = MergeTree() - PARTITION BY (toYYYYMMDD(Column3), Column3) - PRIMARY KEY (Column1, Column2, Column3) - ORDER BY (Column1, Column2, Column3);""" - ) - - first.query( - """INSERT INTO db_drop_target_partition.source SELECT * FROM generateRandom( - 'Column1 UInt64, Column2 Int32, Column3 Date, Column4 DateTime, Column5 String', 1, 10, 2) LIMIT 100;""" - ) - - second.query("CREATE DATABASE IF NOT EXISTS db_drop_target_partition;") - second.query( - """CREATE TABLE db_drop_target_partition.destination - ( - Column1 UInt64, - Column2 Int32, - Column3 Date, - Column4 DateTime, - Column5 String - ) ENGINE = MergeTree() - PARTITION BY toYYYYMMDD(Column3) - ORDER BY (Column3, Column2, Column1);""" - ) - - # Insert data in target too. It has to be dropped. - first.query( - """INSERT INTO db_drop_target_partition.destination SELECT * FROM db_drop_target_partition.source;""" - ) - - print("Preparation completed") - - def check(self): - first = cluster.instances["first_of_two"] - second = cluster.instances["second_of_two"] - - a = first.query("SELECT count() from db_drop_target_partition.source") - b = second.query("SELECT count() from db_drop_target_partition.destination") - assert a == b, "Count" - - a = TSV( - first.query( - """SELECT sipHash64(*) from db_drop_target_partition.source - ORDER BY (Column1, Column2, Column3, Column4, Column5)""" - ) - ) - b = TSV( - second.query( - """SELECT sipHash64(*) from db_drop_target_partition.destination - ORDER BY (Column1, Column2, Column3, Column4, Column5)""" - ) - ) - assert a == b, "Data" - - first.query("DROP DATABASE IF EXISTS db_drop_target_partition SYNC") - second.query("DROP DATABASE IF EXISTS db_drop_target_partition SYNC") - - -def execute_task(started_cluster, task, cmd_options): - task.start() - - zk = started_cluster.get_kazoo_client("zoo1") - print("Use ZooKeeper server: {}:{}".format(zk.hosts[0][0], zk.hosts[0][1])) - - # Run cluster-copier processes on each node - docker_api = started_cluster.docker_client.api - copiers_exec_ids = [] - - cmd = [ - "/usr/bin/clickhouse", - "copier", - "--config", - "/etc/clickhouse-server/config-copier.xml", - "--task-path", - task.zk_task_path, - "--task-file", - task.container_task_file, - "--task-upload-force", - "true", - "--base-dir", - "/var/log/clickhouse-server/copier", - ] - cmd += cmd_options - - print(cmd) - - for instance_name in started_cluster.instances.keys(): - instance = started_cluster.instances[instance_name] - container = instance.get_docker_handle() - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "configs_two_nodes/config-copier.xml"), - "/etc/clickhouse-server/config-copier.xml", - ) - logging.info("Copied copier config to {}".format(instance.name)) - exec_id = docker_api.exec_create(container.id, cmd, stderr=True) - output = docker_api.exec_start(exec_id).decode("utf8") - logging.info(output) - copiers_exec_ids.append(exec_id) - logging.info( - "Copier for {} ({}) has started".format(instance.name, instance.ip_address) - ) - - # time.sleep(1000) - - # Wait for copiers stopping and check their return codes - for exec_id, instance in zip( - copiers_exec_ids, iter(started_cluster.instances.values()) - ): - while True: - res = docker_api.exec_inspect(exec_id) - if not res["Running"]: - break - time.sleep(1) - - assert res["ExitCode"] == 0, "Instance: {} ({}). Info: {}".format( - instance.name, instance.ip_address, repr(res) - ) - - try: - task.check() - finally: - zk.delete(task.zk_task_path, recursive=True) diff --git a/tests/integration/test_config_substitutions/configs/config_zk_include_test.xml b/tests/integration/test_config_substitutions/configs/config_zk_include_test.xml new file mode 100644 index 00000000000..743770c3024 --- /dev/null +++ b/tests/integration/test_config_substitutions/configs/config_zk_include_test.xml @@ -0,0 +1,12 @@ + + + 44 + + + 99 + 1 + 1111 + + + + diff --git a/tests/integration/test_config_substitutions/test.py b/tests/integration/test_config_substitutions/test.py index 564985b2f50..ac75771cb9c 100644 --- a/tests/integration/test_config_substitutions/test.py +++ b/tests/integration/test_config_substitutions/test.py @@ -13,7 +13,12 @@ node2 = cluster.add_instance( env_variables={"MAX_QUERY_SIZE": "55555"}, ) node3 = cluster.add_instance( - "node3", user_configs=["configs/config_zk.xml"], with_zookeeper=True + "node3", + user_configs=[ + "configs/config_zk.xml", + ], + main_configs=["configs/config_zk_include_test.xml"], + with_zookeeper=True, ) node4 = cluster.add_instance( "node4", @@ -62,6 +67,16 @@ def start_cluster(): value=b"default", makepath=True, ) + zk.create( + path="/min_bytes_for_wide_part", + value=b"33", + makepath=True, + ) + zk.create( + path="/merge_max_block_size", + value=b"8888", + makepath=True, + ) cluster.add_zookeeper_startup_command(create_zk_roots) @@ -237,3 +252,63 @@ def test_allow_databases(start_cluster): ).strip() == "" ) + + +def test_config_multiple_zk_substitutions(start_cluster): + assert ( + node3.query( + "SELECT value FROM system.merge_tree_settings WHERE name='min_bytes_for_wide_part'" + ) + == "33\n" + ) + assert ( + node3.query( + "SELECT value FROM system.merge_tree_settings WHERE name='min_rows_for_wide_part'" + ) + == "1111\n" + ) + assert ( + node3.query( + "SELECT value FROM system.merge_tree_settings WHERE name='merge_max_block_size'" + ) + == "8888\n" + ) + assert ( + node3.query( + "SELECT value FROM system.server_settings WHERE name='background_pool_size'" + ) + == "44\n" + ) + + zk = cluster.get_kazoo_client("zoo1") + zk.create( + path="/background_pool_size", + value=b"72", + makepath=True, + ) + + node3.replace_config( + "/etc/clickhouse-server/config.d/config_zk_include_test.xml", + """ + + + 44 + + + 1 + 1111 + + + + +""", + ) + + node3.query("SYSTEM RELOAD CONFIG") + + assert ( + node3.query( + "SELECT value FROM system.server_settings WHERE name='background_pool_size'" + ) + == "72\n" + ) diff --git a/tests/integration/test_config_xml_full/configs/config.xml b/tests/integration/test_config_xml_full/configs/config.xml index ac59b3428e8..628e1432350 100644 --- a/tests/integration/test_config_xml_full/configs/config.xml +++ b/tests/integration/test_config_xml_full/configs/config.xml @@ -72,7 +72,7 @@ 8123