mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
Merge branch 'master' into revert-61200-revert-60430-max-parallel-replicas-validate
This commit is contained in:
commit
e7e11d738b
@ -5,9 +5,7 @@
|
||||
# a) the new check is not controversial (this includes many checks in readability-* and google-*) or
|
||||
# b) too noisy (checks with > 100 new warnings are considered noisy, this includes e.g. cppcoreguidelines-*).
|
||||
|
||||
# TODO Let clang-tidy check headers in further directories
|
||||
# --> HeaderFilterRegex: '^.*/(src|base|programs|utils)/.*(h|hpp)$'
|
||||
HeaderFilterRegex: '^.*/(base|programs|utils)/.*(h|hpp)$'
|
||||
HeaderFilterRegex: '^.*/(base|src|programs|utils)/.*(h|hpp)$'
|
||||
|
||||
Checks: [
|
||||
'*',
|
||||
@ -22,6 +20,7 @@ Checks: [
|
||||
'-bugprone-branch-clone',
|
||||
'-bugprone-easily-swappable-parameters',
|
||||
'-bugprone-exception-escape',
|
||||
'-bugprone-forward-declaration-namespace',
|
||||
'-bugprone-implicit-widening-of-multiplication-result',
|
||||
'-bugprone-narrowing-conversions',
|
||||
'-bugprone-not-null-terminated-result',
|
||||
@ -37,6 +36,8 @@ Checks: [
|
||||
'-cert-oop54-cpp',
|
||||
'-cert-oop57-cpp',
|
||||
|
||||
'-clang-analyzer-optin.performance.Padding',
|
||||
|
||||
'-clang-analyzer-unix.Malloc',
|
||||
|
||||
'-cppcoreguidelines-*', # impractical in a codebase as large as ClickHouse, also slow
|
||||
|
59
.github/workflows/nightly.yml
vendored
59
.github/workflows/nightly.yml
vendored
@ -45,62 +45,3 @@ jobs:
|
||||
with:
|
||||
data: "${{ needs.RunConfig.outputs.data }}"
|
||||
set_latest: true
|
||||
SonarCloud:
|
||||
runs-on: [self-hosted, builder]
|
||||
env:
|
||||
SONAR_SCANNER_VERSION: 4.8.0.2856
|
||||
SONAR_SERVER_URL: "https://sonarcloud.io"
|
||||
BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed
|
||||
CC: clang-17
|
||||
CXX: clang++-17
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis
|
||||
filter: tree:0
|
||||
submodules: true
|
||||
- name: Set up JDK 11
|
||||
uses: actions/setup-java@v1
|
||||
with:
|
||||
java-version: 11
|
||||
- name: Download and set up sonar-scanner
|
||||
env:
|
||||
SONAR_SCANNER_DOWNLOAD_URL: https://binaries.sonarsource.com/Distribution/sonar-scanner-cli/sonar-scanner-cli-${{ env.SONAR_SCANNER_VERSION }}-linux.zip
|
||||
run: |
|
||||
mkdir -p "$HOME/.sonar"
|
||||
curl -sSLo "$HOME/.sonar/sonar-scanner.zip" "${{ env.SONAR_SCANNER_DOWNLOAD_URL }}"
|
||||
unzip -o "$HOME/.sonar/sonar-scanner.zip" -d "$HOME/.sonar/"
|
||||
echo "$HOME/.sonar/sonar-scanner-${{ env.SONAR_SCANNER_VERSION }}-linux/bin" >> "$GITHUB_PATH"
|
||||
- name: Download and set up build-wrapper
|
||||
env:
|
||||
BUILD_WRAPPER_DOWNLOAD_URL: ${{ env.SONAR_SERVER_URL }}/static/cpp/build-wrapper-linux-x86.zip
|
||||
run: |
|
||||
curl -sSLo "$HOME/.sonar/build-wrapper-linux-x86.zip" "${{ env.BUILD_WRAPPER_DOWNLOAD_URL }}"
|
||||
unzip -o "$HOME/.sonar/build-wrapper-linux-x86.zip" -d "$HOME/.sonar/"
|
||||
echo "$HOME/.sonar/build-wrapper-linux-x86" >> "$GITHUB_PATH"
|
||||
- name: Set Up Build Tools
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -yq git cmake ccache ninja-build python3 yasm nasm
|
||||
sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
|
||||
- name: Run build-wrapper
|
||||
run: |
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ..
|
||||
cd ..
|
||||
build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/
|
||||
- name: Run sonar-scanner
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
|
||||
run: |
|
||||
sonar-scanner \
|
||||
--define sonar.host.url="${{ env.SONAR_SERVER_URL }}" \
|
||||
--define sonar.cfamily.build-wrapper-output="${{ env.BUILD_WRAPPER_OUT_DIR }}" \
|
||||
--define sonar.projectKey="ClickHouse_ClickHouse" \
|
||||
--define sonar.organization="clickhouse-java" \
|
||||
--define sonar.cfamily.cpp23.enabled=true \
|
||||
--define sonar.exclusions="**/*.java,**/*.ts,**/*.js,**/*.css,**/*.sql"
|
||||
|
1
.github/workflows/pull_request.yml
vendored
1
.github/workflows/pull_request.yml
vendored
@ -172,6 +172,7 @@ jobs:
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 finish_check.py
|
||||
python3 merge_pr.py --check-approved
|
||||
|
||||
|
||||
#############################################################################################
|
||||
|
3
.github/workflows/reusable_build.yml
vendored
3
.github/workflows/reusable_build.yml
vendored
@ -43,8 +43,7 @@ jobs:
|
||||
runs-on: [self-hosted, '${{inputs.runner_type}}']
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
# WIP: temporary try commit with limited perallelization of checkout
|
||||
uses: ClickHouse/checkout@0be3f7b3098bae494d3ef5d29d2e0676fb606232
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
ref: ${{ fromJson(inputs.data).git_ref }}
|
||||
|
@ -56,13 +56,13 @@ option(ENABLE_CHECK_HEAVY_BUILDS "Don't allow C++ translation units to compile t
|
||||
if (ENABLE_CHECK_HEAVY_BUILDS)
|
||||
# set DATA (since RSS does not work since 2.6.x+) to 5G
|
||||
set (RLIMIT_DATA 5000000000)
|
||||
# set VIRT (RLIMIT_AS) to 10G (DATA*10)
|
||||
# set VIRT (RLIMIT_AS) to 10G (DATA*2)
|
||||
set (RLIMIT_AS 10000000000)
|
||||
# set CPU time limit to 1000 seconds
|
||||
set (RLIMIT_CPU 1000)
|
||||
|
||||
# -fsanitize=memory and address are too heavy
|
||||
if (SANITIZE)
|
||||
if (SANITIZE OR SANITIZE_COVERAGE OR WITH_COVERAGE)
|
||||
set (RLIMIT_DATA 10000000000) # 10G
|
||||
endif()
|
||||
|
||||
@ -110,11 +110,6 @@ endif()
|
||||
# - sanitize.cmake
|
||||
add_library(global-libs INTERFACE)
|
||||
|
||||
# We don't want to instrument everything with fuzzer, but only specific targets (see below),
|
||||
# also, since we build our own llvm, we specifically don't want to instrument
|
||||
# libFuzzer library itself - it would result in infinite recursion
|
||||
#include (cmake/fuzzer.cmake)
|
||||
|
||||
include (cmake/sanitize.cmake)
|
||||
|
||||
option(ENABLE_COLORED_BUILD "Enable colors in compiler output" ON)
|
||||
@ -554,7 +549,9 @@ if (ENABLE_RUST)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND NOT SANITIZE AND NOT SANITIZE_COVERAGE AND OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64))
|
||||
if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO"
|
||||
AND NOT SANITIZE AND NOT SANITIZE_COVERAGE AND NOT ENABLE_FUZZING
|
||||
AND OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64))
|
||||
set(CHECK_LARGE_OBJECT_SIZES_DEFAULT ON)
|
||||
else ()
|
||||
set(CHECK_LARGE_OBJECT_SIZES_DEFAULT OFF)
|
||||
@ -577,10 +574,7 @@ if (FUZZER)
|
||||
if (NOT(target_type STREQUAL "INTERFACE_LIBRARY" OR target_type STREQUAL "UTILITY"))
|
||||
target_compile_options(${target} PRIVATE "-fsanitize=fuzzer-no-link")
|
||||
endif()
|
||||
# clickhouse fuzzer isn't working correctly
|
||||
# initial PR https://github.com/ClickHouse/ClickHouse/pull/27526
|
||||
#if (target MATCHES ".+_fuzzer" OR target STREQUAL "clickhouse")
|
||||
if (target_type STREQUAL "EXECUTABLE" AND target MATCHES ".+_fuzzer")
|
||||
if (target_type STREQUAL "EXECUTABLE" AND (target MATCHES ".+_fuzzer" OR target STREQUAL "clickhouse"))
|
||||
message(STATUS "${target} instrumented with fuzzer")
|
||||
target_link_libraries(${target} PUBLIC ch_contrib::fuzzer)
|
||||
# Add to fuzzers bundle
|
||||
|
@ -20,6 +20,7 @@ set (SRCS
|
||||
getPageSize.cpp
|
||||
getThreadId.cpp
|
||||
int8_to_string.cpp
|
||||
itoa.cpp
|
||||
JSON.cpp
|
||||
mremap.cpp
|
||||
phdr_cache.cpp
|
||||
|
@ -50,9 +50,6 @@ std::optional<uint64_t> getCgroupsV2MemoryLimit()
|
||||
|
||||
}
|
||||
|
||||
/** Returns the size of physical memory (RAM) in bytes.
|
||||
* Returns 0 on unsupported platform
|
||||
*/
|
||||
uint64_t getMemoryAmountOrZero()
|
||||
{
|
||||
int64_t num_pages = sysconf(_SC_PHYS_PAGES);
|
||||
|
@ -2,11 +2,10 @@
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
/** Returns the size of physical memory (RAM) in bytes.
|
||||
* Returns 0 on unsupported platform or if it cannot determine the size of physical memory.
|
||||
*/
|
||||
/// Returns the size in bytes of physical memory (RAM) available to the process. The value can
|
||||
/// be smaller than the total available RAM available to the system due to cgroups settings.
|
||||
/// Returns 0 on unsupported platform or if it cannot determine the size of physical memory.
|
||||
uint64_t getMemoryAmountOrZero();
|
||||
|
||||
/** Throws exception if it cannot determine the size of physical memory.
|
||||
*/
|
||||
/// Throws exception if it cannot determine the size of physical memory.
|
||||
uint64_t getMemoryAmount();
|
||||
|
503
base/base/itoa.cpp
Normal file
503
base/base/itoa.cpp
Normal file
@ -0,0 +1,503 @@
|
||||
// Based on https://github.com/amdn/itoa and combined with our optimizations
|
||||
//
|
||||
//=== itoa.cpp - Fast integer to ascii conversion --*- C++ -*-//
|
||||
//
|
||||
// The MIT License (MIT)
|
||||
// Copyright (c) 2016 Arturo Martin-de-Nicolas
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included
|
||||
// in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <type_traits>
|
||||
#include <base/defines.h>
|
||||
#include <base/extended_types.h>
|
||||
#include <base/itoa.h>
|
||||
|
||||
namespace
|
||||
{
|
||||
template <typename T>
|
||||
ALWAYS_INLINE inline constexpr T pow10(size_t x)
|
||||
{
|
||||
return x ? 10 * pow10<T>(x - 1) : 1;
|
||||
}
|
||||
|
||||
// Division by a power of 10 is implemented using a multiplicative inverse.
|
||||
// This strength reduction is also done by optimizing compilers, but
|
||||
// presently the fastest results are produced by using the values
|
||||
// for the multiplication and the shift as given by the algorithm
|
||||
// described by Agner Fog in "Optimizing Subroutines in Assembly Language"
|
||||
//
|
||||
// http://www.agner.org/optimize/optimizing_assembly.pdf
|
||||
//
|
||||
// "Integer division by a constant (all processors)
|
||||
// A floating point number can be divided by a constant by multiplying
|
||||
// with the reciprocal. If we want to do the same with integers, we have
|
||||
// to scale the reciprocal by 2n and then shift the product to the right
|
||||
// by n. There are various algorithms for finding a suitable value of n
|
||||
// and compensating for rounding errors. The algorithm described below
|
||||
// was invented by Terje Mathisen, Norway, and not published elsewhere."
|
||||
|
||||
/// Division by constant is performed by:
|
||||
/// 1. Adding 1 if needed;
|
||||
/// 2. Multiplying by another constant;
|
||||
/// 3. Shifting right by another constant.
|
||||
template <typename UInt, bool add_, UInt multiplier_, unsigned shift_>
|
||||
struct Division
|
||||
{
|
||||
static constexpr bool add{add_};
|
||||
static constexpr UInt multiplier{multiplier_};
|
||||
static constexpr unsigned shift{shift_};
|
||||
};
|
||||
|
||||
/// Select a type with appropriate number of bytes from the list of types.
|
||||
/// First parameter is the number of bytes requested. Then goes a list of types with 1, 2, 4, ... number of bytes.
|
||||
/// Example: SelectType<4, uint8_t, uint16_t, uint32_t, uint64_t> will select uint32_t.
|
||||
template <size_t N, typename T, typename... Ts>
|
||||
struct SelectType
|
||||
{
|
||||
using Result = typename SelectType<N / 2, Ts...>::Result;
|
||||
};
|
||||
|
||||
template <typename T, typename... Ts>
|
||||
struct SelectType<1, T, Ts...>
|
||||
{
|
||||
using Result = T;
|
||||
};
|
||||
|
||||
|
||||
/// Division by 10^N where N is the size of the type.
|
||||
template <size_t N>
|
||||
using DivisionBy10PowN = typename SelectType<
|
||||
N,
|
||||
Division<uint8_t, false, 205U, 11>, /// divide by 10
|
||||
Division<uint16_t, true, 41943U, 22>, /// divide by 100
|
||||
Division<uint32_t, false, 3518437209U, 45>, /// divide by 10000
|
||||
Division<uint64_t, false, 12379400392853802749ULL, 90> /// divide by 100000000
|
||||
>::Result;
|
||||
|
||||
template <size_t N>
|
||||
using UnsignedOfSize = typename SelectType<N, uint8_t, uint16_t, uint32_t, uint64_t, __uint128_t>::Result;
|
||||
|
||||
/// Holds the result of dividing an unsigned N-byte variable by 10^N resulting in
|
||||
template <size_t N>
|
||||
struct QuotientAndRemainder
|
||||
{
|
||||
UnsignedOfSize<N> quotient; // quotient with fewer than 2*N decimal digits
|
||||
UnsignedOfSize<N / 2> remainder; // remainder with at most N decimal digits
|
||||
};
|
||||
|
||||
template <size_t N>
|
||||
QuotientAndRemainder<N> inline split(UnsignedOfSize<N> value)
|
||||
{
|
||||
constexpr DivisionBy10PowN<N> division;
|
||||
|
||||
UnsignedOfSize<N> quotient = (division.multiplier * (UnsignedOfSize<2 * N>(value) + division.add)) >> division.shift;
|
||||
UnsignedOfSize<N / 2> remainder = static_cast<UnsignedOfSize<N / 2>>(value - quotient * pow10<UnsignedOfSize<N / 2>>(N));
|
||||
|
||||
return {quotient, remainder};
|
||||
}
|
||||
|
||||
ALWAYS_INLINE inline char * outDigit(char * p, uint8_t value)
|
||||
{
|
||||
*p = '0' + value;
|
||||
++p;
|
||||
return p;
|
||||
}
|
||||
|
||||
// Using a lookup table to convert binary numbers from 0 to 99
|
||||
// into ascii characters as described by Andrei Alexandrescu in
|
||||
// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/
|
||||
|
||||
const char digits[201] = "00010203040506070809"
|
||||
"10111213141516171819"
|
||||
"20212223242526272829"
|
||||
"30313233343536373839"
|
||||
"40414243444546474849"
|
||||
"50515253545556575859"
|
||||
"60616263646566676869"
|
||||
"70717273747576777879"
|
||||
"80818283848586878889"
|
||||
"90919293949596979899";
|
||||
|
||||
ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value)
|
||||
{
|
||||
memcpy(p, &digits[value * 2], 2);
|
||||
p += 2;
|
||||
return p;
|
||||
}
|
||||
|
||||
namespace convert
|
||||
{
|
||||
template <typename UInt, size_t N = sizeof(UInt)>
|
||||
char * head(char * p, UInt u);
|
||||
template <typename UInt, size_t N = sizeof(UInt)>
|
||||
char * tail(char * p, UInt u);
|
||||
|
||||
//===----------------------------------------------------------===//
|
||||
// head: find most significant digit, skip leading zeros
|
||||
//===----------------------------------------------------------===//
|
||||
|
||||
// "x" contains quotient and remainder after division by 10^N
|
||||
// quotient is less than 10^N
|
||||
template <size_t N>
|
||||
ALWAYS_INLINE inline char * head(char * p, QuotientAndRemainder<N> x)
|
||||
{
|
||||
p = head(p, UnsignedOfSize<N / 2>(x.quotient));
|
||||
p = tail(p, x.remainder);
|
||||
return p;
|
||||
}
|
||||
|
||||
// "u" is less than 10^2*N
|
||||
template <typename UInt, size_t N>
|
||||
ALWAYS_INLINE inline char * head(char * p, UInt u)
|
||||
{
|
||||
return u < pow10<UnsignedOfSize<N>>(N) ? head(p, UnsignedOfSize<N / 2>(u)) : head<N>(p, split<N>(u));
|
||||
}
|
||||
|
||||
// recursion base case, selected when "u" is one byte
|
||||
template <>
|
||||
ALWAYS_INLINE inline char * head<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
|
||||
{
|
||||
return u < 10 ? outDigit(p, u) : outTwoDigits(p, u);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------===//
|
||||
// tail: produce all digits including leading zeros
|
||||
//===----------------------------------------------------------===//
|
||||
|
||||
// recursive step, "u" is less than 10^2*N
|
||||
template <typename UInt, size_t N>
|
||||
ALWAYS_INLINE inline char * tail(char * p, UInt u)
|
||||
{
|
||||
QuotientAndRemainder<N> x = split<N>(u);
|
||||
p = tail(p, UnsignedOfSize<N / 2>(x.quotient));
|
||||
p = tail(p, x.remainder);
|
||||
return p;
|
||||
}
|
||||
|
||||
// recursion base case, selected when "u" is one byte
|
||||
template <>
|
||||
ALWAYS_INLINE inline char * tail<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
|
||||
{
|
||||
return outTwoDigits(p, u);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------===//
|
||||
// large values are >= 10^2*N
|
||||
// where x contains quotient and remainder after division by 10^N
|
||||
//===----------------------------------------------------------===//
|
||||
template <size_t N>
|
||||
ALWAYS_INLINE inline char * large(char * p, QuotientAndRemainder<N> x)
|
||||
{
|
||||
QuotientAndRemainder<N> y = split<N>(x.quotient);
|
||||
p = head(p, UnsignedOfSize<N / 2>(y.quotient));
|
||||
p = tail(p, y.remainder);
|
||||
p = tail(p, x.remainder);
|
||||
return p;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------===//
|
||||
// handle values of "u" that might be >= 10^2*N
|
||||
// where N is the size of "u" in bytes
|
||||
//===----------------------------------------------------------===//
|
||||
template <typename UInt, size_t N = sizeof(UInt)>
|
||||
ALWAYS_INLINE inline char * uitoa(char * p, UInt u)
|
||||
{
|
||||
if (u < pow10<UnsignedOfSize<N>>(N))
|
||||
return head(p, UnsignedOfSize<N / 2>(u));
|
||||
QuotientAndRemainder<N> x = split<N>(u);
|
||||
|
||||
return u < pow10<UnsignedOfSize<N>>(2 * N) ? head<N>(p, x) : large<N>(p, x);
|
||||
}
|
||||
|
||||
// selected when "u" is one byte
|
||||
template <>
|
||||
ALWAYS_INLINE inline char * uitoa<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
|
||||
{
|
||||
if (u < 10)
|
||||
return outDigit(p, u);
|
||||
else if (u < 100)
|
||||
return outTwoDigits(p, u);
|
||||
else
|
||||
{
|
||||
p = outDigit(p, u / 100);
|
||||
p = outTwoDigits(p, u % 100);
|
||||
return p;
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------===//
|
||||
// handle unsigned and signed integral operands
|
||||
//===----------------------------------------------------------===//
|
||||
|
||||
// itoa: handle unsigned integral operands (selected by SFINAE)
|
||||
template <typename U, std::enable_if_t<!std::is_signed_v<U> && std::is_integral_v<U>> * = nullptr>
|
||||
ALWAYS_INLINE inline char * itoa(U u, char * p)
|
||||
{
|
||||
return convert::uitoa(p, u);
|
||||
}
|
||||
|
||||
// itoa: handle signed integral operands (selected by SFINAE)
|
||||
template <typename I, size_t N = sizeof(I), std::enable_if_t<std::is_signed_v<I> && std::is_integral_v<I>> * = nullptr>
|
||||
ALWAYS_INLINE inline char * itoa(I i, char * p)
|
||||
{
|
||||
// Need "mask" to be filled with a copy of the sign bit.
|
||||
// If "i" is a negative value, then the result of "operator >>"
|
||||
// is implementation-defined, though usually it is an arithmetic
|
||||
// right shift that replicates the sign bit.
|
||||
// Use a conditional expression to be portable,
|
||||
// a good optimizing compiler generates an arithmetic right shift
|
||||
// and avoids the conditional branch.
|
||||
UnsignedOfSize<N> mask = i < 0 ? ~UnsignedOfSize<N>(0) : 0;
|
||||
// Now get the absolute value of "i" and cast to unsigned type UnsignedOfSize<N>.
|
||||
// Cannot use std::abs() because the result is undefined
|
||||
// in 2's complement systems for the most-negative value.
|
||||
// Want to avoid conditional branch for performance reasons since
|
||||
// CPU branch prediction will be ineffective when negative values
|
||||
// occur randomly.
|
||||
// Let "u" be "i" cast to unsigned type UnsignedOfSize<N>.
|
||||
// Subtract "u" from 2*u if "i" is positive or 0 if "i" is negative.
|
||||
// This yields the absolute value with the desired type without
|
||||
// using a conditional branch and without invoking undefined or
|
||||
// implementation defined behavior:
|
||||
UnsignedOfSize<N> u = ((2 * UnsignedOfSize<N>(i)) & ~mask) - UnsignedOfSize<N>(i);
|
||||
// Unconditionally store a minus sign when producing digits
|
||||
// in a forward direction and increment the pointer only if
|
||||
// the value is in fact negative.
|
||||
// This avoids a conditional branch and is safe because we will
|
||||
// always produce at least one digit and it will overwrite the
|
||||
// minus sign when the value is not negative.
|
||||
*p = '-';
|
||||
p += (mask & 1);
|
||||
p = convert::uitoa(p, u);
|
||||
return p;
|
||||
}
|
||||
}
|
||||
|
||||
const uint64_t max_multiple_of_hundred_that_fits_in_64_bits = 1'00'00'00'00'00'00'00'00'00ull;
|
||||
const int max_multiple_of_hundred_blocks = 9;
|
||||
static_assert(max_multiple_of_hundred_that_fits_in_64_bits % 100 == 0);
|
||||
|
||||
ALWAYS_INLINE inline char * writeUIntText(UInt128 _x, char * p)
|
||||
{
|
||||
/// If we the highest 64bit item is empty, we can print just the lowest item as u64
|
||||
if (_x.items[UInt128::_impl::little(1)] == 0)
|
||||
return convert::itoa(_x.items[UInt128::_impl::little(0)], p);
|
||||
|
||||
/// Doing operations using __int128 is faster and we already rely on this feature
|
||||
using T = unsigned __int128;
|
||||
T x = (T(_x.items[UInt128::_impl::little(1)]) << 64) + T(_x.items[UInt128::_impl::little(0)]);
|
||||
|
||||
/// We are going to accumulate blocks of 2 digits to print until the number is small enough to be printed as u64
|
||||
/// To do this we could do: x / 100, x % 100
|
||||
/// But these would mean doing many iterations with long integers, so instead we divide by a much longer integer
|
||||
/// multiple of 100 (100^9) and then get the blocks out of it (as u64)
|
||||
/// Once we reach u64::max we can stop and use the fast method to print that in the front
|
||||
static const T large_divisor = max_multiple_of_hundred_that_fits_in_64_bits;
|
||||
static const T largest_uint64 = std::numeric_limits<uint64_t>::max();
|
||||
uint8_t two_values[20] = {0}; // 39 Max characters / 2
|
||||
|
||||
int current_block = 0;
|
||||
while (x > largest_uint64)
|
||||
{
|
||||
uint64_t u64_remainder = uint64_t(x % large_divisor);
|
||||
x /= large_divisor;
|
||||
|
||||
int pos = current_block;
|
||||
while (u64_remainder)
|
||||
{
|
||||
two_values[pos] = uint8_t(u64_remainder % 100);
|
||||
pos++;
|
||||
u64_remainder /= 100;
|
||||
}
|
||||
current_block += max_multiple_of_hundred_blocks;
|
||||
}
|
||||
|
||||
char * highest_part_print = convert::itoa(uint64_t(x), p);
|
||||
for (int i = 0; i < current_block; i++)
|
||||
{
|
||||
outTwoDigits(highest_part_print, two_values[current_block - 1 - i]);
|
||||
highest_part_print += 2;
|
||||
}
|
||||
|
||||
return highest_part_print;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE inline char * writeUIntText(UInt256 _x, char * p)
|
||||
{
|
||||
/// If possible, treat it as a smaller integer as they are much faster to print
|
||||
if (_x.items[UInt256::_impl::little(3)] == 0 && _x.items[UInt256::_impl::little(2)] == 0)
|
||||
return writeUIntText(UInt128{_x.items[UInt256::_impl::little(0)], _x.items[UInt256::_impl::little(1)]}, p);
|
||||
|
||||
/// If available (x86) we transform from our custom class to _BitInt(256) which has better support in the compiler
|
||||
/// and produces better code
|
||||
using T =
|
||||
#if defined(__x86_64__)
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wbit-int-extension"
|
||||
unsigned _BitInt(256)
|
||||
# pragma clang diagnostic pop
|
||||
#else
|
||||
UInt256
|
||||
#endif
|
||||
;
|
||||
|
||||
#if defined(__x86_64__)
|
||||
T x = (T(_x.items[UInt256::_impl::little(3)]) << 192) + (T(_x.items[UInt256::_impl::little(2)]) << 128)
|
||||
+ (T(_x.items[UInt256::_impl::little(1)]) << 64) + T(_x.items[UInt256::_impl::little(0)]);
|
||||
#else
|
||||
T x = _x;
|
||||
#endif
|
||||
|
||||
/// Similar to writeUIntText(UInt128) only that in this case we will stop as soon as we reach the largest u128
|
||||
/// and switch to that function
|
||||
uint8_t two_values[39] = {0}; // 78 Max characters / 2
|
||||
int current_pos = 0;
|
||||
|
||||
static const T large_divisor = max_multiple_of_hundred_that_fits_in_64_bits;
|
||||
static const T largest_uint128 = T(std::numeric_limits<uint64_t>::max()) << 64 | T(std::numeric_limits<uint64_t>::max());
|
||||
|
||||
while (x > largest_uint128)
|
||||
{
|
||||
uint64_t u64_remainder = uint64_t(x % large_divisor);
|
||||
x /= large_divisor;
|
||||
|
||||
int pos = current_pos;
|
||||
while (u64_remainder)
|
||||
{
|
||||
two_values[pos] = uint8_t(u64_remainder % 100);
|
||||
pos++;
|
||||
u64_remainder /= 100;
|
||||
}
|
||||
current_pos += max_multiple_of_hundred_blocks;
|
||||
}
|
||||
|
||||
#if defined(__x86_64__)
|
||||
UInt128 pending{uint64_t(x), uint64_t(x >> 64)};
|
||||
#else
|
||||
UInt128 pending{x.items[UInt256::_impl::little(0)], x.items[UInt256::_impl::little(1)]};
|
||||
#endif
|
||||
|
||||
char * highest_part_print = writeUIntText(pending, p);
|
||||
for (int i = 0; i < current_pos; i++)
|
||||
{
|
||||
outTwoDigits(highest_part_print, two_values[current_pos - 1 - i]);
|
||||
highest_part_print += 2;
|
||||
}
|
||||
|
||||
return highest_part_print;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE inline char * writeLeadingMinus(char * pos)
|
||||
{
|
||||
*pos = '-';
|
||||
return pos + 1;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ALWAYS_INLINE inline char * writeSIntText(T x, char * pos)
|
||||
{
|
||||
static_assert(std::is_same_v<T, Int128> || std::is_same_v<T, Int256>);
|
||||
|
||||
using UnsignedT = make_unsigned_t<T>;
|
||||
static constexpr T min_int = UnsignedT(1) << (sizeof(T) * 8 - 1);
|
||||
|
||||
if (unlikely(x == min_int))
|
||||
{
|
||||
if constexpr (std::is_same_v<T, Int128>)
|
||||
{
|
||||
const char * res = "-170141183460469231731687303715884105728";
|
||||
memcpy(pos, res, strlen(res));
|
||||
return pos + strlen(res);
|
||||
}
|
||||
else if constexpr (std::is_same_v<T, Int256>)
|
||||
{
|
||||
const char * res = "-57896044618658097711785492504343953926634992332820282019728792003956564819968";
|
||||
memcpy(pos, res, strlen(res));
|
||||
return pos + strlen(res);
|
||||
}
|
||||
}
|
||||
|
||||
if (x < 0)
|
||||
{
|
||||
x = -x;
|
||||
pos = writeLeadingMinus(pos);
|
||||
}
|
||||
return writeUIntText(UnsignedT(x), pos);
|
||||
}
|
||||
}
|
||||
|
||||
char * itoa(UInt8 i, char * p)
|
||||
{
|
||||
return convert::itoa(uint8_t(i), p);
|
||||
}
|
||||
|
||||
char * itoa(Int8 i, char * p)
|
||||
{
|
||||
return convert::itoa(int8_t(i), p);
|
||||
}
|
||||
|
||||
char * itoa(UInt128 i, char * p)
|
||||
{
|
||||
return writeUIntText(i, p);
|
||||
}
|
||||
|
||||
char * itoa(Int128 i, char * p)
|
||||
{
|
||||
return writeSIntText(i, p);
|
||||
}
|
||||
|
||||
char * itoa(UInt256 i, char * p)
|
||||
{
|
||||
return writeUIntText(i, p);
|
||||
}
|
||||
|
||||
char * itoa(Int256 i, char * p)
|
||||
{
|
||||
return writeSIntText(i, p);
|
||||
}
|
||||
|
||||
#define DEFAULT_ITOA(T) \
|
||||
char * itoa(T i, char * p) \
|
||||
{ \
|
||||
return convert::itoa(i, p); \
|
||||
}
|
||||
|
||||
#define FOR_MISSING_INTEGER_TYPES(M) \
|
||||
M(uint8_t) \
|
||||
M(UInt16) \
|
||||
M(UInt32) \
|
||||
M(UInt64) \
|
||||
M(int8_t) \
|
||||
M(Int16) \
|
||||
M(Int32) \
|
||||
M(Int64)
|
||||
|
||||
FOR_MISSING_INTEGER_TYPES(DEFAULT_ITOA)
|
||||
|
||||
#if defined(OS_DARWIN)
|
||||
DEFAULT_ITOA(unsigned long)
|
||||
DEFAULT_ITOA(long)
|
||||
#endif
|
||||
|
||||
#undef FOR_MISSING_INTEGER_TYPES
|
||||
#undef DEFAULT_ITOA
|
462
base/base/itoa.h
462
base/base/itoa.h
@ -1,446 +1,30 @@
|
||||
#pragma once
|
||||
|
||||
// Based on https://github.com/amdn/itoa and combined with our optimizations
|
||||
//
|
||||
//=== itoa.h - Fast integer to ascii conversion --*- C++ -*-//
|
||||
//
|
||||
// The MIT License (MIT)
|
||||
// Copyright (c) 2016 Arturo Martin-de-Nicolas
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included
|
||||
// in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
#include <type_traits>
|
||||
#include <base/extended_types.h>
|
||||
|
||||
#define FOR_INTEGER_TYPES(M) \
|
||||
M(uint8_t) \
|
||||
M(UInt8) \
|
||||
M(UInt16) \
|
||||
M(UInt32) \
|
||||
M(UInt64) \
|
||||
M(UInt128) \
|
||||
M(UInt256) \
|
||||
M(int8_t) \
|
||||
M(Int8) \
|
||||
M(Int16) \
|
||||
M(Int32) \
|
||||
M(Int64) \
|
||||
M(Int128) \
|
||||
M(Int256)
|
||||
|
||||
template <typename T>
|
||||
inline int digits10(T x)
|
||||
{
|
||||
if (x < 10ULL)
|
||||
return 1;
|
||||
if (x < 100ULL)
|
||||
return 2;
|
||||
if (x < 1000ULL)
|
||||
return 3;
|
||||
#define INSTANTIATION(T) char * itoa(T i, char * p);
|
||||
FOR_INTEGER_TYPES(INSTANTIATION)
|
||||
|
||||
if (x < 1000000000000ULL)
|
||||
{
|
||||
if (x < 100000000ULL)
|
||||
{
|
||||
if (x < 1000000ULL)
|
||||
{
|
||||
if (x < 10000ULL)
|
||||
return 4;
|
||||
else
|
||||
return 5 + (x >= 100000ULL);
|
||||
}
|
||||
#if defined(OS_DARWIN)
|
||||
INSTANTIATION(unsigned long)
|
||||
INSTANTIATION(long)
|
||||
#endif
|
||||
|
||||
return 7 + (x >= 10000000ULL);
|
||||
}
|
||||
|
||||
if (x < 10000000000ULL)
|
||||
return 9 + (x >= 1000000000ULL);
|
||||
|
||||
return 11 + (x >= 100000000000ULL);
|
||||
}
|
||||
|
||||
return 12 + digits10(x / 1000000000000ULL);
|
||||
}
|
||||
|
||||
|
||||
namespace impl
|
||||
{
|
||||
|
||||
template <typename T>
|
||||
static constexpr T pow10(size_t x)
|
||||
{
|
||||
return x ? 10 * pow10<T>(x - 1) : 1;
|
||||
}
|
||||
|
||||
// Division by a power of 10 is implemented using a multiplicative inverse.
|
||||
// This strength reduction is also done by optimizing compilers, but
|
||||
// presently the fastest results are produced by using the values
|
||||
// for the multiplication and the shift as given by the algorithm
|
||||
// described by Agner Fog in "Optimizing Subroutines in Assembly Language"
|
||||
//
|
||||
// http://www.agner.org/optimize/optimizing_assembly.pdf
|
||||
//
|
||||
// "Integer division by a constant (all processors)
|
||||
// A floating point number can be divided by a constant by multiplying
|
||||
// with the reciprocal. If we want to do the same with integers, we have
|
||||
// to scale the reciprocal by 2n and then shift the product to the right
|
||||
// by n. There are various algorithms for finding a suitable value of n
|
||||
// and compensating for rounding errors. The algorithm described below
|
||||
// was invented by Terje Mathisen, Norway, and not published elsewhere."
|
||||
|
||||
/// Division by constant is performed by:
|
||||
/// 1. Adding 1 if needed;
|
||||
/// 2. Multiplying by another constant;
|
||||
/// 3. Shifting right by another constant.
|
||||
template <typename UInt, bool add_, UInt multiplier_, unsigned shift_>
|
||||
struct Division
|
||||
{
|
||||
static constexpr bool add{add_};
|
||||
static constexpr UInt multiplier{multiplier_};
|
||||
static constexpr unsigned shift{shift_};
|
||||
};
|
||||
|
||||
/// Select a type with appropriate number of bytes from the list of types.
|
||||
/// First parameter is the number of bytes requested. Then goes a list of types with 1, 2, 4, ... number of bytes.
|
||||
/// Example: SelectType<4, uint8_t, uint16_t, uint32_t, uint64_t> will select uint32_t.
|
||||
template <size_t N, typename T, typename... Ts>
|
||||
struct SelectType
|
||||
{
|
||||
using Result = typename SelectType<N / 2, Ts...>::Result;
|
||||
};
|
||||
|
||||
template <typename T, typename... Ts>
|
||||
struct SelectType<1, T, Ts...>
|
||||
{
|
||||
using Result = T;
|
||||
};
|
||||
|
||||
|
||||
/// Division by 10^N where N is the size of the type.
|
||||
template <size_t N>
|
||||
using DivisionBy10PowN = typename SelectType
|
||||
<
|
||||
N,
|
||||
Division<uint8_t, false, 205U, 11>, /// divide by 10
|
||||
Division<uint16_t, true, 41943U, 22>, /// divide by 100
|
||||
Division<uint32_t, false, 3518437209U, 45>, /// divide by 10000
|
||||
Division<uint64_t, false, 12379400392853802749ULL, 90> /// divide by 100000000
|
||||
>::Result;
|
||||
|
||||
template <size_t N>
|
||||
using UnsignedOfSize = typename SelectType
|
||||
<
|
||||
N,
|
||||
uint8_t,
|
||||
uint16_t,
|
||||
uint32_t,
|
||||
uint64_t,
|
||||
__uint128_t
|
||||
>::Result;
|
||||
|
||||
/// Holds the result of dividing an unsigned N-byte variable by 10^N resulting in
|
||||
template <size_t N>
|
||||
struct QuotientAndRemainder
|
||||
{
|
||||
UnsignedOfSize<N> quotient; // quotient with fewer than 2*N decimal digits
|
||||
UnsignedOfSize<N / 2> remainder; // remainder with at most N decimal digits
|
||||
};
|
||||
|
||||
template <size_t N>
|
||||
QuotientAndRemainder<N> static inline split(UnsignedOfSize<N> value)
|
||||
{
|
||||
constexpr DivisionBy10PowN<N> division;
|
||||
|
||||
UnsignedOfSize<N> quotient = (division.multiplier * (UnsignedOfSize<2 * N>(value) + division.add)) >> division.shift;
|
||||
UnsignedOfSize<N / 2> remainder = static_cast<UnsignedOfSize<N / 2>>(value - quotient * pow10<UnsignedOfSize<N / 2>>(N));
|
||||
|
||||
return {quotient, remainder};
|
||||
}
|
||||
|
||||
|
||||
static inline char * outDigit(char * p, uint8_t value)
|
||||
{
|
||||
*p = '0' + value;
|
||||
++p;
|
||||
return p;
|
||||
}
|
||||
|
||||
// Using a lookup table to convert binary numbers from 0 to 99
|
||||
// into ascii characters as described by Andrei Alexandrescu in
|
||||
// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/
|
||||
|
||||
static const char digits[201] = "00010203040506070809"
|
||||
"10111213141516171819"
|
||||
"20212223242526272829"
|
||||
"30313233343536373839"
|
||||
"40414243444546474849"
|
||||
"50515253545556575859"
|
||||
"60616263646566676869"
|
||||
"70717273747576777879"
|
||||
"80818283848586878889"
|
||||
"90919293949596979899";
|
||||
|
||||
static inline char * outTwoDigits(char * p, uint8_t value)
|
||||
{
|
||||
memcpy(p, &digits[value * 2], 2);
|
||||
p += 2;
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
namespace convert
|
||||
{
|
||||
template <typename UInt, size_t N = sizeof(UInt)> static char * head(char * p, UInt u);
|
||||
template <typename UInt, size_t N = sizeof(UInt)> static char * tail(char * p, UInt u);
|
||||
|
||||
//===----------------------------------------------------------===//
|
||||
// head: find most significant digit, skip leading zeros
|
||||
//===----------------------------------------------------------===//
|
||||
|
||||
// "x" contains quotient and remainder after division by 10^N
|
||||
// quotient is less than 10^N
|
||||
template <size_t N>
|
||||
static inline char * head(char * p, QuotientAndRemainder<N> x)
|
||||
{
|
||||
p = head(p, UnsignedOfSize<N / 2>(x.quotient));
|
||||
p = tail(p, x.remainder);
|
||||
return p;
|
||||
}
|
||||
|
||||
// "u" is less than 10^2*N
|
||||
template <typename UInt, size_t N>
|
||||
static inline char * head(char * p, UInt u)
|
||||
{
|
||||
return u < pow10<UnsignedOfSize<N>>(N)
|
||||
? head(p, UnsignedOfSize<N / 2>(u))
|
||||
: head<N>(p, split<N>(u));
|
||||
}
|
||||
|
||||
// recursion base case, selected when "u" is one byte
|
||||
template <>
|
||||
inline char * head<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
|
||||
{
|
||||
return u < 10
|
||||
? outDigit(p, u)
|
||||
: outTwoDigits(p, u);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------===//
|
||||
// tail: produce all digits including leading zeros
|
||||
//===----------------------------------------------------------===//
|
||||
|
||||
// recursive step, "u" is less than 10^2*N
|
||||
template <typename UInt, size_t N>
|
||||
static inline char * tail(char * p, UInt u)
|
||||
{
|
||||
QuotientAndRemainder<N> x = split<N>(u);
|
||||
p = tail(p, UnsignedOfSize<N / 2>(x.quotient));
|
||||
p = tail(p, x.remainder);
|
||||
return p;
|
||||
}
|
||||
|
||||
// recursion base case, selected when "u" is one byte
|
||||
template <>
|
||||
inline char * tail<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
|
||||
{
|
||||
return outTwoDigits(p, u);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------===//
|
||||
// large values are >= 10^2*N
|
||||
// where x contains quotient and remainder after division by 10^N
|
||||
//===----------------------------------------------------------===//
|
||||
|
||||
template <size_t N>
|
||||
static inline char * large(char * p, QuotientAndRemainder<N> x)
|
||||
{
|
||||
QuotientAndRemainder<N> y = split<N>(x.quotient);
|
||||
p = head(p, UnsignedOfSize<N / 2>(y.quotient));
|
||||
p = tail(p, y.remainder);
|
||||
p = tail(p, x.remainder);
|
||||
return p;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------===//
|
||||
// handle values of "u" that might be >= 10^2*N
|
||||
// where N is the size of "u" in bytes
|
||||
//===----------------------------------------------------------===//
|
||||
|
||||
template <typename UInt, size_t N = sizeof(UInt)>
|
||||
static inline char * uitoa(char * p, UInt u)
|
||||
{
|
||||
if (u < pow10<UnsignedOfSize<N>>(N))
|
||||
return head(p, UnsignedOfSize<N / 2>(u));
|
||||
QuotientAndRemainder<N> x = split<N>(u);
|
||||
|
||||
return u < pow10<UnsignedOfSize<N>>(2 * N)
|
||||
? head<N>(p, x)
|
||||
: large<N>(p, x);
|
||||
}
|
||||
|
||||
// selected when "u" is one byte
|
||||
template <>
|
||||
inline char * uitoa<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
|
||||
{
|
||||
if (u < 10)
|
||||
return outDigit(p, u);
|
||||
else if (u < 100)
|
||||
return outTwoDigits(p, u);
|
||||
else
|
||||
{
|
||||
p = outDigit(p, u / 100);
|
||||
p = outTwoDigits(p, u % 100);
|
||||
return p;
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------===//
|
||||
// handle unsigned and signed integral operands
|
||||
//===----------------------------------------------------------===//
|
||||
|
||||
// itoa: handle unsigned integral operands (selected by SFINAE)
|
||||
template <typename U, std::enable_if_t<!std::is_signed_v<U> && std::is_integral_v<U>> * = nullptr>
|
||||
static inline char * itoa(U u, char * p)
|
||||
{
|
||||
return convert::uitoa(p, u);
|
||||
}
|
||||
|
||||
// itoa: handle signed integral operands (selected by SFINAE)
|
||||
template <typename I, size_t N = sizeof(I), std::enable_if_t<std::is_signed_v<I> && std::is_integral_v<I>> * = nullptr>
|
||||
static inline char * itoa(I i, char * p)
|
||||
{
|
||||
// Need "mask" to be filled with a copy of the sign bit.
|
||||
// If "i" is a negative value, then the result of "operator >>"
|
||||
// is implementation-defined, though usually it is an arithmetic
|
||||
// right shift that replicates the sign bit.
|
||||
// Use a conditional expression to be portable,
|
||||
// a good optimizing compiler generates an arithmetic right shift
|
||||
// and avoids the conditional branch.
|
||||
UnsignedOfSize<N> mask = i < 0 ? ~UnsignedOfSize<N>(0) : 0;
|
||||
// Now get the absolute value of "i" and cast to unsigned type UnsignedOfSize<N>.
|
||||
// Cannot use std::abs() because the result is undefined
|
||||
// in 2's complement systems for the most-negative value.
|
||||
// Want to avoid conditional branch for performance reasons since
|
||||
// CPU branch prediction will be ineffective when negative values
|
||||
// occur randomly.
|
||||
// Let "u" be "i" cast to unsigned type UnsignedOfSize<N>.
|
||||
// Subtract "u" from 2*u if "i" is positive or 0 if "i" is negative.
|
||||
// This yields the absolute value with the desired type without
|
||||
// using a conditional branch and without invoking undefined or
|
||||
// implementation defined behavior:
|
||||
UnsignedOfSize<N> u = ((2 * UnsignedOfSize<N>(i)) & ~mask) - UnsignedOfSize<N>(i);
|
||||
// Unconditionally store a minus sign when producing digits
|
||||
// in a forward direction and increment the pointer only if
|
||||
// the value is in fact negative.
|
||||
// This avoids a conditional branch and is safe because we will
|
||||
// always produce at least one digit and it will overwrite the
|
||||
// minus sign when the value is not negative.
|
||||
*p = '-';
|
||||
p += (mask & 1);
|
||||
p = convert::uitoa(p, u);
|
||||
return p;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
static inline char * writeUIntText(T x, char * p)
|
||||
{
|
||||
static_assert(is_unsigned_v<T>);
|
||||
|
||||
int len = digits10(x);
|
||||
auto * pp = p + len;
|
||||
while (x >= 100)
|
||||
{
|
||||
const auto i = x % 100;
|
||||
x /= 100;
|
||||
pp -= 2;
|
||||
outTwoDigits(pp, i);
|
||||
}
|
||||
if (x < 10)
|
||||
*p = '0' + x;
|
||||
else
|
||||
outTwoDigits(p, x);
|
||||
return p + len;
|
||||
}
|
||||
|
||||
static inline char * writeLeadingMinus(char * pos)
|
||||
{
|
||||
*pos = '-';
|
||||
return pos + 1;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static inline char * writeSIntText(T x, char * pos)
|
||||
{
|
||||
static_assert(std::is_same_v<T, Int128> || std::is_same_v<T, Int256>);
|
||||
|
||||
using UnsignedT = make_unsigned_t<T>;
|
||||
static constexpr T min_int = UnsignedT(1) << (sizeof(T) * 8 - 1);
|
||||
|
||||
if (unlikely(x == min_int))
|
||||
{
|
||||
if constexpr (std::is_same_v<T, Int128>)
|
||||
{
|
||||
const char * res = "-170141183460469231731687303715884105728";
|
||||
memcpy(pos, res, strlen(res));
|
||||
return pos + strlen(res);
|
||||
}
|
||||
else if constexpr (std::is_same_v<T, Int256>)
|
||||
{
|
||||
const char * res = "-57896044618658097711785492504343953926634992332820282019728792003956564819968";
|
||||
memcpy(pos, res, strlen(res));
|
||||
return pos + strlen(res);
|
||||
}
|
||||
}
|
||||
|
||||
if (x < 0)
|
||||
{
|
||||
x = -x;
|
||||
pos = writeLeadingMinus(pos);
|
||||
}
|
||||
return writeUIntText(UnsignedT(x), pos);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
template <typename I>
|
||||
char * itoa(I i, char * p)
|
||||
{
|
||||
return impl::convert::itoa(i, p);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline char * itoa(char8_t i, char * p)
|
||||
{
|
||||
return impl::convert::itoa(uint8_t(i), p);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline char * itoa(UInt128 i, char * p)
|
||||
{
|
||||
return impl::writeUIntText(i, p);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline char * itoa(Int128 i, char * p)
|
||||
{
|
||||
return impl::writeSIntText(i, p);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline char * itoa(UInt256 i, char * p)
|
||||
{
|
||||
return impl::writeUIntText(i, p);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline char * itoa(Int256 i, char * p)
|
||||
{
|
||||
return impl::writeSIntText(i, p);
|
||||
}
|
||||
#undef FOR_INTEGER_TYPES
|
||||
#undef INSTANTIATION
|
||||
|
@ -30,7 +30,6 @@ namespace Net
|
||||
|
||||
|
||||
class HTTPServerRequest;
|
||||
class HTTPServerResponse;
|
||||
class HTTPRequestHandler;
|
||||
|
||||
|
||||
|
@ -1,17 +0,0 @@
|
||||
# see ./CMakeLists.txt for variable declaration
|
||||
if (FUZZER)
|
||||
if (FUZZER STREQUAL "libfuzzer")
|
||||
# NOTE: Eldar Zaitov decided to name it "libfuzzer" instead of "fuzzer" to keep in mind another possible fuzzer backends.
|
||||
# NOTE: no-link means that all the targets are built with instrumentation for fuzzer, but only some of them
|
||||
# (tests) have entry point for fuzzer and it's not checked.
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link -DFUZZER=1")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link -DFUZZER=1")
|
||||
|
||||
# NOTE: oss-fuzz can change LIB_FUZZING_ENGINE variable
|
||||
if (NOT LIB_FUZZING_ENGINE)
|
||||
set (LIB_FUZZING_ENGINE "-fsanitize=fuzzer")
|
||||
endif ()
|
||||
else ()
|
||||
message (FATAL_ERROR "Unknown fuzzer type: ${FUZZER}")
|
||||
endif ()
|
||||
endif()
|
@ -26,13 +26,13 @@ const uint8_t MetroHash64::test_seed_1[8] = { 0x3B, 0x0D, 0x48, 0x1C, 0xF4, 0x
|
||||
|
||||
|
||||
|
||||
MetroHash64::MetroHash64(const uint64_t seed)
|
||||
MetroHash64::MetroHash64(uint64_t seed)
|
||||
{
|
||||
Initialize(seed);
|
||||
}
|
||||
|
||||
|
||||
void MetroHash64::Initialize(const uint64_t seed)
|
||||
void MetroHash64::Initialize(uint64_t seed)
|
||||
{
|
||||
vseed = (static_cast<uint64_t>(seed) + k2) * k0;
|
||||
|
||||
@ -47,7 +47,7 @@ void MetroHash64::Initialize(const uint64_t seed)
|
||||
}
|
||||
|
||||
|
||||
void MetroHash64::Update(const uint8_t * const buffer, const uint64_t length)
|
||||
void MetroHash64::Update(const uint8_t * const buffer, uint64_t length)
|
||||
{
|
||||
const uint8_t * ptr = reinterpret_cast<const uint8_t*>(buffer);
|
||||
const uint8_t * const end = ptr + length;
|
||||
@ -62,7 +62,7 @@ void MetroHash64::Update(const uint8_t * const buffer, const uint64_t length)
|
||||
memcpy(input.b + (bytes % 32), ptr, static_cast<size_t>(fill));
|
||||
ptr += fill;
|
||||
bytes += fill;
|
||||
|
||||
|
||||
// input buffer is still partially filled
|
||||
if ((bytes % 32) != 0) return;
|
||||
|
||||
@ -72,7 +72,7 @@ void MetroHash64::Update(const uint8_t * const buffer, const uint64_t length)
|
||||
state.v[2] += read_u64(&input.b[16]) * k2; state.v[2] = rotate_right(state.v[2],29) + state.v[0];
|
||||
state.v[3] += read_u64(&input.b[24]) * k3; state.v[3] = rotate_right(state.v[3],29) + state.v[1];
|
||||
}
|
||||
|
||||
|
||||
// bulk update
|
||||
bytes += static_cast<uint64_t>(end - ptr);
|
||||
while (ptr <= (end - 32))
|
||||
@ -83,14 +83,14 @@ void MetroHash64::Update(const uint8_t * const buffer, const uint64_t length)
|
||||
state.v[2] += read_u64(ptr) * k2; ptr += 8; state.v[2] = rotate_right(state.v[2],29) + state.v[0];
|
||||
state.v[3] += read_u64(ptr) * k3; ptr += 8; state.v[3] = rotate_right(state.v[3],29) + state.v[1];
|
||||
}
|
||||
|
||||
|
||||
// store remaining bytes in input buffer
|
||||
if (ptr < end)
|
||||
memcpy(input.b, ptr, static_cast<size_t>(end - ptr));
|
||||
}
|
||||
|
||||
|
||||
void MetroHash64::Finalize(uint8_t * const hash)
|
||||
void MetroHash64::Finalize(uint8_t * hash)
|
||||
{
|
||||
// finalize bulk loop, if used
|
||||
if (bytes >= 32)
|
||||
@ -102,11 +102,11 @@ void MetroHash64::Finalize(uint8_t * const hash)
|
||||
|
||||
state.v[0] = vseed + (state.v[0] ^ state.v[1]);
|
||||
}
|
||||
|
||||
|
||||
// process any bytes remaining in the input buffer
|
||||
const uint8_t * ptr = reinterpret_cast<const uint8_t*>(input.b);
|
||||
const uint8_t * const end = ptr + (bytes % 32);
|
||||
|
||||
|
||||
if ((end - ptr) >= 16)
|
||||
{
|
||||
state.v[1] = state.v[0] + (read_u64(ptr) * k2); ptr += 8; state.v[1] = rotate_right(state.v[1],29) * k3;
|
||||
@ -139,7 +139,7 @@ void MetroHash64::Finalize(uint8_t * const hash)
|
||||
state.v[0] += read_u8 (ptr) * k3;
|
||||
state.v[0] ^= rotate_right(state.v[0], 37) * k1;
|
||||
}
|
||||
|
||||
|
||||
state.v[0] ^= rotate_right(state.v[0], 28);
|
||||
state.v[0] *= k0;
|
||||
state.v[0] ^= rotate_right(state.v[0], 29);
|
||||
@ -152,7 +152,7 @@ void MetroHash64::Finalize(uint8_t * const hash)
|
||||
}
|
||||
|
||||
|
||||
void MetroHash64::Hash(const uint8_t * buffer, const uint64_t length, uint8_t * const hash, const uint64_t seed)
|
||||
void MetroHash64::Hash(const uint8_t * buffer, uint64_t length, uint8_t * const hash, uint64_t seed)
|
||||
{
|
||||
const uint8_t * ptr = reinterpret_cast<const uint8_t*>(buffer);
|
||||
const uint8_t * const end = ptr + length;
|
||||
@ -238,7 +238,7 @@ bool MetroHash64::ImplementationVerified()
|
||||
|
||||
// verify incremental implementation
|
||||
MetroHash64 metro;
|
||||
|
||||
|
||||
metro.Initialize(0);
|
||||
metro.Update(reinterpret_cast<const uint8_t *>(MetroHash64::test_string), strlen(MetroHash64::test_string));
|
||||
metro.Finalize(hash);
|
||||
@ -262,9 +262,9 @@ void metrohash64_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * o
|
||||
|
||||
const uint8_t * ptr = reinterpret_cast<const uint8_t*>(key);
|
||||
const uint8_t * const end = ptr + len;
|
||||
|
||||
|
||||
uint64_t hash = ((static_cast<uint64_t>(seed) + k2) * k0) + len;
|
||||
|
||||
|
||||
if (len >= 32)
|
||||
{
|
||||
uint64_t v[4];
|
||||
@ -272,7 +272,7 @@ void metrohash64_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * o
|
||||
v[1] = hash;
|
||||
v[2] = hash;
|
||||
v[3] = hash;
|
||||
|
||||
|
||||
do
|
||||
{
|
||||
v[0] += read_u64(ptr) * k0; ptr += 8; v[0] = rotate_right(v[0],29) + v[2];
|
||||
@ -288,7 +288,7 @@ void metrohash64_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * o
|
||||
v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 33) * k0;
|
||||
hash += v[0] ^ v[1];
|
||||
}
|
||||
|
||||
|
||||
if ((end - ptr) >= 16)
|
||||
{
|
||||
uint64_t v0 = hash + (read_u64(ptr) * k0); ptr += 8; v0 = rotate_right(v0,33) * k1;
|
||||
@ -297,32 +297,32 @@ void metrohash64_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * o
|
||||
v1 ^= rotate_right(v1 * k3, 35) + v0;
|
||||
hash += v1;
|
||||
}
|
||||
|
||||
|
||||
if ((end - ptr) >= 8)
|
||||
{
|
||||
hash += read_u64(ptr) * k3; ptr += 8;
|
||||
hash ^= rotate_right(hash, 33) * k1;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
if ((end - ptr) >= 4)
|
||||
{
|
||||
hash += read_u32(ptr) * k3; ptr += 4;
|
||||
hash ^= rotate_right(hash, 15) * k1;
|
||||
}
|
||||
|
||||
|
||||
if ((end - ptr) >= 2)
|
||||
{
|
||||
hash += read_u16(ptr) * k3; ptr += 2;
|
||||
hash ^= rotate_right(hash, 13) * k1;
|
||||
}
|
||||
|
||||
|
||||
if ((end - ptr) >= 1)
|
||||
{
|
||||
hash += read_u8 (ptr) * k3;
|
||||
hash ^= rotate_right(hash, 25) * k1;
|
||||
}
|
||||
|
||||
|
||||
hash ^= rotate_right(hash, 33);
|
||||
hash *= k0;
|
||||
hash ^= rotate_right(hash, 33);
|
||||
@ -336,13 +336,13 @@ void metrohash64_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * o
|
||||
static const uint64_t k0 = 0xD6D018F5;
|
||||
static const uint64_t k1 = 0xA2AA033B;
|
||||
static const uint64_t k2 = 0x62992FC1;
|
||||
static const uint64_t k3 = 0x30BC5B29;
|
||||
static const uint64_t k3 = 0x30BC5B29;
|
||||
|
||||
const uint8_t * ptr = reinterpret_cast<const uint8_t*>(key);
|
||||
const uint8_t * const end = ptr + len;
|
||||
|
||||
|
||||
uint64_t hash = ((static_cast<uint64_t>(seed) + k2) * k0) + len;
|
||||
|
||||
|
||||
if (len >= 32)
|
||||
{
|
||||
uint64_t v[4];
|
||||
@ -350,7 +350,7 @@ void metrohash64_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * o
|
||||
v[1] = hash;
|
||||
v[2] = hash;
|
||||
v[3] = hash;
|
||||
|
||||
|
||||
do
|
||||
{
|
||||
v[0] += read_u64(ptr) * k0; ptr += 8; v[0] = rotate_right(v[0],29) + v[2];
|
||||
@ -366,7 +366,7 @@ void metrohash64_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * o
|
||||
v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 30) * k0;
|
||||
hash += v[0] ^ v[1];
|
||||
}
|
||||
|
||||
|
||||
if ((end - ptr) >= 16)
|
||||
{
|
||||
uint64_t v0 = hash + (read_u64(ptr) * k2); ptr += 8; v0 = rotate_right(v0,29) * k3;
|
||||
@ -375,31 +375,31 @@ void metrohash64_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * o
|
||||
v1 ^= rotate_right(v1 * k3, 34) + v0;
|
||||
hash += v1;
|
||||
}
|
||||
|
||||
|
||||
if ((end - ptr) >= 8)
|
||||
{
|
||||
hash += read_u64(ptr) * k3; ptr += 8;
|
||||
hash ^= rotate_right(hash, 36) * k1;
|
||||
}
|
||||
|
||||
|
||||
if ((end - ptr) >= 4)
|
||||
{
|
||||
hash += read_u32(ptr) * k3; ptr += 4;
|
||||
hash ^= rotate_right(hash, 15) * k1;
|
||||
}
|
||||
|
||||
|
||||
if ((end - ptr) >= 2)
|
||||
{
|
||||
hash += read_u16(ptr) * k3; ptr += 2;
|
||||
hash ^= rotate_right(hash, 15) * k1;
|
||||
}
|
||||
|
||||
|
||||
if ((end - ptr) >= 1)
|
||||
{
|
||||
hash += read_u8 (ptr) * k3;
|
||||
hash ^= rotate_right(hash, 23) * k1;
|
||||
}
|
||||
|
||||
|
||||
hash ^= rotate_right(hash, 28);
|
||||
hash *= k0;
|
||||
hash ^= rotate_right(hash, 29);
|
||||
|
@ -25,24 +25,24 @@ public:
|
||||
static const uint32_t bits = 64;
|
||||
|
||||
// Constructor initializes the same as Initialize()
|
||||
explicit MetroHash64(const uint64_t seed=0);
|
||||
explicit MetroHash64(uint64_t seed=0);
|
||||
|
||||
// Initializes internal state for new hash with optional seed
|
||||
void Initialize(const uint64_t seed=0);
|
||||
void Initialize(uint64_t seed=0);
|
||||
|
||||
// Update the hash state with a string of bytes. If the length
|
||||
// is sufficiently long, the implementation switches to a bulk
|
||||
// hashing algorithm directly on the argument buffer for speed.
|
||||
void Update(const uint8_t * buffer, const uint64_t length);
|
||||
void Update(const uint8_t * buffer, uint64_t length);
|
||||
|
||||
// Constructs the final hash and writes it to the argument buffer.
|
||||
// After a hash is finalized, this instance must be Initialized()-ed
|
||||
// again or the behavior of Update() and Finalize() is undefined.
|
||||
void Finalize(uint8_t * const hash);
|
||||
void Finalize(uint8_t * hash);
|
||||
|
||||
// A non-incremental function implementation. This can be significantly
|
||||
// faster than the incremental implementation for some usage patterns.
|
||||
static void Hash(const uint8_t * buffer, const uint64_t length, uint8_t * const hash, const uint64_t seed=0);
|
||||
static void Hash(const uint8_t * buffer, uint64_t length, uint8_t * hash, uint64_t seed=0);
|
||||
|
||||
// Does implementation correctly execute test vectors?
|
||||
static bool ImplementationVerified();
|
||||
|
@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="24.2.1.2248"
|
||||
ARG VERSION="24.2.2.71"
|
||||
ARG PACKAGES="clickhouse-keeper"
|
||||
ARG DIRECT_DOWNLOAD_URLS=""
|
||||
|
||||
|
@ -4,6 +4,9 @@ FROM clickhouse/fasttest:$FROM_TAG
|
||||
ENV CC=clang-${LLVM_VERSION}
|
||||
ENV CXX=clang++-${LLVM_VERSION}
|
||||
|
||||
# If the cctools is updated, then first build it in the CI, then update here in a different commit
|
||||
COPY --from=clickhouse/cctools:d9e3596e706b /cctools /cctools
|
||||
|
||||
# Rust toolchain and libraries
|
||||
ENV RUSTUP_HOME=/rust/rustup
|
||||
ENV CARGO_HOME=/rust/cargo
|
||||
@ -73,9 +76,6 @@ RUN curl -Lo /usr/bin/clang-tidy-cache \
|
||||
"https://raw.githubusercontent.com/matus-chochlik/ctcache/$CLANG_TIDY_SHA1/clang-tidy-cache" \
|
||||
&& chmod +x /usr/bin/clang-tidy-cache
|
||||
|
||||
# If the cctools is updated, then first build it in the CI, then update here in a different commit
|
||||
COPY --from=clickhouse/cctools:5a908f73878a /cctools /cctools
|
||||
|
||||
RUN mkdir /workdir && chmod 777 /workdir
|
||||
WORKDIR /workdir
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
# It's based on the assumption that we don't care of the cctools version so much
|
||||
# It event does not depend on the clickhouse/fasttest in the `docker/images.json`
|
||||
ARG FROM_TAG=latest
|
||||
FROM clickhouse/fasttest:$FROM_TAG
|
||||
FROM clickhouse/fasttest:$FROM_TAG as builder
|
||||
|
||||
ENV CC=clang-${LLVM_VERSION}
|
||||
ENV CXX=clang++-${LLVM_VERSION}
|
||||
@ -29,3 +29,6 @@ RUN git clone https://github.com/tpoechtrager/cctools-port.git \
|
||||
&& make install -j$(nproc) \
|
||||
&& cd ../.. \
|
||||
&& rm -rf cctools-port
|
||||
|
||||
FROM scratch
|
||||
COPY --from=builder /cctools /cctools
|
||||
|
@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="24.2.1.2248"
|
||||
ARG VERSION="24.2.2.71"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
ARG DIRECT_DOWNLOAD_URLS=""
|
||||
|
||||
|
@ -27,7 +27,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
|
||||
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
|
||||
ARG VERSION="24.2.1.2248"
|
||||
ARG VERSION="24.2.2.71"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
# set non-empty deb_location_url url to create a docker image
|
||||
|
@ -174,7 +174,14 @@ function fuzz
|
||||
mkdir -p /var/run/clickhouse-server
|
||||
|
||||
# NOTE: we use process substitution here to preserve keep $! as a pid of clickhouse-server
|
||||
clickhouse-server --config-file db/config.xml --pid-file /var/run/clickhouse-server/clickhouse-server.pid -- --path db > server.log 2>&1 &
|
||||
# server.log -> CH logs
|
||||
# stderr.log -> Process logs (sanitizer)
|
||||
clickhouse-server \
|
||||
--config-file db/config.xml \
|
||||
--pid-file /var/run/clickhouse-server/clickhouse-server.pid \
|
||||
-- --path db \
|
||||
--logger.console=0 \
|
||||
--logger.log=server.log > stderr.log 2>&1 &
|
||||
server_pid=$!
|
||||
|
||||
kill -0 $server_pid
|
||||
@ -303,7 +310,7 @@ quit
|
||||
if [ "$server_died" == 1 ]
|
||||
then
|
||||
# The server has died.
|
||||
if ! rg --text -o 'Received signal.*|Logical error.*|Assertion.*failed|Failed assertion.*|.*runtime error: .*|.*is located.*|(SUMMARY|ERROR): [a-zA-Z]+Sanitizer:.*|.*_LIBCPP_ASSERT.*|.*Child process was terminated by signal 9.*' server.log > description.txt
|
||||
if ! rg --text -o 'Received signal.*|Logical error.*|Assertion.*failed|Failed assertion.*|.*runtime error: .*|.*is located.*|(SUMMARY|ERROR): [a-zA-Z]+Sanitizer:.*|.*_LIBCPP_ASSERT.*|.*Child process was terminated by signal 9.*' server.log stderr.log > description.txt
|
||||
then
|
||||
echo "Lost connection to server. See the logs." > description.txt
|
||||
fi
|
||||
@ -427,6 +434,7 @@ p.links a { padding: 5px; margin: 3px; background: #FFF; line-height: 2; white-s
|
||||
<a href="run.log">run.log</a>
|
||||
<a href="fuzzer.log.zst">fuzzer.log.zst</a>
|
||||
<a href="server.log.zst">server.log.zst</a>
|
||||
<a href="stderr.log">stderr.log</a>
|
||||
<a href="main.log">main.log</a>
|
||||
<a href="dmesg.log">dmesg.log</a>
|
||||
${CORE_LINK}
|
||||
|
@ -51,22 +51,22 @@ fi
|
||||
config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml
|
||||
|
||||
if [[ -n "$BUGFIX_VALIDATE_CHECK" ]] && [[ "$BUGFIX_VALIDATE_CHECK" -eq 1 ]]; then
|
||||
sudo cat /etc/clickhouse-server/config.d/zookeeper.xml \
|
||||
| sed "/<use_compression>1<\/use_compression>/d" \
|
||||
> /etc/clickhouse-server/config.d/zookeeper.xml.tmp
|
||||
sudo mv /etc/clickhouse-server/config.d/zookeeper.xml.tmp /etc/clickhouse-server/config.d/zookeeper.xml
|
||||
sudo sed -i "/<use_compression>1<\/use_compression>/d" /etc/clickhouse-server/config.d/zookeeper.xml
|
||||
|
||||
# it contains some new settings, but we can safely remove it
|
||||
rm /etc/clickhouse-server/config.d/handlers.yaml
|
||||
rm /etc/clickhouse-server/users.d/s3_cache_new.xml
|
||||
rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml
|
||||
|
||||
#todo: remove these after 24.3 released.
|
||||
sudo sed -i "s|<object_storage_type>azure<|<object_storage_type>azure_blob_storage<|" /etc/clickhouse-server/config.d/azure_storage_conf.xml
|
||||
|
||||
#todo: remove these after 24.3 released.
|
||||
sudo sed -i "s|<object_storage_type>local<|<object_storage_type>local_blob_storage<|" /etc/clickhouse-server/config.d/storage_conf.xml
|
||||
|
||||
function remove_keeper_config()
|
||||
{
|
||||
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
|
||||
| sed "/<$1>$2<\/$1>/d" \
|
||||
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
|
||||
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
sudo sed -i "/<$1>$2<\/$1>/d" /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
}
|
||||
# commit_logs_cache_size_threshold setting doesn't exist on some older versions
|
||||
remove_keeper_config "commit_logs_cache_size_threshold" "[[:digit:]]\+"
|
||||
@ -77,7 +77,7 @@ fi
|
||||
if [ "$NUM_TRIES" -gt "1" ]; then
|
||||
export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000
|
||||
export THREAD_FUZZER_SLEEP_PROBABILITY=0.1
|
||||
export THREAD_FUZZER_SLEEP_TIME_US=100000
|
||||
export THREAD_FUZZER_SLEEP_TIME_US_MAX=100000
|
||||
|
||||
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1
|
||||
export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1
|
||||
@ -88,10 +88,10 @@ if [ "$NUM_TRIES" -gt "1" ]; then
|
||||
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001
|
||||
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001
|
||||
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001
|
||||
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000
|
||||
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000
|
||||
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000
|
||||
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000
|
||||
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US_MAX=10000
|
||||
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US_MAX=10000
|
||||
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US_MAX=10000
|
||||
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US_MAX=10000
|
||||
|
||||
mkdir -p /var/run/clickhouse-server
|
||||
# simplest way to forward env variables to server
|
||||
@ -101,25 +101,13 @@ else
|
||||
fi
|
||||
|
||||
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
|
||||
sudo cat /etc/clickhouse-server1/config.d/filesystem_caches_path.xml \
|
||||
| sed "s|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches/</filesystem_caches_path>|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches_1/</filesystem_caches_path>|" \
|
||||
> /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp
|
||||
mv /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp /etc/clickhouse-server1/config.d/filesystem_caches_path.xml
|
||||
sudo sed -i "s|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches/</filesystem_caches_path>|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches_1/</filesystem_caches_path>|" /etc/clickhouse-server1/config.d/filesystem_caches_path.xml
|
||||
|
||||
sudo cat /etc/clickhouse-server2/config.d/filesystem_caches_path.xml \
|
||||
| sed "s|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches/</filesystem_caches_path>|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches_2/</filesystem_caches_path>|" \
|
||||
> /etc/clickhouse-server2/config.d/filesystem_caches_path.xml.tmp
|
||||
mv /etc/clickhouse-server2/config.d/filesystem_caches_path.xml.tmp /etc/clickhouse-server2/config.d/filesystem_caches_path.xml
|
||||
sudo sed -i "s|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches/</filesystem_caches_path>|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches_2/</filesystem_caches_path>|" /etc/clickhouse-server2/config.d/filesystem_caches_path.xml
|
||||
|
||||
sudo cat /etc/clickhouse-server1/config.d/filesystem_caches_path.xml \
|
||||
| sed "s|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches/</custom_cached_disks_base_directory>|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches_1/</custom_cached_disks_base_directory>|" \
|
||||
> /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp
|
||||
mv /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp /etc/clickhouse-server1/config.d/filesystem_caches_path.xml
|
||||
sudo sed -i "s|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches/</custom_cached_disks_base_directory>|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches_1/</custom_cached_disks_base_directory>|" /etc/clickhouse-server1/config.d/filesystem_caches_path.xml
|
||||
|
||||
sudo cat /etc/clickhouse-server2/config.d/filesystem_caches_path.xml \
|
||||
| sed "s|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches/</custom_cached_disks_base_directory>|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches_2/</custom_cached_disks_base_directory>|" \
|
||||
> /etc/clickhouse-server2/config.d/filesystem_caches_path.xml.tmp
|
||||
mv /etc/clickhouse-server2/config.d/filesystem_caches_path.xml.tmp /etc/clickhouse-server2/config.d/filesystem_caches_path.xml
|
||||
sudo sed -i "s|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches/</custom_cached_disks_base_directory>|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches_2/</custom_cached_disks_base_directory>|" /etc/clickhouse-server2/config.d/filesystem_caches_path.xml
|
||||
|
||||
mkdir -p /var/run/clickhouse-server1
|
||||
sudo chown clickhouse:clickhouse /var/run/clickhouse-server1
|
||||
|
@ -215,7 +215,7 @@ function check_server_start()
|
||||
function check_logs_for_critical_errors()
|
||||
{
|
||||
# Sanitizer asserts
|
||||
sed -n '/WARNING:.*anitizer/,/^$/p' >> /test_output/tmp
|
||||
sed -n '/WARNING:.*anitizer/,/^$/p' /var/log/clickhouse-server/stderr.log >> /test_output/tmp
|
||||
rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \
|
||||
&& echo -e "Sanitizer assert (in stderr.log)$FAIL$(head_escaped /test_output/tmp)" >> /test_output/test_results.tsv \
|
||||
|| echo -e "No sanitizer asserts$OK" >> /test_output/test_results.tsv
|
||||
|
@ -27,7 +27,7 @@ install_packages package_folder
|
||||
# and find more potential issues.
|
||||
export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000
|
||||
export THREAD_FUZZER_SLEEP_PROBABILITY=0.1
|
||||
export THREAD_FUZZER_SLEEP_TIME_US=100000
|
||||
export THREAD_FUZZER_SLEEP_TIME_US_MAX=100000
|
||||
|
||||
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1
|
||||
export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1
|
||||
@ -38,11 +38,11 @@ export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001
|
||||
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001
|
||||
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001
|
||||
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001
|
||||
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000
|
||||
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US_MAX=10000
|
||||
|
||||
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000
|
||||
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000
|
||||
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000
|
||||
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US_MAX=10000
|
||||
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US_MAX=10000
|
||||
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US_MAX=10000
|
||||
|
||||
export THREAD_FUZZER_EXPLICIT_SLEEP_PROBABILITY=0.01
|
||||
export THREAD_FUZZER_EXPLICIT_MEMORY_EXCEPTION_PROBABILITY=0.01
|
||||
|
@ -67,10 +67,7 @@ configure
|
||||
|
||||
function remove_keeper_config()
|
||||
{
|
||||
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
|
||||
| sed "/<$1>$2<\/$1>/d" \
|
||||
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
|
||||
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
sudo sed -i "/<$1>$2<\/$1>/d" /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
}
|
||||
|
||||
# async_replication setting doesn't exist on some older versions
|
||||
@ -80,16 +77,10 @@ remove_keeper_config "async_replication" "1"
|
||||
remove_keeper_config "create_if_not_exists" "[01]"
|
||||
|
||||
#todo: remove these after 24.3 released.
|
||||
sudo cat /etc/clickhouse-server/config.d/azure_storage_conf.xml \
|
||||
| sed "s|<object_storage_type>azure|<object_storage_type>azure_blob_storage|" \
|
||||
> /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp
|
||||
sudo mv /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp /etc/clickhouse-server/config.d/azure_storage_conf.xml
|
||||
sudo sed -i "s|<object_storage_type>azure<|<object_storage_type>azure_blob_storage<|" /etc/clickhouse-server/config.d/azure_storage_conf.xml
|
||||
|
||||
#todo: remove these after 24.3 released.
|
||||
sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \
|
||||
| sed "s|<object_storage_type>local|<object_storage_type>local_blob_storage|" \
|
||||
> /etc/clickhouse-server/config.d/storage_conf.xml.tmp
|
||||
sudo mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml
|
||||
sudo sed -i "s|<object_storage_type>local<|<object_storage_type>local_blob_storage<|" /etc/clickhouse-server/config.d/storage_conf.xml
|
||||
|
||||
# latest_logs_cache_size_threshold setting doesn't exist on some older versions
|
||||
remove_keeper_config "latest_logs_cache_size_threshold" "[[:digit:]]\+"
|
||||
@ -120,22 +111,13 @@ export ZOOKEEPER_FAULT_INJECTION=0
|
||||
configure
|
||||
|
||||
# force_sync=false doesn't work correctly on some older versions
|
||||
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
|
||||
| sed "s|<force_sync>false</force_sync>|<force_sync>true</force_sync>|" \
|
||||
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
|
||||
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
sudo sed -i "s|<force_sync>false</force_sync>|<force_sync>true</force_sync>|" /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
|
||||
#todo: remove these after 24.3 released.
|
||||
sudo cat /etc/clickhouse-server/config.d/azure_storage_conf.xml \
|
||||
| sed "s|<object_storage_type>azure|<object_storage_type>azure_blob_storage|" \
|
||||
> /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp
|
||||
sudo mv /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp /etc/clickhouse-server/config.d/azure_storage_conf.xml
|
||||
sudo sed -i "s|<object_storage_type>azure<|<object_storage_type>azure_blob_storage<|" /etc/clickhouse-server/config.d/azure_storage_conf.xml
|
||||
|
||||
#todo: remove these after 24.3 released.
|
||||
sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \
|
||||
| sed "s|<object_storage_type>local|<object_storage_type>local_blob_storage|" \
|
||||
> /etc/clickhouse-server/config.d/storage_conf.xml.tmp
|
||||
sudo mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml
|
||||
sudo sed -i "s|<object_storage_type>local<|<object_storage_type>local_blob_storage<|" /etc/clickhouse-server/config.d/storage_conf.xml
|
||||
|
||||
# async_replication setting doesn't exist on some older versions
|
||||
remove_keeper_config "async_replication" "1"
|
||||
@ -150,10 +132,7 @@ remove_keeper_config "latest_logs_cache_size_threshold" "[[:digit:]]\+"
|
||||
remove_keeper_config "commit_logs_cache_size_threshold" "[[:digit:]]\+"
|
||||
|
||||
# But we still need default disk because some tables loaded only into it
|
||||
sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \
|
||||
| sed "s|<main><disk>s3</disk></main>|<main><disk>s3</disk></main><default><disk>default</disk></default>|" \
|
||||
> /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp
|
||||
mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
|
||||
sudo sed -i "s|<main><disk>s3</disk></main>|<main><disk>s3</disk></main><default><disk>default</disk></default>|" /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
|
||||
sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
|
||||
sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
|
||||
|
||||
@ -256,10 +235,7 @@ then
|
||||
fi
|
||||
|
||||
# Just in case previous version left some garbage in zk
|
||||
sudo cat /etc/clickhouse-server/config.d/lost_forever_check.xml \
|
||||
| sed "s|>1<|>0<|g" \
|
||||
> /etc/clickhouse-server/config.d/lost_forever_check.xml.tmp
|
||||
sudo mv /etc/clickhouse-server/config.d/lost_forever_check.xml.tmp /etc/clickhouse-server/config.d/lost_forever_check.xml
|
||||
sudo sed -i "s|>1<|>0<|g" /etc/clickhouse-server/config.d/lost_forever_check.xml \
|
||||
rm /etc/clickhouse-server/config.d/filesystem_caches_path.xml
|
||||
|
||||
start 500
|
||||
|
64
docs/changelogs/v23.12.5.81-stable.md
Normal file
64
docs/changelogs/v23.12.5.81-stable.md
Normal file
@ -0,0 +1,64 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v23.12.5.81-stable (a0fbe3ae813) FIXME as compared to v23.12.4.15-stable (4233d111d20)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#60290](https://github.com/ClickHouse/ClickHouse/issues/60290): Copy S3 file GCP fallback to buffer copy in case GCP returned `Internal Error` with `GATEWAY_TIMEOUT` HTTP error code. [#60164](https://github.com/ClickHouse/ClickHouse/pull/60164) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Backported in [#60830](https://github.com/ClickHouse/ClickHouse/issues/60830): Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#59883](https://github.com/ClickHouse/ClickHouse/issues/59883): If you want to run initdb scripts every time when ClickHouse container is starting you shoud initialize environment varible CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS. [#59808](https://github.com/ClickHouse/ClickHouse/pull/59808) ([Alexander Nikolaev](https://github.com/AlexNik)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix_kql_issue_found_by_wingfuzz [#59626](https://github.com/ClickHouse/ClickHouse/pull/59626) ([Yong Wang](https://github.com/kashwy)).
|
||||
* Fix error "Read beyond last offset" for AsynchronousBoundedReadBuffer [#59630](https://github.com/ClickHouse/ClickHouse/pull/59630) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix query start time on non initial queries [#59662](https://github.com/ClickHouse/ClickHouse/pull/59662) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* rabbitmq: fix having neither acked nor nacked messages [#59775](https://github.com/ClickHouse/ClickHouse/pull/59775) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix parsing of partition expressions surrounded by parens [#59901](https://github.com/ClickHouse/ClickHouse/pull/59901) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
|
||||
* Fix optimize_uniq_to_count removing the column alias [#60026](https://github.com/ClickHouse/ClickHouse/pull/60026) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix cosineDistance crash with Nullable [#60150](https://github.com/ClickHouse/ClickHouse/pull/60150) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Hide sensitive info for s3queue [#60233](https://github.com/ClickHouse/ClickHouse/pull/60233) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix_max_query_size_for_kql_compound_operator: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)).
|
||||
* Reduce the number of read rows from `system.numbers` [#60546](https://github.com/ClickHouse/ClickHouse/pull/60546) ([JackyWoo](https://github.com/JackyWoo)).
|
||||
* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Prevent setting custom metadata headers on unsupported multipart upload operations [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
|
||||
* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
||||
#### CI Fix or Improvement (changelog entry is not required)
|
||||
|
||||
* Backported in [#60767](https://github.com/ClickHouse/ClickHouse/issues/60767): Decoupled changes from [#60408](https://github.com/ClickHouse/ClickHouse/issues/60408). [#60553](https://github.com/ClickHouse/ClickHouse/pull/60553) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#60582](https://github.com/ClickHouse/ClickHouse/issues/60582): Arm and amd docker build jobs use similar job names and thus overwrite job reports - aarch64 and amd64 suffixes added to fix this. [#60554](https://github.com/ClickHouse/ClickHouse/pull/60554) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61041](https://github.com/ClickHouse/ClickHouse/issues/61041): Debug and fix markreleaseready. [#60611](https://github.com/ClickHouse/ClickHouse/pull/60611) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#61030](https://github.com/ClickHouse/ClickHouse/issues/61030): ... [#61022](https://github.com/ClickHouse/ClickHouse/pull/61022) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61224](https://github.com/ClickHouse/ClickHouse/issues/61224): ... [#61183](https://github.com/ClickHouse/ClickHouse/pull/61183) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Backported in [#61190](https://github.com/ClickHouse/ClickHouse/issues/61190): ... [#61185](https://github.com/ClickHouse/ClickHouse/pull/61185) ([Max K.](https://github.com/maxknv)).
|
||||
|
||||
#### NO CL ENTRY
|
||||
|
||||
* NO CL ENTRY: 'Revert "Backport [#59798](https://github.com/ClickHouse/ClickHouse/issues/59798) to 23.12: CI: do not reuse builds on release branches"'. [#59979](https://github.com/ClickHouse/ClickHouse/pull/59979) ([Max K.](https://github.com/maxknv)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* CI: move ci-specifics from job scripts to ci.py [#58516](https://github.com/ClickHouse/ClickHouse/pull/58516) ([Max K.](https://github.com/maxknv)).
|
||||
* Make ZooKeeper actually sequentialy consistent [#59735](https://github.com/ClickHouse/ClickHouse/pull/59735) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Fix special build reports in release branches [#59797](https://github.com/ClickHouse/ClickHouse/pull/59797) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* CI: do not reuse builds on release branches [#59798](https://github.com/ClickHouse/ClickHouse/pull/59798) ([Max K.](https://github.com/maxknv)).
|
||||
* Fix mark release ready [#59994](https://github.com/ClickHouse/ClickHouse/pull/59994) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Ability to detect undead ZooKeeper sessions [#60044](https://github.com/ClickHouse/ClickHouse/pull/60044) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Detect io_uring in tests [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Remove broken test while we fix it [#60547](https://github.com/ClickHouse/ClickHouse/pull/60547) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Speed up cctools building [#61011](https://github.com/ClickHouse/ClickHouse/pull/61011) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
24
docs/changelogs/v23.3.21.26-lts.md
Normal file
24
docs/changelogs/v23.3.21.26-lts.md
Normal file
@ -0,0 +1,24 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v23.3.21.26-lts (d9672a3731f) FIXME as compared to v23.3.20.27-lts (cc974ba4f81)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix reading from sparse columns after restart [#49660](https://github.com/ClickHouse/ClickHouse/pull/49660) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#57104](https://github.com/ClickHouse/ClickHouse/pull/57104) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Detect io_uring in tests [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
|
30
docs/changelogs/v23.8.11.28-lts.md
Normal file
30
docs/changelogs/v23.8.11.28-lts.md
Normal file
@ -0,0 +1,30 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v23.8.11.28-lts (31879d2ab4c) FIXME as compared to v23.8.10.43-lts (a278225bba9)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#60828](https://github.com/ClickHouse/ClickHouse/issues/60828): Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
||||
#### NO CL ENTRY
|
||||
|
||||
* NO CL ENTRY: 'Use the current branch test-utils to build cctools'. [#61276](https://github.com/ClickHouse/ClickHouse/pull/61276) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#57104](https://github.com/ClickHouse/ClickHouse/pull/57104) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Detect io_uring in tests [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
|
26
docs/changelogs/v24.1.7.18-stable.md
Normal file
26
docs/changelogs/v24.1.7.18-stable.md
Normal file
@ -0,0 +1,26 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v24.1.7.18-stable (90925babd78) FIXME as compared to v24.1.6.52-stable (fa09f677bc9)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix_max_query_size_for_kql_compound_operator: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)).
|
||||
* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
||||
#### CI Fix or Improvement (changelog entry is not required)
|
||||
|
||||
* Backported in [#61043](https://github.com/ClickHouse/ClickHouse/issues/61043): Debug and fix markreleaseready. [#60611](https://github.com/ClickHouse/ClickHouse/pull/60611) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#61168](https://github.com/ClickHouse/ClickHouse/issues/61168): Just a preparation for the merge queue support. [#61099](https://github.com/ClickHouse/ClickHouse/pull/61099) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61192](https://github.com/ClickHouse/ClickHouse/issues/61192): ... [#61185](https://github.com/ClickHouse/ClickHouse/pull/61185) ([Max K.](https://github.com/maxknv)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
|
55
docs/changelogs/v24.2.2.71-stable.md
Normal file
55
docs/changelogs/v24.2.2.71-stable.md
Normal file
@ -0,0 +1,55 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v24.2.2.71-stable (9293d361e72) FIXME as compared to v24.2.1.2248-stable (891689a4150)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#60834](https://github.com/ClickHouse/ClickHouse/issues/60834): Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* PartsSplitter invalid ranges for the same part [#60041](https://github.com/ClickHouse/ClickHouse/pull/60041) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Try to avoid calculation of scalar subqueries for CREATE TABLE. [#60464](https://github.com/ClickHouse/ClickHouse/pull/60464) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix_max_query_size_for_kql_compound_operator: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)).
|
||||
* Reduce the number of read rows from `system.numbers` [#60546](https://github.com/ClickHouse/ClickHouse/pull/60546) ([JackyWoo](https://github.com/JackyWoo)).
|
||||
* Don't output number tips for date types [#60577](https://github.com/ClickHouse/ClickHouse/pull/60577) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Prevent setting custom metadata headers on unsupported multipart upload operations [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
|
||||
* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix multiple bugs in groupArraySorted [#61203](https://github.com/ClickHouse/ClickHouse/pull/61203) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
||||
#### CI Fix or Improvement (changelog entry is not required)
|
||||
|
||||
* Backported in [#60758](https://github.com/ClickHouse/ClickHouse/issues/60758): Decoupled changes from [#60408](https://github.com/ClickHouse/ClickHouse/issues/60408). [#60553](https://github.com/ClickHouse/ClickHouse/pull/60553) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#60706](https://github.com/ClickHouse/ClickHouse/issues/60706): Eliminates the need to provide input args to docker server jobs to clean yml files. [#60602](https://github.com/ClickHouse/ClickHouse/pull/60602) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61045](https://github.com/ClickHouse/ClickHouse/issues/61045): Debug and fix markreleaseready. [#60611](https://github.com/ClickHouse/ClickHouse/pull/60611) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#60721](https://github.com/ClickHouse/ClickHouse/issues/60721): Fix build_report job so that it's defined by ci_config only (not yml file). [#60613](https://github.com/ClickHouse/ClickHouse/pull/60613) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#60668](https://github.com/ClickHouse/ClickHouse/issues/60668): Do not await ci pending jobs on release branches decrease wait timeout to fit into gh job timeout. [#60652](https://github.com/ClickHouse/ClickHouse/pull/60652) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#60863](https://github.com/ClickHouse/ClickHouse/issues/60863): Set limited number of builds for "special build check" report in backports. [#60850](https://github.com/ClickHouse/ClickHouse/pull/60850) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#60946](https://github.com/ClickHouse/ClickHouse/issues/60946): ... [#60935](https://github.com/ClickHouse/ClickHouse/pull/60935) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#60972](https://github.com/ClickHouse/ClickHouse/issues/60972): ... [#60952](https://github.com/ClickHouse/ClickHouse/pull/60952) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#60980](https://github.com/ClickHouse/ClickHouse/issues/60980): ... [#60958](https://github.com/ClickHouse/ClickHouse/pull/60958) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61170](https://github.com/ClickHouse/ClickHouse/issues/61170): Just a preparation for the merge queue support. [#61099](https://github.com/ClickHouse/ClickHouse/pull/61099) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61181](https://github.com/ClickHouse/ClickHouse/issues/61181): ... [#61172](https://github.com/ClickHouse/ClickHouse/pull/61172) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61228](https://github.com/ClickHouse/ClickHouse/issues/61228): ... [#61183](https://github.com/ClickHouse/ClickHouse/pull/61183) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Backported in [#61194](https://github.com/ClickHouse/ClickHouse/issues/61194): ... [#61185](https://github.com/ClickHouse/ClickHouse/pull/61185) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61244](https://github.com/ClickHouse/ClickHouse/issues/61244): ... [#61214](https://github.com/ClickHouse/ClickHouse/pull/61214) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61388](https://github.com/ClickHouse/ClickHouse/issues/61388):. [#61373](https://github.com/ClickHouse/ClickHouse/pull/61373) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* CI: make workflow yml abstract [#60421](https://github.com/ClickHouse/ClickHouse/pull/60421) ([Max K.](https://github.com/maxknv)).
|
||||
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* General sanity in function `seriesOutliersDetectTukey` [#60535](https://github.com/ClickHouse/ClickHouse/pull/60535) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Speed up cctools building [#61011](https://github.com/ClickHouse/ClickHouse/pull/61011) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
@ -6,6 +6,11 @@ sidebar_label: JDBC
|
||||
|
||||
# JDBC
|
||||
|
||||
:::note
|
||||
clickhouse-jdbc-bridge contains experimental codes and is no longer supported. It may contain reliability issues and security vulnerabilities. Use it at your own risk.
|
||||
ClickHouse recommend using built-in table functions in ClickHouse which provide a better alternative for ad-hoc querying scenarios (Postgres, MySQL, MongoDB, etc).
|
||||
:::
|
||||
|
||||
Allows ClickHouse to connect to external databases via [JDBC](https://en.wikipedia.org/wiki/Java_Database_Connectivity).
|
||||
|
||||
To implement the JDBC connection, ClickHouse uses the separate program [clickhouse-jdbc-bridge](https://github.com/ClickHouse/clickhouse-jdbc-bridge) that should run as a daemon.
|
||||
|
@ -946,96 +946,6 @@ You could change storage policy after table creation with [ALTER TABLE ... MODIF
|
||||
|
||||
The number of threads performing background moves of data parts can be changed by [background_move_pool_size](/docs/en/operations/server-configuration-parameters/settings.md/#background_move_pool_size) setting.
|
||||
|
||||
### Dynamic Storage
|
||||
|
||||
This example query shows how to attach a table stored at a URL and configure the
|
||||
remote storage within the query. The web storage is not configured in the ClickHouse
|
||||
configuration files; all the settings are in the CREATE/ATTACH query.
|
||||
|
||||
:::note
|
||||
The example uses `type=web`, but any disk type can be configured as dynamic, even Local disk. Local disks require a path argument to be inside the server config parameter `custom_local_disks_base_directory`, which has no default, so set that also when using local disk.
|
||||
:::
|
||||
|
||||
#### Example dynamic web storage
|
||||
|
||||
:::tip
|
||||
A [demo dataset](https://github.com/ClickHouse/web-tables-demo) is hosted in GitHub. To prepare your own tables for web storage see the tool [clickhouse-static-files-uploader](/docs/en/operations/storing-data.md/#storing-data-on-webserver)
|
||||
:::
|
||||
|
||||
In this `ATTACH TABLE` query the `UUID` provided matches the directory name of the data, and the endpoint is the URL for the raw GitHub content.
|
||||
|
||||
```sql
|
||||
# highlight-next-line
|
||||
ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
|
||||
(
|
||||
price UInt32,
|
||||
date Date,
|
||||
postcode1 LowCardinality(String),
|
||||
postcode2 LowCardinality(String),
|
||||
type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
|
||||
is_new UInt8,
|
||||
duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
|
||||
addr1 String,
|
||||
addr2 String,
|
||||
street LowCardinality(String),
|
||||
locality LowCardinality(String),
|
||||
town LowCardinality(String),
|
||||
district LowCardinality(String),
|
||||
county LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (postcode1, postcode2, addr1, addr2)
|
||||
# highlight-start
|
||||
SETTINGS disk = disk(
|
||||
type=web,
|
||||
endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
|
||||
);
|
||||
# highlight-end
|
||||
```
|
||||
|
||||
### Nested Dynamic Storage
|
||||
|
||||
This example query builds on the above dynamic disk configuration and shows how to
|
||||
use a local disk to cache data from a table stored at a URL. Neither the cache disk
|
||||
nor the web storage is configured in the ClickHouse configuration files; both are
|
||||
configured in the CREATE/ATTACH query settings.
|
||||
|
||||
In the settings highlighted below notice that the disk of `type=web` is nested within
|
||||
the disk of `type=cache`.
|
||||
|
||||
```sql
|
||||
ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
|
||||
(
|
||||
price UInt32,
|
||||
date Date,
|
||||
postcode1 LowCardinality(String),
|
||||
postcode2 LowCardinality(String),
|
||||
type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
|
||||
is_new UInt8,
|
||||
duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
|
||||
addr1 String,
|
||||
addr2 String,
|
||||
street LowCardinality(String),
|
||||
locality LowCardinality(String),
|
||||
town LowCardinality(String),
|
||||
district LowCardinality(String),
|
||||
county LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (postcode1, postcode2, addr1, addr2)
|
||||
# highlight-start
|
||||
SETTINGS disk = disk(
|
||||
type=cache,
|
||||
max_size='1Gi',
|
||||
path='/var/lib/clickhouse/custom_disk_cache/',
|
||||
disk=disk(
|
||||
type=web,
|
||||
endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
|
||||
)
|
||||
);
|
||||
# highlight-end
|
||||
```
|
||||
|
||||
### Details {#details}
|
||||
|
||||
In the case of `MergeTree` tables, data is getting to disk in different ways:
|
||||
@ -1064,13 +974,11 @@ During this time, they are not moved to other volumes or disks. Therefore, until
|
||||
|
||||
User can assign new big parts to different disks of a [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures) volume in a balanced way using the [min_bytes_to_rebalance_partition_over_jbod](/docs/en/operations/settings/merge-tree-settings.md/#min-bytes-to-rebalance-partition-over-jbod) setting.
|
||||
|
||||
## Using S3 for Data Storage {#table_engine-mergetree-s3}
|
||||
## Using External Storage for Data Storage {#table_engine-mergetree-s3}
|
||||
|
||||
:::note
|
||||
Google Cloud Storage (GCS) is also supported using the type `s3`. See [GCS backed MergeTree](/docs/en/integrations/gcs).
|
||||
:::
|
||||
[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) family table engines can store data to `S3`, `AzureBlobStorage`, `HDFS` using a disk with types `s3`, `azure_blob_storage`, `hdfs` accordingly. See [configuring external storage options](/docs/en/operations/storing-data.md/#configuring-external-storage) for more details.
|
||||
|
||||
`MergeTree` family table engines can store data to [S3](https://aws.amazon.com/s3/) using a disk with type `s3`.
|
||||
Example for [S3](https://aws.amazon.com/s3/) as external storage using a disk with type `s3`.
|
||||
|
||||
Configuration markup:
|
||||
``` xml
|
||||
@ -1112,253 +1020,12 @@ Configuration markup:
|
||||
</storage_configuration>
|
||||
```
|
||||
|
||||
Also see [configuring external storage options](/docs/en/operations/storing-data.md/#configuring-external-storage).
|
||||
|
||||
:::note cache configuration
|
||||
ClickHouse versions 22.3 through 22.7 use a different cache configuration, see [using local cache](/docs/en/operations/storing-data.md/#using-local-cache) if you are using one of those versions.
|
||||
:::
|
||||
|
||||
### Configuring the S3 disk
|
||||
|
||||
Required parameters:
|
||||
|
||||
- `endpoint` — S3 endpoint URL in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint URL should contain a bucket and root path to store data.
|
||||
- `access_key_id` — S3 access key id.
|
||||
- `secret_access_key` — S3 secret access key.
|
||||
|
||||
Optional parameters:
|
||||
|
||||
- `region` — S3 region name.
|
||||
- `support_batch_delete` — This controls the check to see if batch deletes are supported. Set this to `false` when using Google Cloud Storage (GCS) as GCS does not support batch deletes and preventing the checks will prevent error messages in the logs.
|
||||
- `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`.
|
||||
- `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`.
|
||||
- `expiration_window_seconds` — Grace period for checking if expiration-based credentials have expired. Optional, default value is `120`.
|
||||
- `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL.
|
||||
- `connect_timeout_ms` — Socket connect timeout in milliseconds. Default value is `10 seconds`.
|
||||
- `request_timeout_ms` — Request timeout in milliseconds. Default value is `5 seconds`.
|
||||
- `retry_attempts` — Number of retry attempts in case of failed request. Default value is `10`.
|
||||
- `single_read_retries` — Number of retry attempts in case of connection drop during read. Default value is `4`.
|
||||
- `min_bytes_for_seek` — Minimal number of bytes to use seek operation instead of sequential read. Default value is `1 Mb`.
|
||||
- `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
|
||||
- `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`.
|
||||
- `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times.
|
||||
- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set.
|
||||
- `server_side_encryption_kms_key_id` - If specified, required headers for accessing S3 objects with [SSE-KMS encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingKMSEncryption.html) will be set. If an empty string is specified, the AWS managed S3 key will be used. Optional.
|
||||
- `server_side_encryption_kms_encryption_context` - If specified alongside `server_side_encryption_kms_key_id`, the given encryption context header for SSE-KMS will be set. Optional.
|
||||
- `server_side_encryption_kms_bucket_key_enabled` - If specified alongside `server_side_encryption_kms_key_id`, the header to enable S3 bucket keys for SSE-KMS will be set. Optional, can be `true` or `false`, defaults to nothing (matches the bucket-level setting).
|
||||
- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited).
|
||||
- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`.
|
||||
- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited).
|
||||
- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
|
||||
- `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
- `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
- `key_template` — Define the format with which the object keys are generated. By default, Clickhouse takes `root path` from `endpoint` option and adds random generated suffix. That suffix is a dir with 3 random symbols and a file name with 29 random symbols. With that option you have a full control how to the object keys are generated. Some usage scenarios require having random symbols in the prefix or in the middle of object key. For example: `[a-z]{3}-prefix-random/constant-part/random-middle-[a-z]{3}/random-suffix-[a-z]{29}`. The value is parsed with [`re2`](https://github.com/google/re2/wiki/Syntax). Only some subset of the syntax is supported. Check if your preferred format is supported before using that option. Disk isn't initialized if clickhouse is unable to generate a key by the value of `key_template`. It requires enabled feature flag [storage_metadata_write_full_object_key](/docs/en/operations/settings/settings#storage_metadata_write_full_object_key). It forbids declaring the `root path` in `endpoint` option. It requires definition of the option `key_compatibility_prefix`.
|
||||
- `key_compatibility_prefix` — That option is required when option `key_template` is in use. In order to be able to read the objects keys which were stored in the metadata files with the metadata version lower that `VERSION_FULL_OBJECT_KEY`, the previous `root path` from the `endpoint` option should be set here.
|
||||
|
||||
### Configuring the cache
|
||||
|
||||
This is the cache configuration from above:
|
||||
```xml
|
||||
<s3_cache>
|
||||
<type>cache</type>
|
||||
<disk>s3</disk>
|
||||
<path>/var/lib/clickhouse/disks/s3_cache/</path>
|
||||
<max_size>10Gi</max_size>
|
||||
</s3_cache>
|
||||
```
|
||||
|
||||
These parameters define the cache layer:
|
||||
- `type` — If a disk is of type `cache` it caches mark and index files in memory.
|
||||
- `disk` — The name of the disk that will be cached.
|
||||
|
||||
Cache parameters:
|
||||
- `path` — The path where metadata for the cache is stored.
|
||||
- `max_size` — The size (amount of disk space) that the cache can grow to.
|
||||
|
||||
:::tip
|
||||
There are several other cache parameters that you can use to tune your storage, see [using local cache](/docs/en/operations/storing-data.md/#using-local-cache) for the details.
|
||||
:::
|
||||
|
||||
S3 disk can be configured as `main` or `cold` storage:
|
||||
``` xml
|
||||
<storage_configuration>
|
||||
...
|
||||
<disks>
|
||||
<s3>
|
||||
<type>s3</type>
|
||||
<endpoint>https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/root-path/</endpoint>
|
||||
<access_key_id>your_access_key_id</access_key_id>
|
||||
<secret_access_key>your_secret_access_key</secret_access_key>
|
||||
</s3>
|
||||
</disks>
|
||||
<policies>
|
||||
<s3_main>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>s3</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</s3_main>
|
||||
<s3_cold>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>default</disk>
|
||||
</main>
|
||||
<external>
|
||||
<disk>s3</disk>
|
||||
</external>
|
||||
</volumes>
|
||||
<move_factor>0.2</move_factor>
|
||||
</s3_cold>
|
||||
</policies>
|
||||
...
|
||||
</storage_configuration>
|
||||
```
|
||||
|
||||
In case of `cold` option a data can be moved to S3 if local disk free size will be smaller than `move_factor * disk_size` or by TTL move rule.
|
||||
|
||||
## Using Azure Blob Storage for Data Storage {#table_engine-mergetree-azure-blob-storage}
|
||||
|
||||
`MergeTree` family table engines can store data to [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) using a disk with type `azure_blob_storage`.
|
||||
|
||||
As of February 2022, this feature is still a fresh addition, so expect that some Azure Blob Storage functionalities might be unimplemented.
|
||||
|
||||
Configuration markup:
|
||||
``` xml
|
||||
<storage_configuration>
|
||||
...
|
||||
<disks>
|
||||
<blob_storage_disk>
|
||||
<type>azure_blob_storage</type>
|
||||
<storage_account_url>http://account.blob.core.windows.net</storage_account_url>
|
||||
<container_name>container</container_name>
|
||||
<account_name>account</account_name>
|
||||
<account_key>pass123</account_key>
|
||||
<metadata_path>/var/lib/clickhouse/disks/blob_storage_disk/</metadata_path>
|
||||
<cache_path>/var/lib/clickhouse/disks/blob_storage_disk/cache/</cache_path>
|
||||
<skip_access_check>false</skip_access_check>
|
||||
</blob_storage_disk>
|
||||
</disks>
|
||||
...
|
||||
</storage_configuration>
|
||||
```
|
||||
|
||||
Connection parameters:
|
||||
* `endpoint` — AzureBlobStorage endpoint URL with container & prefix. Optionally can contain account_name if the authentication method used needs it. (`http://account.blob.core.windows.net:{port}/[account_name]{container_name}/{data_prefix}`) or these parameters can be provided separately using storage_account_url, account_name & container. For specifying prefix, endpoint should be used.
|
||||
* `endpoint_contains_account_name` - This flag is used to specify if endpoint contains account_name as it is only needed for certain authentication methods. (Default : true)
|
||||
* `storage_account_url` - Required if endpoint is not specified, Azure Blob Storage account URL, like `http://account.blob.core.windows.net` or `http://azurite1:10000/devstoreaccount1`.
|
||||
* `container_name` - Target container name, defaults to `default-container`.
|
||||
* `container_already_exists` - If set to `false`, a new container `container_name` is created in the storage account, if set to `true`, disk connects to the container directly, and if left unset, disk connects to the account, checks if the container `container_name` exists, and creates it if it doesn't exist yet.
|
||||
|
||||
Authentication parameters (the disk will try all available methods **and** Managed Identity Credential):
|
||||
* `connection_string` - For authentication using a connection string.
|
||||
* `account_name` and `account_key` - For authentication using Shared Key.
|
||||
|
||||
Limit parameters (mainly for internal usage):
|
||||
* `s3_max_single_part_upload_size` - Limits the size of a single block upload to Blob Storage.
|
||||
* `min_bytes_for_seek` - Limits the size of a seekable region.
|
||||
* `max_single_read_retries` - Limits the number of attempts to read a chunk of data from Blob Storage.
|
||||
* `max_single_download_retries` - Limits the number of attempts to download a readable buffer from Blob Storage.
|
||||
* `thread_pool_size` - Limits the number of threads with which `IDiskRemote` is instantiated.
|
||||
* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object.
|
||||
|
||||
Other parameters:
|
||||
* `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
|
||||
* `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`.
|
||||
* `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
* `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
|
||||
Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)).
|
||||
|
||||
:::note Zero-copy replication is not ready for production
|
||||
Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use.
|
||||
:::
|
||||
|
||||
## HDFS storage {#hdfs-storage}
|
||||
|
||||
In this sample configuration:
|
||||
- the disk is of type `hdfs`
|
||||
- the data is hosted at `hdfs://hdfs1:9000/clickhouse/`
|
||||
|
||||
```xml
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<hdfs>
|
||||
<type>hdfs</type>
|
||||
<endpoint>hdfs://hdfs1:9000/clickhouse/</endpoint>
|
||||
<skip_access_check>true</skip_access_check>
|
||||
</hdfs>
|
||||
<hdd>
|
||||
<type>local</type>
|
||||
<path>/</path>
|
||||
</hdd>
|
||||
</disks>
|
||||
<policies>
|
||||
<hdfs>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>hdfs</disk>
|
||||
</main>
|
||||
<external>
|
||||
<disk>hdd</disk>
|
||||
</external>
|
||||
</volumes>
|
||||
</hdfs>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
## Web storage (read-only) {#web-storage}
|
||||
|
||||
Web storage can be used for read-only purposes. An example use is for hosting sample
|
||||
data, or for migrating data.
|
||||
|
||||
:::tip
|
||||
Storage can also be configured temporarily within a query, if a web dataset is not expected
|
||||
to be used routinely, see [dynamic storage](#dynamic-storage) and skip editing the
|
||||
configuration file.
|
||||
:::
|
||||
|
||||
In this sample configuration:
|
||||
- the disk is of type `web`
|
||||
- the data is hosted at `http://nginx:80/test1/`
|
||||
- a cache on local storage is used
|
||||
|
||||
```xml
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<web>
|
||||
<type>web</type>
|
||||
<endpoint>http://nginx:80/test1/</endpoint>
|
||||
</web>
|
||||
<cached_web>
|
||||
<type>cache</type>
|
||||
<disk>web</disk>
|
||||
<path>cached_web_cache/</path>
|
||||
<max_size>100000000</max_size>
|
||||
</cached_web>
|
||||
</disks>
|
||||
<policies>
|
||||
<web>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>web</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</web>
|
||||
<cached_web>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>cached_web</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</cached_web>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
## Virtual Columns {#virtual-columns}
|
||||
|
||||
- `_part` — Name of a part.
|
||||
|
@ -21,3 +21,79 @@ When restarting a server, data disappears from the table and the table becomes e
|
||||
Normally, using this table engine is not justified. However, it can be used for tests, and for tasks where maximum speed is required on a relatively small number of rows (up to approximately 100,000,000).
|
||||
|
||||
The Memory engine is used by the system for temporary tables with external query data (see the section “External data for processing a query”), and for implementing `GLOBAL IN` (see the section “IN operators”).
|
||||
|
||||
Upper and lower bounds can be specified to limit Memory engine table size, effectively allowing it to act as a circular buffer (see [Engine Parameters](#engine-parameters)).
|
||||
|
||||
## Engine Parameters {#engine-parameters}
|
||||
|
||||
- `min_bytes_to_keep` — Minimum bytes to keep when memory table is size-capped.
|
||||
- Default value: `0`
|
||||
- Requires `max_bytes_to_keep`
|
||||
- `max_bytes_to_keep` — Maximum bytes to keep within memory table where oldest rows are deleted on each insertion (i.e circular buffer). Max bytes can exceed the stated limit if the oldest batch of rows to remove falls under the `min_bytes_to_keep` limit when adding a large block.
|
||||
- Default value: `0`
|
||||
- `min_rows_to_keep` — Minimum rows to keep when memory table is size-capped.
|
||||
- Default value: `0`
|
||||
- Requires `max_rows_to_keep`
|
||||
- `max_rows_to_keep` — Maximum rows to keep within memory table where oldest rows are deleted on each insertion (i.e circular buffer). Max rows can exceed the stated limit if the oldest batch of rows to remove falls under the `min_rows_to_keep` limit when adding a large block.
|
||||
- Default value: `0`
|
||||
|
||||
## Usage {#usage}
|
||||
|
||||
|
||||
**Initialize settings**
|
||||
``` sql
|
||||
CREATE TABLE memory (i UInt32) ENGINE = Memory SETTINGS min_rows_to_keep = 100, max_rows_to_keep = 1000;
|
||||
```
|
||||
|
||||
**Note:** Both `bytes` and `rows` capping parameters can be set at the same time, however, the lower bounds of `max` and `min` will be adhered to.
|
||||
|
||||
## Examples {#examples}
|
||||
``` sql
|
||||
CREATE TABLE memory (i UInt32) ENGINE = Memory SETTINGS min_bytes_to_keep = 4096, max_bytes_to_keep = 16384;
|
||||
|
||||
/* 1. testing oldest block doesn't get deleted due to min-threshold - 3000 rows */
|
||||
INSERT INTO memory SELECT * FROM numbers(0, 1600); -- 8'192 bytes
|
||||
|
||||
/* 2. adding block that doesn't get deleted */
|
||||
INSERT INTO memory SELECT * FROM numbers(1000, 100); -- 1'024 bytes
|
||||
|
||||
/* 3. testing oldest block gets deleted - 9216 bytes - 1100 */
|
||||
INSERT INTO memory SELECT * FROM numbers(9000, 1000); -- 8'192 bytes
|
||||
|
||||
/* 4. checking a very large block overrides all */
|
||||
INSERT INTO memory SELECT * FROM numbers(9000, 10000); -- 65'536 bytes
|
||||
|
||||
SELECT total_bytes, total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase();
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─total_bytes─┬─total_rows─┐
|
||||
│ 65536 │ 10000 │
|
||||
└─────────────┴────────────┘
|
||||
```
|
||||
|
||||
also, for rows:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE memory (i UInt32) ENGINE = Memory SETTINGS min_rows_to_keep = 4000, max_rows_to_keep = 10000;
|
||||
|
||||
/* 1. testing oldest block doesn't get deleted due to min-threshold - 3000 rows */
|
||||
INSERT INTO memory SELECT * FROM numbers(0, 1600); -- 1'600 rows
|
||||
|
||||
/* 2. adding block that doesn't get deleted */
|
||||
INSERT INTO memory SELECT * FROM numbers(1000, 100); -- 100 rows
|
||||
|
||||
/* 3. testing oldest block gets deleted - 9216 bytes - 1100 */
|
||||
INSERT INTO memory SELECT * FROM numbers(9000, 1000); -- 1'000 rows
|
||||
|
||||
/* 4. checking a very large block overrides all */
|
||||
INSERT INTO memory SELECT * FROM numbers(9000, 10000); -- 10'000 rows
|
||||
|
||||
SELECT total_bytes, total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase();
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─total_bytes─┬─total_rows─┐
|
||||
│ 65536 │ 10000 │
|
||||
└─────────────┴────────────┘
|
||||
```
|
||||
|
@ -55,7 +55,7 @@ CREATE TABLE criteo_log (
|
||||
) ENGINE = Log;
|
||||
```
|
||||
|
||||
Download the data:
|
||||
Insert the data:
|
||||
|
||||
``` bash
|
||||
$ for i in {00..23}; do echo $i; zcat datasets/criteo/day_${i#0}.gz | sed -r 's/^/2000-01-'${i/00/24}'\t/' | clickhouse-client --host=example-perftest01j --query="INSERT INTO criteo_log FORMAT TabSeparated"; done
|
||||
|
@ -248,6 +248,9 @@ Some of the files might not download fully. Check the file sizes and re-download
|
||||
|
||||
``` bash
|
||||
$ curl -O https://datasets.clickhouse.com/trips_mergetree/partitions/trips_mergetree.tar
|
||||
# Validate the checksum
|
||||
$ md5sum trips_mergetree.tar
|
||||
# Checksum should be equal to: f3b8d469b41d9a82da064ded7245d12c
|
||||
$ tar xvf trips_mergetree.tar -C /var/lib/clickhouse # path to ClickHouse data directory
|
||||
$ # check permissions of unpacked data, fix if required
|
||||
$ sudo service clickhouse-server restart
|
||||
|
293
docs/en/getting-started/example-datasets/tw-weather.md
Normal file
293
docs/en/getting-started/example-datasets/tw-weather.md
Normal file
@ -0,0 +1,293 @@
|
||||
---
|
||||
slug: /en/getting-started/example-datasets/tw-weather
|
||||
sidebar_label: Taiwan Historical Weather Datasets
|
||||
sidebar_position: 1
|
||||
description: 131 million rows of weather observation data for the last 128 yrs
|
||||
---
|
||||
|
||||
# Taiwan Historical Weather Datasets
|
||||
|
||||
This dataset contains historical meteorological observations measurements for the last 128 years. Each row is a measurement for a point in date time and weather station.
|
||||
|
||||
The origin of this dataset is available [here](https://github.com/Raingel/historical_weather) and the list of weather station numbers can be found [here](https://github.com/Raingel/weather_station_list).
|
||||
|
||||
> The sources of meteorological datasets include the meteorological stations that are established by the Central Weather Administration (station code is beginning with C0, C1, and 4) and the agricultural meteorological stations belonging to the Council of Agriculture (station code other than those mentioned above):
|
||||
|
||||
- StationId
|
||||
- MeasuredDate, the observation time
|
||||
- StnPres, the station air pressure
|
||||
- SeaPres, the sea level pressure
|
||||
- Td, the dew point temperature
|
||||
- RH, the relative humidity
|
||||
- Other elements where available
|
||||
|
||||
## Downloading the data
|
||||
|
||||
- A [pre-processed version](#pre-processed-data) of the data for the ClickHouse, which has been cleaned, re-structured, and enriched. This dataset covers the years from 1896 to 2023.
|
||||
- [Download the original raw data](#original-raw-data) and convert to the format required by ClickHouse. Users wanting to add their own columns may wish to explore or complete their approaches.
|
||||
|
||||
### Pre-processed data
|
||||
|
||||
The dataset has also been re-structured from a measurement per line to a row per weather station id and measured date, i.e.
|
||||
|
||||
```csv
|
||||
StationId,MeasuredDate,StnPres,Tx,RH,WS,WD,WSGust,WDGust,Precp,GloblRad,TxSoil0cm,TxSoil5cm,TxSoil20cm,TxSoil50cm,TxSoil100cm,SeaPres,Td,PrecpHour,SunShine,TxSoil10cm,EvapA,Visb,UVI,Cloud Amount,TxSoil30cm,TxSoil200cm,TxSoil300cm,TxSoil500cm,VaporPressure
|
||||
C0X100,2016-01-01 01:00:00,1022.1,16.1,72,1.1,8.0,,,,,,,,,,,,,,,,,,,,,,,
|
||||
C0X100,2016-01-01 02:00:00,1021.6,16.0,73,1.2,358.0,,,,,,,,,,,,,,,,,,,,,,,
|
||||
C0X100,2016-01-01 03:00:00,1021.3,15.8,74,1.5,353.0,,,,,,,,,,,,,,,,,,,,,,,
|
||||
C0X100,2016-01-01 04:00:00,1021.2,15.8,74,1.7,8.0,,,,,,,,,,,,,,,,,,,,,,,
|
||||
```
|
||||
|
||||
It is easy to query and ensure that the resulting table has less sparse and some elements are null because they're not available to be measured in this weather station.
|
||||
|
||||
This dataset is available in the following Google CloudStorage location. Either download the dataset to your local filesystem (and insert them with the ClickHouse client) or insert them directly into the ClickHouse (see [Inserting from URL](#inserting-from-url)).
|
||||
|
||||
To download:
|
||||
|
||||
```bash
|
||||
wget https://storage.googleapis.com/taiwan-weather-observaiton-datasets/preprocessed_weather_daily_1896_2023.tar.gz
|
||||
|
||||
# Option: Validate the checksum
|
||||
md5sum preprocessed_weather_daily_1896_2023.tar.gz
|
||||
# Checksum should be equal to: 11b484f5bd9ddafec5cfb131eb2dd008
|
||||
|
||||
tar -xzvf preprocessed_weather_daily_1896_2023.tar.gz
|
||||
daily_weather_preprocessed_1896_2023.csv
|
||||
|
||||
# Option: Validate the checksum
|
||||
md5sum daily_weather_preprocessed_1896_2023.csv
|
||||
# Checksum should be equal to: 1132248c78195c43d93f843753881754
|
||||
```
|
||||
|
||||
### Original raw data
|
||||
|
||||
The following details are about the steps to download the original raw data to transform and convert as you want.
|
||||
|
||||
#### Download
|
||||
|
||||
To download the original raw data:
|
||||
|
||||
```bash
|
||||
mkdir tw_raw_weather_data && cd tw_raw_weather_data
|
||||
|
||||
wget https://storage.googleapis.com/taiwan-weather-observaiton-datasets/raw_data_weather_daily_1896_2023.tar.gz
|
||||
|
||||
# Option: Validate the checksum
|
||||
md5sum raw_data_weather_daily_1896_2023.tar.gz
|
||||
# Checksum should be equal to: b66b9f137217454d655e3004d7d1b51a
|
||||
|
||||
tar -xzvf raw_data_weather_daily_1896_2023.tar.gz
|
||||
466920_1928.csv
|
||||
466920_1929.csv
|
||||
466920_1930.csv
|
||||
466920_1931.csv
|
||||
...
|
||||
|
||||
# Option: Validate the checksum
|
||||
cat *.csv | md5sum
|
||||
# Checksum should be equal to: b26db404bf84d4063fac42e576464ce1
|
||||
```
|
||||
|
||||
#### Retrieve the Taiwan weather stations
|
||||
|
||||
```bash
|
||||
wget -O weather_sta_list.csv https://github.com/Raingel/weather_station_list/raw/main/data/weather_sta_list.csv
|
||||
|
||||
# Option: Convert the UTF-8-BOM to UTF-8 encoding
|
||||
sed -i '1s/^\xEF\xBB\xBF//' weather_sta_list.csv
|
||||
```
|
||||
|
||||
## Create table schema
|
||||
|
||||
Create the MergeTree table in ClickHouse (from the ClickHouse client).
|
||||
|
||||
```bash
|
||||
CREATE TABLE tw_weather_data (
|
||||
StationId String null,
|
||||
MeasuredDate DateTime64,
|
||||
StnPres Float64 null,
|
||||
SeaPres Float64 null,
|
||||
Tx Float64 null,
|
||||
Td Float64 null,
|
||||
RH Float64 null,
|
||||
WS Float64 null,
|
||||
WD Float64 null,
|
||||
WSGust Float64 null,
|
||||
WDGust Float64 null,
|
||||
Precp Float64 null,
|
||||
PrecpHour Float64 null,
|
||||
SunShine Float64 null,
|
||||
GloblRad Float64 null,
|
||||
TxSoil0cm Float64 null,
|
||||
TxSoil5cm Float64 null,
|
||||
TxSoil10cm Float64 null,
|
||||
TxSoil20cm Float64 null,
|
||||
TxSoil50cm Float64 null,
|
||||
TxSoil100cm Float64 null,
|
||||
TxSoil30cm Float64 null,
|
||||
TxSoil200cm Float64 null,
|
||||
TxSoil300cm Float64 null,
|
||||
TxSoil500cm Float64 null,
|
||||
VaporPressure Float64 null,
|
||||
UVI Float64 null,
|
||||
"Cloud Amount" Float64 null,
|
||||
EvapA Float64 null,
|
||||
Visb Float64 null
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (MeasuredDate);
|
||||
```
|
||||
|
||||
## Inserting into ClickHouse
|
||||
|
||||
### Inserting from local file
|
||||
|
||||
Data can be inserted from a local file as follows (from the ClickHouse client):
|
||||
|
||||
```sql
|
||||
INSERT INTO tw_weather_data FROM INFILE '/path/to/daily_weather_preprocessed_1896_2023.csv'
|
||||
```
|
||||
|
||||
where `/path/to` represents the specific user path to the local file on the disk.
|
||||
|
||||
And the sample response output is as follows after inserting data into the ClickHouse:
|
||||
|
||||
```response
|
||||
Query id: 90e4b524-6e14-4855-817c-7e6f98fbeabb
|
||||
|
||||
Ok.
|
||||
131985329 rows in set. Elapsed: 71.770 sec. Processed 131.99 million rows, 10.06 GB (1.84 million rows/s., 140.14 MB/s.)
|
||||
Peak memory usage: 583.23 MiB.
|
||||
```
|
||||
|
||||
### Inserting from URL
|
||||
|
||||
```sql
|
||||
INSERT INTO tw_weather_data SELECT *
|
||||
FROM url('https://storage.googleapis.com/taiwan-weather-observaiton-datasets/daily_weather_preprocessed_1896_2023.csv', 'CSVWithNames')
|
||||
|
||||
```
|
||||
To know how to speed this up, please see our blog post on [tuning large data loads](https://clickhouse.com/blog/supercharge-your-clickhouse-data-loads-part2).
|
||||
|
||||
## Check data rows and sizes
|
||||
|
||||
1. Let's see how many rows are inserted:
|
||||
|
||||
```sql
|
||||
SELECT formatReadableQuantity(count())
|
||||
FROM tw_weather_data;
|
||||
```
|
||||
|
||||
```response
|
||||
┌─formatReadableQuantity(count())─┐
|
||||
│ 131.99 million │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
2. Let's see how much disk space are used for this table:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
formatReadableSize(sum(bytes)) AS disk_size,
|
||||
formatReadableSize(sum(data_uncompressed_bytes)) AS uncompressed_size
|
||||
FROM system.parts
|
||||
WHERE (`table` = 'tw_weather_data') AND active
|
||||
```
|
||||
|
||||
```response
|
||||
┌─disk_size─┬─uncompressed_size─┐
|
||||
│ 2.13 GiB │ 32.94 GiB │
|
||||
└───────────┴───────────────────┘
|
||||
```
|
||||
|
||||
## Sample queries
|
||||
|
||||
### Q1: Retrieve the highest dew point temperature for each weather station in the specific year
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
StationId,
|
||||
max(Td) AS max_td
|
||||
FROM tw_weather_data
|
||||
WHERE (year(MeasuredDate) = 2023) AND (Td IS NOT NULL)
|
||||
GROUP BY StationId
|
||||
|
||||
┌─StationId─┬─max_td─┐
|
||||
│ 466940 │ 1 │
|
||||
│ 467300 │ 1 │
|
||||
│ 467540 │ 1 │
|
||||
│ 467490 │ 1 │
|
||||
│ 467080 │ 1 │
|
||||
│ 466910 │ 1 │
|
||||
│ 467660 │ 1 │
|
||||
│ 467270 │ 1 │
|
||||
│ 467350 │ 1 │
|
||||
│ 467571 │ 1 │
|
||||
│ 466920 │ 1 │
|
||||
│ 467650 │ 1 │
|
||||
│ 467550 │ 1 │
|
||||
│ 467480 │ 1 │
|
||||
│ 467610 │ 1 │
|
||||
│ 467050 │ 1 │
|
||||
│ 467590 │ 1 │
|
||||
│ 466990 │ 1 │
|
||||
│ 467060 │ 1 │
|
||||
│ 466950 │ 1 │
|
||||
│ 467620 │ 1 │
|
||||
│ 467990 │ 1 │
|
||||
│ 466930 │ 1 │
|
||||
│ 467110 │ 1 │
|
||||
│ 466881 │ 1 │
|
||||
│ 467410 │ 1 │
|
||||
│ 467441 │ 1 │
|
||||
│ 467420 │ 1 │
|
||||
│ 467530 │ 1 │
|
||||
│ 466900 │ 1 │
|
||||
└───────────┴────────┘
|
||||
|
||||
30 rows in set. Elapsed: 0.045 sec. Processed 6.41 million rows, 187.33 MB (143.92 million rows/s., 4.21 GB/s.)
|
||||
```
|
||||
|
||||
### Q2: Raw data fetching with the specific duration time range, fields and weather station
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
StnPres,
|
||||
SeaPres,
|
||||
Tx,
|
||||
Td,
|
||||
RH,
|
||||
WS,
|
||||
WD,
|
||||
WSGust,
|
||||
WDGust,
|
||||
Precp,
|
||||
PrecpHour
|
||||
FROM tw_weather_data
|
||||
WHERE (StationId = 'C0UB10') AND (MeasuredDate >= '2023-12-23') AND (MeasuredDate < '2023-12-24')
|
||||
ORDER BY MeasuredDate ASC
|
||||
LIMIT 10
|
||||
```
|
||||
|
||||
```response
|
||||
┌─StnPres─┬─SeaPres─┬───Tx─┬───Td─┬─RH─┬──WS─┬──WD─┬─WSGust─┬─WDGust─┬─Precp─┬─PrecpHour─┐
|
||||
│ 1029.5 │ ᴺᵁᴸᴸ │ 11.8 │ ᴺᵁᴸᴸ │ 78 │ 2.7 │ 271 │ 5.5 │ 275 │ -99.8 │ -99.8 │
|
||||
│ 1029.8 │ ᴺᵁᴸᴸ │ 12.3 │ ᴺᵁᴸᴸ │ 78 │ 2.7 │ 289 │ 5.5 │ 308 │ -99.8 │ -99.8 │
|
||||
│ 1028.6 │ ᴺᵁᴸᴸ │ 12.3 │ ᴺᵁᴸᴸ │ 79 │ 2.3 │ 251 │ 6.1 │ 289 │ -99.8 │ -99.8 │
|
||||
│ 1028.2 │ ᴺᵁᴸᴸ │ 13 │ ᴺᵁᴸᴸ │ 75 │ 4.3 │ 312 │ 7.5 │ 316 │ -99.8 │ -99.8 │
|
||||
│ 1027.8 │ ᴺᵁᴸᴸ │ 11.1 │ ᴺᵁᴸᴸ │ 89 │ 7.1 │ 310 │ 11.6 │ 322 │ -99.8 │ -99.8 │
|
||||
│ 1027.8 │ ᴺᵁᴸᴸ │ 11.6 │ ᴺᵁᴸᴸ │ 90 │ 3.1 │ 269 │ 10.7 │ 295 │ -99.8 │ -99.8 │
|
||||
│ 1027.9 │ ᴺᵁᴸᴸ │ 12.3 │ ᴺᵁᴸᴸ │ 89 │ 4.7 │ 296 │ 8.1 │ 310 │ -99.8 │ -99.8 │
|
||||
│ 1028.2 │ ᴺᵁᴸᴸ │ 12.2 │ ᴺᵁᴸᴸ │ 94 │ 2.5 │ 246 │ 7.1 │ 283 │ -99.8 │ -99.8 │
|
||||
│ 1028.4 │ ᴺᵁᴸᴸ │ 12.5 │ ᴺᵁᴸᴸ │ 94 │ 3.1 │ 265 │ 4.8 │ 297 │ -99.8 │ -99.8 │
|
||||
│ 1028.3 │ ᴺᵁᴸᴸ │ 13.6 │ ᴺᵁᴸᴸ │ 91 │ 1.2 │ 273 │ 4.4 │ 256 │ -99.8 │ -99.8 │
|
||||
└─────────┴─────────┴──────┴──────┴────┴─────┴─────┴────────┴────────┴───────┴───────────┘
|
||||
|
||||
10 rows in set. Elapsed: 0.009 sec. Processed 91.70 thousand rows, 2.33 MB (9.67 million rows/s., 245.31 MB/s.)
|
||||
```
|
||||
|
||||
## Credits
|
||||
|
||||
We would like to acknowledge the efforts of the Central Weather Administration and Agricultural Meteorological Observation Network (Station) of the Council of Agriculture for preparing, cleaning, and distributing this dataset. We appreciate your efforts.
|
||||
|
||||
Ou, J.-H., Kuo, C.-H., Wu, Y.-F., Lin, G.-C., Lee, M.-H., Chen, R.-K., Chou, H.-P., Wu, H.-Y., Chu, S.-C., Lai, Q.-J., Tsai, Y.-C., Lin, C.-C., Kuo, C.-C., Liao, C.-T., Chen, Y.-N., Chu, Y.-W., Chen, C.-Y., 2023. Application-oriented deep learning model for early warning of rice blast in Taiwan. Ecological Informatics 73, 101950. https://doi.org/10.1016/j.ecoinf.2022.101950 [13/12/2022]
|
@ -178,7 +178,7 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va
|
||||
- `--password` – The password. Default value: empty string.
|
||||
- `--ask-password` - Prompt the user to enter a password.
|
||||
- `--query, -q` – The query to process when using non-interactive mode. `--query` can be specified multiple times, e.g. `--query "SELECT 1" --query "SELECT 2"`. Cannot be used simultaneously with `--queries-file`.
|
||||
- `--queries-file` – file path with queries to execute. `--queries-file` can be specified multiple times, e.g. `--query queries1.sql --query queries2.sql`. Cannot be used simultaneously with `--query`.
|
||||
- `--queries-file` – file path with queries to execute. `--queries-file` can be specified multiple times, e.g. `--queries-file queries1.sql --queries-file queries2.sql`. Cannot be used simultaneously with `--query`.
|
||||
- `--multiquery, -n` – If specified, multiple queries separated by semicolons can be listed after the `--query` option. For convenience, it is also possible to omit `--query` and pass the queries directly after `--multiquery`.
|
||||
- `--multiline, -m` – If specified, allow multiline queries (do not send the query on Enter).
|
||||
- `--database, -d` – Select the current default database. Default value: the current database from the server settings (‘default’ by default).
|
||||
|
@ -95,9 +95,11 @@ which is equal to
|
||||
|
||||
## Substituting Configuration {#substitution}
|
||||
|
||||
The config can also define “substitutions”. If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/clickhouse/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md#macros)).
|
||||
The config can define substitutions. There are two types of substitutions:
|
||||
|
||||
If you want to replace an entire element with a substitution use `include` as the element name.
|
||||
- If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/clickhouse/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md#macros)).
|
||||
|
||||
- If you want to replace an entire element with a substitution, use `include` as the element name. Substitutions can also be performed from ZooKeeper by specifying attribute `from_zk = "/path/to/node"`. In this case, the element value is replaced with the contents of the Zookeeper node at `/path/to/node`. This also works with you store an entire XML subtree as a Zookeeper node, it will be fully inserted into the source element.
|
||||
|
||||
XML substitution example:
|
||||
|
||||
@ -114,7 +116,7 @@ XML substitution example:
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node, and it will be fully inserted into the source element.
|
||||
If you want to merge the substituting content with the existing configuration instead of appending you can use attribute `merge="true"`, for example: `<include from_zk="/some_path" merge="true">`. In this case, the existing configuration will be merged with the content from the substitution and the existing configuration settings will be replaced with values from substitution.
|
||||
|
||||
## Encrypting and Hiding Configuration {#encryption}
|
||||
|
||||
|
@ -933,9 +933,9 @@ Hard limit is configured via system tools
|
||||
|
||||
## database_atomic_delay_before_drop_table_sec {#database_atomic_delay_before_drop_table_sec}
|
||||
|
||||
Sets the delay before remove table data in seconds. If the query has `SYNC` modifier, this setting is ignored.
|
||||
The delay before a table data is dropped in seconds. If the `DROP TABLE` query has a `SYNC` modifier, this setting is ignored.
|
||||
|
||||
Default value: `480` (8 minute).
|
||||
Default value: `480` (8 minutes).
|
||||
|
||||
## database_catalog_unused_dir_hide_timeout_sec {#database_catalog_unused_dir_hide_timeout_sec}
|
||||
|
||||
|
@ -4337,6 +4337,18 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
|
||||
## function_locate_has_mysql_compatible_argument_order {#function-locate-has-mysql-compatible-argument-order}
|
||||
|
||||
Controls the order of arguments in function [locate](../../sql-reference/functions/string-search-functions.md#locate).
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 — Function `locate` accepts arguments `(haystack, needle[, start_pos])`.
|
||||
- 1 — Function `locate` accepts arguments `(needle, haystack, [, start_pos])` (MySQL-compatible behavior)
|
||||
|
||||
Default value: `1`.
|
||||
|
||||
## date_time_overflow_behavior {#date_time_overflow_behavior}
|
||||
|
||||
Defines the behavior when [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md), [DateTime64](../../sql-reference/data-types/datetime64.md) or integers are converted into Date, Date32, DateTime or DateTime64 but the value cannot be represented in the result type.
|
||||
|
@ -5,26 +5,416 @@ sidebar_label: "External Disks for Storing Data"
|
||||
title: "External Disks for Storing Data"
|
||||
---
|
||||
|
||||
Data, processed in ClickHouse, is usually stored in the local file system — on the same machine with the ClickHouse server. That requires large-capacity disks, which can be expensive enough. To avoid that you can store the data remotely — on [Amazon S3](https://aws.amazon.com/s3/) disks or in the Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)).
|
||||
Data, processed in ClickHouse, is usually stored in the local file system — on the same machine with the ClickHouse server. That requires large-capacity disks, which can be expensive enough. To avoid that you can store the data remotely. Various storages are supported:
|
||||
1. [Amazon S3](https://aws.amazon.com/s3/) object storage.
|
||||
2. The Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html))
|
||||
3. [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs).
|
||||
|
||||
To work with data stored on `Amazon S3` disks use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine, and to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine.
|
||||
:::note ClickHouse also has support for external table engines, which are different from external storage option described on this page as they allow to read data stored in some general file format (like Parquet), while on this page we are describing storage configuration for ClickHouse `MergeTree` family or `Log` family tables.
|
||||
1. to work with data stored on `Amazon S3` disks, use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine.
|
||||
2. to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine.
|
||||
3. to work with data stored in Azure Blob Storage use [AzureBlobStorage](/docs/en/engines/table-engines/integrations/azureBlobStorage.md) table engine.
|
||||
:::
|
||||
|
||||
To load data from a web server with static files use a disk with type [web](#storing-data-on-webserver).
|
||||
## Configuring external storage {#configuring-external-storage}
|
||||
|
||||
## Configuring HDFS {#configuring-hdfs}
|
||||
[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) family table engines can store data to `S3`, `AzureBlobStorage`, `HDFS` using a disk with types `s3`, `azure_blob_storage`, `hdfs` accordingly.
|
||||
|
||||
[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) family table engines can store data to HDFS using a disk with type `HDFS`.
|
||||
Disk configuration requires:
|
||||
1. `type` section, equal to one of `s3`, `azure_blob_storage`, `hdfs`, `local_blob_storage`, `web`.
|
||||
2. Configuration of a specific external storage type.
|
||||
|
||||
Configuration markup:
|
||||
Starting from 24.1 clickhouse version, it is possible to use a new configuration option.
|
||||
It requires to specify:
|
||||
1. `type` equal to `object_storage`
|
||||
2. `object_storage_type`, equal to one of `s3`, `azure_blob_storage` (or just `azure` from `24.3`), `hdfs`, `local_blob_storage` (or just `local` from `24.3`), `web`.
|
||||
Optionally, `metadata_type` can be specified (it is equal to `local` by default), but it can also be set to `plain`, `web`.
|
||||
Usage of `plain` metadata type is described in [plain storage section](/docs/en/operations/storing-data.md/#storing-data-on-webserver), `web` metadata type can be used only with `web` object storage type, `local` metadata type stores metadata files locally (each metadata files contains mapping to files in object storage and some additional meta information about them).
|
||||
|
||||
E.g. configuration option
|
||||
``` xml
|
||||
<s3>
|
||||
<type>s3</type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3>
|
||||
```
|
||||
|
||||
is equal to configuration (from `24.1`):
|
||||
``` xml
|
||||
<s3>
|
||||
<type>object_storage</type>
|
||||
<object_storage_type>s3</object_storage_type>
|
||||
<metadata_type>local</metadata_type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3>
|
||||
```
|
||||
|
||||
Configuration
|
||||
``` xml
|
||||
<s3_plain>
|
||||
<type>s3_plain</type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3_plain>
|
||||
```
|
||||
|
||||
is equal to
|
||||
``` xml
|
||||
<s3_plain>
|
||||
<type>object_storage</type>
|
||||
<object_storage_type>s3</object_storage_type>
|
||||
<metadata_type>plain</metadata_type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3_plain>
|
||||
```
|
||||
|
||||
Example of full storage configuration will look like:
|
||||
``` xml
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<s3>
|
||||
<type>s3</type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3>
|
||||
</disks>
|
||||
<policies>
|
||||
<s3>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>s3</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</s3>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
Starting with 24.1 clickhouse version, it can also look like:
|
||||
``` xml
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<s3>
|
||||
<type>object_storage</type>
|
||||
<object_storage_type>s3</object_storage_type>
|
||||
<metadata_type>local</metadata_type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3>
|
||||
</disks>
|
||||
<policies>
|
||||
<s3>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>s3</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</s3>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
In order to make a specific kind of storage a default option for all `MergeTree` tables add the following section to configuration file:
|
||||
``` xml
|
||||
<clickhouse>
|
||||
<merge_tree>
|
||||
<storage_policy>s3</storage_policy>
|
||||
</merge_tree>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
If you want to configure a specific storage policy only to specific table, you can define it in settings while creating the table:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE test (a Int32, b String)
|
||||
ENGINE = MergeTree() ORDER BY a
|
||||
SETTINGS storage_policy = 's3';
|
||||
```
|
||||
|
||||
You can also use `disk` instead of `storage_policy`. In this case it is not requires to have `storage_policy` section in configuration file, only `disk` section would be enough.
|
||||
|
||||
``` sql
|
||||
CREATE TABLE test (a Int32, b String)
|
||||
ENGINE = MergeTree() ORDER BY a
|
||||
SETTINGS disk = 's3';
|
||||
```
|
||||
|
||||
## Dynamic Configuration {#dynamic-configuration}
|
||||
|
||||
There is also a possibility to specify storage configuration without a predefined disk in configuration in a configuration file, but can be configured in the `CREATE`/`ATTACH` query settings.
|
||||
|
||||
The following example query builds on the above dynamic disk configuration and shows how to use a local disk to cache data from a table stored at a URL.
|
||||
|
||||
```sql
|
||||
ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
|
||||
(
|
||||
price UInt32,
|
||||
date Date,
|
||||
postcode1 LowCardinality(String),
|
||||
postcode2 LowCardinality(String),
|
||||
type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
|
||||
is_new UInt8,
|
||||
duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
|
||||
addr1 String,
|
||||
addr2 String,
|
||||
street LowCardinality(String),
|
||||
locality LowCardinality(String),
|
||||
town LowCardinality(String),
|
||||
district LowCardinality(String),
|
||||
county LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (postcode1, postcode2, addr1, addr2)
|
||||
# highlight-start
|
||||
SETTINGS disk = disk(
|
||||
type=web,
|
||||
endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
|
||||
);
|
||||
# highlight-end
|
||||
```
|
||||
|
||||
The example below adds cache to external storage.
|
||||
|
||||
```sql
|
||||
ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
|
||||
(
|
||||
price UInt32,
|
||||
date Date,
|
||||
postcode1 LowCardinality(String),
|
||||
postcode2 LowCardinality(String),
|
||||
type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
|
||||
is_new UInt8,
|
||||
duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
|
||||
addr1 String,
|
||||
addr2 String,
|
||||
street LowCardinality(String),
|
||||
locality LowCardinality(String),
|
||||
town LowCardinality(String),
|
||||
district LowCardinality(String),
|
||||
county LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (postcode1, postcode2, addr1, addr2)
|
||||
# highlight-start
|
||||
SETTINGS disk = disk(
|
||||
type=cache,
|
||||
max_size='1Gi',
|
||||
path='/var/lib/clickhouse/custom_disk_cache/',
|
||||
disk=disk(
|
||||
type=web,
|
||||
endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
|
||||
)
|
||||
);
|
||||
# highlight-end
|
||||
```
|
||||
|
||||
In the settings highlighted below notice that the disk of `type=web` is nested within
|
||||
the disk of `type=cache`.
|
||||
|
||||
:::note
|
||||
The example uses `type=web`, but any disk type can be configured as dynamic, even Local disk. Local disks require a path argument to be inside the server config parameter `custom_local_disks_base_directory`, which has no default, so set that also when using local disk.
|
||||
:::
|
||||
|
||||
A combination of config-based configuration and sql-defined configuration is also possible:
|
||||
|
||||
```sql
|
||||
ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
|
||||
(
|
||||
price UInt32,
|
||||
date Date,
|
||||
postcode1 LowCardinality(String),
|
||||
postcode2 LowCardinality(String),
|
||||
type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
|
||||
is_new UInt8,
|
||||
duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
|
||||
addr1 String,
|
||||
addr2 String,
|
||||
street LowCardinality(String),
|
||||
locality LowCardinality(String),
|
||||
town LowCardinality(String),
|
||||
district LowCardinality(String),
|
||||
county LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (postcode1, postcode2, addr1, addr2)
|
||||
# highlight-start
|
||||
SETTINGS disk = disk(
|
||||
type=cache,
|
||||
max_size='1Gi',
|
||||
path='/var/lib/clickhouse/custom_disk_cache/',
|
||||
disk=disk(
|
||||
type=web,
|
||||
endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
|
||||
)
|
||||
);
|
||||
# highlight-end
|
||||
```
|
||||
|
||||
where `web` is a from a server configuration file:
|
||||
|
||||
``` xml
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<web>
|
||||
<type>web</type>
|
||||
<endpoint>'https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'</endpoint>
|
||||
</web>
|
||||
</disks>
|
||||
</storage_configuration>
|
||||
```
|
||||
|
||||
### Using S3 Storage {#s3-storage}
|
||||
|
||||
Required parameters:
|
||||
|
||||
- `endpoint` — S3 endpoint URL in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint URL should contain a bucket and root path to store data.
|
||||
- `access_key_id` — S3 access key id.
|
||||
- `secret_access_key` — S3 secret access key.
|
||||
|
||||
Optional parameters:
|
||||
|
||||
- `region` — S3 region name.
|
||||
- `support_batch_delete` — This controls the check to see if batch deletes are supported. Set this to `false` when using Google Cloud Storage (GCS) as GCS does not support batch deletes and preventing the checks will prevent error messages in the logs.
|
||||
- `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`.
|
||||
- `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`.
|
||||
- `expiration_window_seconds` — Grace period for checking if expiration-based credentials have expired. Optional, default value is `120`.
|
||||
- `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL.
|
||||
- `connect_timeout_ms` — Socket connect timeout in milliseconds. Default value is `10 seconds`.
|
||||
- `request_timeout_ms` — Request timeout in milliseconds. Default value is `5 seconds`.
|
||||
- `retry_attempts` — Number of retry attempts in case of failed request. Default value is `10`.
|
||||
- `single_read_retries` — Number of retry attempts in case of connection drop during read. Default value is `4`.
|
||||
- `min_bytes_for_seek` — Minimal number of bytes to use seek operation instead of sequential read. Default value is `1 Mb`.
|
||||
- `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
|
||||
- `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`.
|
||||
- `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times.
|
||||
- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set.
|
||||
- `server_side_encryption_kms_key_id` - If specified, required headers for accessing S3 objects with [SSE-KMS encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingKMSEncryption.html) will be set. If an empty string is specified, the AWS managed S3 key will be used. Optional.
|
||||
- `server_side_encryption_kms_encryption_context` - If specified alongside `server_side_encryption_kms_key_id`, the given encryption context header for SSE-KMS will be set. Optional.
|
||||
- `server_side_encryption_kms_bucket_key_enabled` - If specified alongside `server_side_encryption_kms_key_id`, the header to enable S3 bucket keys for SSE-KMS will be set. Optional, can be `true` or `false`, defaults to nothing (matches the bucket-level setting).
|
||||
- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited).
|
||||
- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`.
|
||||
- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited).
|
||||
- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
|
||||
- `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
- `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
- `key_template` — Define the format with which the object keys are generated. By default, Clickhouse takes `root path` from `endpoint` option and adds random generated suffix. That suffix is a dir with 3 random symbols and a file name with 29 random symbols. With that option you have a full control how to the object keys are generated. Some usage scenarios require having random symbols in the prefix or in the middle of object key. For example: `[a-z]{3}-prefix-random/constant-part/random-middle-[a-z]{3}/random-suffix-[a-z]{29}`. The value is parsed with [`re2`](https://github.com/google/re2/wiki/Syntax). Only some subset of the syntax is supported. Check if your preferred format is supported before using that option. Disk isn't initialized if clickhouse is unable to generate a key by the value of `key_template`. It requires enabled feature flag [storage_metadata_write_full_object_key](/docs/en/operations/settings/settings#storage_metadata_write_full_object_key). It forbids declaring the `root path` in `endpoint` option. It requires definition of the option `key_compatibility_prefix`.
|
||||
- `key_compatibility_prefix` — That option is required when option `key_template` is in use. In order to be able to read the objects keys which were stored in the metadata files with the metadata version lower that `VERSION_FULL_OBJECT_KEY`, the previous `root path` from the `endpoint` option should be set here.
|
||||
|
||||
:::note
|
||||
Google Cloud Storage (GCS) is also supported using the type `s3`. See [GCS backed MergeTree](/docs/en/integrations/gcs).
|
||||
:::
|
||||
|
||||
### Using Plain Storage {#plain-storage}
|
||||
|
||||
In `22.10` a new disk type `s3_plain` was introduced, which provides a write-once storage. Configuration parameters are the same as for `s3` disk type.
|
||||
Unlike `s3` disk type, it stores data as is, e.g. instead of randomly-generated blob names, it uses normal file names (the same way as clickhouse stores files on local disk) and does not store any metadata locally, e.g. it is derived from data on `s3`.
|
||||
|
||||
This disk type allows to keep a static version of the table, as it does not allow executing merges on the existing data and does not allow inserting of new data.
|
||||
A use case for this disk type is to create backups on it, which can be done via `BACKUP TABLE data TO Disk('plain_disk_name', 'backup_name')`. Afterwards you can do `RESTORE TABLE data AS data_restored FROM Disk('plain_disk_name', 'backup_name')` or using `ATTACH TABLE data (...) ENGINE = MergeTree() SETTINGS disk = 'plain_disk_name'`.
|
||||
|
||||
Configuration:
|
||||
``` xml
|
||||
<s3_plain>
|
||||
<type>s3_plain</type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3_plain>
|
||||
```
|
||||
|
||||
Starting from `24.1` it is possible configure any object storage disk (`s3`, `azure`, `hdfs`, `local`) using `plain` metadata type.
|
||||
|
||||
Configuration:
|
||||
``` xml
|
||||
<s3_plain>
|
||||
<type>object_storage</type>
|
||||
<object_storage_type>azure</object_storage_type>
|
||||
<metadata_type>plain</metadata_type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
</s3_plain>
|
||||
```
|
||||
|
||||
### Using Azure Blob Storage {#azure-blob-storage}
|
||||
|
||||
`MergeTree` family table engines can store data to [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) using a disk with type `azure_blob_storage`.
|
||||
|
||||
As of February 2022, this feature is still a fresh addition, so expect that some Azure Blob Storage functionalities might be unimplemented.
|
||||
|
||||
Configuration markup:
|
||||
``` xml
|
||||
<storage_configuration>
|
||||
...
|
||||
<disks>
|
||||
<blob_storage_disk>
|
||||
<type>azure_blob_storage</type>
|
||||
<storage_account_url>http://account.blob.core.windows.net</storage_account_url>
|
||||
<container_name>container</container_name>
|
||||
<account_name>account</account_name>
|
||||
<account_key>pass123</account_key>
|
||||
<metadata_path>/var/lib/clickhouse/disks/blob_storage_disk/</metadata_path>
|
||||
<cache_path>/var/lib/clickhouse/disks/blob_storage_disk/cache/</cache_path>
|
||||
<skip_access_check>false</skip_access_check>
|
||||
</blob_storage_disk>
|
||||
</disks>
|
||||
...
|
||||
</storage_configuration>
|
||||
```
|
||||
|
||||
Connection parameters:
|
||||
* `storage_account_url` - **Required**, Azure Blob Storage account URL, like `http://account.blob.core.windows.net` or `http://azurite1:10000/devstoreaccount1`.
|
||||
* `container_name` - Target container name, defaults to `default-container`.
|
||||
* `container_already_exists` - If set to `false`, a new container `container_name` is created in the storage account, if set to `true`, disk connects to the container directly, and if left unset, disk connects to the account, checks if the container `container_name` exists, and creates it if it doesn't exist yet.
|
||||
|
||||
Authentication parameters (the disk will try all available methods **and** Managed Identity Credential):
|
||||
* `connection_string` - For authentication using a connection string.
|
||||
* `account_name` and `account_key` - For authentication using Shared Key.
|
||||
|
||||
Limit parameters (mainly for internal usage):
|
||||
* `s3_max_single_part_upload_size` - Limits the size of a single block upload to Blob Storage.
|
||||
* `min_bytes_for_seek` - Limits the size of a seekable region.
|
||||
* `max_single_read_retries` - Limits the number of attempts to read a chunk of data from Blob Storage.
|
||||
* `max_single_download_retries` - Limits the number of attempts to download a readable buffer from Blob Storage.
|
||||
* `thread_pool_size` - Limits the number of threads with which `IDiskRemote` is instantiated.
|
||||
* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object.
|
||||
|
||||
Other parameters:
|
||||
* `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
|
||||
* `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`.
|
||||
* `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
* `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
|
||||
Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)).
|
||||
|
||||
:::note Zero-copy replication is not ready for production
|
||||
Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use.
|
||||
:::
|
||||
|
||||
## Using HDFS storage {#hdfs-storage}
|
||||
|
||||
In this sample configuration:
|
||||
- the disk is of type `hdfs`
|
||||
- the data is hosted at `hdfs://hdfs1:9000/clickhouse/`
|
||||
|
||||
```xml
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<hdfs>
|
||||
<type>hdfs</type>
|
||||
<endpoint>hdfs://hdfs1:9000/clickhouse/</endpoint>
|
||||
<skip_access_check>true</skip_access_check>
|
||||
</hdfs>
|
||||
<hdd>
|
||||
<type>local</type>
|
||||
<path>/</path>
|
||||
</hdd>
|
||||
</disks>
|
||||
<policies>
|
||||
<hdfs>
|
||||
@ -32,26 +422,17 @@ Configuration markup:
|
||||
<main>
|
||||
<disk>hdfs</disk>
|
||||
</main>
|
||||
<external>
|
||||
<disk>hdd</disk>
|
||||
</external>
|
||||
</volumes>
|
||||
</hdfs>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
|
||||
<merge_tree>
|
||||
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
|
||||
</merge_tree>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
Required parameters:
|
||||
|
||||
- `endpoint` — HDFS endpoint URL in `path` format. Endpoint URL should contain a root path to store data.
|
||||
|
||||
Optional parameters:
|
||||
|
||||
- `min_bytes_for_seek` — The minimal number of bytes to use seek operation instead of sequential read. Default value: `1 Mb`.
|
||||
|
||||
## Using Virtual File System for Data Encryption {#encrypted-virtual-file-system}
|
||||
### Using Data Encryption {#encrypted-virtual-file-system}
|
||||
|
||||
You can encrypt the data stored on [S3](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3), or [HDFS](#configuring-hdfs) external disks, or on a local disk. To turn on the encryption mode, in the configuration file you must define a disk with the type `encrypted` and choose a disk on which the data will be saved. An `encrypted` disk ciphers all written files on the fly, and when you read files from an `encrypted` disk it deciphers them automatically. So you can work with an `encrypted` disk like with a normal one.
|
||||
|
||||
@ -112,7 +493,7 @@ Example of disk configuration:
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
## Using local cache {#using-local-cache}
|
||||
### Using local cache {#using-local-cache}
|
||||
|
||||
It is possible to configure local cache over disks in storage configuration starting from version 22.3.
|
||||
For versions 22.3 - 22.7 cache is supported only for `s3` disk type. For versions >= 22.8 cache is supported for any disk type: S3, Azure, Local, Encrypted, etc.
|
||||
@ -275,23 +656,92 @@ Cache profile events:
|
||||
|
||||
- `CachedWriteBufferCacheWriteBytes`, `CachedWriteBufferCacheWriteMicroseconds`
|
||||
|
||||
## Using in-memory cache (userspace page cache) {#userspace-page-cache}
|
||||
|
||||
The File Cache described above stores cached data in local files. Alternatively, object-store-based disks can be configured to use "Userspace Page Cache", which is RAM-only. Userspace page cache is recommended only if file cache can't be used for some reason, e.g. if the machine doesn't have a local disk at all. Note that file cache effectively uses RAM for caching too, since the OS caches contents of local files.
|
||||
|
||||
To enable userspace page cache for disks that don't use file cache, use setting `use_page_cache_for_disks_without_file_cache`.
|
||||
|
||||
By default, on Linux, the userspace page cache will use all available memory, similar to the OS page cache. In tools like `top` and `ps`, the clickhouse server process will typically show resident set size near 100% of the machine's RAM - this is normal, and most of this memory is actually reclaimable by the OS on memory pressure (`MADV_FREE`). This behavior can be disabled with server setting `page_cache_use_madv_free = 0`, making the userspace page cache just use a fixed amount of memory `page_cache_size` with no special interaction with the OS. On Mac OS, `page_cache_use_madv_free` is always disabled as it doesn't have lazy `MADV_FREE`.
|
||||
|
||||
Unfortunately, `page_cache_use_madv_free` makes it difficult to tell if the server is close to running out of memory, since the RSS metric becomes useless. Async metric `UnreclaimableRSS` shows the amount of physical memory used by the server, excluding the memory reclaimable by the OS: `select value from system.asynchronous_metrics where metric = 'UnreclaimableRSS'`. Use it for monitoring instead of RSS. This metric is only available if `page_cache_use_madv_free` is enabled.
|
||||
|
||||
## Storing Data on Web Server {#storing-data-on-webserver}
|
||||
|
||||
There is a tool `clickhouse-static-files-uploader`, which prepares a data directory for a given table (`SELECT data_paths FROM system.tables WHERE name = 'table_name'`). For each table you need, you get a directory of files. These files can be uploaded to, for example, a web server with static files. After this preparation, you can load this table into any ClickHouse server via `DiskWeb`.
|
||||
### Using static Web storage (read-only) {#web-storage}
|
||||
|
||||
This is a read-only disk. Its data is only read and never modified. A new table is loaded to this disk via `ATTACH TABLE` query (see example below). Local disk is not actually used, each `SELECT` query will result in a `http` request to fetch required data. All modification of the table data will result in an exception, i.e. the following types of queries are not allowed: [CREATE TABLE](/docs/en/sql-reference/statements/create/table.md), [ALTER TABLE](/docs/en/sql-reference/statements/alter/index.md), [RENAME TABLE](/docs/en/sql-reference/statements/rename.md/#misc_operations-rename_table), [DETACH TABLE](/docs/en/sql-reference/statements/detach.md) and [TRUNCATE TABLE](/docs/en/sql-reference/statements/truncate.md).
|
||||
Web storage can be used for read-only purposes. An example use is for hosting sample data, or for migrating data.
|
||||
There is a tool `clickhouse-static-files-uploader`, which prepares a data directory for a given table (`SELECT data_paths FROM system.tables WHERE name = 'table_name'`). For each table you need, you get a directory of files. These files can be uploaded to, for example, a web server with static files. After this preparation, you can load this table into any ClickHouse server via `DiskWeb`.
|
||||
|
||||
Web server storage is supported only for the [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) engine families. To access the data stored on a `web` disk, use the [storage_policy](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#terms) setting when executing the query. For example, `ATTACH TABLE table_web UUID '{}' (id Int32) ENGINE = MergeTree() ORDER BY id SETTINGS storage_policy = 'web'`.
|
||||
In this sample configuration:
|
||||
- the disk is of type `web`
|
||||
- the data is hosted at `http://nginx:80/test1/`
|
||||
- a cache on local storage is used
|
||||
|
||||
```xml
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<web>
|
||||
<type>web</type>
|
||||
<endpoint>http://nginx:80/test1/</endpoint>
|
||||
</web>
|
||||
<cached_web>
|
||||
<type>cache</type>
|
||||
<disk>web</disk>
|
||||
<path>cached_web_cache/</path>
|
||||
<max_size>100000000</max_size>
|
||||
</cached_web>
|
||||
</disks>
|
||||
<policies>
|
||||
<web>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>web</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</web>
|
||||
<cached_web>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>cached_web</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</cached_web>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
:::tip
|
||||
Storage can also be configured temporarily within a query, if a web dataset is not expected
|
||||
to be used routinely, see [dynamic configuration](#dynamic-configuration) and skip editing the
|
||||
configuration file.
|
||||
:::
|
||||
|
||||
:::tip
|
||||
A [demo dataset](https://github.com/ClickHouse/web-tables-demo) is hosted in GitHub. To prepare your own tables for web storage see the tool [clickhouse-static-files-uploader](/docs/en/operations/storing-data.md/#storing-data-on-webserver)
|
||||
:::
|
||||
|
||||
In this `ATTACH TABLE` query the `UUID` provided matches the directory name of the data, and the endpoint is the URL for the raw GitHub content.
|
||||
|
||||
```sql
|
||||
# highlight-next-line
|
||||
ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
|
||||
(
|
||||
price UInt32,
|
||||
date Date,
|
||||
postcode1 LowCardinality(String),
|
||||
postcode2 LowCardinality(String),
|
||||
type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
|
||||
is_new UInt8,
|
||||
duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
|
||||
addr1 String,
|
||||
addr2 String,
|
||||
street LowCardinality(String),
|
||||
locality LowCardinality(String),
|
||||
town LowCardinality(String),
|
||||
district LowCardinality(String),
|
||||
county LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (postcode1, postcode2, addr1, addr2)
|
||||
# highlight-start
|
||||
SETTINGS disk = disk(
|
||||
type=web,
|
||||
endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
|
||||
);
|
||||
# highlight-end
|
||||
```
|
||||
|
||||
A ready test case. You need to add this configuration to config:
|
||||
|
||||
@ -487,7 +937,7 @@ If URL is not reachable on disk load when the server is starting up tables, then
|
||||
Use [http_max_single_read_retries](/docs/en/operations/settings/settings.md/#http-max-single-read-retries) setting to limit the maximum number of retries during a single HTTP read.
|
||||
|
||||
|
||||
## Zero-copy Replication (not ready for production) {#zero-copy}
|
||||
### Zero-copy Replication (not ready for production) {#zero-copy}
|
||||
|
||||
Zero-copy replication is possible, but not recommended, with `S3` and `HDFS` disks. Zero-copy replication means that if the data is stored remotely on several machines and needs to be synchronized, then only the metadata is replicated (paths to the data parts), but not the data itself.
|
||||
|
||||
|
@ -26,7 +26,9 @@ priority: 0
|
||||
is_active: 0
|
||||
active_children: 0
|
||||
dequeued_requests: 67
|
||||
canceled_requests: 0
|
||||
dequeued_cost: 4692272
|
||||
canceled_cost: 0
|
||||
busy_periods: 63
|
||||
vruntime: 938454.1999999989
|
||||
system_vruntime: ᴺᵁᴸᴸ
|
||||
@ -54,7 +56,9 @@ Columns:
|
||||
- `is_active` (`UInt8`) - Whether this node is currently active - has resource requests to be dequeued and constraints satisfied.
|
||||
- `active_children` (`UInt64`) - The number of children in active state.
|
||||
- `dequeued_requests` (`UInt64`) - The total number of resource requests dequeued from this node.
|
||||
- `canceled_requests` (`UInt64`) - The total number of resource requests canceled from this node.
|
||||
- `dequeued_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests dequeued from this node.
|
||||
- `canceled_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests canceled from this node.
|
||||
- `busy_periods` (`UInt64`) - The total number of deactivations of this node.
|
||||
- `vruntime` (`Nullable(Float64)`) - For children of `fair` nodes only. Virtual runtime of a node used by SFQ algorithm to select the next child to process in a max-min fair manner.
|
||||
- `system_vruntime` (`Nullable(Float64)`) - For `fair` nodes only. Virtual runtime showing `vruntime` of the last processed resource request. Used during child activation as the new value of `vruntime`.
|
||||
|
@ -36,9 +36,9 @@ You can explicitly set a time zone for `DateTime`-type columns when creating a t
|
||||
|
||||
The [clickhouse-client](../../interfaces/cli.md) applies the server time zone by default if a time zone isn’t explicitly set when initializing the data type. To use the client time zone, run `clickhouse-client` with the `--use_client_time_zone` parameter.
|
||||
|
||||
ClickHouse outputs values depending on the value of the [date_time_output_format](../../operations/settings/settings-formats.md#date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionally, you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function.
|
||||
ClickHouse outputs values depending on the value of the [date_time_output_format](../../operations/settings/settings.md#settings-date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionally, you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function.
|
||||
|
||||
When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the value of the [date_time_input_format](../../operations/settings/settings-formats.md#date_time_input_format) setting.
|
||||
When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the value of the [date_time_input_format](../../operations/settings/settings.md#settings-date_time_input_format) setting.
|
||||
|
||||
## Examples
|
||||
|
||||
@ -147,8 +147,8 @@ Time shifts for multiple days. Some pacific islands changed their timezone offse
|
||||
- [Type conversion functions](../../sql-reference/functions/type-conversion-functions.md)
|
||||
- [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md)
|
||||
- [Functions for working with arrays](../../sql-reference/functions/array-functions.md)
|
||||
- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#date_time_input_format)
|
||||
- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#date_time_output_format)
|
||||
- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#settings-date_time_input_format)
|
||||
- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#settings-date_time_output_format)
|
||||
- [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone)
|
||||
- [The `session_timezone` setting](../../operations/settings/settings.md#session_timezone)
|
||||
- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime)
|
||||
|
@ -4,6 +4,67 @@ sidebar_label: Polygons
|
||||
title: "Functions for Working with Polygons"
|
||||
---
|
||||
|
||||
## WKT
|
||||
|
||||
Returns a WKT (Well Known Text) geometric object from various [Geo Data Types](../../data-types/geo.md). Supported WKT objects are:
|
||||
|
||||
- POINT
|
||||
- POLYGON
|
||||
- MULTIPOLYGON
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
WKT(geo_data)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
`geo_data` can be one of the following [Geo Data Types](../../data-types/geo.md) or their underlying primitive types:
|
||||
|
||||
- [Point](../../data-types/geo.md#point)
|
||||
- [Ring](../../data-types/geo.md#ring)
|
||||
- [Polygon](../../data-types/geo.md#polygon)
|
||||
- [MultiPolygon](../../data-types/geo.md#multipolygon)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- WKT geometric object `POINT` is returned for a Point.
|
||||
- WKT geometric object `POLYGON` is returned for a Polygon
|
||||
- WKT geometric object `MULTIPOLYGON` is returned for a MultiPolygon.
|
||||
|
||||
**Examples**
|
||||
|
||||
POINT from tuple:
|
||||
|
||||
```sql
|
||||
SELECT wkt((0., 0.));
|
||||
```
|
||||
|
||||
```response
|
||||
POINT(0 0)
|
||||
```
|
||||
|
||||
POLYGON from an array of tuples or an array of tuple arrays:
|
||||
|
||||
```sql
|
||||
SELECT wkt([(0., 0.), (10., 0.), (10., 10.), (0., 10.)]);
|
||||
```
|
||||
|
||||
```response
|
||||
POLYGON((0 0,10 0,10 10,0 10))
|
||||
```
|
||||
|
||||
MULTIPOLYGON from an array of multi-dimensional tuple arrays:
|
||||
|
||||
```sql
|
||||
SELECT wkt([[[(0., 0.), (10., 0.), (10., 10.), (0., 10.)], [(4., 4.), (5., 4.), (5., 5.), (4., 5.)]], [[(-10., -10.), (-10., -9.), (-9., 10.)]]]);
|
||||
```
|
||||
|
||||
```response
|
||||
MULTIPOLYGON(((0 0,10 0,10 10,0 10,0 0),(4 4,5 4,5 5,4 5,4 4)),((-10 -10,-10 -9,-9 10,-10 -10)))
|
||||
```
|
||||
|
||||
## readWKTMultiPolygon
|
||||
|
||||
Converts a WKT (Well Known Text) MultiPolygon into a MultiPolygon type.
|
||||
|
@ -30,7 +30,6 @@ position(haystack, needle[, start_pos])
|
||||
|
||||
Alias:
|
||||
- `position(needle IN haystack)`
|
||||
- `locate(haystack, needle[, start_pos])`.
|
||||
|
||||
**Arguments**
|
||||
|
||||
@ -49,7 +48,7 @@ If substring `needle` is empty, these rules apply:
|
||||
- if `start_pos >= 1` and `start_pos <= length(haystack) + 1`: return `start_pos`
|
||||
- otherwise: return `0`
|
||||
|
||||
The same rules also apply to functions `positionCaseInsensitive`, `positionUTF8` and `positionCaseInsensitiveUTF8`
|
||||
The same rules also apply to functions `locate`, `positionCaseInsensitive`, `positionUTF8` and `positionCaseInsensitiveUTF8`.
|
||||
|
||||
Type: `Integer`.
|
||||
|
||||
@ -114,6 +113,21 @@ SELECT
|
||||
└─────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┘
|
||||
```
|
||||
|
||||
## locate
|
||||
|
||||
Like [position](#position) but with arguments `haystack` and `locate` switched.
|
||||
|
||||
The behavior of this function depends on the ClickHouse version:
|
||||
- in versions < v24.3, `locate` was an alias of function `position` and accepted arguments `(haystack, needle[, start_pos])`.
|
||||
- in versions >= 24.3,, `locate` is an individual function (for better compatibility with MySQL) and accepts arguments `(needle, haystack[, start_pos])`. The previous behavior
|
||||
can be restored using setting [function_locate_has_mysql_compatible_argument_order = false](../../operations/settings/settings.md#function-locate-has-mysql-compatible-argument-order);
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
locate(needle, haystack[, start_pos])
|
||||
```
|
||||
|
||||
## positionCaseInsensitive
|
||||
|
||||
Like [position](#position) but searches case-insensitively.
|
||||
|
@ -350,6 +350,7 @@ ALTER TABLE mt DELETE IN PARTITION ID '2' WHERE p = 2;
|
||||
You can specify the partition expression in `ALTER ... PARTITION` queries in different ways:
|
||||
|
||||
- As a value from the `partition` column of the `system.parts` table. For example, `ALTER TABLE visits DETACH PARTITION 201901`.
|
||||
- Using the keyword `ALL`. It can be used only with DROP/DETACH/ATTACH. For example, `ALTER TABLE visits ATTACH PARTITION ALL`.
|
||||
- As a tuple of expressions or constants that matches (in types) the table partitioning keys tuple. In the case of a single element partitioning key, the expression should be wrapped in the `tuple (...)` function. For example, `ALTER TABLE visits DETACH PARTITION tuple(toYYYYMM(toDate('2019-01-25')))`.
|
||||
- Using the partition ID. Partition ID is a string identifier of the partition (human-readable, if possible) that is used as the names of partitions in the file system and in ZooKeeper. The partition ID must be specified in the `PARTITION ID` clause, in a single quotes. For example, `ALTER TABLE visits DETACH PARTITION ID '201901'`.
|
||||
- In the [ALTER ATTACH PART](#alter_attach-partition) and [DROP DETACHED PART](#alter_drop-detached) query, to specify the name of a part, use string literal with a value from the `name` column of the [system.detached_parts](/docs/en/operations/system-tables/detached_parts.md/#system_tables-detached_parts) table. For example, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`.
|
||||
|
@ -13,13 +13,6 @@ a system table called `system.dropped_tables`.
|
||||
|
||||
If you have a materialized view without a `TO` clause associated with the dropped table, then you will also have to UNDROP the inner table of that view.
|
||||
|
||||
:::note
|
||||
UNDROP TABLE is experimental. To use it add this setting:
|
||||
```sql
|
||||
set allow_experimental_undrop_table_query = 1;
|
||||
```
|
||||
:::
|
||||
|
||||
:::tip
|
||||
Also see [DROP TABLE](/docs/en/sql-reference/statements/drop.md)
|
||||
:::
|
||||
@ -32,60 +25,53 @@ UNDROP TABLE [db.]name [UUID '<uuid>'] [ON CLUSTER cluster]
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
set allow_experimental_undrop_table_query = 1;
|
||||
```
|
||||
|
||||
```sql
|
||||
CREATE TABLE undropMe
|
||||
CREATE TABLE tab
|
||||
(
|
||||
`id` UInt8
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id
|
||||
```
|
||||
ORDER BY id;
|
||||
|
||||
DROP TABLE tab;
|
||||
|
||||
```sql
|
||||
DROP TABLE undropMe
|
||||
```
|
||||
```sql
|
||||
SELECT *
|
||||
FROM system.dropped_tables
|
||||
FORMAT Vertical
|
||||
FORMAT Vertical;
|
||||
```
|
||||
|
||||
```response
|
||||
Row 1:
|
||||
──────
|
||||
index: 0
|
||||
database: default
|
||||
table: undropMe
|
||||
table: tab
|
||||
uuid: aa696a1a-1d70-4e60-a841-4c80827706cc
|
||||
engine: MergeTree
|
||||
metadata_dropped_path: /var/lib/clickhouse/metadata_dropped/default.undropMe.aa696a1a-1d70-4e60-a841-4c80827706cc.sql
|
||||
metadata_dropped_path: /var/lib/clickhouse/metadata_dropped/default.tab.aa696a1a-1d70-4e60-a841-4c80827706cc.sql
|
||||
table_dropped_time: 2023-04-05 14:12:12
|
||||
|
||||
1 row in set. Elapsed: 0.001 sec.
|
||||
```
|
||||
|
||||
```sql
|
||||
UNDROP TABLE undropMe
|
||||
```
|
||||
```response
|
||||
Ok.
|
||||
```
|
||||
```sql
|
||||
UNDROP TABLE tab;
|
||||
|
||||
SELECT *
|
||||
FROM system.dropped_tables
|
||||
FORMAT Vertical
|
||||
```
|
||||
FORMAT Vertical;
|
||||
|
||||
```response
|
||||
Ok.
|
||||
|
||||
0 rows in set. Elapsed: 0.001 sec.
|
||||
```
|
||||
|
||||
```sql
|
||||
DESCRIBE TABLE undropMe
|
||||
FORMAT Vertical
|
||||
DESCRIBE TABLE tab
|
||||
FORMAT Vertical;
|
||||
```
|
||||
|
||||
```response
|
||||
Row 1:
|
||||
──────
|
||||
|
@ -6,6 +6,11 @@ sidebar_label: jdbc
|
||||
|
||||
# jdbc
|
||||
|
||||
:::note
|
||||
clickhouse-jdbc-bridge contains experimental codes and is no longer supported. It may contain reliability issues and security vulnerabilities. Use it at your own risk.
|
||||
ClickHouse recommend using built-in table functions in ClickHouse which provide a better alternative for ad-hoc querying scenarios (Postgres, MySQL, MongoDB, etc).
|
||||
:::
|
||||
|
||||
`jdbc(datasource, schema, table)` - returns table that is connected via JDBC driver.
|
||||
|
||||
This table function requires separate [clickhouse-jdbc-bridge](https://github.com/ClickHouse/clickhouse-jdbc-bridge) program to be running.
|
||||
|
@ -27,9 +27,9 @@ DateTime([timezone])
|
||||
|
||||
Консольный клиент ClickHouse по умолчанию использует часовой пояс сервера, если для значения `DateTime` часовой пояс не был задан в явном виде при инициализации типа данных. Чтобы использовать часовой пояс клиента, запустите [clickhouse-client](../../interfaces/cli.md) с параметром `--use_client_time_zone`.
|
||||
|
||||
ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/settings-formats.md#date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime).
|
||||
ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/index.md#settings-date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime).
|
||||
|
||||
При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/settings-formats.md#date_time_input_format).
|
||||
При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/index.md#settings-date_time_input_format).
|
||||
|
||||
## Примеры {#primery}
|
||||
|
||||
@ -119,8 +119,8 @@ FROM dt
|
||||
- [Функции преобразования типов](../../sql-reference/functions/type-conversion-functions.md)
|
||||
- [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md)
|
||||
- [Функции для работы с массивами](../../sql-reference/functions/array-functions.md)
|
||||
- [Настройка `date_time_input_format`](../../operations/settings/settings-formats.md#date_time_input_format)
|
||||
- [Настройка `date_time_output_format`](../../operations/settings/settings-formats.md#date_time_output_format)
|
||||
- [Настройка `date_time_input_format`](../../operations/settings/index.md#settings-date_time_input_format)
|
||||
- [Настройка `date_time_output_format`](../../operations/settings/index.md)
|
||||
- [Конфигурационный параметр сервера `timezone`](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone)
|
||||
- [Параметр `session_timezone`](../../operations/settings/settings.md#session_timezone)
|
||||
- [Операторы для работы с датой и временем](../../sql-reference/operators/index.md#operators-datetime)
|
||||
|
@ -143,7 +143,7 @@ int mainEntryClickHouseCompressor(int argc, char ** argv)
|
||||
ParserCodec codec_parser;
|
||||
|
||||
std::string codecs_line = boost::algorithm::join(codecs, ",");
|
||||
auto ast = parseQuery(codec_parser, "(" + codecs_line + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
||||
auto ast = parseQuery(codec_parser, "(" + codecs_line + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS);
|
||||
codec = CompressionCodecFactory::instance().get(ast, nullptr);
|
||||
}
|
||||
else
|
||||
|
@ -234,7 +234,7 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
|
||||
size_t approx_query_length = multiple ? find_first_symbols<';'>(pos, end) - pos : end - pos;
|
||||
|
||||
ASTPtr res = parseQueryAndMovePosition(
|
||||
parser, pos, end, "query", multiple, cmd_settings.max_query_size, cmd_settings.max_parser_depth);
|
||||
parser, pos, end, "query", multiple, cmd_settings.max_query_size, cmd_settings.max_parser_depth, cmd_settings.max_parser_backtracks);
|
||||
|
||||
std::unique_ptr<ReadBuffer> insert_query_payload = nullptr;
|
||||
/// If the query is INSERT ... VALUES, then we will try to parse the data.
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include "Commands.h"
|
||||
#include <queue>
|
||||
#include "KeeperClient.h"
|
||||
#include "Parsers/CommonParsers.h"
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -106,13 +107,13 @@ bool CreateCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> &
|
||||
|
||||
int mode = zkutil::CreateMode::Persistent;
|
||||
|
||||
if (ParserKeyword{"PERSISTENT"}.ignore(pos, expected))
|
||||
if (ParserKeyword(Keyword::PERSISTENT).ignore(pos, expected))
|
||||
mode = zkutil::CreateMode::Persistent;
|
||||
else if (ParserKeyword{"EPHEMERAL"}.ignore(pos, expected))
|
||||
else if (ParserKeyword(Keyword::EPHEMERAL).ignore(pos, expected))
|
||||
mode = zkutil::CreateMode::Ephemeral;
|
||||
else if (ParserKeyword{"EPHEMERAL SEQUENTIAL"}.ignore(pos, expected))
|
||||
else if (ParserKeyword(Keyword::EPHEMERAL_SEQUENTIAL).ignore(pos, expected))
|
||||
mode = zkutil::CreateMode::EphemeralSequential;
|
||||
else if (ParserKeyword{"PERSISTENT SEQUENTIAL"}.ignore(pos, expected))
|
||||
else if (ParserKeyword(Keyword::PERSISTENT_SEQUENTIAL).ignore(pos, expected))
|
||||
mode = zkutil::CreateMode::PersistentSequential;
|
||||
|
||||
node->args.push_back(std::move(mode));
|
||||
@ -382,12 +383,16 @@ void RMRCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co
|
||||
|
||||
bool ReconfigCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, DB::Expected & expected) const
|
||||
{
|
||||
ParserKeyword s_add(Keyword::ADD);
|
||||
ParserKeyword s_remove(Keyword::REMOVE);
|
||||
ParserKeyword s_set(Keyword::SET);
|
||||
|
||||
ReconfigCommand::Operation operation;
|
||||
if (ParserKeyword{"ADD"}.ignore(pos, expected))
|
||||
if (s_add.ignore(pos, expected))
|
||||
operation = ReconfigCommand::Operation::ADD;
|
||||
else if (ParserKeyword{"REMOVE"}.ignore(pos, expected))
|
||||
else if (s_remove.ignore(pos, expected))
|
||||
operation = ReconfigCommand::Operation::REMOVE;
|
||||
else if (ParserKeyword{"SET"}.ignore(pos, expected))
|
||||
else if (s_set.ignore(pos, expected))
|
||||
operation = ReconfigCommand::Operation::SET;
|
||||
else
|
||||
return false;
|
||||
|
@ -44,7 +44,7 @@ String KeeperClient::executeFourLetterCommand(const String & command)
|
||||
std::vector<String> KeeperClient::getCompletions(const String & prefix) const
|
||||
{
|
||||
Tokens tokens(prefix.data(), prefix.data() + prefix.size(), 0, false);
|
||||
IParser::Pos pos(tokens, 0);
|
||||
IParser::Pos pos(tokens, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS);
|
||||
|
||||
if (pos->type != TokenType::BareWord)
|
||||
return registered_commands_and_four_letter_words;
|
||||
@ -278,6 +278,7 @@ bool KeeperClient::processQueryText(const String & text)
|
||||
/* allow_multi_statements = */ true,
|
||||
/* max_query_size = */ 0,
|
||||
/* max_parser_depth = */ 0,
|
||||
/* max_parser_backtracks = */ 0,
|
||||
/* skip_insignificant = */ false);
|
||||
|
||||
if (!res)
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <IO/UseSSL.h>
|
||||
#include <Core/ServerUUID.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/CgroupsMemoryUsageObserver.h>
|
||||
#include <Common/ErrorHandlers.h>
|
||||
#include <Common/assertProcessUserMatchesDataOwner.h>
|
||||
#include <Common/makeSocketAddress.h>
|
||||
@ -623,6 +624,25 @@ try
|
||||
buildLoggers(config(), logger());
|
||||
main_config_reloader->start();
|
||||
|
||||
std::optional<CgroupsMemoryUsageObserver> cgroups_memory_usage_observer;
|
||||
try
|
||||
{
|
||||
auto wait_time = config().getUInt64("keeper_server.cgroups_memory_observer_wait_time", 15);
|
||||
if (wait_time != 0)
|
||||
{
|
||||
cgroups_memory_usage_observer.emplace(std::chrono::seconds(wait_time));
|
||||
/// Not calling cgroups_memory_usage_observer->setLimits() here (as for the normal ClickHouse server) because Keeper controls
|
||||
/// its memory usage by other means (via setting 'max_memory_usage_soft_limit').
|
||||
cgroups_memory_usage_observer->setOnMemoryAmountAvailableChangedFn([&]() { main_config_reloader->reload(); });
|
||||
cgroups_memory_usage_observer->startThread();
|
||||
}
|
||||
}
|
||||
catch (Exception &)
|
||||
{
|
||||
tryLogCurrentException(log, "Disabling cgroup memory observer because of an error during initialization");
|
||||
}
|
||||
|
||||
|
||||
LOG_INFO(log, "Ready for connections.");
|
||||
|
||||
waitForTerminationRequest();
|
||||
|
@ -11,6 +11,7 @@ set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES
|
||||
LibraryBridgeHandlers.cpp
|
||||
SharedLibrary.cpp
|
||||
library-bridge.cpp
|
||||
createFunctionBaseCast.cpp
|
||||
)
|
||||
|
||||
clickhouse_add_executable(clickhouse-library-bridge ${CLICKHOUSE_LIBRARY_BRIDGE_SOURCES})
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <Bridge/IBridge.h>
|
||||
#include "LibraryBridgeHandlerFactory.h"
|
||||
|
||||
|
23
programs/library-bridge/createFunctionBaseCast.cpp
Normal file
23
programs/library-bridge/createFunctionBaseCast.cpp
Normal file
@ -0,0 +1,23 @@
|
||||
#include <memory>
|
||||
#include <Functions/CastOverloadResolver.h>
|
||||
#include <Core/ColumnsWithTypeAndName.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
class IFunctionBase;
|
||||
using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
|
||||
|
||||
FunctionBasePtr createFunctionBaseCast(
|
||||
ContextPtr, const char *, const ColumnsWithTypeAndName &, const DataTypePtr &, std::optional<CastDiagnostic>, CastType)
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for Library Bridge");
|
||||
}
|
||||
|
||||
}
|
@ -1000,12 +1000,6 @@ extern "C" int LLVMFuzzerInitialize(int * pargc, char *** pargv)
|
||||
{
|
||||
std::vector<char *> argv(*pargv, *pargv + (*pargc + 1));
|
||||
|
||||
if (!isClickhouseApp("local", argv))
|
||||
{
|
||||
std::cerr << "\033[31m" << "ClickHouse compiled in fuzzing mode, only clickhouse local is available." << "\033[0m" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/// As a user you can add flags to clickhouse binary in fuzzing mode as follows
|
||||
/// clickhouse local <set of clickhouse-local specific flag> -- <set of libfuzzer flags>
|
||||
|
||||
@ -1013,13 +1007,16 @@ extern "C" int LLVMFuzzerInitialize(int * pargc, char *** pargv)
|
||||
|
||||
auto it = argv.begin() + 1;
|
||||
for (; *it; ++it)
|
||||
{
|
||||
if (strcmp(*it, "--") == 0)
|
||||
{
|
||||
++it;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
while (*it)
|
||||
{
|
||||
if (strncmp(*it, "--", 2) != 0)
|
||||
{
|
||||
*(p++) = *it;
|
||||
@ -1027,6 +1024,7 @@ extern "C" int LLVMFuzzerInitialize(int * pargc, char *** pargv)
|
||||
}
|
||||
else
|
||||
++it;
|
||||
}
|
||||
|
||||
*pargc = static_cast<int>(p - &(*pargv)[0]);
|
||||
*p = nullptr;
|
||||
|
@ -68,7 +68,6 @@ namespace
|
||||
using MainFunc = int (*)(int, char**);
|
||||
|
||||
#if !defined(FUZZING_MODE)
|
||||
|
||||
/// Add an item here to register new application
|
||||
std::pair<std::string_view, MainFunc> clickhouse_applications[] =
|
||||
{
|
||||
@ -105,13 +104,6 @@ std::pair<std::string_view, MainFunc> clickhouse_applications[] =
|
||||
{"restart", mainEntryClickHouseRestart},
|
||||
};
|
||||
|
||||
/// Add an item here to register a new short name
|
||||
std::pair<std::string_view, std::string_view> clickhouse_short_names[] =
|
||||
{
|
||||
{"chl", "local"},
|
||||
{"chc", "client"},
|
||||
};
|
||||
|
||||
int printHelp(int, char **)
|
||||
{
|
||||
std::cerr << "Use one of the following commands:" << std::endl;
|
||||
@ -121,6 +113,13 @@ int printHelp(int, char **)
|
||||
}
|
||||
#endif
|
||||
|
||||
/// Add an item here to register a new short name
|
||||
std::pair<std::string_view, std::string_view> clickhouse_short_names[] =
|
||||
{
|
||||
{"chl", "local"},
|
||||
{"chc", "client"},
|
||||
};
|
||||
|
||||
|
||||
enum class InstructionFail
|
||||
{
|
||||
|
@ -13,6 +13,7 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES
|
||||
getIdentifierQuote.cpp
|
||||
odbc-bridge.cpp
|
||||
validateODBCConnectionString.cpp
|
||||
createFunctionBaseCast.cpp
|
||||
)
|
||||
|
||||
clickhouse_add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES})
|
||||
|
23
programs/odbc-bridge/createFunctionBaseCast.cpp
Normal file
23
programs/odbc-bridge/createFunctionBaseCast.cpp
Normal file
@ -0,0 +1,23 @@
|
||||
#include <memory>
|
||||
#include <Functions/CastOverloadResolver.h>
|
||||
#include <Core/ColumnsWithTypeAndName.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
class IFunctionBase;
|
||||
using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
|
||||
|
||||
FunctionBasePtr createFunctionBaseCast(
|
||||
ContextPtr, const char *, const ColumnsWithTypeAndName &, const DataTypePtr &, std::optional<CastDiagnostic>, CastType)
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for Library Bridge");
|
||||
}
|
||||
|
||||
}
|
@ -733,8 +733,6 @@ try
|
||||
LOG_INFO(log, "Available CPU instruction sets: {}", cpu_info);
|
||||
#endif
|
||||
|
||||
sanityChecks(*this);
|
||||
|
||||
// Initialize global thread pool. Do it before we fetch configs from zookeeper
|
||||
// nodes (`from_zk`), because ZooKeeper interface uses the pool. We will
|
||||
// ignore `max_thread_pool_size` in configs we fetch from ZK, but oh well.
|
||||
@ -904,6 +902,7 @@ try
|
||||
config_processor.savePreprocessedConfig(loaded_config, config().getString("path", DBMS_DEFAULT_PATH));
|
||||
config().removeConfiguration(old_configuration.get());
|
||||
config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false);
|
||||
global_context->setConfig(loaded_config.configuration);
|
||||
}
|
||||
|
||||
Settings::checkNoSettingNamesAtTopLevel(config(), config_path);
|
||||
@ -911,6 +910,9 @@ try
|
||||
/// We need to reload server settings because config could be updated via zookeeper.
|
||||
server_settings.loadSettingsFromConfig(config());
|
||||
|
||||
/// NOTE: Do sanity checks after we loaded all possible substitutions (for the configuration) from ZK
|
||||
sanityChecks(*this);
|
||||
|
||||
#if defined(OS_LINUX)
|
||||
std::string executable_path = getExecutablePath();
|
||||
|
||||
@ -1294,7 +1296,7 @@ try
|
||||
std::optional<CgroupsMemoryUsageObserver> cgroups_memory_usage_observer;
|
||||
try
|
||||
{
|
||||
UInt64 wait_time = server_settings.cgroups_memory_usage_observer_wait_time;
|
||||
auto wait_time = server_settings.cgroups_memory_usage_observer_wait_time;
|
||||
if (wait_time != 0)
|
||||
cgroups_memory_usage_observer.emplace(std::chrono::seconds(wait_time));
|
||||
}
|
||||
@ -1360,7 +1362,7 @@ try
|
||||
{
|
||||
double hard_limit_ratio = new_server_settings.cgroup_memory_watcher_hard_limit_ratio;
|
||||
double soft_limit_ratio = new_server_settings.cgroup_memory_watcher_soft_limit_ratio;
|
||||
cgroups_memory_usage_observer->setLimits(
|
||||
cgroups_memory_usage_observer->setMemoryUsageLimits(
|
||||
static_cast<uint64_t>(max_server_memory_usage * hard_limit_ratio),
|
||||
static_cast<uint64_t>(max_server_memory_usage * soft_limit_ratio));
|
||||
}
|
||||
@ -1718,6 +1720,12 @@ try
|
||||
throw;
|
||||
}
|
||||
|
||||
if (cgroups_memory_usage_observer)
|
||||
{
|
||||
cgroups_memory_usage_observer->setOnMemoryAmountAvailableChangedFn([&]() { main_config_reloader->reload(); });
|
||||
cgroups_memory_usage_observer->startThread();
|
||||
}
|
||||
|
||||
/// Reload config in SYSTEM RELOAD CONFIG query.
|
||||
global_context->setConfigReloadCallback([&]()
|
||||
{
|
||||
|
@ -297,7 +297,7 @@ namespace
|
||||
|
||||
|
||||
std::pair<String, BackupEntryPtr> makeBackupEntryForAccess(
|
||||
const std::vector<std::pair<UUID, AccessEntityPtr>> access_entities,
|
||||
const std::vector<std::pair<UUID, AccessEntityPtr>> & access_entities,
|
||||
const String & data_path_in_backup,
|
||||
size_t counter,
|
||||
const AccessControl & access_control)
|
||||
@ -326,7 +326,7 @@ void AccessRestorerFromBackup::addDataPath(const String & data_path)
|
||||
return;
|
||||
|
||||
fs::path data_path_in_backup_fs = data_path;
|
||||
Strings filenames = backup->listFiles(data_path);
|
||||
Strings filenames = backup->listFiles(data_path, /*recursive*/ false);
|
||||
if (filenames.empty())
|
||||
return;
|
||||
|
||||
|
@ -21,7 +21,7 @@ struct RestoreSettings;
|
||||
|
||||
/// Makes a backup of access entities of a specified type.
|
||||
std::pair<String, BackupEntryPtr> makeBackupEntryForAccess(
|
||||
const std::vector<std::pair<UUID, AccessEntityPtr>> access_entities,
|
||||
const std::vector<std::pair<UUID, AccessEntityPtr>> & access_entities,
|
||||
const String & data_path_in_backup,
|
||||
size_t counter,
|
||||
const AccessControl & access_control);
|
||||
|
@ -62,7 +62,7 @@ AccessEntityPtr deserializeAccessEntityImpl(const String & definition)
|
||||
const char * end = begin + definition.size();
|
||||
while (pos < end)
|
||||
{
|
||||
queries.emplace_back(parseQueryAndMovePosition(parser, pos, end, "", true, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH));
|
||||
queries.emplace_back(parseQueryAndMovePosition(parser, pos, end, "", true, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS));
|
||||
while (isWhitespaceASCII(*pos) || *pos == ';')
|
||||
++pos;
|
||||
}
|
||||
|
@ -13,63 +13,63 @@ namespace ErrorCodes
|
||||
|
||||
const AuthenticationTypeInfo & AuthenticationTypeInfo::get(AuthenticationType type_)
|
||||
{
|
||||
static constexpr auto make_info = [](const char * raw_name_, bool is_password_ = false)
|
||||
static constexpr auto make_info = [](Keyword keyword_, bool is_password_ = false)
|
||||
{
|
||||
String init_name = raw_name_;
|
||||
String init_name = String(toStringView(keyword_));
|
||||
boost::to_lower(init_name);
|
||||
return AuthenticationTypeInfo{raw_name_, std::move(init_name), is_password_};
|
||||
return AuthenticationTypeInfo{keyword_, std::move(init_name), is_password_};
|
||||
};
|
||||
|
||||
switch (type_)
|
||||
{
|
||||
case AuthenticationType::NO_PASSWORD:
|
||||
{
|
||||
static const auto info = make_info("NO_PASSWORD");
|
||||
static const auto info = make_info(Keyword::NO_PASSWORD);
|
||||
return info;
|
||||
}
|
||||
case AuthenticationType::PLAINTEXT_PASSWORD:
|
||||
{
|
||||
static const auto info = make_info("PLAINTEXT_PASSWORD", true);
|
||||
static const auto info = make_info(Keyword::PLAINTEXT_PASSWORD, true);
|
||||
return info;
|
||||
}
|
||||
case AuthenticationType::SHA256_PASSWORD:
|
||||
{
|
||||
static const auto info = make_info("SHA256_PASSWORD", true);
|
||||
static const auto info = make_info(Keyword::SHA256_PASSWORD, true);
|
||||
return info;
|
||||
}
|
||||
case AuthenticationType::DOUBLE_SHA1_PASSWORD:
|
||||
{
|
||||
static const auto info = make_info("DOUBLE_SHA1_PASSWORD", true);
|
||||
static const auto info = make_info(Keyword::DOUBLE_SHA1_PASSWORD, true);
|
||||
return info;
|
||||
}
|
||||
case AuthenticationType::LDAP:
|
||||
{
|
||||
static const auto info = make_info("LDAP");
|
||||
static const auto info = make_info(Keyword::LDAP);
|
||||
return info;
|
||||
}
|
||||
case AuthenticationType::KERBEROS:
|
||||
{
|
||||
static const auto info = make_info("KERBEROS");
|
||||
static const auto info = make_info(Keyword::KERBEROS);
|
||||
return info;
|
||||
}
|
||||
case AuthenticationType::SSL_CERTIFICATE:
|
||||
{
|
||||
static const auto info = make_info("SSL_CERTIFICATE");
|
||||
static const auto info = make_info(Keyword::SSL_CERTIFICATE);
|
||||
return info;
|
||||
}
|
||||
case AuthenticationType::BCRYPT_PASSWORD:
|
||||
{
|
||||
static const auto info = make_info("BCRYPT_PASSWORD", true);
|
||||
static const auto info = make_info(Keyword::BCRYPT_PASSWORD, true);
|
||||
return info;
|
||||
}
|
||||
case AuthenticationType::SSH_KEY:
|
||||
{
|
||||
static const auto info = make_info("SSH_KEY");
|
||||
static const auto info = make_info(Keyword::SSH_KEY);
|
||||
return info;
|
||||
}
|
||||
case AuthenticationType::HTTP:
|
||||
{
|
||||
static const auto info = make_info("HTTP");
|
||||
static const auto info = make_info(Keyword::HTTP);
|
||||
return info;
|
||||
}
|
||||
case AuthenticationType::MAX:
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/types.h>
|
||||
#include <Parsers/CommonParsers.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -45,7 +46,7 @@ enum class AuthenticationType
|
||||
|
||||
struct AuthenticationTypeInfo
|
||||
{
|
||||
const char * const raw_name;
|
||||
Keyword keyword; // Keyword used in parser
|
||||
const String name; /// Lowercased with underscores, e.g. "sha256_password".
|
||||
bool is_password;
|
||||
static const AuthenticationTypeInfo & get(AuthenticationType type_);
|
||||
@ -53,7 +54,7 @@ struct AuthenticationTypeInfo
|
||||
|
||||
inline String toString(AuthenticationType type_)
|
||||
{
|
||||
return AuthenticationTypeInfo::get(type_).raw_name;
|
||||
return String(toStringView(AuthenticationTypeInfo::get(type_).keyword));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -7,7 +7,7 @@ namespace DB
|
||||
{
|
||||
|
||||
ContextAccessParams::ContextAccessParams(
|
||||
const std::optional<UUID> user_id_,
|
||||
std::optional<UUID> user_id_,
|
||||
bool full_access_,
|
||||
bool use_default_roles_,
|
||||
const std::shared_ptr<const std::vector<UUID>> & current_roles_,
|
||||
|
@ -15,7 +15,7 @@ class ContextAccessParams
|
||||
{
|
||||
public:
|
||||
ContextAccessParams(
|
||||
const std::optional<UUID> user_id_,
|
||||
std::optional<UUID> user_id_,
|
||||
bool full_access_,
|
||||
bool use_default_roles_,
|
||||
const std::shared_ptr<const std::vector<UUID>> & current_roles_,
|
||||
|
@ -34,7 +34,7 @@ public:
|
||||
};
|
||||
|
||||
explicit GSSAcceptorContext(const Params & params_);
|
||||
virtual ~GSSAcceptorContext() override;
|
||||
~GSSAcceptorContext() override;
|
||||
|
||||
GSSAcceptorContext(const GSSAcceptorContext &) = delete;
|
||||
GSSAcceptorContext(GSSAcceptorContext &&) = delete;
|
||||
|
@ -204,7 +204,7 @@ void LDAPAccessStorage::assignRolesNoLock(User & user, const LDAPClient::SearchR
|
||||
}
|
||||
|
||||
|
||||
void LDAPAccessStorage::assignRolesNoLock(User & user, const LDAPClient::SearchResultsList & external_roles, const std::size_t external_roles_hash) const
|
||||
void LDAPAccessStorage::assignRolesNoLock(User & user, const LDAPClient::SearchResultsList & external_roles, std::size_t external_roles_hash) const
|
||||
{
|
||||
const auto & user_name = user.getName();
|
||||
auto & granted_roles = user.granted_roles;
|
||||
|
@ -33,29 +33,29 @@ public:
|
||||
static constexpr char STORAGE_TYPE[] = "ldap";
|
||||
|
||||
explicit LDAPAccessStorage(const String & storage_name_, AccessControl & access_control_, const Poco::Util::AbstractConfiguration & config, const String & prefix);
|
||||
virtual ~LDAPAccessStorage() override = default;
|
||||
~LDAPAccessStorage() override = default;
|
||||
|
||||
String getLDAPServerName() const;
|
||||
|
||||
// IAccessStorage implementations.
|
||||
virtual const char * getStorageType() const override;
|
||||
virtual String getStorageParamsJSON() const override;
|
||||
virtual bool isReadOnly() const override { return true; }
|
||||
virtual bool exists(const UUID & id) const override;
|
||||
const char * getStorageType() const override;
|
||||
String getStorageParamsJSON() const override;
|
||||
bool isReadOnly() const override { return true; }
|
||||
bool exists(const UUID & id) const override;
|
||||
|
||||
private: // IAccessStorage implementations.
|
||||
virtual std::optional<UUID> findImpl(AccessEntityType type, const String & name) const override;
|
||||
virtual std::vector<UUID> findAllImpl(AccessEntityType type) const override;
|
||||
virtual AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
|
||||
virtual std::optional<std::pair<String, AccessEntityType>> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const override;
|
||||
virtual std::optional<AuthResult> authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists, bool allow_no_password, bool allow_plaintext_password) const override;
|
||||
std::optional<UUID> findImpl(AccessEntityType type, const String & name) const override;
|
||||
std::vector<UUID> findAllImpl(AccessEntityType type) const override;
|
||||
AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
|
||||
std::optional<std::pair<String, AccessEntityType>> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const override;
|
||||
std::optional<AuthResult> authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists, bool allow_no_password, bool allow_plaintext_password) const override;
|
||||
|
||||
void setConfiguration(const Poco::Util::AbstractConfiguration & config, const String & prefix);
|
||||
void processRoleChange(const UUID & id, const AccessEntityPtr & entity);
|
||||
|
||||
void applyRoleChangeNoLock(bool grant, const UUID & role_id, const String & role_name);
|
||||
void assignRolesNoLock(User & user, const LDAPClient::SearchResultsList & external_roles) const;
|
||||
void assignRolesNoLock(User & user, const LDAPClient::SearchResultsList & external_roles, const std::size_t external_roles_hash) const;
|
||||
void assignRolesNoLock(User & user, const LDAPClient::SearchResultsList & external_roles, std::size_t external_roles_hash) const;
|
||||
void updateAssignedRolesNoLock(const UUID & id, const String & user_name, const LDAPClient::SearchResultsList & external_roles) const;
|
||||
std::set<String> mapExternalRolesNoLock(const LDAPClient::SearchResultsList & external_roles) const;
|
||||
bool areLDAPCredentialsValidNoLock(const User & user, const Credentials & credentials,
|
||||
|
@ -86,7 +86,7 @@ void RowPolicyCache::PolicyInfo::setPolicy(const RowPolicyPtr & policy_)
|
||||
try
|
||||
{
|
||||
ParserExpression parser;
|
||||
parsed_filters[filter_type_i] = parseQuery(parser, filter, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
||||
parsed_filters[filter_type_i] = parseQuery(parser, filter, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
|
@ -66,7 +66,7 @@ namespace
|
||||
|
||||
String error_message;
|
||||
const char * pos = string_query.data();
|
||||
auto ast = tryParseQuery(parser, pos, pos + string_query.size(), error_message, false, "", false, 0, 0);
|
||||
auto ast = tryParseQuery(parser, pos, pos + string_query.size(), error_message, false, "", false, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS, true);
|
||||
|
||||
if (!ast)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Failed to parse grant query. Error: {}", error_message);
|
||||
|
@ -252,7 +252,6 @@ void dumpFlameGraph(
|
||||
fillColumn(chars, offsets, out.str());
|
||||
}
|
||||
|
||||
// NOLINTBEGIN(clang-analyzer-optin.performance.Padding)
|
||||
struct AggregateFunctionFlameGraphData
|
||||
{
|
||||
struct Entry
|
||||
@ -469,7 +468,6 @@ struct AggregateFunctionFlameGraphData
|
||||
DB::dumpFlameGraph(tree.dump(max_depth, min_bytes), chars, offsets);
|
||||
}
|
||||
};
|
||||
// NOLINTEND(clang-analyzer-optin.performance.Padding)
|
||||
|
||||
/// Aggregate function which builds a flamegraph using the list of stacktraces.
|
||||
/// The output is an array of strings which can be used by flamegraph.pl util.
|
||||
|
@ -157,7 +157,7 @@ public:
|
||||
|
||||
void update(UInt64 batch_size, std::vector<Float64> & weights, Float64 & bias, Float64 learning_rate, const std::vector<Float64> & batch_gradient) override;
|
||||
|
||||
virtual void merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac) override;
|
||||
void merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac) override;
|
||||
|
||||
void write(WriteBuffer & buf) const override;
|
||||
|
||||
@ -189,7 +189,7 @@ public:
|
||||
|
||||
void update(UInt64 batch_size, std::vector<Float64> & weights, Float64 & bias, Float64 learning_rate, const std::vector<Float64> & batch_gradient) override;
|
||||
|
||||
virtual void merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac) override;
|
||||
void merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac) override;
|
||||
|
||||
void write(WriteBuffer & buf) const override;
|
||||
|
||||
@ -226,7 +226,7 @@ public:
|
||||
|
||||
void update(UInt64 batch_size, std::vector<Float64> & weights, Float64 & bias, Float64 learning_rate, const std::vector<Float64> & batch_gradient) override;
|
||||
|
||||
virtual void merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac) override;
|
||||
void merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac) override;
|
||||
|
||||
void write(WriteBuffer & buf) const override;
|
||||
|
||||
|
@ -234,9 +234,6 @@ namespace ErrorCodes
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <template <typename> typename FunctionTemplate, StatisticsFunctionKind kind>
|
||||
AggregateFunctionPtr createAggregateFunctionStatisticsUnary(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
@ -273,5 +270,3 @@ AggregateFunctionPtr createAggregateFunctionStatisticsBinary(
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -483,6 +483,7 @@ public:
|
||||
}
|
||||
|
||||
bool isAbleToParallelizeMerge() const override { return is_able_to_parallelize_merge; }
|
||||
bool canOptimizeEqualKeysRanges() const override { return !is_able_to_parallelize_merge; }
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena *) const override
|
||||
{
|
||||
@ -576,6 +577,7 @@ public:
|
||||
}
|
||||
|
||||
bool isAbleToParallelizeMerge() const override { return is_able_to_parallelize_merge; }
|
||||
bool canOptimizeEqualKeysRanges() const override { return !is_able_to_parallelize_merge; }
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena *) const override
|
||||
{
|
||||
|
@ -42,9 +42,6 @@ struct UniqCombinedHashTableGrower : public HashTableGrowerWithPrecalculation<>
|
||||
void increaseSize() { increaseSizeDegree(1); }
|
||||
};
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename T, UInt8 K, typename HashValueType>
|
||||
struct AggregateFunctionUniqCombinedData
|
||||
{
|
||||
@ -268,8 +265,6 @@ AggregateFunctionPtr createAggregateFunctionWithK(const DataTypes & argument_typ
|
||||
return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunctionVariadic<false, false>>(argument_types, params);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
template <UInt8 K>
|
||||
AggregateFunctionPtr createAggregateFunctionWithHashType(bool use_64_bit_hash, const DataTypes & argument_types, const Array & params)
|
||||
{
|
||||
|
@ -142,6 +142,7 @@ public:
|
||||
}
|
||||
|
||||
bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); }
|
||||
bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); }
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override
|
||||
{
|
||||
|
@ -165,6 +165,7 @@ public:
|
||||
}
|
||||
|
||||
bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); }
|
||||
bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); }
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override
|
||||
{
|
||||
|
@ -111,6 +111,7 @@ public:
|
||||
}
|
||||
|
||||
bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); }
|
||||
bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); }
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override
|
||||
{
|
||||
|
@ -152,6 +152,7 @@ public:
|
||||
}
|
||||
|
||||
bool isAbleToParallelizeMerge() const override { return nested_function->isAbleToParallelizeMerge(); }
|
||||
bool canOptimizeEqualKeysRanges() const override { return nested_function->canOptimizeEqualKeysRanges(); }
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override
|
||||
{
|
||||
@ -520,7 +521,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
chassert(nullable_filters.size() > 0);
|
||||
chassert(!nullable_filters.empty());
|
||||
bool found_one = false;
|
||||
if (nullable_filters.size() == 1)
|
||||
{
|
||||
|
@ -92,6 +92,7 @@ public:
|
||||
}
|
||||
|
||||
bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); }
|
||||
bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); }
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override
|
||||
{
|
||||
|
@ -38,7 +38,7 @@ public:
|
||||
offset = other->offset;
|
||||
}
|
||||
|
||||
int length()
|
||||
int length() const
|
||||
{
|
||||
return static_cast<int>(bins.size());
|
||||
}
|
||||
|
@ -162,6 +162,10 @@ public:
|
||||
/// Tells if merge() with thread pool parameter could be used.
|
||||
virtual bool isAbleToParallelizeMerge() const { return false; }
|
||||
|
||||
/// Return true if it is allowed to replace call of `addBatch`
|
||||
/// to `addBatchSinglePlace` for ranges of consecutive equal keys.
|
||||
virtual bool canOptimizeEqualKeysRanges() const { return true; }
|
||||
|
||||
/// Should be used only if isAbleToParallelizeMerge() returned true.
|
||||
virtual void
|
||||
merge(AggregateDataPtr __restrict /*place*/, ConstAggregateDataPtr /*rhs*/, ThreadPool & /*thread_pool*/, Arena * /*arena*/) const
|
||||
|
@ -150,7 +150,7 @@ struct QuantileExactExclusive : public QuantileExact<Value>
|
||||
return static_cast<Float64>(*std::min_element(array.begin(), array.end()));
|
||||
|
||||
::nth_element(array.begin(), array.begin() + n - 1, array.end());
|
||||
auto nth_elem = std::min_element(array.begin() + n, array.end());
|
||||
auto * nth_elem = std::min_element(array.begin() + n, array.end());
|
||||
|
||||
return static_cast<Float64>(array[n - 1]) + (h - n) * static_cast<Float64>(*nth_elem - array[n - 1]);
|
||||
}
|
||||
@ -179,7 +179,7 @@ struct QuantileExactExclusive : public QuantileExact<Value>
|
||||
else
|
||||
{
|
||||
::nth_element(array.begin() + prev_n, array.begin() + n - 1, array.end());
|
||||
auto nth_elem = std::min_element(array.begin() + n, array.end());
|
||||
auto * nth_elem = std::min_element(array.begin() + n, array.end());
|
||||
|
||||
result[indices[i]] = static_cast<Float64>(array[n - 1]) + (h - n) * static_cast<Float64>(*nth_elem - array[n - 1]);
|
||||
prev_n = n - 1;
|
||||
@ -214,7 +214,7 @@ struct QuantileExactInclusive : public QuantileExact<Value>
|
||||
else if (n < 1)
|
||||
return static_cast<Float64>(*std::min_element(array.begin(), array.end()));
|
||||
::nth_element(array.begin(), array.begin() + n - 1, array.end());
|
||||
auto nth_elem = std::min_element(array.begin() + n, array.end());
|
||||
auto * nth_elem = std::min_element(array.begin() + n, array.end());
|
||||
|
||||
return static_cast<Float64>(array[n - 1]) + (h - n) * static_cast<Float64>(*nth_elem - array[n - 1]);
|
||||
}
|
||||
@ -241,7 +241,7 @@ struct QuantileExactInclusive : public QuantileExact<Value>
|
||||
else
|
||||
{
|
||||
::nth_element(array.begin() + prev_n, array.begin() + n - 1, array.end());
|
||||
auto nth_elem = std::min_element(array.begin() + n, array.end());
|
||||
auto * nth_elem = std::min_element(array.begin() + n, array.end());
|
||||
|
||||
result[indices[i]] = static_cast<Float64>(array[n - 1]) + (h - n) * (static_cast<Float64>(*nth_elem) - array[n - 1]);
|
||||
prev_n = n - 1;
|
||||
|
@ -191,7 +191,7 @@ public:
|
||||
/// TODO: After implementation of "versioning aggregate function state",
|
||||
/// change the serialization format.
|
||||
Element elem;
|
||||
memset(&elem, 0, sizeof(elem));
|
||||
memset(&elem, 0, sizeof(elem)); /// NOLINT(bugprone-undefined-memory-manipulation)
|
||||
elem = samples[i];
|
||||
|
||||
DB::transformEndianness<std::endian::little>(elem);
|
||||
|
@ -23,7 +23,7 @@ struct SingleValueDataBase
|
||||
/// For example argMin holds 1 of these (for the result), while keeping a template for the value
|
||||
static constexpr UInt32 MAX_STORAGE_SIZE = 64;
|
||||
|
||||
virtual ~SingleValueDataBase() { }
|
||||
virtual ~SingleValueDataBase() = default;
|
||||
virtual bool has() const = 0;
|
||||
virtual void insertResultInto(IColumn &) const = 0;
|
||||
virtual void write(WriteBuffer &, const ISerialization &) const = 0;
|
||||
|
@ -39,8 +39,8 @@ public:
|
||||
/// This method will convert all the SingleLevelSet to TwoLevelSet in parallel if the hashsets are not all singlelevel or not all twolevel.
|
||||
static void parallelizeMergePrepare(const std::vector<UniqExactSet *> & data_vec, ThreadPool & thread_pool)
|
||||
{
|
||||
unsigned long single_level_set_num = 0;
|
||||
unsigned long all_single_hash_size = 0;
|
||||
UInt64 single_level_set_num = 0;
|
||||
UInt64 all_single_hash_size = 0;
|
||||
|
||||
for (auto ele : data_vec)
|
||||
{
|
||||
|
@ -27,6 +27,9 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
|
||||
|
||||
auto initialize = [&]() mutable
|
||||
{
|
||||
if (context)
|
||||
return true;
|
||||
|
||||
shared_context = Context::createShared();
|
||||
context = Context::createGlobal(shared_context.get());
|
||||
context->makeGlobalContext();
|
||||
|
@ -81,7 +81,8 @@ void getAggregateFunctionNameAndParametersArray(
|
||||
ParserExpressionList params_parser(false);
|
||||
ASTPtr args_ast = parseQuery(params_parser,
|
||||
parameters_str.data(), parameters_str.data() + parameters_str.size(),
|
||||
"parameters of aggregate function in " + error_context, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
||||
"parameters of aggregate function in " + error_context,
|
||||
0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS);
|
||||
|
||||
if (args_ast->children.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Incorrect list of parameters to aggregate function {}",
|
||||
|
@ -776,6 +776,7 @@ struct IdentifierResolveScope
|
||||
std::unordered_map<QueryTreeNodePtr, TableExpressionData> table_expression_node_to_data;
|
||||
|
||||
QueryTreeNodePtrWithHashSet nullable_group_by_keys;
|
||||
QueryTreeNodePtrWithHashMap<QueryTreeNodePtr> nullable_join_columns;
|
||||
|
||||
/// Use identifier lookup to result cache
|
||||
bool use_identifier_lookup_to_result_cache = true;
|
||||
@ -1276,7 +1277,11 @@ private:
|
||||
return {};
|
||||
}
|
||||
|
||||
static void convertJoinedColumnTypeToNullIfNeeded(QueryTreeNodePtr & resolved_identifier, const JoinKind & join_kind, std::optional<JoinTableSide> resolved_side)
|
||||
static QueryTreeNodePtr convertJoinedColumnTypeToNullIfNeeded(
|
||||
const QueryTreeNodePtr & resolved_identifier,
|
||||
const JoinKind & join_kind,
|
||||
std::optional<JoinTableSide> resolved_side,
|
||||
IdentifierResolveScope & scope)
|
||||
{
|
||||
if (resolved_identifier->getNodeType() == QueryTreeNodeType::COLUMN &&
|
||||
JoinCommon::canBecomeNullable(resolved_identifier->getResultType()) &&
|
||||
@ -1284,9 +1289,20 @@ private:
|
||||
(isLeft(join_kind) && resolved_side && *resolved_side == JoinTableSide::Right) ||
|
||||
(isRight(join_kind) && resolved_side && *resolved_side == JoinTableSide::Left)))
|
||||
{
|
||||
auto & resolved_column = resolved_identifier->as<ColumnNode &>();
|
||||
resolved_column.setColumnType(makeNullableOrLowCardinalityNullable(resolved_column.getColumnType()));
|
||||
auto nullable_resolved_identifier = resolved_identifier->clone();
|
||||
auto & resolved_column = nullable_resolved_identifier->as<ColumnNode &>();
|
||||
auto new_result_type = makeNullableOrLowCardinalityNullable(resolved_column.getColumnType());
|
||||
resolved_column.setColumnType(new_result_type);
|
||||
if (resolved_column.hasExpression())
|
||||
{
|
||||
auto & resolved_expression = resolved_column.getExpression();
|
||||
if (!resolved_expression->getResultType()->equals(*new_result_type))
|
||||
resolved_expression = buildCastFunction(resolved_expression, new_result_type, scope.context, true);
|
||||
}
|
||||
scope.nullable_join_columns[nullable_resolved_identifier] = resolved_identifier;
|
||||
return nullable_resolved_identifier;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// Resolve identifier functions
|
||||
@ -3258,6 +3274,32 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromTableExpression(const Id
|
||||
return {};
|
||||
}
|
||||
|
||||
QueryTreeNodePtr checkIsMissedObjectJSONSubcolumn(const QueryTreeNodePtr & left_resolved_identifier,
|
||||
const QueryTreeNodePtr & right_resolved_identifier)
|
||||
{
|
||||
if (left_resolved_identifier && right_resolved_identifier && left_resolved_identifier->getNodeType() == QueryTreeNodeType::CONSTANT
|
||||
&& right_resolved_identifier->getNodeType() == QueryTreeNodeType::CONSTANT)
|
||||
{
|
||||
auto & left_resolved_column = left_resolved_identifier->as<ConstantNode &>();
|
||||
auto & right_resolved_column = right_resolved_identifier->as<ConstantNode &>();
|
||||
if (left_resolved_column.getValueStringRepresentation() == "NULL" && right_resolved_column.getValueStringRepresentation() == "NULL")
|
||||
return left_resolved_identifier;
|
||||
}
|
||||
else if (left_resolved_identifier && left_resolved_identifier->getNodeType() == QueryTreeNodeType::CONSTANT)
|
||||
{
|
||||
auto & left_resolved_column = left_resolved_identifier->as<ConstantNode &>();
|
||||
if (left_resolved_column.getValueStringRepresentation() == "NULL")
|
||||
return left_resolved_identifier;
|
||||
}
|
||||
else if (right_resolved_identifier && right_resolved_identifier->getNodeType() == QueryTreeNodeType::CONSTANT)
|
||||
{
|
||||
auto & right_resolved_column = right_resolved_identifier->as<ConstantNode &>();
|
||||
if (right_resolved_column.getValueStringRepresentation() == "NULL")
|
||||
return right_resolved_identifier;
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLookup & identifier_lookup,
|
||||
const QueryTreeNodePtr & table_expression_node,
|
||||
IdentifierResolveScope & scope)
|
||||
@ -3358,28 +3400,8 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
|
||||
|
||||
/// If columns from left or right table were missed Object(Nullable('json')) subcolumns, they will be replaced
|
||||
/// to ConstantNode(NULL), which can't be cast to ColumnNode, so we resolve it here.
|
||||
if (left_resolved_identifier && right_resolved_identifier && left_resolved_identifier->getNodeType() == QueryTreeNodeType::CONSTANT
|
||||
&& right_resolved_identifier->getNodeType() == QueryTreeNodeType::CONSTANT)
|
||||
{
|
||||
auto & left_resolved_column = left_resolved_identifier->as<ConstantNode &>();
|
||||
auto & right_resolved_column = right_resolved_identifier->as<ConstantNode &>();
|
||||
if (left_resolved_column.getValueStringRepresentation() == "NULL" && right_resolved_column.getValueStringRepresentation() == "NULL")
|
||||
return left_resolved_identifier;
|
||||
}
|
||||
else if (left_resolved_identifier && left_resolved_identifier->getNodeType() == QueryTreeNodeType::CONSTANT)
|
||||
{
|
||||
resolved_side = JoinTableSide::Left;
|
||||
auto & left_resolved_column = left_resolved_identifier->as<ConstantNode &>();
|
||||
if (left_resolved_column.getValueStringRepresentation() == "NULL")
|
||||
return left_resolved_identifier;
|
||||
}
|
||||
else if (right_resolved_identifier && right_resolved_identifier->getNodeType() == QueryTreeNodeType::CONSTANT)
|
||||
{
|
||||
resolved_side = JoinTableSide::Right;
|
||||
auto & right_resolved_column = right_resolved_identifier->as<ConstantNode &>();
|
||||
if (right_resolved_column.getValueStringRepresentation() == "NULL")
|
||||
return right_resolved_identifier;
|
||||
}
|
||||
if (auto missed_subcolumn_identifier = checkIsMissedObjectJSONSubcolumn(left_resolved_identifier, right_resolved_identifier))
|
||||
return missed_subcolumn_identifier;
|
||||
|
||||
if (left_resolved_identifier && right_resolved_identifier)
|
||||
{
|
||||
@ -3521,8 +3543,9 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
|
||||
|
||||
if (scope.join_use_nulls)
|
||||
{
|
||||
resolved_identifier = resolved_identifier->clone();
|
||||
convertJoinedColumnTypeToNullIfNeeded(resolved_identifier, join_kind, resolved_side);
|
||||
auto nullable_resolved_identifier = convertJoinedColumnTypeToNullIfNeeded(resolved_identifier, join_kind, resolved_side, scope);
|
||||
if (nullable_resolved_identifier)
|
||||
resolved_identifier = nullable_resolved_identifier;
|
||||
}
|
||||
|
||||
return resolved_identifier;
|
||||
@ -4402,6 +4425,28 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveUnqualifiedMatcher(
|
||||
const auto & join_using_column_nodes_list = join_using_column_node.getExpressionOrThrow()->as<ListNode &>();
|
||||
const auto & join_using_column_nodes = join_using_column_nodes_list.getNodes();
|
||||
|
||||
/** If column doesn't exists in the table, then do not match column from USING clause.
|
||||
* Example: SELECT a + 1 AS id, * FROM (SELECT 1 AS a) AS t1 JOIN (SELECT 2 AS id) AS t2 USING (id);
|
||||
* In this case `id` is not present in the left table expression,
|
||||
* so asterisk should return `id` from the right table expression.
|
||||
*/
|
||||
auto is_column_from_parent_scope = [&scope](const QueryTreeNodePtr & using_node_from_table)
|
||||
{
|
||||
const auto & using_column_from_table = using_node_from_table->as<ColumnNode &>();
|
||||
auto table_expression_data_it = scope.table_expression_node_to_data.find(using_column_from_table.getColumnSource());
|
||||
if (table_expression_data_it != scope.table_expression_node_to_data.end())
|
||||
{
|
||||
const auto & table_expression_data = table_expression_data_it->second;
|
||||
const auto & column_name = using_column_from_table.getColumnName();
|
||||
return !table_expression_data.column_name_to_column_node.contains(column_name);
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
if (is_column_from_parent_scope(join_using_column_nodes.at(0)) ||
|
||||
is_column_from_parent_scope(join_using_column_nodes.at(1)))
|
||||
continue;
|
||||
|
||||
QueryTreeNodePtr matched_column_node;
|
||||
|
||||
if (isRight(join_node->getKind()))
|
||||
@ -4523,7 +4568,15 @@ ProjectionNames QueryAnalyzer::resolveMatcher(QueryTreeNodePtr & matcher_node, I
|
||||
for (auto & [node, node_name] : matched_expression_nodes_with_names)
|
||||
{
|
||||
auto join_identifier_side = getColumnSideFromJoinTree(node, *nearest_scope_join_node);
|
||||
convertJoinedColumnTypeToNullIfNeeded(node, nearest_scope_join_node->getKind(), join_identifier_side);
|
||||
auto projection_name_it = node_to_projection_name.find(node);
|
||||
auto nullable_node = convertJoinedColumnTypeToNullIfNeeded(node, nearest_scope_join_node->getKind(), join_identifier_side, scope);
|
||||
if (nullable_node)
|
||||
{
|
||||
node = nullable_node;
|
||||
/// Set the same projection name for new nullable node
|
||||
if (projection_name_it != node_to_projection_name.end())
|
||||
node_to_projection_name.emplace(node, projection_name_it->second);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -7304,8 +7357,64 @@ void QueryAnalyzer::resolveJoin(QueryTreeNodePtr & join_node, IdentifierResolveS
|
||||
|
||||
join_using_identifiers.insert(identifier_full_name);
|
||||
|
||||
const auto & settings = scope.context->getSettingsRef();
|
||||
|
||||
/** While resolving JOIN USING identifier, try to resolve identifier from parent subquery projection.
|
||||
* Example: SELECT a + 1 AS b FROM (SELECT 1 AS a) t1 JOIN (SELECT 2 AS b) USING b
|
||||
* In this case `b` is not in the left table expression, but it is in the parent subquery projection.
|
||||
*/
|
||||
auto try_resolve_identifier_from_query_projection = [this](const String & identifier_full_name_,
|
||||
const QueryTreeNodePtr & left_table_expression,
|
||||
const IdentifierResolveScope & scope_) -> QueryTreeNodePtr
|
||||
{
|
||||
const QueryNode * query_node = scope_.scope_node ? scope_.scope_node->as<QueryNode>() : nullptr;
|
||||
if (!query_node)
|
||||
return nullptr;
|
||||
|
||||
const auto & projection_list = query_node->getProjection();
|
||||
for (const auto & projection_node : projection_list.getNodes())
|
||||
{
|
||||
if (projection_node->hasAlias() && identifier_full_name_ == projection_node->getAlias())
|
||||
{
|
||||
auto left_subquery = std::make_shared<QueryNode>(query_node->getMutableContext());
|
||||
left_subquery->getProjection().getNodes().push_back(projection_node->clone());
|
||||
left_subquery->getJoinTree() = left_table_expression;
|
||||
|
||||
IdentifierResolveScope left_subquery_scope(left_subquery, nullptr /*parent_scope*/);
|
||||
resolveQuery(left_subquery, left_subquery_scope);
|
||||
|
||||
const auto & resolved_nodes = left_subquery->getProjection().getNodes();
|
||||
if (resolved_nodes.size() == 1)
|
||||
{
|
||||
/// Create ColumnNode with expression from parent projection
|
||||
return std::make_shared<ColumnNode>(
|
||||
NameAndTypePair{identifier_full_name_, resolved_nodes.front()->getResultType()},
|
||||
resolved_nodes.front(), left_table_expression);
|
||||
}
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
};
|
||||
|
||||
QueryTreeNodePtr result_left_table_expression = nullptr;
|
||||
/** With `analyzer_compatibility_join_using_top_level_identifier` alias in projection has higher priority than column from left table.
|
||||
* But if aliased expression cannot be resolved from left table, we get UNKNOW_IDENTIFIER error,
|
||||
* despite the fact that column from USING could be resolved from left table.
|
||||
* It's compatibility with a default behavior for old analyzer.
|
||||
*/
|
||||
if (settings.analyzer_compatibility_join_using_top_level_identifier)
|
||||
result_left_table_expression = try_resolve_identifier_from_query_projection(identifier_full_name, join_node_typed.getLeftTableExpression(), scope);
|
||||
|
||||
IdentifierLookup identifier_lookup{identifier_node->getIdentifier(), IdentifierLookupContext::EXPRESSION};
|
||||
auto result_left_table_expression = tryResolveIdentifierFromJoinTreeNode(identifier_lookup, join_node_typed.getLeftTableExpression(), scope);
|
||||
if (!result_left_table_expression)
|
||||
result_left_table_expression = tryResolveIdentifierFromJoinTreeNode(identifier_lookup, join_node_typed.getLeftTableExpression(), scope);
|
||||
|
||||
/// Here we may try to resolve identifier from projection in case it's not resolved from left table expression
|
||||
/// and analyzer_compatibility_join_using_top_level_identifier is disabled.
|
||||
/// For now we do not do this, because not all corner cases are clear.
|
||||
/// if (!settings.analyzer_compatibility_join_using_top_level_identifier && !result_left_table_expression)
|
||||
/// result_left_table_expression = try_resolve_identifier_from_query_projection(identifier_full_name, join_node_typed.getLeftTableExpression(), scope);
|
||||
|
||||
if (!result_left_table_expression)
|
||||
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
|
||||
"JOIN {} using identifier '{}' cannot be resolved from left table expression. In scope {}",
|
||||
@ -7448,6 +7557,29 @@ void QueryAnalyzer::resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node,
|
||||
scope.table_expressions_in_resolve_process.erase(join_tree_node.get());
|
||||
}
|
||||
|
||||
class ReplaceColumnsVisitor : public InDepthQueryTreeVisitor<ReplaceColumnsVisitor>
|
||||
{
|
||||
public:
|
||||
explicit ReplaceColumnsVisitor(const QueryTreeNodePtrWithHashMap<QueryTreeNodePtr> & replacement_map_, const ContextPtr & context_)
|
||||
: replacement_map(replacement_map_)
|
||||
, context(context_)
|
||||
{}
|
||||
|
||||
void visitImpl(QueryTreeNodePtr & node)
|
||||
{
|
||||
if (auto it = replacement_map.find(node); it != replacement_map.end())
|
||||
node = it->second;
|
||||
if (auto * function_node = node->as<FunctionNode>())
|
||||
rerunFunctionResolve(function_node, context);
|
||||
}
|
||||
|
||||
bool shouldTraverseTopToBottom() const { return false; }
|
||||
|
||||
private:
|
||||
const QueryTreeNodePtrWithHashMap<QueryTreeNodePtr> & replacement_map;
|
||||
const ContextPtr & context;
|
||||
};
|
||||
|
||||
/** Resolve query.
|
||||
* This function modifies query node during resolve. It is caller responsibility to clone query node before resolve
|
||||
* if it is needed for later use.
|
||||
@ -7635,21 +7767,23 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
|
||||
scope.scope_node->formatASTForErrorMessage());
|
||||
}
|
||||
|
||||
if (query_node_typed.getPrewhere())
|
||||
if (auto & prewhere_node = query_node_typed.getPrewhere())
|
||||
{
|
||||
/** Expression in PREWHERE with JOIN should not be modified by join_use_nulls.
|
||||
* Example: SELECT * FROM t1 JOIN t2 USING (id) PREWHERE a = 1
|
||||
* Column `a` should be resolved from table and should not change its type to Nullable.
|
||||
*/
|
||||
bool join_use_nulls = scope.join_use_nulls;
|
||||
bool use_identifier_lookup_to_result_cache = scope.use_identifier_lookup_to_result_cache;
|
||||
scope.join_use_nulls = false;
|
||||
scope.use_identifier_lookup_to_result_cache = false;
|
||||
resolveExpressionNode(prewhere_node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
|
||||
|
||||
resolveExpressionNode(query_node_typed.getPrewhere(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
|
||||
|
||||
scope.join_use_nulls = join_use_nulls;
|
||||
scope.use_identifier_lookup_to_result_cache = use_identifier_lookup_to_result_cache;
|
||||
if (scope.join_use_nulls)
|
||||
{
|
||||
/** Expression in PREWHERE with JOIN should not be modified by join_use_nulls.
|
||||
* Example: SELECT * FROM t1 JOIN t2 USING (id) PREWHERE b = 1
|
||||
* Column `a` should be resolved from table and should not change its type to Nullable.
|
||||
* More complicated example when column is somewhere inside an expression:
|
||||
* SELECT a + 1 as b FROM t1 JOIN t2 USING (id) PREWHERE b = 1
|
||||
* expression `a + 1 as b` in projection and in PREWHERE should have different `a`.
|
||||
*/
|
||||
prewhere_node = prewhere_node->clone();
|
||||
ReplaceColumnsVisitor replace_visitor(scope.nullable_join_columns, scope.context);
|
||||
replace_visitor.visit(prewhere_node);
|
||||
}
|
||||
}
|
||||
|
||||
if (query_node_typed.getWhere())
|
||||
|
@ -77,7 +77,7 @@ public:
|
||||
* Available expression columns are extracted from table expression.
|
||||
* Table expression node must have query, union, table, table function type.
|
||||
*/
|
||||
QueryAnalysisPass(QueryTreeNodePtr table_expression_, bool only_analyze_ = false);
|
||||
explicit QueryAnalysisPass(QueryTreeNodePtr table_expression_, bool only_analyze_ = false);
|
||||
|
||||
String getName() override
|
||||
{
|
||||
|
@ -54,10 +54,10 @@ public:
|
||||
if (!constant_node)
|
||||
return;
|
||||
|
||||
const auto & constant_value_literal = constant_node->getValue();
|
||||
if (!isInt64OrUInt64FieldType(constant_value_literal.getType()))
|
||||
if (auto constant_type = constant_node->getResultType(); !isNativeInteger(constant_type))
|
||||
return;
|
||||
|
||||
const auto & constant_value_literal = constant_node->getValue();
|
||||
if (getSettings().aggregate_functions_null_for_empty)
|
||||
return;
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <Backups/BackupCoordinationRemote.h>
|
||||
|
||||
#include <base/hex.h>
|
||||
#include <boost/algorithm/string/split.hpp>
|
||||
|
||||
#include <Access/Common/AccessEntityType.h>
|
||||
#include <Backups/BackupCoordinationReplicatedAccess.h>
|
||||
|
@ -25,7 +25,7 @@ String BackupInfo::toString() const
|
||||
BackupInfo BackupInfo::fromString(const String & str)
|
||||
{
|
||||
ParserIdentifierWithOptionalParameters parser;
|
||||
ASTPtr ast = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
||||
ASTPtr ast = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS);
|
||||
return fromAST(*ast);
|
||||
}
|
||||
|
||||
|
@ -26,7 +26,7 @@ struct BackupSettings
|
||||
String password;
|
||||
|
||||
/// S3 storage class.
|
||||
String s3_storage_class = "";
|
||||
String s3_storage_class;
|
||||
|
||||
/// If this is set to true then only create queries will be written to backup,
|
||||
/// without the data of tables.
|
||||
|
@ -327,7 +327,7 @@ public:
|
||||
metric_active_threads = CurrentMetrics::RestoreThreadsActive;
|
||||
metric_active_threads = CurrentMetrics::RestoreThreadsScheduled;
|
||||
max_threads = num_restore_threads;
|
||||
use_queue = (thread_pool_id != ThreadPoolId::RESTORE);
|
||||
use_queue = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -80,7 +80,7 @@ public:
|
||||
|
||||
/// Returns names of entries stored in a specified directory in the backup.
|
||||
/// If `directory` is empty or '/' the functions returns entries in the backup's root.
|
||||
virtual Strings listFiles(const String & directory, bool recursive = false) const = 0;
|
||||
virtual Strings listFiles(const String & directory, bool recursive) const = 0;
|
||||
|
||||
/// Checks if a specified directory contains any files.
|
||||
/// The function returns the same as `!listFiles(directory).empty()`.
|
||||
@ -108,11 +108,9 @@ public:
|
||||
virtual std::unique_ptr<SeekableReadBuffer> readFile(const SizeAndChecksum & size_and_checksum) const = 0;
|
||||
|
||||
/// Copies a file from the backup to a specified destination disk. Returns the number of bytes written.
|
||||
virtual size_t copyFileToDisk(const String & file_name, DiskPtr destination_disk, const String & destination_path,
|
||||
WriteMode write_mode = WriteMode::Rewrite) const = 0;
|
||||
virtual size_t copyFileToDisk(const String & file_name, DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) const = 0;
|
||||
|
||||
virtual size_t copyFileToDisk(const SizeAndChecksum & size_and_checksum, DiskPtr destination_disk, const String & destination_path,
|
||||
WriteMode write_mode = WriteMode::Rewrite) const = 0;
|
||||
virtual size_t copyFileToDisk(const SizeAndChecksum & size_and_checksum, DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) const = 0;
|
||||
|
||||
/// Puts a new entry to the backup.
|
||||
virtual void writeFile(const BackupFileInfo & file_info, BackupEntryPtr entry) = 0;
|
||||
|
@ -101,10 +101,12 @@ RestorerFromBackup::RestorerFromBackup(
|
||||
|
||||
RestorerFromBackup::~RestorerFromBackup()
|
||||
{
|
||||
if (!futures.empty())
|
||||
/// If an exception occurs we can come here to the destructor having some tasks still unfinished.
|
||||
/// We have to wait until they finish.
|
||||
if (getNumFutures() > 0)
|
||||
{
|
||||
LOG_ERROR(log, "RestorerFromBackup must not be destroyed while {} tasks are still running", futures.size());
|
||||
chassert(false && "RestorerFromBackup must not be destroyed while some tasks are still running");
|
||||
LOG_INFO(log, "Waiting for {} tasks to finish", getNumFutures());
|
||||
waitFutures();
|
||||
}
|
||||
}
|
||||
|
||||
@ -271,7 +273,7 @@ void RestorerFromBackup::findRootPathsInBackup()
|
||||
root_paths_in_backup.push_back(root_path);
|
||||
|
||||
/// Add shard-related part to the root path.
|
||||
Strings shards_in_backup = backup->listFiles(root_path / "shards");
|
||||
Strings shards_in_backup = backup->listFiles(root_path / "shards", /*recursive*/ false);
|
||||
if (shards_in_backup.empty())
|
||||
{
|
||||
if (restore_settings.shard_num_in_backup > 1)
|
||||
@ -293,7 +295,7 @@ void RestorerFromBackup::findRootPathsInBackup()
|
||||
}
|
||||
|
||||
/// Add replica-related part to the root path.
|
||||
Strings replicas_in_backup = backup->listFiles(root_path / "replicas");
|
||||
Strings replicas_in_backup = backup->listFiles(root_path / "replicas", /*recursive*/ false);
|
||||
if (replicas_in_backup.empty())
|
||||
{
|
||||
if (restore_settings.replica_num_in_backup > 1)
|
||||
@ -422,7 +424,7 @@ void RestorerFromBackup::findTableInBackupImpl(const QualifiedTableName & table_
|
||||
readStringUntilEOF(create_query_str, *read_buffer);
|
||||
read_buffer.reset();
|
||||
ParserCreateQuery create_parser;
|
||||
ASTPtr create_table_query = parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
||||
ASTPtr create_table_query = parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS);
|
||||
applyCustomStoragePolicy(create_table_query);
|
||||
renameDatabaseAndTableNameInCreateQuery(create_table_query, renaming_map, context->getGlobalContext());
|
||||
String create_table_query_str = serializeAST(*create_table_query);
|
||||
@ -512,7 +514,7 @@ void RestorerFromBackup::findDatabaseInBackupImpl(const String & database_name_i
|
||||
if (!metadata_path && !try_metadata_path.empty() && backup->fileExists(try_metadata_path))
|
||||
metadata_path = try_metadata_path;
|
||||
|
||||
Strings file_names = backup->listFiles(try_tables_metadata_path);
|
||||
Strings file_names = backup->listFiles(try_tables_metadata_path, /*recursive*/ false);
|
||||
for (const String & file_name : file_names)
|
||||
{
|
||||
if (!file_name.ends_with(".sql"))
|
||||
@ -532,7 +534,7 @@ void RestorerFromBackup::findDatabaseInBackupImpl(const String & database_name_i
|
||||
readStringUntilEOF(create_query_str, *read_buffer);
|
||||
read_buffer.reset();
|
||||
ParserCreateQuery create_parser;
|
||||
ASTPtr create_database_query = parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
||||
ASTPtr create_database_query = parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS);
|
||||
renameDatabaseAndTableNameInCreateQuery(create_database_query, renaming_map, context->getGlobalContext());
|
||||
String create_database_query_str = serializeAST(*create_database_query);
|
||||
|
||||
@ -573,7 +575,7 @@ void RestorerFromBackup::findEverythingInBackup(const std::set<String> & except_
|
||||
|
||||
for (const auto & root_path_in_backup : root_paths_in_backup)
|
||||
{
|
||||
Strings file_names = backup->listFiles(root_path_in_backup / "metadata");
|
||||
Strings file_names = backup->listFiles(root_path_in_backup / "metadata", /*recursive*/ false);
|
||||
for (String & file_name : file_names)
|
||||
{
|
||||
if (file_name.ends_with(".sql"))
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user