Merge remote-tracking branch 'upstream/master' into ncb/hostname-system-log-tables

This commit is contained in:
Bharat Nallan Chakravarthy 2023-11-19 14:06:40 -08:00
commit d08512d8c0
334 changed files with 9493 additions and 2719 deletions

View File

@ -187,9 +187,10 @@ if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
endif ()
endif()
if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE"
OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO"
OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL")
if (NOT (SANITIZE_COVERAGE OR WITH_COVERAGE)
AND (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE"
OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO"
OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL"))
set (OMIT_HEAVY_DEBUG_SYMBOLS_DEFAULT ON)
else()
set (OMIT_HEAVY_DEBUG_SYMBOLS_DEFAULT OFF)
@ -291,9 +292,6 @@ set (CMAKE_C_STANDARD 11)
set (CMAKE_C_EXTENSIONS ON) # required by most contribs written in C
set (CMAKE_C_STANDARD_REQUIRED ON)
# Compiler-specific coverage flags e.g. -fcoverage-mapping
option(WITH_COVERAGE "Profile the resulting binary/binaries" OFF)
if (COMPILER_CLANG)
# Enable C++14 sized global deallocation functions. It should be enabled by setting -std=c++14 but I'm not sure.
# See https://reviews.llvm.org/D112921
@ -309,12 +307,6 @@ if (COMPILER_CLANG)
set(BRANCHES_WITHIN_32B_BOUNDARIES "-mbranches-within-32B-boundaries")
set(COMPILER_FLAGS "${COMPILER_FLAGS} ${BRANCHES_WITHIN_32B_BOUNDARIES}")
endif()
if (WITH_COVERAGE)
set(COMPILER_FLAGS "${COMPILER_FLAGS} -fprofile-instr-generate -fcoverage-mapping")
# If we want to disable coverage for specific translation units
set(WITHOUT_COVERAGE "-fno-profile-instr-generate -fno-coverage-mapping")
endif()
endif ()
set (COMPILER_FLAGS "${COMPILER_FLAGS}")
@ -569,7 +561,6 @@ option(CHECK_LARGE_OBJECT_SIZES "Check that there are no large object files afte
add_subdirectory (base)
add_subdirectory (src)
add_subdirectory (programs)
add_subdirectory (tests)
add_subdirectory (utils)
if (FUZZER)

View File

@ -1,3 +1,5 @@
add_compile_options($<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>)
if (USE_CLANG_TIDY)
set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
endif ()

View File

@ -1,11 +1,15 @@
#include "coverage.h"
#if WITH_COVERAGE
#pragma GCC diagnostic ignored "-Wreserved-identifier"
# include <mutex>
# include <unistd.h>
/// WITH_COVERAGE enables the default implementation of code coverage,
/// that dumps a map to the filesystem.
#if WITH_COVERAGE
#include <mutex>
#include <unistd.h>
# if defined(__clang__)
@ -31,3 +35,131 @@ void dumpCoverageReportIfPossible()
#endif
}
/// SANITIZE_COVERAGE enables code instrumentation,
/// but leaves the callbacks implementation to us,
/// which we use to calculate coverage on a per-test basis
/// and to write it to system tables.
#if defined(SANITIZE_COVERAGE)
namespace
{
bool pc_guards_initialized = false;
bool pc_table_initialized = false;
uint32_t * guards_start = nullptr;
uint32_t * guards_end = nullptr;
uintptr_t * coverage_array = nullptr;
size_t coverage_array_size = 0;
uintptr_t * all_addresses_array = nullptr;
size_t all_addresses_array_size = 0;
}
extern "C"
{
/// This is called at least once for every DSO for initialization.
/// But we will use it only for the main DSO.
void __sanitizer_cov_trace_pc_guard_init(uint32_t * start, uint32_t * stop)
{
if (pc_guards_initialized)
return;
pc_guards_initialized = true;
/// The function can be called multiple times, but we need to initialize only once.
if (start == stop || *start)
return;
guards_start = start;
guards_end = stop;
coverage_array_size = stop - start;
/// Note: we will leak this.
coverage_array = static_cast<uintptr_t*>(malloc(sizeof(uintptr_t) * coverage_array_size));
resetCoverage();
}
/// This is called at least once for every DSO for initialization
/// and provides information about all instrumented addresses.
void __sanitizer_cov_pcs_init(const uintptr_t * pcs_begin, const uintptr_t * pcs_end)
{
if (pc_table_initialized)
return;
pc_table_initialized = true;
all_addresses_array = static_cast<uintptr_t*>(malloc(sizeof(uintptr_t) * coverage_array_size));
all_addresses_array_size = pcs_end - pcs_begin;
/// They are not a real pointers, but also contain a flag in the most significant bit,
/// in which we are not interested for now. Reset it.
for (size_t i = 0; i < all_addresses_array_size; ++i)
all_addresses_array[i] = pcs_begin[i] & 0x7FFFFFFFFFFFFFFFULL;
}
/// This is called at every basic block / edge, etc.
void __sanitizer_cov_trace_pc_guard(uint32_t * guard)
{
/// Duplicate the guard check.
if (!*guard)
return;
*guard = 0;
/// If you set *guard to 0 this code will not be called again for this edge.
/// Now we can get the PC and do whatever you want:
/// - store it somewhere or symbolize it and print right away.
/// The values of `*guard` are as you set them in
/// __sanitizer_cov_trace_pc_guard_init and so you can make them consecutive
/// and use them to dereference an array or a bit vector.
void * pc = __builtin_return_address(0);
coverage_array[guard - guards_start] = reinterpret_cast<uintptr_t>(pc);
}
}
__attribute__((no_sanitize("coverage")))
std::span<const uintptr_t> getCoverage()
{
return {coverage_array, coverage_array_size};
}
__attribute__((no_sanitize("coverage")))
std::span<const uintptr_t> getAllInstrumentedAddresses()
{
return {all_addresses_array, all_addresses_array_size};
}
__attribute__((no_sanitize("coverage")))
void resetCoverage()
{
memset(coverage_array, 0, coverage_array_size * sizeof(*coverage_array));
/// The guard defines whether the __sanitizer_cov_trace_pc_guard should be called.
/// For example, you can unset it after first invocation to prevent excessive work.
/// Initially set all the guards to 1 to enable callbacks.
for (uint32_t * x = guards_start; x < guards_end; ++x)
*x = 1;
}
#else
std::span<const uintptr_t> getCoverage()
{
return {};
}
std::span<const uintptr_t> getAllInstrumentedAddresses()
{
return {};
}
void resetCoverage()
{
}
#endif

View File

@ -1,5 +1,8 @@
#pragma once
#include <span>
#include <cstdint>
/// Flush coverage report to file, depending on coverage system
/// proposed by compiler (llvm for clang and gcov for gcc).
///
@ -7,3 +10,16 @@
/// Thread safe (use exclusive lock).
/// Idempotent, may be called multiple times.
void dumpCoverageReportIfPossible();
/// This is effective if SANITIZE_COVERAGE is enabled at build time.
/// Get accumulated unique program addresses of the instrumented parts of the code,
/// seen so far after program startup or after previous reset.
/// The returned span will be represented as a sparse map, containing mostly zeros, which you should filter away.
std::span<const uintptr_t> getCoverage();
/// Get all instrumented addresses that could be in the coverage.
std::span<const uintptr_t> getAllInstrumentedAddresses();
/// Reset the accumulated coverage.
/// This is useful to compare coverage of different tests, including differential coverage.
void resetCoverage();

View File

@ -1,5 +1,6 @@
#include "memcpy.h"
__attribute__((no_sanitize("coverage")))
extern "C" void * memcpy(void * __restrict dst, const void * __restrict src, size_t size)
{
return inline_memcpy(dst, src, size);

View File

@ -93,7 +93,7 @@
* See https://habr.com/en/company/yandex/blog/457612/
*/
__attribute__((no_sanitize("coverage")))
static inline void * inline_memcpy(void * __restrict dst_, const void * __restrict src_, size_t size)
{
/// We will use pointer arithmetic, so char pointer will be used.

View File

@ -26,7 +26,6 @@ HTTPServerSession::HTTPServerSession(const StreamSocket& socket, HTTPServerParam
_maxKeepAliveRequests(pParams->getMaxKeepAliveRequests())
{
setTimeout(pParams->getTimeout());
this->socket().setReceiveTimeout(pParams->getTimeout());
}

View File

@ -93,9 +93,34 @@ void HTTPSession::setTimeout(const Poco::Timespan& timeout)
void HTTPSession::setTimeout(const Poco::Timespan& connectionTimeout, const Poco::Timespan& sendTimeout, const Poco::Timespan& receiveTimeout)
{
_connectionTimeout = connectionTimeout;
_sendTimeout = sendTimeout;
_receiveTimeout = receiveTimeout;
try
{
_connectionTimeout = connectionTimeout;
if (_sendTimeout.totalMicroseconds() != sendTimeout.totalMicroseconds()) {
_sendTimeout = sendTimeout;
if (connected())
_socket.setSendTimeout(_sendTimeout);
}
if (_receiveTimeout.totalMicroseconds() != receiveTimeout.totalMicroseconds()) {
_receiveTimeout = receiveTimeout;
if (connected())
_socket.setReceiveTimeout(_receiveTimeout);
}
}
catch (NetException &)
{
#ifndef NDEBUG
throw;
#else
// mute exceptions in release
// just in case when changing settings on socket is not allowed
// however it should be OK for timeouts
#endif
}
}

View File

@ -1,19 +0,0 @@
# Adding test output on failure
enable_testing ()
if (NOT TARGET check)
if (CMAKE_CONFIGURATION_TYPES)
add_custom_target (check COMMAND ${CMAKE_CTEST_COMMAND}
--force-new-ctest-process --output-on-failure --build-config "$<CONFIGURATION>"
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
else ()
add_custom_target (check COMMAND ${CMAKE_CTEST_COMMAND}
--force-new-ctest-process --output-on-failure
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
endif ()
endif ()
macro (add_check target)
add_test (NAME test_${target} COMMAND ${target} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
add_dependencies (check ${target})
endmacro (add_check)

View File

@ -134,60 +134,52 @@ elseif (ARCH_AMD64)
# ClickHouse can be cross-compiled (e.g. on an ARM host for x86) but it is also possible to build ClickHouse on x86 w/o AVX for x86 w/
# AVX. We only assume that the compiler can emit certain SIMD instructions, we don't care if the host system is able to run the binary.
SET (HAVE_SSSE3 1)
SET (HAVE_SSE41 1)
SET (HAVE_SSE42 1)
SET (HAVE_PCLMULQDQ 1)
SET (HAVE_POPCNT 1)
SET (HAVE_AVX 1)
SET (HAVE_AVX2 1)
SET (HAVE_AVX512 1)
SET (HAVE_AVX512_VBMI 1)
SET (HAVE_BMI 1)
SET (HAVE_BMI2 1)
if (HAVE_SSSE3 AND ENABLE_SSSE3)
if (ENABLE_SSSE3)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -mssse3")
endif ()
if (HAVE_SSE41 AND ENABLE_SSE41)
if (ENABLE_SSE41)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -msse4.1")
endif ()
if (HAVE_SSE42 AND ENABLE_SSE42)
if (ENABLE_SSE42)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -msse4.2")
endif ()
if (HAVE_PCLMULQDQ AND ENABLE_PCLMULQDQ)
if (ENABLE_PCLMULQDQ)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -mpclmul")
endif ()
if (HAVE_POPCNT AND ENABLE_POPCNT)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -mpopcnt")
endif ()
if (HAVE_AVX AND ENABLE_AVX)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -mavx")
endif ()
if (HAVE_AVX2 AND ENABLE_AVX2)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -mavx2")
endif ()
if (HAVE_AVX512 AND ENABLE_AVX512)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -mavx512f -mavx512bw -mavx512vl")
endif ()
if (HAVE_AVX512 AND ENABLE_AVX512 AND HAVE_AVX512_VBMI AND ENABLE_AVX512_VBMI)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -mavx512vbmi")
endif ()
if (HAVE_BMI AND ENABLE_BMI)
if (ENABLE_BMI)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -mbmi")
endif ()
if (HAVE_BMI2 AND HAVE_AVX2 AND ENABLE_AVX2 AND ENABLE_BMI2)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -mbmi2")
if (ENABLE_POPCNT)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -mpopcnt")
endif ()
if (ENABLE_AVX)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -mavx")
endif ()
if (ENABLE_AVX2)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -mavx2")
if (ENABLE_BMI2)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -mbmi2")
endif ()
endif ()
if (ENABLE_AVX512)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -mavx512f -mavx512bw -mavx512vl")
if (ENABLE_AVX512_VBMI)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -mavx512vbmi")
endif ()
endif ()
if (ENABLE_AVX512_FOR_SPEC_OP)
set (X86_INTRINSICS_FLAGS "")
if (HAVE_BMI)
set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mbmi")
endif ()
if (HAVE_AVX512)
set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mavx512f -mavx512bw -mavx512vl -mprefer-vector-width=256")
endif ()
set (X86_INTRINSICS_FLAGS "-mbmi -mavx512f -mavx512bw -mavx512vl -mprefer-vector-width=256")
endif ()
else ()
# RISC-V + exotic platforms
endif ()

View File

@ -9,9 +9,3 @@ set (CMAKE_ASM_COMPILER_TARGET "aarch64-apple-darwin")
set (CMAKE_OSX_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../toolchain/darwin-aarch64")
set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) # disable linkage check - it doesn't work in CMake
set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)

View File

@ -9,9 +9,3 @@ set (CMAKE_ASM_COMPILER_TARGET "x86_64-apple-darwin")
set (CMAKE_OSX_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../toolchain/darwin-x86_64")
set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) # disable linkage check - it doesn't work in CMake
set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)

View File

@ -9,13 +9,3 @@ set (CMAKE_ASM_COMPILER_TARGET "aarch64-unknown-freebsd12")
set (CMAKE_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/freebsd-aarch64")
set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) # disable linkage check - it doesn't work in CMake
# Will be changed later, but somehow needed to be set here.
set (CMAKE_AR "ar")
set (CMAKE_RANLIB "ranlib")
set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)

View File

@ -9,13 +9,3 @@ set (CMAKE_ASM_COMPILER_TARGET "powerpc64le-unknown-freebsd13")
set (CMAKE_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/freebsd-ppc64le")
set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) # disable linkage check - it doesn't work in CMake
# Will be changed later, but somehow needed to be set here.
set (CMAKE_AR "ar")
set (CMAKE_RANLIB "ranlib")
set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)

View File

@ -9,13 +9,3 @@ set (CMAKE_ASM_COMPILER_TARGET "x86_64-pc-freebsd11")
set (CMAKE_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/freebsd-x86_64")
set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) # disable linkage check - it doesn't work in CMake
# Will be changed later, but somehow needed to be set here.
set (CMAKE_AR "ar")
set (CMAKE_RANLIB "ranlib")
set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)

View File

@ -21,7 +21,7 @@ if (NOT PARALLEL_COMPILE_JOBS AND MAX_COMPILER_MEMORY)
set (PARALLEL_COMPILE_JOBS 1)
endif ()
if (PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES)
message(WARNING "The auto-calculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.")
message("The auto-calculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.")
endif()
endif ()
@ -32,7 +32,7 @@ if (NOT PARALLEL_LINK_JOBS AND MAX_LINKER_MEMORY)
set (PARALLEL_LINK_JOBS 1)
endif ()
if (PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES)
message(WARNING "The auto-calculated link jobs limit (${PARALLEL_LINK_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.")
message("The auto-calculated link jobs limit (${PARALLEL_LINK_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.")
endif()
endif ()

View File

@ -9,10 +9,6 @@ set (CMAKE_C_COMPILER_TARGET "aarch64-linux-gnu")
set (CMAKE_CXX_COMPILER_TARGET "aarch64-linux-gnu")
set (CMAKE_ASM_COMPILER_TARGET "aarch64-linux-gnu")
# Will be changed later, but somehow needed to be set here.
set (CMAKE_AR "ar")
set (CMAKE_RANLIB "ranlib")
set (TOOLCHAIN_PATH "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/linux-aarch64")
set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}/aarch64-linux-gnu/libc")
@ -20,9 +16,3 @@ set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}/aarch64-linux-gnu/libc")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)

View File

@ -9,10 +9,6 @@ set (CMAKE_C_COMPILER_TARGET "powerpc64le-linux-gnu")
set (CMAKE_CXX_COMPILER_TARGET "powerpc64le-linux-gnu")
set (CMAKE_ASM_COMPILER_TARGET "powerpc64le-linux-gnu")
# Will be changed later, but somehow needed to be set here.
set (CMAKE_AR "ar")
set (CMAKE_RANLIB "ranlib")
set (TOOLCHAIN_PATH "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/linux-powerpc64le")
set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}/powerpc64le-linux-gnu/libc")
@ -20,9 +16,3 @@ set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}/powerpc64le-linux-gnu/libc")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)

View File

@ -9,10 +9,6 @@ set (CMAKE_C_COMPILER_TARGET "riscv64-linux-gnu")
set (CMAKE_CXX_COMPILER_TARGET "riscv64-linux-gnu")
set (CMAKE_ASM_COMPILER_TARGET "riscv64-linux-gnu")
# Will be changed later, but somehow needed to be set here.
set (CMAKE_AR "ar")
set (CMAKE_RANLIB "ranlib")
set (TOOLCHAIN_PATH "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/linux-riscv64")
set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}")
@ -27,9 +23,3 @@ set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=bfd")
# ld.lld: error: section size decrease is too large
# But GNU BinUtils work.
set (LINKER_NAME "riscv64-linux-gnu-ld.bfd" CACHE STRING "Linker name" FORCE)
set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)

View File

@ -9,10 +9,6 @@ set (CMAKE_C_COMPILER_TARGET "s390x-linux-gnu")
set (CMAKE_CXX_COMPILER_TARGET "s390x-linux-gnu")
set (CMAKE_ASM_COMPILER_TARGET "s390x-linux-gnu")
# Will be changed later, but somehow needed to be set here.
set (CMAKE_AR "ar")
set (CMAKE_RANLIB "ranlib")
set (TOOLCHAIN_PATH "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/linux-s390x")
set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}/s390x-linux-gnu/libc")
@ -23,9 +19,3 @@ set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=mold -Wl,-L${CMAKE_SYSROOT}/usr/lib64")
set (CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -fuse-ld=mold -Wl,-L${CMAKE_SYSROOT}/usr/lib64")
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=mold -Wl,-L${CMAKE_SYSROOT}/usr/lib64")
set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)

View File

@ -9,10 +9,6 @@ set (CMAKE_C_COMPILER_TARGET "x86_64-linux-musl")
set (CMAKE_CXX_COMPILER_TARGET "x86_64-linux-musl")
set (CMAKE_ASM_COMPILER_TARGET "x86_64-linux-musl")
# Will be changed later, but somehow needed to be set here.
set (CMAKE_AR "ar")
set (CMAKE_RANLIB "ranlib")
set (TOOLCHAIN_PATH "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/linux-x86_64-musl")
set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}")
@ -21,11 +17,5 @@ set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
set (USE_MUSL 1)
add_definitions(-DUSE_MUSL=1)

View File

@ -19,10 +19,6 @@ set (CMAKE_C_COMPILER_TARGET "x86_64-linux-gnu")
set (CMAKE_CXX_COMPILER_TARGET "x86_64-linux-gnu")
set (CMAKE_ASM_COMPILER_TARGET "x86_64-linux-gnu")
# Will be changed later, but somehow needed to be set here.
set (CMAKE_AR "ar")
set (CMAKE_RANLIB "ranlib")
set (TOOLCHAIN_PATH "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/linux-x86_64")
set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}/x86_64-linux-gnu/libc")
@ -32,9 +28,3 @@ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
set (CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)

View File

@ -58,3 +58,27 @@ if (SANITIZE)
message (FATAL_ERROR "Unknown sanitizer type: ${SANITIZE}")
endif ()
endif()
# Default coverage instrumentation (dumping the coverage map on exit)
option(WITH_COVERAGE "Instrumentation for code coverage with default implementation" OFF)
if (WITH_COVERAGE)
message (INFORMATION "Enabled instrumentation for code coverage")
set(COVERAGE_FLAGS "-fprofile-instr-generate -fcoverage-mapping")
endif()
option (SANITIZE_COVERAGE "Instrumentation for code coverage with custom callbacks" OFF)
if (SANITIZE_COVERAGE)
message (INFORMATION "Enabled instrumentation for code coverage")
# We set this define for whole build to indicate that at least some parts are compiled with coverage.
# And to expose it in system.build_options.
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSANITIZE_COVERAGE=1")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DSANITIZE_COVERAGE=1")
# But the actual coverage will be enabled on per-library basis: for ClickHouse code, but not for 3rd-party.
set (COVERAGE_FLAGS "-fsanitize-coverage=trace-pc-guard,pc-table")
endif()
set (WITHOUT_COVERAGE_FLAGS "-fno-profile-instr-generate -fno-coverage-mapping -fno-sanitize-coverage=trace-pc-guard,pc-table")

View File

@ -3,15 +3,6 @@
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w -ffunction-sections -fdata-sections")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w -ffunction-sections -fdata-sections")
if (WITH_COVERAGE)
set (WITHOUT_COVERAGE_LIST ${WITHOUT_COVERAGE})
separate_arguments(WITHOUT_COVERAGE_LIST)
# disable coverage for contib files and build with optimisations
if (COMPILER_CLANG)
add_compile_options(-O3 -DNDEBUG -finline-functions -finline-hint-functions ${WITHOUT_COVERAGE_LIST})
endif()
endif()
if (SANITIZE STREQUAL "undefined")
# 3rd-party libraries usually not intended to work with UBSan.
add_compile_options(-fno-sanitize=undefined)

2
contrib/abseil-cpp vendored

@ -1 +1 @@
Subproject commit 5655528c41830f733160de4fb0b99073841bae9e
Subproject commit 3bd86026c93da5a40006fd53403dff9d5f5e30e3

File diff suppressed because it is too large Load Diff

View File

@ -77,16 +77,16 @@ set(FLATBUFFERS_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/flatbuffers")
set(FLATBUFFERS_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/flatbuffers")
set(FLATBUFFERS_INCLUDE_DIR "${FLATBUFFERS_SRC_DIR}/include")
# set flatbuffers CMake options
set(FLATBUFFERS_BUILD_FLATLIB ON CACHE BOOL "Enable the build of the flatbuffers library")
set(FLATBUFFERS_BUILD_SHAREDLIB OFF CACHE BOOL "Disable the build of the flatbuffers shared library")
set(FLATBUFFERS_BUILD_TESTS OFF CACHE BOOL "Skip flatbuffers tests")
set(FLATBUFFERS_SRCS
${FLATBUFFERS_SRC_DIR}/src/idl_parser.cpp
${FLATBUFFERS_SRC_DIR}/src/idl_gen_text.cpp
${FLATBUFFERS_SRC_DIR}/src/reflection.cpp
${FLATBUFFERS_SRC_DIR}/src/util.cpp)
add_subdirectory(${FLATBUFFERS_SRC_DIR} "${FLATBUFFERS_BINARY_DIR}")
add_library(_flatbuffers STATIC ${FLATBUFFERS_SRCS})
target_include_directories(_flatbuffers PUBLIC ${FLATBUFFERS_INCLUDE_DIR})
target_compile_definitions(_flatbuffers PRIVATE -DFLATBUFFERS_LOCALE_INDEPENDENT=0)
add_library(_flatbuffers INTERFACE)
target_link_libraries(_flatbuffers INTERFACE flatbuffers)
target_include_directories(_flatbuffers INTERFACE ${FLATBUFFERS_INCLUDE_DIR})
# === hdfs
# NOTE: cannot use ch_contrib::hdfs since it's INCLUDE_DIRECTORIES does not includes trailing "hdfs/"
@ -127,7 +127,6 @@ set(ORC_SRCS
"${ORC_SOURCE_SRC_DIR}/BpackingDefault.hh"
"${ORC_SOURCE_SRC_DIR}/ByteRLE.cc"
"${ORC_SOURCE_SRC_DIR}/ByteRLE.hh"
"${ORC_SOURCE_SRC_DIR}/CMakeLists.txt"
"${ORC_SOURCE_SRC_DIR}/ColumnPrinter.cc"
"${ORC_SOURCE_SRC_DIR}/ColumnReader.cc"
"${ORC_SOURCE_SRC_DIR}/ColumnReader.hh"

View File

@ -2,7 +2,7 @@
# SPDX-License-Identifier: Apache-2.0.
if (USE_CPU_EXTENSIONS)
if (HAVE_AVX2)
if (ENABLE_AVX2)
set (AVX2_CFLAGS "-mavx -mavx2")
set (HAVE_AVX2_INTRINSICS 1)
set (HAVE_MM256_EXTRACT_EPI64 1)

View File

@ -48,9 +48,8 @@ set(AZURE_SDK_INCLUDES
"${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-blobs/inc/"
)
include("${AZURE_DIR}/cmake-modules/AzureTransportAdapters.cmake")
add_library(_azure_sdk ${AZURE_SDK_UNIFIED_SRC})
target_compile_definitions(_azure_sdk PRIVATE BUILD_CURL_HTTP_TRANSPORT_ADAPTER)
# Originally, on Windows azure-core is built with bcrypt and crypt32 by default
if (TARGET OpenSSL::SSL)

View File

@ -68,8 +68,7 @@ list(APPEND INCLUDE_DIRS
${CASS_SRC_DIR}/third_party/hdr_histogram
${CASS_SRC_DIR}/third_party/http-parser
${CASS_SRC_DIR}/third_party/mt19937_64
${CASS_SRC_DIR}/third_party/rapidjson/rapidjson
${CASS_SRC_DIR}/third_party/sparsehash/src)
${CASS_SRC_DIR}/third_party/rapidjson/rapidjson)
list(APPEND INCLUDE_DIRS ${CASS_INCLUDE_DIR} ${CASS_SRC_DIR})
@ -83,10 +82,6 @@ set(HAVE_MEMCPY 1)
set(HAVE_LONG_LONG 1)
set(HAVE_UINT16_T 1)
configure_file("${CASS_SRC_DIR}/third_party/sparsehash/config.h.cmake" "${CMAKE_CURRENT_BINARY_DIR}/sparsehash/internal/sparseconfig.h")
# Determine random availability
if (OS_LINUX)
#set (HAVE_GETRANDOM 1) - not on every Linux kernel
@ -116,17 +111,17 @@ configure_file(
${CASS_ROOT_DIR}/driver_config.hpp.in
${CMAKE_CURRENT_BINARY_DIR}/driver_config.hpp)
add_library(_cassandra
${SOURCES}
$<TARGET_OBJECTS:_curl_hostcheck>
$<TARGET_OBJECTS:_hdr_histogram>
$<TARGET_OBJECTS:_http-parser>)
target_link_libraries(_cassandra ch_contrib::zlib ch_contrib::minizip)
target_link_libraries(_cassandra ch_contrib::zlib ch_contrib::minizip ch_contrib::sparsehash)
target_include_directories(_cassandra PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${INCLUDE_DIRS})
target_include_directories(_cassandra SYSTEM BEFORE PUBLIC ${CASS_INCLUDE_DIR})
target_compile_definitions(_cassandra PRIVATE CASS_BUILDING)
target_compile_definitions(_cassandra PRIVATE -DSPARSEHASH_HASH=std::hash -Dsparsehash=google)
target_link_libraries(_cassandra ch_contrib::uv)

View File

@ -13,12 +13,10 @@ set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/fastops")
set(SRCS "")
if(HAVE_AVX)
if(ARCH_AMD64)
set (SRCS ${SRCS} "${LIBRARY_DIR}/fastops/avx/ops_avx.cpp")
set_source_files_properties("${LIBRARY_DIR}/fastops/avx/ops_avx.cpp" PROPERTIES COMPILE_FLAGS "-mavx -DNO_AVX2")
endif()
if(HAVE_AVX2)
set (SRCS ${SRCS} "${LIBRARY_DIR}/fastops/avx2/ops_avx2.cpp")
set_source_files_properties("${LIBRARY_DIR}/fastops/avx2/ops_avx2.cpp" PROPERTIES COMPILE_FLAGS "-mavx2 -mfma")
endif()

@ -1 +1 @@
Subproject commit 089b89c8d4140f0d49fe4222b047a8ea814bc752
Subproject commit 0862007f6ca1f5723c58f10f0ca34f3f25a63b2e

View File

@ -82,7 +82,6 @@ set(libprotobuf_lite_files
${protobuf_source_dir}/src/google/protobuf/any_lite.cc
${protobuf_source_dir}/src/google/protobuf/arena.cc
${protobuf_source_dir}/src/google/protobuf/arena_align.cc
${protobuf_source_dir}/src/google/protobuf/arena_config.cc
${protobuf_source_dir}/src/google/protobuf/arenastring.cc
${protobuf_source_dir}/src/google/protobuf/arenaz_sampler.cc
${protobuf_source_dir}/src/google/protobuf/extension_set.cc
@ -131,17 +130,18 @@ set(libprotobuf_files
${protobuf_source_dir}/src/google/protobuf/any_lite.cc
${protobuf_source_dir}/src/google/protobuf/arena.cc
${protobuf_source_dir}/src/google/protobuf/arena_align.cc
${protobuf_source_dir}/src/google/protobuf/arena_config.cc
${protobuf_source_dir}/src/google/protobuf/arenastring.cc
${protobuf_source_dir}/src/google/protobuf/arenaz_sampler.cc
${protobuf_source_dir}/src/google/protobuf/compiler/importer.cc
${protobuf_source_dir}/src/google/protobuf/compiler/parser.cc
${protobuf_source_dir}/src/google/protobuf/cpp_features.pb.cc
${protobuf_source_dir}/src/google/protobuf/descriptor.cc
${protobuf_source_dir}/src/google/protobuf/descriptor.pb.cc
${protobuf_source_dir}/src/google/protobuf/descriptor_database.cc
${protobuf_source_dir}/src/google/protobuf/dynamic_message.cc
${protobuf_source_dir}/src/google/protobuf/extension_set.cc
${protobuf_source_dir}/src/google/protobuf/extension_set_heavy.cc
${protobuf_source_dir}/src/google/protobuf/feature_resolver.cc
${protobuf_source_dir}/src/google/protobuf/generated_enum_util.cc
${protobuf_source_dir}/src/google/protobuf/generated_message_bases.cc
${protobuf_source_dir}/src/google/protobuf/generated_message_reflection.cc
@ -174,6 +174,9 @@ set(libprotobuf_files
${protobuf_source_dir}/src/google/protobuf/message.cc
${protobuf_source_dir}/src/google/protobuf/message_lite.cc
${protobuf_source_dir}/src/google/protobuf/parse_context.cc
${protobuf_source_dir}/src/google/protobuf/port.cc
${protobuf_source_dir}/src/google/protobuf/raw_ptr.cc
${protobuf_source_dir}/src/google/protobuf/reflection_mode.cc
${protobuf_source_dir}/src/google/protobuf/reflection_ops.cc
${protobuf_source_dir}/src/google/protobuf/repeated_field.cc
${protobuf_source_dir}/src/google/protobuf/repeated_ptr_field.cc
@ -208,11 +211,13 @@ add_library(protobuf::libprotobuf ALIAS _libprotobuf)
set(libprotoc_files
${protobuf_source_dir}/src/google/protobuf/compiler/allowlists/editions.cc
${protobuf_source_dir}/src/google/protobuf/compiler/code_generator.cc
${protobuf_source_dir}/src/google/protobuf/compiler/command_line_interface.cc
${protobuf_source_dir}/src/google/protobuf/compiler/cpp/enum.cc
${protobuf_source_dir}/src/google/protobuf/compiler/cpp/extension.cc
${protobuf_source_dir}/src/google/protobuf/compiler/cpp/field.cc
${protobuf_source_dir}/src/google/protobuf/compiler/cpp/field_generators/cord_field.cc
${protobuf_source_dir}/src/google/protobuf/compiler/cpp/field_generators/enum_field.cc
${protobuf_source_dir}/src/google/protobuf/compiler/cpp/field_generators/map_field.cc
${protobuf_source_dir}/src/google/protobuf/compiler/cpp/field_generators/message_field.cc
@ -299,6 +304,13 @@ set(libprotoc_files
${protobuf_source_dir}/src/google/protobuf/compiler/python/pyi_generator.cc
${protobuf_source_dir}/src/google/protobuf/compiler/retention.cc
${protobuf_source_dir}/src/google/protobuf/compiler/ruby/ruby_generator.cc
${protobuf_source_dir}/src/google/protobuf/compiler/rust/accessors/accessors.cc
${protobuf_source_dir}/src/google/protobuf/compiler/rust/accessors/singular_bytes.cc
${protobuf_source_dir}/src/google/protobuf/compiler/rust/accessors/singular_scalar.cc
${protobuf_source_dir}/src/google/protobuf/compiler/rust/context.cc
${protobuf_source_dir}/src/google/protobuf/compiler/rust/generator.cc
${protobuf_source_dir}/src/google/protobuf/compiler/rust/message.cc
${protobuf_source_dir}/src/google/protobuf/compiler/rust/naming.cc
${protobuf_source_dir}/src/google/protobuf/compiler/subprocess.cc
${protobuf_source_dir}/src/google/protobuf/compiler/zip_writer.cc
)
@ -357,7 +369,7 @@ else ()
"-Dprotobuf_BUILD_PROTOC_BINARIES=1"
"-DABSL_ROOT_DIR=${abseil_source_dir}"
"-DABSL_ENABLE_INSTALL=0"
"${protobuf_source_dir}/cmake"
"${protobuf_source_dir}"
WORKING_DIRECTORY "${PROTOC_BUILD_DIR}"
COMMAND_ECHO STDOUT)
@ -366,38 +378,6 @@ else ()
COMMAND_ECHO STDOUT)
endif ()
# add_custom_command (
# OUTPUT ${PROTOC_BUILD_DIR}
# COMMAND mkdir -p ${PROTOC_BUILD_DIR})
#
# add_custom_command (
# OUTPUT "${PROTOC_BUILD_DIR}/CMakeCache.txt"
#
# COMMAND ${CMAKE_COMMAND}
# -G"${CMAKE_GENERATOR}"
# -DCMAKE_MAKE_PROGRAM="${CMAKE_MAKE_PROGRAM}"
# -DCMAKE_C_COMPILER="${CMAKE_C_COMPILER}"
# -DCMAKE_CXX_COMPILER="${CMAKE_CXX_COMPILER}"
# -Dprotobuf_BUILD_TESTS=0
# -Dprotobuf_BUILD_CONFORMANCE=0
# -Dprotobuf_BUILD_EXAMPLES=0
# -Dprotobuf_BUILD_PROTOC_BINARIES=1
# "${protobuf_source_dir}/cmake"
#
# DEPENDS "${PROTOC_BUILD_DIR}"
# WORKING_DIRECTORY "${PROTOC_BUILD_DIR}"
# COMMENT "Configuring 'protoc' for host architecture."
# USES_TERMINAL)
#
# add_custom_command (
# OUTPUT "${PROTOC_BUILD_DIR}/protoc"
# COMMAND ${CMAKE_COMMAND} --build "${PROTOC_BUILD_DIR}"
# DEPENDS "${PROTOC_BUILD_DIR}/CMakeCache.txt"
# COMMENT "Building 'protoc' for host architecture."
# USES_TERMINAL)
#
# add_custom_target (protoc-host DEPENDS "${PROTOC_BUILD_DIR}/protoc")
add_executable(protoc IMPORTED GLOBAL)
set_target_properties (protoc PROPERTIES IMPORTED_LOCATION "${PROTOC_BUILD_DIR}/protoc")
add_dependencies(protoc "${PROTOC_BUILD_DIR}/protoc")

2
contrib/grpc vendored

@ -1 +1 @@
Subproject commit a08fe1a34075c93bb2d606dd608b9a3953288b81
Subproject commit 740e3dfd97301a52ad8165b65285bcc149d9e817

View File

@ -9,50 +9,14 @@ endif()
set(_gRPC_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/grpc")
set(_gRPC_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/grpc")
# Use re2 from ClickHouse contrib, not from gRPC third_party.
set(gRPC_RE2_PROVIDER "clickhouse" CACHE STRING "" FORCE)
set(_gRPC_RE2_INCLUDE_DIR "")
set(_gRPC_RE2_LIBRARIES ch_contrib::re2)
# Use zlib from ClickHouse contrib, not from gRPC third_party.
set(gRPC_ZLIB_PROVIDER "clickhouse" CACHE STRING "" FORCE)
set(_gRPC_ZLIB_INCLUDE_DIR "")
set(_gRPC_ZLIB_LIBRARIES ch_contrib::zlib)
# Use protobuf from ClickHouse contrib, not from gRPC third_party.
set(gRPC_PROTOBUF_PROVIDER "clickhouse" CACHE STRING "" FORCE)
set(_gRPC_PROTOBUF_LIBRARIES ch_contrib::protobuf)
set(_gRPC_PROTOBUF_PROTOC "protoc")
set(_gRPC_PROTOBUF_PROTOC_EXECUTABLE $<TARGET_FILE:protoc>)
set(_gRPC_PROTOBUF_PROTOC_LIBRARIES ch_contrib::protoc)
if(TARGET OpenSSL::SSL)
set(gRPC_USE_UNSECURE_LIBRARIES FALSE)
else()
set(gRPC_USE_UNSECURE_LIBRARIES TRUE)
endif()
# Use OpenSSL from ClickHouse contrib, not from gRPC third_party.
set(gRPC_SSL_PROVIDER "clickhouse" CACHE STRING "" FORCE)
set(_gRPC_SSL_INCLUDE_DIR "")
set(_gRPC_SSL_LIBRARIES OpenSSL::Crypto OpenSSL::SSL)
# Use abseil-cpp from ClickHouse contrib, not from gRPC third_party.
set(gRPC_ABSL_PROVIDER "clickhouse" CACHE STRING "" FORCE)
# We don't want to build C# extensions.
set(gRPC_BUILD_CSHARP_EXT OFF)
# TODO: Remove this. We generally like to compile with C++23 but grpc isn't ready yet.
set (CMAKE_CXX_STANDARD 20)
set(_gRPC_CARES_LIBRARIES ch_contrib::c-ares)
set(gRPC_CARES_PROVIDER "clickhouse" CACHE STRING "" FORCE)
add_subdirectory("${_gRPC_SOURCE_DIR}" "${_gRPC_BINARY_DIR}")
# The contrib/grpc/CMakeLists.txt redefined the PROTOBUF_GENERATE_GRPC_CPP() function for its own purposes,
# so we need to redefine it back.
include("${ClickHouse_SOURCE_DIR}/contrib/grpc-cmake/protobuf_generate_grpc.cmake")
include(grpc.cmake)
include(protobuf_generate_grpc.cmake)
set(gRPC_CPP_PLUGIN $<TARGET_FILE:grpc_cpp_plugin>)
set(gRPC_PYTHON_PLUGIN $<TARGET_FILE:grpc_python_plugin>)

File diff suppressed because it is too large Load Diff

View File

@ -16,8 +16,7 @@ function(GetLibraryVersion _content _outputVar)
SET(${_outputVar} ${CMAKE_MATCH_1} PARENT_SCOPE)
endfunction()
FILE(READ "${QPL_PROJECT_DIR}/CMakeLists.txt" HEADER_CONTENT)
GetLibraryVersion("${HEADER_CONTENT}" QPL_VERSION)
set (QPL_VERSION 1.2.0)
message(STATUS "Intel QPL version: ${QPL_VERSION}")
@ -28,16 +27,422 @@ message(STATUS "Intel QPL version: ${QPL_VERSION}")
# The qpl submodule comes with its own version of isal. It contains code which does not exist in upstream isal. It would be nice to link
# only upstream isal (ch_contrib::isal) but at this point we can't.
include("${QPL_PROJECT_DIR}/cmake/CompileOptions.cmake")
# ==========================================================================
# Copyright (C) 2022 Intel Corporation
#
# SPDX-License-Identifier: MIT
# ==========================================================================
set(QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS "-fno-exceptions;-fno-rtti")
function(modify_standard_language_flag)
# Declaring function parameters
set(OPTIONS "")
set(ONE_VALUE_ARGS
LANGUAGE_NAME
FLAG_NAME
NEW_FLAG_VALUE)
set(MULTI_VALUE_ARGS "")
# Parsing function parameters
cmake_parse_arguments(MODIFY
"${OPTIONS}"
"${ONE_VALUE_ARGS}"
"${MULTI_VALUE_ARGS}"
${ARGN})
# Variables
set(FLAG_REGULAR_EXPRESSION "${MODIFY_FLAG_NAME}.*[ ]*")
set(NEW_VALUE "${MODIFY_FLAG_NAME}${MODIFY_NEW_FLAG_VALUE}")
# Replacing specified flag with new value
string(REGEX REPLACE
${FLAG_REGULAR_EXPRESSION} ${NEW_VALUE}
NEW_COMPILE_FLAGS
"${CMAKE_${MODIFY_LANGUAGE_NAME}_FLAGS}")
# Returning the value
set(CMAKE_${MODIFY_LANGUAGE_NAME}_FLAGS ${NEW_COMPILE_FLAGS} PARENT_SCOPE)
endfunction()
function(get_function_name_with_default_bit_width in_function_name bit_width out_function_name)
if(in_function_name MATCHES ".*_i")
string(REPLACE "_i" "" in_function_name ${in_function_name})
set(${out_function_name} "${in_function_name}_${bit_width}_i" PARENT_SCOPE)
else()
set(${out_function_name} "${in_function_name}_${bit_width}" PARENT_SCOPE)
endif()
endfunction()
macro(get_list_of_supported_optimizations PLATFORMS_LIST)
list(APPEND PLATFORMS_LIST "")
list(APPEND PLATFORMS_LIST "px")
list(APPEND PLATFORMS_LIST "avx512")
endmacro(get_list_of_supported_optimizations)
function(generate_unpack_kernel_arrays current_directory PLATFORMS_LIST)
list(APPEND UNPACK_POSTFIX_LIST "")
list(APPEND UNPACK_PRLE_POSTFIX_LIST "")
list(APPEND PACK_POSTFIX_LIST "")
list(APPEND PACK_INDEX_POSTFIX_LIST "")
list(APPEND SCAN_POSTFIX_LIST "")
list(APPEND DEFAULT_BIT_WIDTH_FUNCTIONS_LIST "")
list(APPEND DEFAULT_BIT_WIDTH_LIST "")
#create list of functions that use only 8u 16u 32u postfixes
list(APPEND DEFAULT_BIT_WIDTH_FUNCTIONS_LIST "unpack_prle")
list(APPEND DEFAULT_BIT_WIDTH_FUNCTIONS_LIST "extract")
list(APPEND DEFAULT_BIT_WIDTH_FUNCTIONS_LIST "extract_i")
list(APPEND DEFAULT_BIT_WIDTH_FUNCTIONS_LIST "select")
list(APPEND DEFAULT_BIT_WIDTH_FUNCTIONS_LIST "select_i")
list(APPEND DEFAULT_BIT_WIDTH_FUNCTIONS_LIST "expand")
#create default bit width list
list(APPEND DEFAULT_BIT_WIDTH_LIST "8u")
list(APPEND DEFAULT_BIT_WIDTH_LIST "16u")
list(APPEND DEFAULT_BIT_WIDTH_LIST "32u")
#create scan kernel postfixes
list(APPEND SCAN_COMPARATOR_LIST "")
list(APPEND SCAN_COMPARATOR_LIST "eq")
list(APPEND SCAN_COMPARATOR_LIST "ne")
list(APPEND SCAN_COMPARATOR_LIST "lt")
list(APPEND SCAN_COMPARATOR_LIST "le")
list(APPEND SCAN_COMPARATOR_LIST "gt")
list(APPEND SCAN_COMPARATOR_LIST "ge")
list(APPEND SCAN_COMPARATOR_LIST "range")
list(APPEND SCAN_COMPARATOR_LIST "not_range")
foreach(SCAN_COMPARATOR IN LISTS SCAN_COMPARATOR_LIST)
list(APPEND SCAN_POSTFIX_LIST "_${SCAN_COMPARATOR}_8u")
list(APPEND SCAN_POSTFIX_LIST "_${SCAN_COMPARATOR}_16u8u")
list(APPEND SCAN_POSTFIX_LIST "_${SCAN_COMPARATOR}_32u8u")
endforeach()
# create unpack kernel postfixes
foreach(input_width RANGE 1 32 1)
if(input_width LESS 8 OR input_width EQUAL 8)
list(APPEND UNPACK_POSTFIX_LIST "_${input_width}u8u")
elseif(input_width LESS 16 OR input_width EQUAL 16)
list(APPEND UNPACK_POSTFIX_LIST "_${input_width}u16u")
else()
list(APPEND UNPACK_POSTFIX_LIST "_${input_width}u32u")
endif()
endforeach()
# create pack kernel postfixes
foreach(output_width RANGE 1 8 1)
list(APPEND PACK_POSTFIX_LIST "_8u${output_width}u")
endforeach()
foreach(output_width RANGE 9 16 1)
list(APPEND PACK_POSTFIX_LIST "_16u${output_width}u")
endforeach()
foreach(output_width RANGE 17 32 1)
list(APPEND PACK_POSTFIX_LIST "_32u${output_width}u")
endforeach()
list(APPEND PACK_POSTFIX_LIST "_8u16u")
list(APPEND PACK_POSTFIX_LIST "_8u32u")
list(APPEND PACK_POSTFIX_LIST "_16u32u")
# create pack index kernel postfixes
list(APPEND PACK_INDEX_POSTFIX_LIST "_nu")
list(APPEND PACK_INDEX_POSTFIX_LIST "_8u")
list(APPEND PACK_INDEX_POSTFIX_LIST "_8u16u")
list(APPEND PACK_INDEX_POSTFIX_LIST "_8u32u")
# write to file
file(MAKE_DIRECTORY ${current_directory}/generated)
foreach(PLATFORM_VALUE IN LISTS PLATFORMS_LIST)
set(directory "${current_directory}/generated")
set(PLATFORM_PREFIX "${PLATFORM_VALUE}_")
#
# Write unpack table
#
file(WRITE ${directory}/${PLATFORM_PREFIX}unpack.cpp "#include \"qplc_api.h\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}unpack.cpp "#include \"dispatcher/dispatcher.hpp\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}unpack.cpp "namespace qpl::core_sw::dispatcher\n{\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}unpack.cpp "unpack_table_t ${PLATFORM_PREFIX}unpack_table = {\n")
#write LE kernels
foreach(UNPACK_POSTFIX IN LISTS UNPACK_POSTFIX_LIST)
file(APPEND ${directory}/${PLATFORM_PREFIX}unpack.cpp "\t${PLATFORM_PREFIX}qplc_unpack${UNPACK_POSTFIX},\n")
endforeach()
#write BE kernels
#get last element of the list
set(LAST_ELEMENT "")
list(GET UNPACK_POSTFIX_LIST -1 LAST_ELEMENT)
foreach(UNPACK_POSTFIX IN LISTS UNPACK_POSTFIX_LIST)
if(UNPACK_POSTFIX STREQUAL LAST_ELEMENT)
file(APPEND ${directory}/${PLATFORM_PREFIX}unpack.cpp "\t${PLATFORM_PREFIX}qplc_unpack_be${UNPACK_POSTFIX}};\n")
else()
file(APPEND ${directory}/${PLATFORM_PREFIX}unpack.cpp "\t${PLATFORM_PREFIX}qplc_unpack_be${UNPACK_POSTFIX},\n")
endif()
endforeach()
file(APPEND ${directory}/${PLATFORM_PREFIX}unpack.cpp "}\n")
#
# Write pack table
#
file(WRITE ${directory}/${PLATFORM_PREFIX}pack.cpp "#include \"qplc_api.h\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}pack.cpp "#include \"dispatcher/dispatcher.hpp\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}pack.cpp "namespace qpl::core_sw::dispatcher\n{\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}pack.cpp "pack_table_t ${PLATFORM_PREFIX}pack_table = {\n")
#write LE kernels
foreach(PACK_POSTFIX IN LISTS PACK_POSTFIX_LIST)
file(APPEND ${directory}/${PLATFORM_PREFIX}pack.cpp "\t${PLATFORM_PREFIX}qplc_pack${PACK_POSTFIX},\n")
endforeach()
#write BE kernels
#get last element of the list
set(LAST_ELEMENT "")
list(GET PACK_POSTFIX_LIST -1 LAST_ELEMENT)
foreach(PACK_POSTFIX IN LISTS PACK_POSTFIX_LIST)
if(PACK_POSTFIX STREQUAL LAST_ELEMENT)
file(APPEND ${directory}/${PLATFORM_PREFIX}pack.cpp "\t${PLATFORM_PREFIX}qplc_pack_be${PACK_POSTFIX}};\n")
else()
file(APPEND ${directory}/${PLATFORM_PREFIX}pack.cpp "\t${PLATFORM_PREFIX}qplc_pack_be${PACK_POSTFIX},\n")
endif()
endforeach()
file(APPEND ${directory}/${PLATFORM_PREFIX}pack.cpp "}\n")
#
# Write scan table
#
file(WRITE ${directory}/${PLATFORM_PREFIX}scan.cpp "#include \"qplc_api.h\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}scan.cpp "#include \"dispatcher/dispatcher.hpp\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}scan.cpp "namespace qpl::core_sw::dispatcher\n{\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}scan.cpp "scan_table_t ${PLATFORM_PREFIX}scan_table = {\n")
#get last element of the list
set(LAST_ELEMENT "")
list(GET SCAN_POSTFIX_LIST -1 LAST_ELEMENT)
foreach(SCAN_POSTFIX IN LISTS SCAN_POSTFIX_LIST)
if(SCAN_POSTFIX STREQUAL LAST_ELEMENT)
file(APPEND ${directory}/${PLATFORM_PREFIX}scan.cpp "\t${PLATFORM_PREFIX}qplc_scan${SCAN_POSTFIX}};\n")
else()
file(APPEND ${directory}/${PLATFORM_PREFIX}scan.cpp "\t${PLATFORM_PREFIX}qplc_scan${SCAN_POSTFIX},\n")
endif()
endforeach()
file(APPEND ${directory}/${PLATFORM_PREFIX}scan.cpp "}\n")
#
# Write scan_i table
#
file(WRITE ${directory}/${PLATFORM_PREFIX}scan_i.cpp "#include \"qplc_api.h\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}scan_i.cpp "#include \"dispatcher/dispatcher.hpp\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}scan_i.cpp "namespace qpl::core_sw::dispatcher\n{\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}scan_i.cpp "scan_i_table_t ${PLATFORM_PREFIX}scan_i_table = {\n")
#get last element of the list
set(LAST_ELEMENT "")
list(GET SCAN_POSTFIX_LIST -1 LAST_ELEMENT)
foreach(SCAN_POSTFIX IN LISTS SCAN_POSTFIX_LIST)
if(SCAN_POSTFIX STREQUAL LAST_ELEMENT)
file(APPEND ${directory}/${PLATFORM_PREFIX}scan_i.cpp "\t${PLATFORM_PREFIX}qplc_scan${SCAN_POSTFIX}_i};\n")
else()
file(APPEND ${directory}/${PLATFORM_PREFIX}scan_i.cpp "\t${PLATFORM_PREFIX}qplc_scan${SCAN_POSTFIX}_i,\n")
endif()
endforeach()
file(APPEND ${directory}/${PLATFORM_PREFIX}scan_i.cpp "}\n")
#
# Write pack_index table
#
file(WRITE ${directory}/${PLATFORM_PREFIX}pack_index.cpp "#include \"qplc_api.h\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "#include \"dispatcher/dispatcher.hpp\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "namespace qpl::core_sw::dispatcher\n{\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "pack_index_table_t ${PLATFORM_PREFIX}pack_index_table = {\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_bits_nu,\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_index_8u,\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_index_8u16u,\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_index_8u32u,\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_bits_be_nu,\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_index_8u,\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_index_be_8u16u,\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_index_be_8u32u};\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "}\n")
#
# Write default bit width functions
#
foreach(DEAULT_BIT_WIDTH_FUNCTION IN LISTS DEFAULT_BIT_WIDTH_FUNCTIONS_LIST)
file(WRITE ${directory}/${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}.cpp "#include \"qplc_api.h\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}.cpp "#include \"dispatcher/dispatcher.hpp\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}.cpp "namespace qpl::core_sw::dispatcher\n{\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}.cpp "${DEAULT_BIT_WIDTH_FUNCTION}_table_t ${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}_table = {\n")
#get last element of the list
set(LAST_ELEMENT "")
list(GET DEFAULT_BIT_WIDTH_LIST -1 LAST_ELEMENT)
foreach(BIT_WIDTH IN LISTS DEFAULT_BIT_WIDTH_LIST)
set(FUNCTION_NAME "")
get_function_name_with_default_bit_width(${DEAULT_BIT_WIDTH_FUNCTION} ${BIT_WIDTH} FUNCTION_NAME)
if(BIT_WIDTH STREQUAL LAST_ELEMENT)
file(APPEND ${directory}/${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}.cpp "\t${PLATFORM_PREFIX}qplc_${FUNCTION_NAME}};\n")
else()
file(APPEND ${directory}/${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}.cpp "\t${PLATFORM_PREFIX}qplc_${FUNCTION_NAME},\n")
endif()
endforeach()
file(APPEND ${directory}/${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}.cpp "}\n")
endforeach()
#
# Write aggregates table
#
file(WRITE ${directory}/${PLATFORM_PREFIX}aggregates.cpp "#include \"qplc_api.h\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "#include \"dispatcher/dispatcher.hpp\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "namespace qpl::core_sw::dispatcher\n{\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "aggregates_table_t ${PLATFORM_PREFIX}aggregates_table = {\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "\t${PLATFORM_PREFIX}qplc_bit_aggregates_8u,\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "\t${PLATFORM_PREFIX}qplc_aggregates_8u,\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "\t${PLATFORM_PREFIX}qplc_aggregates_16u,\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "\t${PLATFORM_PREFIX}qplc_aggregates_32u};\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "}\n")
#
# Write mem_copy functions table
#
file(WRITE ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "#include \"qplc_api.h\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "#include \"dispatcher/dispatcher.hpp\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "namespace qpl::core_sw::dispatcher\n{\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "memory_copy_table_t ${PLATFORM_PREFIX}memory_copy_table = {\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "\t${PLATFORM_PREFIX}qplc_copy_8u,\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "\t${PLATFORM_PREFIX}qplc_copy_16u,\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "\t${PLATFORM_PREFIX}qplc_copy_32u};\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "}\n")
#
# Write mem_copy functions table
#
file(WRITE ${directory}/${PLATFORM_PREFIX}zero.cpp "#include \"qplc_api.h\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}zero.cpp "#include \"dispatcher/dispatcher.hpp\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}zero.cpp "namespace qpl::core_sw::dispatcher\n{\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}zero.cpp "zero_table_t ${PLATFORM_PREFIX}zero_table = {\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}zero.cpp "\t${PLATFORM_PREFIX}qplc_zero_8u};\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}zero.cpp "}\n")
#
# Write move functions table
#
file(WRITE ${directory}/${PLATFORM_PREFIX}move.cpp "#include \"qplc_api.h\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}move.cpp "#include \"dispatcher/dispatcher.hpp\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}move.cpp "namespace qpl::core_sw::dispatcher\n{\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}move.cpp "move_table_t ${PLATFORM_PREFIX}move_table = {\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}move.cpp "\t${PLATFORM_PREFIX}qplc_move_8u};\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}move.cpp "}\n")
#
# Write crc64 function table
#
file(WRITE ${directory}/${PLATFORM_PREFIX}crc64.cpp "#include \"qplc_api.h\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}crc64.cpp "#include \"dispatcher/dispatcher.hpp\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}crc64.cpp "namespace qpl::core_sw::dispatcher\n{\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}crc64.cpp "crc64_table_t ${PLATFORM_PREFIX}crc64_table = {\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}crc64.cpp "\t${PLATFORM_PREFIX}qplc_crc64};\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}crc64.cpp "}\n")
#
# Write xor_checksum function table
#
file(WRITE ${directory}/${PLATFORM_PREFIX}xor_checksum.cpp "#include \"qplc_api.h\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}xor_checksum.cpp "#include \"dispatcher/dispatcher.hpp\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}xor_checksum.cpp "namespace qpl::core_sw::dispatcher\n{\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}xor_checksum.cpp "xor_checksum_table_t ${PLATFORM_PREFIX}xor_checksum_table = {\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}xor_checksum.cpp "\t${PLATFORM_PREFIX}qplc_xor_checksum_8u};\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}xor_checksum.cpp "}\n")
#
# Write deflate functions table
#
file(WRITE ${directory}/${PLATFORM_PREFIX}deflate.cpp "#include \"deflate_slow_icf.h\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "#include \"deflate_hash_table.h\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "#include \"deflate_histogram.h\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "#include \"dispatcher/dispatcher.hpp\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "namespace qpl::core_sw::dispatcher\n{\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "deflate_table_t ${PLATFORM_PREFIX}deflate_table = {\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "\t reinterpret_cast<void *>(&${PLATFORM_PREFIX}slow_deflate_icf_body),\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "\t reinterpret_cast<void *>(&${PLATFORM_PREFIX}deflate_histogram_reset),\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "\t reinterpret_cast<void *>(&${PLATFORM_PREFIX}deflate_hash_table_reset)};\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "}\n")
#
# Write deflate fix functions table
#
file(WRITE ${directory}/${PLATFORM_PREFIX}deflate_fix.cpp "#include \"deflate_slow.h\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}deflate_fix.cpp "#include \"dispatcher/dispatcher.hpp\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}deflate_fix.cpp "namespace qpl::core_sw::dispatcher\n{\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}deflate_fix.cpp "deflate_fix_table_t ${PLATFORM_PREFIX}deflate_fix_table = {\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}deflate_fix.cpp "\t reinterpret_cast<void *>(&${PLATFORM_PREFIX}slow_deflate_body)};\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}deflate_fix.cpp "}\n")
#
# Write setup_dictionary functions table
#
file(WRITE ${directory}/${PLATFORM_PREFIX}setup_dictionary.cpp "#include \"deflate_slow_utils.h\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}setup_dictionary.cpp "#include \"dispatcher/dispatcher.hpp\"\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}setup_dictionary.cpp "namespace qpl::core_sw::dispatcher\n{\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}setup_dictionary.cpp "setup_dictionary_table_t ${PLATFORM_PREFIX}setup_dictionary_table = {\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}setup_dictionary.cpp "\t reinterpret_cast<void *>(&${PLATFORM_PREFIX}setup_dictionary)};\n")
file(APPEND ${directory}/${PLATFORM_PREFIX}setup_dictionary.cpp "}\n")
endforeach()
endfunction()
# check nasm compiler
include(CheckLanguage)
check_language(ASM_NASM)
if(NOT CMAKE_ASM_NASM_COMPILER)
message(FATAL_ERROR "Please install NASM from 'https://www.nasm.us/' because NASM compiler can not be found!")
endif()
# [SUBDIR]isal
enable_language(ASM_NASM)
set(ISAL_C_SRC ${QPL_SRC_DIR}/isal/igzip/adler32_base.c
@ -107,11 +512,6 @@ set_target_properties(isal PROPERTIES
CXX_STANDARD 11
C_STANDARD 99)
target_compile_options(isal PRIVATE
"$<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}>"
"$<$<CONFIG:Debug>:>"
"$<$<CONFIG:Release>:>")
# AS_FEATURE_LEVEL=10 means "Check SIMD capabilities of the target system at runtime and use up to AVX512 if available".
# HAVE_KNOWS_AVX512 means rely on AVX512 being available on the target system.
target_compile_options(isal_asm PRIVATE "-I${QPL_SRC_DIR}/isal/include/"
@ -164,15 +564,7 @@ foreach(PLATFORM_ID IN LISTS PLATFORMS_LIST)
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/src/compression/include>
PRIVATE $<TARGET_PROPERTY:isal,INTERFACE_INCLUDE_DIRECTORIES>)
set_target_properties(qplcore_${PLATFORM_ID} PROPERTIES
$<$<C_COMPILER_ID:GNU>:C_STANDARD 17>)
target_compile_options(qplcore_${PLATFORM_ID}
PRIVATE ${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}
PRIVATE "$<$<CONFIG:Debug>:>"
PRIVATE "$<$<CONFIG:Release>:-O3;-D_FORTIFY_SOURCE=2>")
# Set specific compiler options and/or definitions based on a platform
# Set specific compiler options and/or definitions based on a platform
if (${PLATFORM_ID} MATCHES "avx512")
target_compile_definitions(qplcore_${PLATFORM_ID} PRIVATE PLATFORM=2)
target_compile_options(qplcore_${PLATFORM_ID} PRIVATE -march=skylake-avx512)
@ -221,10 +613,7 @@ set_target_properties(qplcore_sw_dispatcher PROPERTIES CXX_STANDARD 17)
target_compile_definitions(qplcore_sw_dispatcher PUBLIC -DQPL_LIB)
target_compile_options(qplcore_sw_dispatcher
PRIVATE $<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS};
${QPL_LINUX_TOOLCHAIN_DYNAMIC_LIBRARY_FLAGS};
$<$<CONFIG:Release>:-O3;-D_FORTIFY_SOURCE=2>>
PRIVATE $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS}>)
PRIVATE ${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS})
# [SUBDIR]core-iaa
file(GLOB HW_PATH_SRC ${QPL_SRC_DIR}/core-iaa/sources/aecs/*.c
@ -249,14 +638,6 @@ target_include_directories(core_iaa
PRIVATE $<BUILD_INTERFACE:${QPL_PROJECT_DIR}/sources/c_api> # own_checkers.h
PRIVATE $<TARGET_PROPERTY:qplcore_sw_dispatcher,INTERFACE_INCLUDE_DIRECTORIES>)
set_target_properties(core_iaa PROPERTIES
$<$<C_COMPILER_ID:GNU>:C_STANDARD 17>
CXX_STANDARD 17)
target_compile_options(core_iaa
PRIVATE $<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS};
$<$<CONFIG:Release>:-O3;-D_FORTIFY_SOURCE=2>>)
target_compile_features(core_iaa PRIVATE c_std_11)
target_compile_definitions(core_iaa PRIVATE QPL_BADARG_CHECK
@ -286,10 +667,7 @@ set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS
$<TARGET_OBJECTS:middle_layer_lib>)
target_compile_options(middle_layer_lib
PRIVATE $<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS};
${QPL_LINUX_TOOLCHAIN_DYNAMIC_LIBRARY_FLAGS};
$<$<CONFIG:Release>:-O3;-D_FORTIFY_SOURCE=2>>
PRIVATE $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS}>)
PRIVATE ${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS})
target_compile_definitions(middle_layer_lib
PUBLIC QPL_VERSION="${QPL_VERSION}"
@ -324,15 +702,8 @@ target_include_directories(_qpl
PRIVATE $<TARGET_PROPERTY:middle_layer_lib,INTERFACE_INCLUDE_DIRECTORIES>
PRIVATE $<BUILD_INTERFACE:${QPL_SRC_DIR}/c_api>)
set_target_properties(_qpl PROPERTIES
$<$<C_COMPILER_ID:GNU>:C_STANDARD 17>
CXX_STANDARD 17)
target_compile_options(_qpl
PRIVATE $<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS};
${QPL_LINUX_TOOLCHAIN_DYNAMIC_LIBRARY_FLAGS};
$<$<CONFIG:Release>:-O3;-D_FORTIFY_SOURCE=2>>
PRIVATE $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS}>)
PRIVATE ${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS})
target_compile_definitions(_qpl
PRIVATE -DQPL_LIB

View File

@ -1,530 +0,0 @@
#!/bin/bash
ckhost="localhost"
ckport=("9000" "9001" "9002" "9003")
WORKING_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/.."
OUTPUT_DIR="${WORKING_DIR}/output"
LOG_DIR="${OUTPUT_DIR}/log"
RAWDATA_DIR="${WORKING_DIR}/rawdata_dir"
database_dir="${WORKING_DIR}/database_dir"
CLIENT_SCRIPTS_DIR="${WORKING_DIR}/client_scripts"
LOG_PACK_FILE="$(date +%Y-%m-%d-%H-%M-%S)"
QUERY_FILE="queries_ssb.sql"
SERVER_BIND_CMD[0]="numactl -m 0 -N 0"
SERVER_BIND_CMD[1]="numactl -m 0 -N 0"
SERVER_BIND_CMD[2]="numactl -m 1 -N 1"
SERVER_BIND_CMD[3]="numactl -m 1 -N 1"
CLIENT_BIND_CMD=""
SSB_GEN_FACTOR=20
TABLE_NAME="lineorder_flat"
TALBE_ROWS="119994608"
CODEC_CONFIG="lz4 deflate zstd"
# define instance number
inst_num=$1
if [ ! -n "$1" ]; then
echo "Please clarify instance number from 1,2,3 or 4"
exit 1
else
echo "Benchmarking with instance number:$1"
fi
if [ ! -d "$OUTPUT_DIR" ]; then
mkdir $OUTPUT_DIR
fi
if [ ! -d "$LOG_DIR" ]; then
mkdir $LOG_DIR
fi
if [ ! -d "$RAWDATA_DIR" ]; then
mkdir $RAWDATA_DIR
fi
# define different directories
dir_server=("" "_s2" "_s3" "_s4")
ckreadSql="
CREATE TABLE customer
(
C_CUSTKEY UInt32,
C_NAME String,
C_ADDRESS String,
C_CITY LowCardinality(String),
C_NATION LowCardinality(String),
C_REGION LowCardinality(String),
C_PHONE String,
C_MKTSEGMENT LowCardinality(String)
)
ENGINE = MergeTree ORDER BY (C_CUSTKEY);
CREATE TABLE lineorder
(
LO_ORDERKEY UInt32,
LO_LINENUMBER UInt8,
LO_CUSTKEY UInt32,
LO_PARTKEY UInt32,
LO_SUPPKEY UInt32,
LO_ORDERDATE Date,
LO_ORDERPRIORITY LowCardinality(String),
LO_SHIPPRIORITY UInt8,
LO_QUANTITY UInt8,
LO_EXTENDEDPRICE UInt32,
LO_ORDTOTALPRICE UInt32,
LO_DISCOUNT UInt8,
LO_REVENUE UInt32,
LO_SUPPLYCOST UInt32,
LO_TAX UInt8,
LO_COMMITDATE Date,
LO_SHIPMODE LowCardinality(String)
)
ENGINE = MergeTree PARTITION BY toYear(LO_ORDERDATE) ORDER BY (LO_ORDERDATE, LO_ORDERKEY);
CREATE TABLE part
(
P_PARTKEY UInt32,
P_NAME String,
P_MFGR LowCardinality(String),
P_CATEGORY LowCardinality(String),
P_BRAND LowCardinality(String),
P_COLOR LowCardinality(String),
P_TYPE LowCardinality(String),
P_SIZE UInt8,
P_CONTAINER LowCardinality(String)
)
ENGINE = MergeTree ORDER BY P_PARTKEY;
CREATE TABLE supplier
(
S_SUPPKEY UInt32,
S_NAME String,
S_ADDRESS String,
S_CITY LowCardinality(String),
S_NATION LowCardinality(String),
S_REGION LowCardinality(String),
S_PHONE String
)
ENGINE = MergeTree ORDER BY S_SUPPKEY;
"
supplier_table="
CREATE TABLE supplier
(
S_SUPPKEY UInt32,
S_NAME String,
S_ADDRESS String,
S_CITY LowCardinality(String),
S_NATION LowCardinality(String),
S_REGION LowCardinality(String),
S_PHONE String
)
ENGINE = MergeTree ORDER BY S_SUPPKEY;
"
part_table="
CREATE TABLE part
(
P_PARTKEY UInt32,
P_NAME String,
P_MFGR LowCardinality(String),
P_CATEGORY LowCardinality(String),
P_BRAND LowCardinality(String),
P_COLOR LowCardinality(String),
P_TYPE LowCardinality(String),
P_SIZE UInt8,
P_CONTAINER LowCardinality(String)
)
ENGINE = MergeTree ORDER BY P_PARTKEY;
"
lineorder_table="
CREATE TABLE lineorder
(
LO_ORDERKEY UInt32,
LO_LINENUMBER UInt8,
LO_CUSTKEY UInt32,
LO_PARTKEY UInt32,
LO_SUPPKEY UInt32,
LO_ORDERDATE Date,
LO_ORDERPRIORITY LowCardinality(String),
LO_SHIPPRIORITY UInt8,
LO_QUANTITY UInt8,
LO_EXTENDEDPRICE UInt32,
LO_ORDTOTALPRICE UInt32,
LO_DISCOUNT UInt8,
LO_REVENUE UInt32,
LO_SUPPLYCOST UInt32,
LO_TAX UInt8,
LO_COMMITDATE Date,
LO_SHIPMODE LowCardinality(String)
)
ENGINE = MergeTree PARTITION BY toYear(LO_ORDERDATE) ORDER BY (LO_ORDERDATE, LO_ORDERKEY);
"
customer_table="
CREATE TABLE customer
(
C_CUSTKEY UInt32,
C_NAME String,
C_ADDRESS String,
C_CITY LowCardinality(String),
C_NATION LowCardinality(String),
C_REGION LowCardinality(String),
C_PHONE String,
C_MKTSEGMENT LowCardinality(String)
)
ENGINE = MergeTree ORDER BY (C_CUSTKEY);
"
lineorder_flat_table="
SET max_memory_usage = 20000000000;
CREATE TABLE lineorder_flat
ENGINE = MergeTree
PARTITION BY toYear(LO_ORDERDATE)
ORDER BY (LO_ORDERDATE, LO_ORDERKEY) AS
SELECT
l.LO_ORDERKEY AS LO_ORDERKEY,
l.LO_LINENUMBER AS LO_LINENUMBER,
l.LO_CUSTKEY AS LO_CUSTKEY,
l.LO_PARTKEY AS LO_PARTKEY,
l.LO_SUPPKEY AS LO_SUPPKEY,
l.LO_ORDERDATE AS LO_ORDERDATE,
l.LO_ORDERPRIORITY AS LO_ORDERPRIORITY,
l.LO_SHIPPRIORITY AS LO_SHIPPRIORITY,
l.LO_QUANTITY AS LO_QUANTITY,
l.LO_EXTENDEDPRICE AS LO_EXTENDEDPRICE,
l.LO_ORDTOTALPRICE AS LO_ORDTOTALPRICE,
l.LO_DISCOUNT AS LO_DISCOUNT,
l.LO_REVENUE AS LO_REVENUE,
l.LO_SUPPLYCOST AS LO_SUPPLYCOST,
l.LO_TAX AS LO_TAX,
l.LO_COMMITDATE AS LO_COMMITDATE,
l.LO_SHIPMODE AS LO_SHIPMODE,
c.C_NAME AS C_NAME,
c.C_ADDRESS AS C_ADDRESS,
c.C_CITY AS C_CITY,
c.C_NATION AS C_NATION,
c.C_REGION AS C_REGION,
c.C_PHONE AS C_PHONE,
c.C_MKTSEGMENT AS C_MKTSEGMENT,
s.S_NAME AS S_NAME,
s.S_ADDRESS AS S_ADDRESS,
s.S_CITY AS S_CITY,
s.S_NATION AS S_NATION,
s.S_REGION AS S_REGION,
s.S_PHONE AS S_PHONE,
p.P_NAME AS P_NAME,
p.P_MFGR AS P_MFGR,
p.P_CATEGORY AS P_CATEGORY,
p.P_BRAND AS P_BRAND,
p.P_COLOR AS P_COLOR,
p.P_TYPE AS P_TYPE,
p.P_SIZE AS P_SIZE,
p.P_CONTAINER AS P_CONTAINER
FROM lineorder AS l
INNER JOIN customer AS c ON c.C_CUSTKEY = l.LO_CUSTKEY
INNER JOIN supplier AS s ON s.S_SUPPKEY = l.LO_SUPPKEY
INNER JOIN part AS p ON p.P_PARTKEY = l.LO_PARTKEY;
show settings ilike 'max_memory_usage';
"
function insert_data(){
echo "insert_data:$1"
create_table_prefix="clickhouse client --host ${ckhost} --port $2 --multiquery -q"
insert_data_prefix="clickhouse client --query "
case $1 in
all)
clickhouse client --host ${ckhost} --port $2 --multiquery -q"$ckreadSql" && {
${insert_data_prefix} "INSERT INTO customer FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/customer.tbl --port=$2
${insert_data_prefix} "INSERT INTO part FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/part.tbl --port=$2
${insert_data_prefix} "INSERT INTO supplier FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/supplier.tbl --port=$2
${insert_data_prefix} "INSERT INTO lineorder FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/lineorder.tbl --port=$2
}
${create_table_prefix}"${lineorder_flat_table}"
;;
customer)
echo ${create_table_prefix}\"${customer_table}\"
${create_table_prefix}"${customer_table}" && {
echo "${insert_data_prefix} \"INSERT INTO $1 FORMAT CSV\" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2"
${insert_data_prefix} "INSERT INTO $1 FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2
}
;;
part)
echo ${create_table_prefix}\"${part_table}\"
${create_table_prefix}"${part_table}" && {
echo "${insert_data_prefix} \"INSERT INTO $1 FORMAT CSV\" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2"
${insert_data_prefix} "INSERT INTO $1 FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2
}
;;
supplier)
echo ${create_table_prefix}"${supplier_table}"
${create_table_prefix}"${supplier_table}" && {
echo "${insert_data_prefix} \"INSERT INTO $1 FORMAT CSV\" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2"
${insert_data_prefix} "INSERT INTO $1 FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2
}
;;
lineorder)
echo ${create_table_prefix}"${lineorder_table}"
${create_table_prefix}"${lineorder_table}" && {
echo "${insert_data_prefix} \"INSERT INTO $1 FORMAT CSV\" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2"
${insert_data_prefix} "INSERT INTO $1 FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2
}
;;
lineorder_flat)
echo ${create_table_prefix}"${lineorder_flat_table}"
${create_table_prefix}"${lineorder_flat_table}"
return 0
;;
*)
exit 0
;;
esac
}
function check_sql(){
select_sql="select * from "$1" limit 1"
clickhouse client --host ${ckhost} --port $2 --multiquery -q"${select_sql}"
}
function check_table(){
checknum=0
source_tables="customer part supplier lineorder lineorder_flat"
test_tables=${1:-${source_tables}}
echo "Checking table data required in server..."
for i in $(seq 0 $[inst_num-1])
do
for j in `echo ${test_tables}`
do
check_sql $j ${ckport[i]} &> /dev/null || {
let checknum+=1 && insert_data "$j" ${ckport[i]}
}
done
done
for i in $(seq 0 $[inst_num-1])
do
echo "clickhouse client --host ${ckhost} --port ${ckport[i]} -m -q\"select count() from ${TABLE_NAME};\""
var=$(clickhouse client --host ${ckhost} --port ${ckport[i]} -m -q"select count() from ${TABLE_NAME};")
if [ $var -eq $TALBE_ROWS ];then
echo "Instance_${i} Table data integrity check OK -> Rows:$var"
else
echo "Instance_${i} Table data integrity check Failed -> Rows:$var"
exit 1
fi
done
if [ $checknum -gt 0 ];then
echo "Need sleep 10s after first table data insertion...$checknum"
sleep 10
fi
}
function check_instance(){
instance_alive=0
for i in {1..10}
do
sleep 1
netstat -nltp | grep ${1} > /dev/null
if [ $? -ne 1 ];then
instance_alive=1
break
fi
done
if [ $instance_alive -eq 0 ];then
echo "check_instance -> clickhouse server instance faild to launch due to 10s timeout!"
exit 1
else
echo "check_instance -> clickhouse server instance launch successfully!"
fi
}
function start_clickhouse_for_insertion(){
echo "start_clickhouse_for_insertion"
for i in $(seq 0 $[inst_num-1])
do
echo "cd ${database_dir}/$1${dir_server[i]}"
echo "${SERVER_BIND_CMD[i]} clickhouse server -C config_${1}${dir_server[i]}.xml >&${LOG_DIR}/${1}_${i}_server_log& > /dev/null"
cd ${database_dir}/$1${dir_server[i]}
${SERVER_BIND_CMD[i]} clickhouse server -C config_${1}${dir_server[i]}.xml >&${LOG_DIR}/${1}_${i}_server_log& > /dev/null
check_instance ${ckport[i]}
done
}
function start_clickhouse_for_stressing(){
echo "start_clickhouse_for_stressing"
for i in $(seq 0 $[inst_num-1])
do
echo "cd ${database_dir}/$1${dir_server[i]}"
echo "${SERVER_BIND_CMD[i]} clickhouse server -C config_${1}${dir_server[i]}.xml >&/dev/null&"
cd ${database_dir}/$1${dir_server[i]}
${SERVER_BIND_CMD[i]} clickhouse server -C config_${1}${dir_server[i]}.xml >&/dev/null&
check_instance ${ckport[i]}
done
}
yum -y install git make gcc sudo net-tools &> /dev/null
pip3 install clickhouse_driver numpy &> /dev/null
test -d ${RAWDATA_DIR}/ssb-dbgen || git clone https://github.com/vadimtk/ssb-dbgen.git ${RAWDATA_DIR}/ssb-dbgen && cd ${RAWDATA_DIR}/ssb-dbgen
if [ ! -f ${RAWDATA_DIR}/ssb-dbgen/dbgen ];then
make && {
test -f ${RAWDATA_DIR}/ssb-dbgen/customer.tbl || echo y |./dbgen -s ${SSB_GEN_FACTOR} -T c
test -f ${RAWDATA_DIR}/ssb-dbgen/part.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T p
test -f ${RAWDATA_DIR}/ssb-dbgen/supplier.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T s
test -f ${RAWDATA_DIR}/ssb-dbgen/date.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T d
test -f ${RAWDATA_DIR}/ssb-dbgen/lineorder.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T l
}
else
test -f ${RAWDATA_DIR}/ssb-dbgen/customer.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T c
test -f ${RAWDATA_DIR}/ssb-dbgen/part.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T p
test -f ${RAWDATA_DIR}/ssb-dbgen/supplier.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T s
test -f ${RAWDATA_DIR}/ssb-dbgen/date.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T d
test -f ${RAWDATA_DIR}/ssb-dbgen/lineorder.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T l
fi
filenum=`find ${RAWDATA_DIR}/ssb-dbgen/ -name "*.tbl" | wc -l`
if [ $filenum -ne 5 ];then
echo "generate ssb data file *.tbl faild"
exit 1
fi
function kill_instance(){
instance_alive=1
for i in {1..2}
do
pkill clickhouse && sleep 5
instance_alive=0
for i in $(seq 0 $[inst_num-1])
do
netstat -nltp | grep ${ckport[i]} > /dev/null
if [ $? -ne 1 ];then
instance_alive=1
break;
fi
done
if [ $instance_alive -eq 0 ];then
break;
fi
done
if [ $instance_alive -eq 0 ];then
echo "kill_instance OK!"
else
echo "kill_instance Failed -> clickhouse server instance still alive due to 10s timeout"
exit 1
fi
}
function run_test(){
is_xml=0
for i in $(seq 0 $[inst_num-1])
do
if [ -f ${database_dir}/${1}${dir_server[i]}/config_${1}${dir_server[i]}.xml ]; then
is_xml=$[is_xml+1]
fi
done
if [ $is_xml -eq $inst_num ];then
echo "Benchmark with $inst_num instance"
start_clickhouse_for_insertion ${1}
for i in $(seq 0 $[inst_num-1])
do
clickhouse client --host ${ckhost} --port ${ckport[i]} -m -q"show databases;" >/dev/null
done
if [ $? -eq 0 ];then
check_table
fi
kill_instance
if [ $1 == "deflate" ];then
test -f ${LOG_DIR}/${1}_server_log && deflatemsg=`cat ${LOG_DIR}/${1}_server_log | grep DeflateJobHWPool`
if [ -n "$deflatemsg" ];then
echo ------------------------------------------------------
echo $deflatemsg
echo ------------------------------------------------------
fi
fi
echo "Check table data required in server_${1} -> Done! "
start_clickhouse_for_stressing ${1}
for i in $(seq 0 $[inst_num-1])
do
clickhouse client --host ${ckhost} --port ${ckport[i]} -m -q"show databases;" >/dev/null
done
if [ $? -eq 0 ];then
test -d ${CLIENT_SCRIPTS_DIR} && cd ${CLIENT_SCRIPTS_DIR}
echo "Client stressing... "
echo "${CLIENT_BIND_CMD} python3 client_stressing_test.py ${QUERY_FILE} $inst_num &> ${LOG_DIR}/${1}.log"
${CLIENT_BIND_CMD} python3 client_stressing_test.py ${QUERY_FILE} $inst_num &> ${LOG_DIR}/${1}.log
echo "Completed client stressing, checking log... "
finish_log=`grep "Finished" ${LOG_DIR}/${1}.log | wc -l`
if [ $finish_log -eq 1 ] ;then
kill_instance
test -f ${LOG_DIR}/${1}.log && echo "${1}.log ===> ${LOG_DIR}/${1}.log"
else
kill_instance
echo "No find 'Finished' in client log -> Performance test may fail"
exit 1
fi
else
echo "${1} clickhouse server start fail"
exit 1
fi
else
echo "clickhouse server start fail -> Please check xml files required in ${database_dir} for each instance"
exit 1
fi
}
function clear_log(){
if [ -d "$LOG_DIR" ]; then
cd ${LOG_DIR} && rm -rf *
fi
}
function gather_log_for_codec(){
cd ${OUTPUT_DIR} && mkdir -p ${LOG_PACK_FILE}/${1}
cp -rf ${LOG_DIR} ${OUTPUT_DIR}/${LOG_PACK_FILE}/${1}
}
function pack_log(){
if [ -e "${OUTPUT_DIR}/run.log" ]; then
cp ${OUTPUT_DIR}/run.log ${OUTPUT_DIR}/${LOG_PACK_FILE}/
fi
echo "Please check all log information in ${OUTPUT_DIR}/${LOG_PACK_FILE}"
}
function setup_check(){
iax_dev_num=`accel-config list | grep iax | wc -l`
if [ $iax_dev_num -eq 0 ] ;then
iax_dev_num=`accel-config list | grep iax | wc -l`
if [ $iax_dev_num -eq 0 ] ;then
echo "No IAA devices available -> Please check IAA hardware setup manually!"
exit 1
else
echo "IAA enabled devices number:$iax_dev_num"
fi
else
echo "IAA enabled devices number:$iax_dev_num"
fi
libaccel_version=`accel-config -v`
clickhouser_version=`clickhouse server --version`
kernel_dxd_log=`dmesg | grep dxd`
echo "libaccel_version:$libaccel_version"
echo "clickhouser_version:$clickhouser_version"
echo -e "idxd section in kernel log:\n$kernel_dxd_log"
}
setup_check
export CLICKHOUSE_WATCHDOG_ENABLE=0
for i in ${CODEC_CONFIG[@]}
do
clear_log
codec=${i}
echo "run test------------$codec"
run_test $codec
gather_log_for_codec $codec
done
pack_log
echo "Done."

View File

@ -1,278 +0,0 @@
from operator import eq
import os
import random
import time
import sys
from clickhouse_driver import Client
import numpy as np
import subprocess
import multiprocessing
from multiprocessing import Manager
warmup_runs = 10
calculated_runs = 10
seconds = 30
max_instances_number = 8
retest_number = 3
retest_tolerance = 10
def checkInt(str):
try:
int(str)
return True
except ValueError:
return False
def setup_client(index):
if index < 4:
port_idx = index
else:
port_idx = index + 4
client = Client(
host="localhost",
database="default",
user="default",
password="",
port="900%d" % port_idx,
)
union_mode_query = "SET union_default_mode='DISTINCT'"
client.execute(union_mode_query)
return client
def warm_client(clientN, clientL, query, loop):
for c_idx in range(clientN):
for _ in range(loop):
clientL[c_idx].execute(query)
def read_queries(queries_list):
queries = list()
queries_id = list()
with open(queries_list, "r") as f:
for line in f:
line = line.rstrip()
line = line.split("$")
queries_id.append(line[0])
queries.append(line[1])
return queries_id, queries
def run_task(client, cname, query, loop, query_latency):
start_time = time.time()
for i in range(loop):
client.execute(query)
query_latency.append(client.last_query.elapsed)
end_time = time.time()
p95 = np.percentile(query_latency, 95)
print(
"CLIENT: {0} end. -> P95: %f, qps: %f".format(cname)
% (p95, loop / (end_time - start_time))
)
def run_multi_clients(clientN, clientList, query, loop):
client_pids = {}
start_time = time.time()
manager = multiprocessing.Manager()
query_latency_list0 = manager.list()
query_latency_list1 = manager.list()
query_latency_list2 = manager.list()
query_latency_list3 = manager.list()
query_latency_list4 = manager.list()
query_latency_list5 = manager.list()
query_latency_list6 = manager.list()
query_latency_list7 = manager.list()
for c_idx in range(clientN):
client_name = "Role_%d" % c_idx
if c_idx == 0:
client_pids[c_idx] = multiprocessing.Process(
target=run_task,
args=(clientList[c_idx], client_name, query, loop, query_latency_list0),
)
elif c_idx == 1:
client_pids[c_idx] = multiprocessing.Process(
target=run_task,
args=(clientList[c_idx], client_name, query, loop, query_latency_list1),
)
elif c_idx == 2:
client_pids[c_idx] = multiprocessing.Process(
target=run_task,
args=(clientList[c_idx], client_name, query, loop, query_latency_list2),
)
elif c_idx == 3:
client_pids[c_idx] = multiprocessing.Process(
target=run_task,
args=(clientList[c_idx], client_name, query, loop, query_latency_list3),
)
elif c_idx == 4:
client_pids[c_idx] = multiprocessing.Process(
target=run_task,
args=(clientList[c_idx], client_name, query, loop, query_latency_list4),
)
elif c_idx == 5:
client_pids[c_idx] = multiprocessing.Process(
target=run_task,
args=(clientList[c_idx], client_name, query, loop, query_latency_list5),
)
elif c_idx == 6:
client_pids[c_idx] = multiprocessing.Process(
target=run_task,
args=(clientList[c_idx], client_name, query, loop, query_latency_list6),
)
elif c_idx == 7:
client_pids[c_idx] = multiprocessing.Process(
target=run_task,
args=(clientList[c_idx], client_name, query, loop, query_latency_list7),
)
else:
print("ERROR: CLIENT number dismatch!!")
exit()
print("CLIENT: %s start" % client_name)
client_pids[c_idx].start()
for c_idx in range(clientN):
client_pids[c_idx].join()
end_time = time.time()
totalT = end_time - start_time
query_latencyTotal = list()
for item in query_latency_list0:
query_latencyTotal.append(item)
for item in query_latency_list1:
query_latencyTotal.append(item)
for item in query_latency_list2:
query_latencyTotal.append(item)
for item in query_latency_list3:
query_latencyTotal.append(item)
for item in query_latency_list4:
query_latencyTotal.append(item)
for item in query_latency_list5:
query_latencyTotal.append(item)
for item in query_latency_list6:
query_latencyTotal.append(item)
for item in query_latency_list7:
query_latencyTotal.append(item)
totalP95 = np.percentile(query_latencyTotal, 95) * 1000
return totalT, totalP95
def run_task_caculated(client, cname, query, loop):
query_latency = list()
start_time = time.time()
for i in range(loop):
client.execute(query)
query_latency.append(client.last_query.elapsed)
end_time = time.time()
p95 = np.percentile(query_latency, 95)
def run_multi_clients_caculated(clientN, clientList, query, loop):
client_pids = {}
start_time = time.time()
for c_idx in range(clientN):
client_name = "Role_%d" % c_idx
client_pids[c_idx] = multiprocessing.Process(
target=run_task_caculated,
args=(clientList[c_idx], client_name, query, loop),
)
client_pids[c_idx].start()
for c_idx in range(clientN):
client_pids[c_idx].join()
end_time = time.time()
totalT = end_time - start_time
return totalT
if __name__ == "__main__":
client_number = 1
queries = list()
queries_id = list()
if len(sys.argv) != 3:
print(
"usage: python3 client_stressing_test.py [queries_file_path] [client_number]"
)
sys.exit()
else:
queries_list = sys.argv[1]
client_number = int(sys.argv[2])
print(
"queries_file_path: %s, client_number: %d" % (queries_list, client_number)
)
if not os.path.isfile(queries_list) or not os.access(queries_list, os.R_OK):
print("please check the right path for queries file")
sys.exit()
if (
not checkInt(sys.argv[2])
or int(sys.argv[2]) > max_instances_number
or int(sys.argv[2]) < 1
):
print("client_number should be in [1~%d]" % max_instances_number)
sys.exit()
client_list = {}
queries_id, queries = read_queries(queries_list)
for c_idx in range(client_number):
client_list[c_idx] = setup_client(c_idx)
# clear cache
os.system("sync; echo 3 > /proc/sys/vm/drop_caches")
print("###Polit Run Begin")
for i in queries:
warm_client(client_number, client_list, i, 1)
print("###Polit Run End -> Start stressing....")
query_index = 0
for q in queries:
print(
"\n###START -> Index: %d, ID: %s, Query: %s"
% (query_index, queries_id[query_index], q)
)
warm_client(client_number, client_list, q, warmup_runs)
print("###Warm Done!")
for j in range(0, retest_number):
totalT = run_multi_clients_caculated(
client_number, client_list, q, calculated_runs
)
curr_loop = int(seconds * calculated_runs / totalT) + 1
print(
"###Calculation Done! -> loopN: %d, expected seconds:%d"
% (curr_loop, seconds)
)
print("###Stress Running! -> %d iterations......" % curr_loop)
totalT, totalP95 = run_multi_clients(
client_number, client_list, q, curr_loop
)
if totalT > (seconds - retest_tolerance) and totalT < (
seconds + retest_tolerance
):
break
else:
print(
"###totalT:%d is far way from expected seconds:%d. Run again ->j:%d!"
% (totalT, seconds, j)
)
print(
"###Completed! -> ID: %s, clientN: %d, totalT: %.2f s, latencyAVG: %.2f ms, P95: %.2f ms, QPS_Final: %.2f"
% (
queries_id[query_index],
client_number,
totalT,
totalT * 1000 / (curr_loop * client_number),
totalP95,
((curr_loop * client_number) / totalT),
)
)
query_index += 1
print("###Finished!")

View File

@ -1,10 +0,0 @@
Q1.1$SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue FROM lineorder_flat WHERE toYear(LO_ORDERDATE) = 1993 AND LO_DISCOUNT BETWEEN 1 AND 3 AND LO_QUANTITY < 25;
Q2.1$SELECT sum(LO_REVENUE),toYear(LO_ORDERDATE) AS year,P_BRAND FROM lineorder_flat WHERE P_CATEGORY = 'MFGR#12' AND S_REGION = 'AMERICA' GROUP BY year,P_BRAND ORDER BY year,P_BRAND;
Q2.2$SELECT sum(LO_REVENUE),toYear(LO_ORDERDATE) AS year,P_BRAND FROM lineorder_flat WHERE P_BRAND >= 'MFGR#2221' AND P_BRAND <= 'MFGR#2228' AND S_REGION = 'ASIA' GROUP BY year,P_BRAND ORDER BY year,P_BRAND;
Q2.3$SELECT sum(LO_REVENUE),toYear(LO_ORDERDATE) AS year,P_BRAND FROM lineorder_flat WHERE P_BRAND = 'MFGR#2239' AND S_REGION = 'EUROPE' GROUP BY year,P_BRAND ORDER BY year,P_BRAND;
Q3.1$SELECT C_NATION,S_NATION,toYear(LO_ORDERDATE) AS year,sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE C_REGION = 'ASIA' AND S_REGION = 'ASIA' AND year >= 1992 AND year <= 1997 GROUP BY C_NATION,S_NATION,year ORDER BY year ASC,revenue DESC;
Q3.2$SELECT C_CITY,S_CITY,toYear(LO_ORDERDATE) AS year,sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE C_NATION = 'UNITED STATES' AND S_NATION = 'UNITED STATES' AND year >= 1992 AND year <= 1997 GROUP BY C_CITY,S_CITY,year ORDER BY year ASC,revenue DESC;
Q3.3$SELECT C_CITY,S_CITY,toYear(LO_ORDERDATE) AS year,sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE (C_CITY = 'UNITED KI1' OR C_CITY = 'UNITED KI5') AND (S_CITY = 'UNITED KI1' OR S_CITY = 'UNITED KI5') AND year >= 1992 AND year <= 1997 GROUP BY C_CITY,S_CITY,year ORDER BY year ASC,revenue DESC;
Q4.1$SELECT toYear(LO_ORDERDATE) AS year,C_NATION,sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2') GROUP BY year,C_NATION ORDER BY year ASC,C_NATION ASC;
Q4.2$SELECT toYear(LO_ORDERDATE) AS year,S_NATION,P_CATEGORY,sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (year = 1997 OR year = 1998) AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2') GROUP BY year,S_NATION,P_CATEGORY ORDER BY year ASC,S_NATION ASC,P_CATEGORY ASC;
Q4.3$SELECT toYear(LO_ORDERDATE) AS year,S_CITY,P_BRAND,sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE S_NATION = 'UNITED STATES' AND (year = 1997 OR year = 1998) AND P_CATEGORY = 'MFGR#14' GROUP BY year,S_CITY,P_BRAND ORDER BY year ASC,S_CITY ASC,P_BRAND ASC;

View File

@ -1,6 +0,0 @@
WORKING_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/.."
if [ ! -d "${WORKING_DIR}/output" ]; then
mkdir ${WORKING_DIR}/output
fi
bash allin1_ssb.sh 2 > ${WORKING_DIR}/output/run.log
echo "Please check log in: ${WORKING_DIR}/output/run.log"

View File

@ -1,49 +0,0 @@
<!-- This file was generated automatically.
Do not edit it: it is likely to be discarded and generated again before it's read next time.
Files used to generate this file:
config.xml -->
<!-- Config that is used when server is run without config file. --><clickhouse>
<logger>
<level>trace</level>
<console>true</console>
</logger>
<http_port>8123</http_port>
<tcp_port>9000</tcp_port>
<mysql_port>9004</mysql_port>
<path>./</path>
<uncompressed_cache_size>8589934592</uncompressed_cache_size>
<mark_cache_size>5368709120</mark_cache_size>
<mlock_executable>true</mlock_executable>
<compression>
<case>
<method>deflate_qpl</method>
</case>
</compression>
<users>
<default>
<password/>
<networks>
<ip>::/0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
<access_management>1</access_management>
</default>
</users>
<profiles>
<default/>
</profiles>
<quotas>
<default/>
</quotas>
</clickhouse>

View File

@ -1,49 +0,0 @@
<!-- This file was generated automatically.
Do not edit it: it is likely to be discarded and generated again before it's read next time.
Files used to generate this file:
config.xml -->
<!-- Config that is used when server is run without config file. --><clickhouse>
<logger>
<level>trace</level>
<console>true</console>
</logger>
<http_port>8124</http_port>
<tcp_port>9001</tcp_port>
<mysql_port>9005</mysql_port>
<path>./</path>
<uncompressed_cache_size>8589934592</uncompressed_cache_size>
<mark_cache_size>5368709120</mark_cache_size>
<mlock_executable>true</mlock_executable>
<compression>
<case>
<method>deflate_qpl</method>
</case>
</compression>
<users>
<default>
<password/>
<networks>
<ip>::/0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
<access_management>1</access_management>
</default>
</users>
<profiles>
<default/>
</profiles>
<quotas>
<default/>
</quotas>
</clickhouse>

View File

@ -1,49 +0,0 @@
<!-- This file was generated automatically.
Do not edit it: it is likely to be discarded and generated again before it's read next time.
Files used to generate this file:
config.xml -->
<!-- Config that is used when server is run without config file. --><clickhouse>
<logger>
<level>trace</level>
<console>true</console>
</logger>
<http_port>8123</http_port>
<tcp_port>9000</tcp_port>
<mysql_port>9004</mysql_port>
<path>./</path>
<uncompressed_cache_size>8589934592</uncompressed_cache_size>
<mark_cache_size>5368709120</mark_cache_size>
<mlock_executable>true</mlock_executable>
<compression>
<case>
<method>lz4</method>
</case>
</compression>
<users>
<default>
<password/>
<networks>
<ip>::/0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
<access_management>1</access_management>
</default>
</users>
<profiles>
<default/>
</profiles>
<quotas>
<default/>
</quotas>
</clickhouse>

View File

@ -1,49 +0,0 @@
<!-- This file was generated automatically.
Do not edit it: it is likely to be discarded and generated again before it's read next time.
Files used to generate this file:
config.xml -->
<!-- Config that is used when server is run without config file. --><clickhouse>
<logger>
<level>trace</level>
<console>true</console>
</logger>
<http_port>8124</http_port>
<tcp_port>9001</tcp_port>
<mysql_port>9005</mysql_port>
<path>./</path>
<uncompressed_cache_size>8589934592</uncompressed_cache_size>
<mark_cache_size>5368709120</mark_cache_size>
<mlock_executable>true</mlock_executable>
<compression>
<case>
<method>lz4</method>
</case>
</compression>
<users>
<default>
<password/>
<networks>
<ip>::/0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
<access_management>1</access_management>
</default>
</users>
<profiles>
<default/>
</profiles>
<quotas>
<default/>
</quotas>
</clickhouse>

View File

@ -1,49 +0,0 @@
<!-- This file was generated automatically.
Do not edit it: it is likely to be discarded and generated again before it's read next time.
Files used to generate this file:
config.xml -->
<!-- Config that is used when server is run without config file. --><clickhouse>
<logger>
<level>trace</level>
<console>true</console>
</logger>
<http_port>8123</http_port>
<tcp_port>9000</tcp_port>
<mysql_port>9004</mysql_port>
<path>./</path>
<uncompressed_cache_size>8589934592</uncompressed_cache_size>
<mark_cache_size>5368709120</mark_cache_size>
<mlock_executable>true</mlock_executable>
<compression>
<case>
<method>zstd</method>
</case>
</compression>
<users>
<default>
<password/>
<networks>
<ip>::/0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
<access_management>1</access_management>
</default>
</users>
<profiles>
<default/>
</profiles>
<quotas>
<default/>
</quotas>
</clickhouse>

View File

@ -1,49 +0,0 @@
<!-- This file was generated automatically.
Do not edit it: it is likely to be discarded and generated again before it's read next time.
Files used to generate this file:
config.xml -->
<!-- Config that is used when server is run without config file. --><clickhouse>
<logger>
<level>trace</level>
<console>true</console>
</logger>
<http_port>8124</http_port>
<tcp_port>9001</tcp_port>
<mysql_port>9005</mysql_port>
<path>./</path>
<uncompressed_cache_size>8589934592</uncompressed_cache_size>
<mark_cache_size>5368709120</mark_cache_size>
<mlock_executable>true</mlock_executable>
<compression>
<case>
<method>zstd</method>
</case>
</compression>
<users>
<default>
<password/>
<networks>
<ip>::/0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
<access_management>1</access_management>
</default>
</users>
<profiles>
<default/>
</profiles>
<quotas>
<default/>
</quotas>
</clickhouse>

View File

@ -27,6 +27,17 @@ set(RE2_SOURCES
add_library(_re2 ${RE2_SOURCES})
target_include_directories(_re2 PUBLIC "${SRC_DIR}")
target_link_libraries(_re2 ch_contrib::abseil_str_format)
target_link_libraries(_re2 PRIVATE
absl::base
absl::core_headers
absl::fixed_array
absl::flat_hash_map
absl::flat_hash_set
absl::inlined_vector
absl::strings
absl::str_format
absl::synchronization
absl::optional
absl::span)
add_library(ch_contrib::re2 ALIAS _re2)

View File

@ -93,11 +93,9 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64|arm64|ARM64")
endif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64|arm64|ARM64")
if(HAVE_SSE42)
if(ENABLE_AVX2 AND ENABLE_PCLMULQDQ)
add_definitions(-DHAVE_SSE42)
add_definitions(-DHAVE_PCLMUL)
elseif(FORCE_SSE42)
message(FATAL_ERROR "FORCE_SSE42=ON but unable to compile with SSE4.2 enabled")
endif()
set (HAVE_THREAD_LOCAL 1)
@ -429,7 +427,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/util/memarena.cc
rocksdb_build_version.cc)
if(HAVE_SSE42)
if(ENABLE_SSE42 AND ENABLE_PCLMULQDQ)
set_source_files_properties(
"${ROCKSDB_SOURCE_DIR}/util/crc32c.cc"
PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul")

View File

@ -47,8 +47,6 @@ set(thriftcpp_threads_SOURCES
"${LIBRARY_DIR}/src/thrift/concurrency/Mutex.cpp"
)
include("${ClickHouse_SOURCE_DIR}/contrib/thrift/build/cmake/ConfigureChecks.cmake") # makes config.h
set (HAVE_ARPA_INET_H 1)
set (HAVE_FCNTL_H 1)
set (HAVE_GETOPT_H 1)
@ -81,10 +79,6 @@ if (OS_LINUX AND NOT USE_MUSL)
set (STRERROR_R_CHAR_P 1)
endif ()
#set(PACKAGE ${PACKAGE_NAME})
#set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
#set(VERSION ${thrift_VERSION})
# generate a config.h file
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/build/cmake/config.h.in" "${CMAKE_CURRENT_BINARY_DIR}/thrift/config.h")

View File

@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="23.10.3.5"
ARG VERSION="23.10.4.25"
ARG PACKAGES="clickhouse-keeper"
# user/group precreated explicitly with fixed uid/gid on purpose.

View File

@ -6,29 +6,27 @@ FROM clickhouse/test-util:latest AS cctools
ENV CC=clang-${LLVM_VERSION}
ENV CXX=clang++-${LLVM_VERSION}
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# DO NOT PUT ANYTHING BEFORE THREE NEXT `RUN` DIRECTIVES
# DO NOT PUT ANYTHING BEFORE THE NEXT TWO `RUN` DIRECTIVES
# THE MOST HEAVY OPERATION MUST BE THE FIRST IN THE CACHE
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# libtapi is required to support .tbh format from recent MacOS SDKs
RUN git clone --depth 1 https://github.com/tpoechtrager/apple-libtapi.git \
RUN git clone https://github.com/tpoechtrager/apple-libtapi.git \
&& cd apple-libtapi \
&& git checkout 15dfc2a8c9a2a89d06ff227560a69f5265b692f9 \
&& INSTALLPREFIX=/cctools ./build.sh \
&& ./install.sh \
&& cd .. \
&& rm -rf apple-libtapi
# Build and install tools for cross-linking to Darwin (x86-64)
RUN git clone --depth 1 https://github.com/tpoechtrager/cctools-port.git \
# Build and install tools for cross-linking to Darwin (aarch64)
RUN git clone https://github.com/tpoechtrager/cctools-port.git \
&& cd cctools-port/cctools \
&& git checkout 2a3e1c2a6ff54a30f898b70cfb9ba1692a55fad7 \
&& ./configure --prefix=/cctools --with-libtapi=/cctools \
--target=x86_64-apple-darwin \
&& make install -j$(nproc) \
&& cd ../.. \
&& rm -rf cctools-port
# Build and install tools for cross-linking to Darwin (aarch64)
RUN git clone --depth 1 https://github.com/tpoechtrager/cctools-port.git \
&& cd cctools-port/cctools \
&& make clean \
&& ./configure --prefix=/cctools --with-libtapi=/cctools \
--target=aarch64-apple-darwin \
&& make install -j$(nproc) \
@ -62,19 +60,12 @@ RUN curl https://sh.rustup.rs -sSf | bash -s -- -y && \
rustup target add aarch64-unknown-linux-musl && \
rustup target add riscv64gc-unknown-linux-gnu
# NOTE: Seems like gcc-11 is too new for ubuntu20 repository
# A cross-linker for RISC-V 64 (we need it, because LLVM's LLD does not work):
RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \
&& apt-get update \
&& apt-get install --yes \
binutils-riscv64-linux-gnu \
build-essential \
g++-11 \
gcc-11 \
gcc-aarch64-linux-gnu \
libc6 \
libc6-dev \
libc6-dev-arm64-cross \
python3-boto3 \
yasm \
zstd \

View File

@ -22,6 +22,7 @@ if [ "$EXTRACT_TOOLCHAIN_DARWIN" = "1" ]; then
fi
fi
# Uncomment to debug ccache. Don't put ccache log in /output right away, or it
# will be confusingly packed into the "performance" package.
# export CCACHE_LOGFILE=/build/ccache.log
@ -32,6 +33,16 @@ mkdir -p /build/build_docker
cd /build/build_docker
rm -f CMakeCache.txt
# We don't want to depend on any third-party CMake files.
# To check it, find and delete them.
grep -o -P '"contrib/[^"]+"' ../.gitmodules |
grep -v -P 'llvm-project|google-protobuf|grpc|abseil-cpp|corrosion' |
xargs -I@ find ../@ -'(' -name 'CMakeLists.txt' -or -name '*.cmake' -')' -and -not -name '*.h.cmake' |
xargs rm
if [ -n "$MAKE_DEB" ]; then
rm -rf /build/packages/root
# NOTE: this is for backward compatibility with previous releases,

View File

@ -236,16 +236,14 @@ def parse_env_variables(
cc = compiler
result.append("DEB_ARCH=amd64")
cxx = cc.replace("gcc", "g++").replace("clang", "clang++")
cxx = cc.replace("clang", "clang++")
if package_type == "deb":
# NOTE: This are the env for packages/build script
# NOTE: This is the env for packages/build script
result.append("MAKE_DEB=true")
cmake_flags.append("-DENABLE_TESTS=0")
cmake_flags.append("-DENABLE_UTILS=0")
cmake_flags.append("-DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON")
cmake_flags.append("-DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=ON")
cmake_flags.append("-DCMAKE_AUTOGEN_VERBOSE=ON")
cmake_flags.append("-DCMAKE_INSTALL_PREFIX=/usr")
cmake_flags.append("-DCMAKE_INSTALL_SYSCONFDIR=/etc")
cmake_flags.append("-DCMAKE_INSTALL_LOCALSTATEDIR=/var")
@ -265,12 +263,7 @@ def parse_env_variables(
elif package_type == "fuzzers":
cmake_flags.append("-DENABLE_FUZZING=1")
cmake_flags.append("-DENABLE_PROTOBUF=1")
cmake_flags.append("-DUSE_INTERNAL_PROTOBUF_LIBRARY=1")
cmake_flags.append("-DWITH_COVERAGE=1")
cmake_flags.append("-DCMAKE_AUTOGEN_VERBOSE=ON")
# cmake_flags.append("-DCMAKE_INSTALL_PREFIX=/usr")
# cmake_flags.append("-DCMAKE_INSTALL_SYSCONFDIR=/etc")
# cmake_flags.append("-DCMAKE_INSTALL_LOCALSTATEDIR=/var")
# Reduce linking and building time by avoid *install/all dependencies
cmake_flags.append("-DCMAKE_SKIP_INSTALL_ALL_DEPENDENCY=ON")

View File

@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="23.10.3.5"
ARG VERSION="23.10.4.25"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# user/group precreated explicitly with fixed uid/gid on purpose.

View File

@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="23.10.3.5"
ARG VERSION="23.10.4.25"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image

View File

@ -206,7 +206,7 @@ function build
(
cd "$FASTTEST_BUILD"
TIMEFORMAT=$'\nreal\t%3R\nuser\t%3U\nsys\t%3S'
( time ninja clickhouse-bundle) |& ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/build_log.txt"
( time ninja clickhouse-bundle clickhouse-stripped) |& ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/build_log.txt"
BUILD_SECONDS_ELAPSED=$(awk '/^....-..-.. ..:..:.. real\t[0-9]/ {print $4}' < "$FASTTEST_OUTPUT/build_log.txt")
echo "build_clickhouse_fasttest_binary: [ OK ] $BUILD_SECONDS_ELAPSED sec." \
| ts '%Y-%m-%d %H:%M:%S' \
@ -215,7 +215,6 @@ function build
mkdir -p "$FASTTEST_OUTPUT/binaries/"
cp programs/clickhouse "$FASTTEST_OUTPUT/binaries/clickhouse"
strip programs/clickhouse -o programs/clickhouse-stripped
zstd --threads=0 programs/clickhouse-stripped -o "$FASTTEST_OUTPUT/binaries/clickhouse-stripped.zst"
fi
ccache_status

View File

@ -189,6 +189,8 @@ function run_tests
test_prefix=right/performance
fi
run_only_changed_tests=0
# Determine which tests to run.
if [ -v CHPC_TEST_GREP ]
then
@ -203,6 +205,7 @@ function run_tests
# tests. The lists of changed files are prepared in entrypoint.sh because
# it has the repository.
test_files=($(sed "s/tests\/performance/${test_prefix//\//\\/}/" changed-test-definitions.txt))
run_only_changed_tests=1
else
# The default -- run all tests found in the test dir.
test_files=($(ls "$test_prefix"/*.xml))
@ -226,6 +229,13 @@ function run_tests
test_files=("${test_files[@]}")
fi
if [ "$run_only_changed_tests" -ne 0 ]; then
if [ ${#test_files[@]} -eq 0 ]; then
time "$script_dir/report.py" --no-tests-run > report.html
exit 0
fi
fi
# For PRs w/o changes in test definitons, test only a subset of queries,
# and run them less times. If the corresponding environment variables are
# already set, keep those values.

View File

@ -34,9 +34,4 @@
<memory_profiler_step>0</memory_profiler_step>
</default>
</profiles>
<users>
<default>
<access_management>1</access_management>
</default>
</users>
</clickhouse>

View File

@ -19,6 +19,7 @@ parser.add_argument(
choices=["main", "all-queries"],
help="Which report to build",
)
parser.add_argument("--no-tests-run", action="store_true", default=False)
args = parser.parse_args()
tables = []
@ -354,6 +355,36 @@ if args.report == "main":
add_tested_commits()
def print_status(status, message):
print(
(
"""
<!--status: {status}-->
<!--message: {message}-->
""".format(
status=status, message=message
)
)
)
if args.no_tests_run:
for t in tables:
print(t)
print(
"<h2>No tests to run. Only changed tests were run, but all changed tests are from another batch.</h2>"
)
print(
f"""
</div>
{os.getenv("CHPC_ADD_REPORT_LINKS") or ''}
</body>
</html>
"""
)
# Why failure? Because otherwise we will not notice if we have a bug that leads to 0 tests being run
print_status("failure", "No tests changed, nothing to run")
exit(0)
run_error_rows = tsvRows("run-errors.tsv")
error_tests += len(run_error_rows)
addSimpleTable("Run Errors", ["Test", "Error"], run_error_rows)
@ -646,16 +677,7 @@ if args.report == "main":
status = "failure"
message = "Errors while building the report."
print(
(
"""
<!--status: {status}-->
<!--message: {message}-->
""".format(
status=status, message=message
)
)
)
print_status(status, message)
elif args.report == "all-queries":
print((header_template.format()))

View File

@ -0,0 +1,28 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.10.4.25-stable (330fd687d41) FIXME as compared to v23.10.3.5-stable (b2ba7637a41)
#### Build/Testing/Packaging Improvement
* Backported in [#56633](https://github.com/ClickHouse/ClickHouse/issues/56633): In [#54043](https://github.com/ClickHouse/ClickHouse/issues/54043) the setup plan started to appear in the logs. It should be only in the `runner_get_all_tests.log` only. As well, send the failed infrastructure event to CI db. [#56214](https://github.com/ClickHouse/ClickHouse/pull/56214) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#56737](https://github.com/ClickHouse/ClickHouse/issues/56737): Do not fetch changed submodules in the builder container. [#56689](https://github.com/ClickHouse/ClickHouse/pull/56689) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Select from system tables when table based on table function. [#55540](https://github.com/ClickHouse/ClickHouse/pull/55540) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
* Fix restore from backup with `flatten_nested` and `data_type_default_nullable` [#56306](https://github.com/ClickHouse/ClickHouse/pull/56306) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix segfault during Kerberos initialization [#56401](https://github.com/ClickHouse/ClickHouse/pull/56401) ([Nikolay Degterinsky](https://github.com/evillique)).
* Fix: RabbitMQ OpenSSL dynamic loading issue [#56703](https://github.com/ClickHouse/ClickHouse/pull/56703) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix crash in GCD codec in case when zeros present in data [#56704](https://github.com/ClickHouse/ClickHouse/pull/56704) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Fix crash in FPC codec [#56795](https://github.com/ClickHouse/ClickHouse/pull/56795) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Rewrite jobs to use callable workflow [#56385](https://github.com/ClickHouse/ClickHouse/pull/56385) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Continue rewriting workflows to reusable tests [#56501](https://github.com/ClickHouse/ClickHouse/pull/56501) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Better exception messages [#56854](https://github.com/ClickHouse/ClickHouse/pull/56854) ([Antonio Andelic](https://github.com/antonio2368)).

View File

@ -0,0 +1,23 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.3.17.13-lts (e867d59020f) FIXME as compared to v23.3.16.7-lts (fb4125cc92a)
#### Build/Testing/Packaging Improvement
* Backported in [#56731](https://github.com/ClickHouse/ClickHouse/issues/56731): Do not fetch changed submodules in the builder container. [#56689](https://github.com/ClickHouse/ClickHouse/pull/56689) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix segfault during Kerberos initialization [#56401](https://github.com/ClickHouse/ClickHouse/pull/56401) ([Nikolay Degterinsky](https://github.com/evillique)).
* Fix crash in FPC codec [#56795](https://github.com/ClickHouse/ClickHouse/pull/56795) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Rewrite jobs to use callable workflow [#56385](https://github.com/ClickHouse/ClickHouse/pull/56385) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Continue rewriting workflows to reusable tests [#56501](https://github.com/ClickHouse/ClickHouse/pull/56501) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Better exception messages [#56854](https://github.com/ClickHouse/ClickHouse/pull/56854) ([Antonio Andelic](https://github.com/antonio2368)).

View File

@ -0,0 +1,31 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.8.7.24-lts (812b95e14ba) FIXME as compared to v23.8.6.16-lts (077df679bed)
#### Build/Testing/Packaging Improvement
* Backported in [#56733](https://github.com/ClickHouse/ClickHouse/issues/56733): Do not fetch changed submodules in the builder container. [#56689](https://github.com/ClickHouse/ClickHouse/pull/56689) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Select from system tables when table based on table function. [#55540](https://github.com/ClickHouse/ClickHouse/pull/55540) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
* Fix incomplete query result for UNION in view() function. [#56274](https://github.com/ClickHouse/ClickHouse/pull/56274) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix crash in case of adding a column with type Object(JSON) [#56307](https://github.com/ClickHouse/ClickHouse/pull/56307) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Fix segfault during Kerberos initialization [#56401](https://github.com/ClickHouse/ClickHouse/pull/56401) ([Nikolay Degterinsky](https://github.com/evillique)).
* Fix: RabbitMQ OpenSSL dynamic loading issue [#56703](https://github.com/ClickHouse/ClickHouse/pull/56703) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix crash in FPC codec [#56795](https://github.com/ClickHouse/ClickHouse/pull/56795) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### NO CL CATEGORY
* Backported in [#56601](https://github.com/ClickHouse/ClickHouse/issues/56601):. [#56598](https://github.com/ClickHouse/ClickHouse/pull/56598) ([Maksim Kita](https://github.com/kitaisreal)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Rewrite jobs to use callable workflow [#56385](https://github.com/ClickHouse/ClickHouse/pull/56385) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Continue rewriting workflows to reusable tests [#56501](https://github.com/ClickHouse/ClickHouse/pull/56501) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Better exception messages [#56854](https://github.com/ClickHouse/ClickHouse/pull/56854) ([Antonio Andelic](https://github.com/antonio2368)).

View File

@ -0,0 +1,34 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.9.5.29-stable (f8554c1a1ff) FIXME as compared to v23.9.4.11-stable (74c1f49dd6a)
#### Build/Testing/Packaging Improvement
* Backported in [#56631](https://github.com/ClickHouse/ClickHouse/issues/56631): In [#54043](https://github.com/ClickHouse/ClickHouse/issues/54043) the setup plan started to appear in the logs. It should be only in the `runner_get_all_tests.log` only. As well, send the failed infrastructure event to CI db. [#56214](https://github.com/ClickHouse/ClickHouse/pull/56214) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#56735](https://github.com/ClickHouse/ClickHouse/issues/56735): Do not fetch changed submodules in the builder container. [#56689](https://github.com/ClickHouse/ClickHouse/pull/56689) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Select from system tables when table based on table function. [#55540](https://github.com/ClickHouse/ClickHouse/pull/55540) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
* Fix incomplete query result for UNION in view() function. [#56274](https://github.com/ClickHouse/ClickHouse/pull/56274) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix crash in case of adding a column with type Object(JSON) [#56307](https://github.com/ClickHouse/ClickHouse/pull/56307) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Fix segfault during Kerberos initialization [#56401](https://github.com/ClickHouse/ClickHouse/pull/56401) ([Nikolay Degterinsky](https://github.com/evillique)).
* Fix: RabbitMQ OpenSSL dynamic loading issue [#56703](https://github.com/ClickHouse/ClickHouse/pull/56703) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix crash in GCD codec in case when zeros present in data [#56704](https://github.com/ClickHouse/ClickHouse/pull/56704) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Fix crash in FPC codec [#56795](https://github.com/ClickHouse/ClickHouse/pull/56795) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### NO CL CATEGORY
* Backported in [#56603](https://github.com/ClickHouse/ClickHouse/issues/56603):. [#56598](https://github.com/ClickHouse/ClickHouse/pull/56598) ([Maksim Kita](https://github.com/kitaisreal)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Improve enrich image [#55793](https://github.com/ClickHouse/ClickHouse/pull/55793) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Rewrite jobs to use callable workflow [#56385](https://github.com/ClickHouse/ClickHouse/pull/56385) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Continue rewriting workflows to reusable tests [#56501](https://github.com/ClickHouse/ClickHouse/pull/56501) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Better exception messages [#56854](https://github.com/ClickHouse/ClickHouse/pull/56854) ([Antonio Andelic](https://github.com/antonio2368)).

View File

@ -14,7 +14,7 @@ You should never use too granular of partitioning. Don't partition your data by
Partitioning is available for the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family tables (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables). [Materialized views](../../../engines/table-engines/special/materializedview.md#materializedview) based on MergeTree tables support partitioning, as well.
A partition is a logical combination of records in a table by a specified criterion. You can set a partition by an arbitrary criterion, such as by month, by day, or by event type. Each partition is stored separately to simplify manipulations of this data. When accessing the data, ClickHouse uses the smallest subset of partitions possible.
A partition is a logical combination of records in a table by a specified criterion. You can set a partition by an arbitrary criterion, such as by month, by day, or by event type. Each partition is stored separately to simplify manipulations of this data. When accessing the data, ClickHouse uses the smallest subset of partitions possible. Partitions improve performance for queries containing a partitioning key because ClickHouse will filter for that partition before selecting the parts and granules within the partition.
The partition is specified in the `PARTITION BY expr` clause when [creating a table](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table). The partition key can be any expression from the table columns. For example, to specify partitioning by month, use the expression `toYYYYMM(date_column)`:

View File

@ -6,7 +6,7 @@ sidebar_label: MergeTree
# MergeTree
The `MergeTree` engine and other engines of this family (`*MergeTree`) are the most robust ClickHouse table engines.
The `MergeTree` engine and other engines of this family (`*MergeTree`) are the most commonly used and most robust ClickHouse table engines.
Engines in the `MergeTree` family are designed for inserting a very large amount of data into a table. The data is quickly written to the table part by part, then rules are applied for merging the parts in the background. This method is much more efficient than continually rewriting the data in storage during insert.
@ -32,6 +32,8 @@ Main features:
The [Merge](/docs/en/engines/table-engines/special/merge.md/#merge) engine does not belong to the `*MergeTree` family.
:::
If you need to update rows frequently, we recommend using the [`ReplacingMergeTree`](/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md) table engine. Using `ALTER TABLE my_table UPDATE` to update rows triggers a mutation, which causes parts to be re-written and uses IO/resources. With `ReplacingMergeTree`, you can simply insert the updated rows and the old rows will be replaced according to the table sorting key.
## Creating a Table {#table_engine-mergetree-creating-a-table}
``` sql

View File

@ -731,11 +731,13 @@ Default value: LZ4.
## max_block_size {#setting-max_block_size}
In ClickHouse, data is processed by blocks (sets of column parts). The internal processing cycles for a single block are efficient enough, but there are noticeable expenditures on each block. The `max_block_size` setting is a recommendation for what size of the block (in a count of rows) to load from tables. The block size shouldnt be too small, so that the expenditures on each block are still noticeable, but not too large so that the query with LIMIT that is completed after the first block is processed quickly. The goal is to avoid consuming too much memory when extracting a large number of columns in multiple threads and to preserve at least some cache locality.
In ClickHouse, data is processed by blocks, which are sets of column parts. The internal processing cycles for a single block are efficient but there are noticeable costs when processing each block.
Default value: 65,536.
The `max_block_size` setting indicates the recommended maximum number of rows to include in a single block when loading data from tables. Blocks the size of `max_block_size` are not always loaded from the table: if ClickHouse determines that less data needs to be retrieved, a smaller block is processed.
Blocks the size of `max_block_size` are not always loaded from the table. If it is obvious that less data needs to be retrieved, a smaller block is processed.
The block size should not be too small to avoid noticeable costs when processing each block. It should also not be too large to ensure that queries with a LIMIT clause execute quickly after processing the first block. When setting `max_block_size`, the goal should be to avoid consuming too much memory when extracting a large number of columns in multiple threads and to preserve at least some cache locality.
Default value: `65,409`
## preferred_block_size_bytes {#preferred-block-size-bytes}
@ -2714,6 +2716,10 @@ Default value: `0`.
- [Distributed Table Engine](../../engines/table-engines/special/distributed.md/#distributed)
- [Managing Distributed Tables](../../sql-reference/statements/system.md/#query-language-system-distributed)
## insert_distributed_sync {#insert_distributed_sync}
Alias for [`distributed_foreground_insert`](#distributed_foreground_insert).
## insert_shard_id {#insert_shard_id}
If not `0`, specifies the shard of [Distributed](../../engines/table-engines/special/distributed.md/#distributed) table into which the data will be inserted synchronously.
@ -4820,3 +4826,10 @@ When set to `true` the metadata files are written with `VERSION_FULL_OBJECT_KEY`
When set to `false` the metadata files are written with the previous format version, `VERSION_INLINE_DATA`. With that format only suffixes of object storage key names are are written to the metadata files. The prefix for all of object storage key names is set in configurations files at `storage_configuration.disks` section.
Default value: `false`.
## s3_use_adaptive_timeouts {#s3_use_adaptive_timeouts}
When set to `true` than for all s3 requests first two attempts are made with low send and receive timeouts.
When set to `false` than all attempts are made with identical timeouts.
Default value: `true`.

View File

@ -34,6 +34,10 @@ The `SELECT count() FROM table` query is optimized by default using metadata fro
However `SELECT count(nullable_column) FROM table` query can be optimized by enabling the [optimize_functions_to_subcolumns](../../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [null](../../../sql-reference/data-types/nullable.md#finding-null) subcolumn instead of reading and processing the whole column data. The query `SELECT count(n) FROM table` transforms to `SELECT sum(NOT n.null) FROM table`.
**Improving COUNT(DISTINCT expr) performance**
If your `COUNT(DISTINCT expr)` query is slow, consider adding a [`GROUP BY`](../../../sql-reference/statements/select/group-by.md) clause as this improves parallelization. You can also use a [projection](../../../sql-reference/statements/alter/projection.md) to create an index on the target column used with `COUNT(DISTINCT target_col)`.
**Examples**
Example 1:

View File

@ -6,9 +6,9 @@ sidebar_label: Random Numbers
# Functions for Generating Random Numbers
All functions in this section accept zero or one arguments. The only use of the argument (if provided) is to prevent prevent [common subexpression
elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) such that two different execution of the same random
function in a query return different random values.
All functions in this section accept zero or one arguments. The only use of the argument (if provided) is to prevent [common subexpression
elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) such that two different executions within a row of the same random
function return different random values.
Related content
- Blog: [Generating random data in ClickHouse](https://clickhouse.com/blog/generating-random-test-distribution-data-for-clickhouse)

View File

@ -429,7 +429,7 @@ SELECT format('{} {}', 'Hello', 'World')
## concat
Concatenates the strings listed in the arguments without separator.
Concatenates the given arguments.
**Syntax**
@ -439,7 +439,9 @@ concat(s1, s2, ...)
**Arguments**
Values of type String or FixedString.
At least two values of arbitrary type.
Arguments which are not of types [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments.
**Returned values**
@ -449,6 +451,8 @@ If any of arguments is `NULL`, the function returns `NULL`.
**Example**
Query:
``` sql
SELECT concat('Hello, ', 'World!');
```
@ -461,6 +465,20 @@ Result:
└─────────────────────────────┘
```
Query:
```sql
SELECT concat(42, 144);
```
Result:
```result
┌─concat(42, 144)─┐
│ 42144 │
└─────────────────┘
```
## concatAssumeInjective
Like [concat](#concat) but assumes that `concat(s1, s2, ...) → sn` is injective. Can be used for optimization of GROUP BY.
@ -526,6 +544,8 @@ Concatenates the given strings with a given separator.
concatWithSeparator(sep, expr1, expr2, expr3...)
```
Alias: `concat_ws`
**Arguments**
- sep — separator. Const [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).

View File

@ -5,7 +5,7 @@ sidebar_label: OPTIMIZE
title: "OPTIMIZE Statement"
---
This query tries to initialize an unscheduled merge of data parts for tables.
This query tries to initialize an unscheduled merge of data parts for tables. Note that we generally recommend against using `OPTIMIZE TABLE ... FINAL` (see these [docs](/docs/en/optimize/avoidoptimizefinal)) as its use case is meant for administration, not for daily operations.
:::note
`OPTIMIZE` cant fix the `Too many parts` error.

View File

@ -2,6 +2,3 @@ position: 1
label: 'Введение'
collapsible: true
collapsed: true
link:
type: generated-index
title: Введение

View File

@ -0,0 +1,13 @@
---
slug: /ru/introduction/
sidebar_label: "Введение"
sidebar_position: 8
---
# Введение
В этом разделе содержится информация о том, как начать работу с ClickHouse.
- [Отличительные возможности ClickHouse](./distinctive-features.md)
- [Производительность](./performance.md)
- [История ClickHouse](./history.md)

View File

@ -1,3 +1,5 @@
add_compile_options($<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>)
if (USE_CLANG_TIDY)
set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
endif ()

View File

@ -46,6 +46,7 @@ namespace CurrentMetrics
{
extern const Metric LocalThread;
extern const Metric LocalThreadActive;
extern const Metric LocalThreadScheduled;
}
namespace DB
@ -107,7 +108,7 @@ public:
settings(settings_),
shared_context(Context::createShared()),
global_context(Context::createGlobal(shared_context.get())),
pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, concurrency)
pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, concurrency)
{
const auto secure = secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable;
size_t connections_cnt = std::max(ports_.size(), hosts_.size());

View File

@ -25,6 +25,7 @@ namespace CurrentMetrics
{
extern const Metric LocalThread;
extern const Metric LocalThreadActive;
extern const Metric LocalThreadScheduled;
}
namespace DB
@ -200,7 +201,7 @@ void ClusterCopier::discoverTablePartitions(const ConnectionTimeouts & timeouts,
{
/// Fetch partitions list from a shard
{
ThreadPool thread_pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, num_threads ? num_threads : 2 * getNumberOfPhysicalCPUCores());
ThreadPool thread_pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, num_threads ? num_threads : 2 * getNumberOfPhysicalCPUCores());
for (const TaskShardPtr & task_shard : task_table.all_shards)
thread_pool.scheduleOrThrowOnError([this, timeouts, task_shard]()

View File

@ -676,6 +676,10 @@ try
global_context->addWarningMessage("Server was built with sanitizer. It will work slowly.");
#endif
#if defined(SANITIZE_COVERAGE) || WITH_COVERAGE
global_context->addWarningMessage("Server was built with code coverage. It will work slowly.");
#endif
const size_t physical_server_memory = getMemoryAmount();
LOG_INFO(log, "Available RAM: {}; physical cores: {}; logical cores: {}.",

View File

@ -23,7 +23,9 @@
<profile>default</profile>
<quota>default</quota>
<access_management>1</access_management>
<named_collection_control>1</named_collection_control>
</default>
</users>

View File

@ -85,7 +85,10 @@
<quota>default</quota>
<!-- User can create other users and grant rights to them. -->
<!-- <access_management>1</access_management> -->
<access_management>1</access_management>
<!-- User can manipulate named collections. -->
<named_collection_control>1</named_collection_control>
<!-- User permissions can be granted here -->
<!--

View File

@ -50,6 +50,21 @@ BackupFileInfo BackupCoordinationFileInfos::getFileInfoByDataFileIndex(size_t da
return *(file_infos_for_all_hosts[data_file_index]);
}
namespace
{
/// copy all the file infos that are shared between reference target and source
void copyFileInfoToReference(const BackupFileInfo & target, BackupFileInfo & reference)
{
reference.size = target.size;
reference.checksum = target.checksum;
reference.base_size = target.base_size;
reference.base_checksum = target.base_checksum;
reference.encrypted_by_disk = target.encrypted_by_disk;
}
}
void BackupCoordinationFileInfos::prepare() const
{
if (prepared)
@ -78,8 +93,38 @@ void BackupCoordinationFileInfos::prepare() const
num_files = 0;
total_size_of_files = 0;
std::vector<BackupFileInfo *> unresolved_references;
std::unordered_map<std::string_view, BackupFileInfo *> file_name_to_info;
const auto handle_unresolved_references = [&](const auto & try_resolve_reference)
{
for (auto * reference : unresolved_references)
{
if (!try_resolve_reference(*reference))
throw DB::Exception(
ErrorCodes::LOGICAL_ERROR,
"Couldn't resolve reference {} with target {}",
reference->file_name,
reference->reference_target);
}
};
if (plain_backup)
{
const auto try_resolve_reference = [&](BackupFileInfo & reference)
{
auto it = file_name_to_info.find(reference.reference_target);
if (it == file_name_to_info.end())
return false;
auto & target_info = it->second;
target_info->data_file_copies.push_back(reference.file_name);
copyFileInfoToReference(*target_info, reference);
total_size_of_files += reference.size;
return true;
};
/// For plain backup all file infos are stored as is, without checking for duplicates or skipping empty files.
for (size_t i = 0; i != file_infos_for_all_hosts.size(); ++i)
{
@ -88,12 +133,38 @@ void BackupCoordinationFileInfos::prepare() const
info.data_file_index = i;
info.base_size = 0; /// Base backup must not be used while creating a plain backup.
info.base_checksum = 0;
total_size_of_files += info.size;
if (info.reference_target.empty())
{
file_name_to_info.emplace(info.file_name, &info);
total_size_of_files += info.size;
}
else if (!try_resolve_reference(info))
{
unresolved_references.push_back(&info);
}
}
handle_unresolved_references(try_resolve_reference);
num_files = file_infos_for_all_hosts.size();
}
else
{
const auto try_resolve_reference = [&](BackupFileInfo & reference)
{
auto it = file_name_to_info.find(reference.reference_target);
if (it == file_name_to_info.end())
return false;
auto & target_info = it->second;
copyFileInfoToReference(*target_info, reference);
reference.data_file_name = target_info->data_file_name;
reference.data_file_index = target_info->data_file_index;
return true;
};
/// For non-plain backups files with the same size and checksum are stored only once,
/// in order to find those files we'll use this map.
std::map<SizeAndChecksum, size_t> data_file_index_by_checksum;
@ -101,6 +172,15 @@ void BackupCoordinationFileInfos::prepare() const
for (size_t i = 0; i != file_infos_for_all_hosts.size(); ++i)
{
auto & info = *(file_infos_for_all_hosts[i]);
if (!info.reference_target.empty())
{
if (!try_resolve_reference(info))
unresolved_references.push_back(&info);
continue;
}
if (info.size == info.base_size)
{
/// A file is either empty or can be get from the base backup as a whole.
@ -126,7 +206,13 @@ void BackupCoordinationFileInfos::prepare() const
info.data_file_name = file_infos_for_all_hosts[it->second]->data_file_name;
}
}
file_name_to_info.emplace(info.file_name, &info);
}
handle_unresolved_references(try_resolve_reference);
num_files = file_infos_for_all_hosts.size();
}
prepared = true;

View File

@ -0,0 +1,23 @@
#include <Backups/BackupCoordinationKeeperMapTables.h>
namespace DB
{
void BackupCoordinationKeeperMapTables::addTable(const std::string & table_zookeeper_root_path, const std::string & table_id, const std::string & data_path_in_backup)
{
if (auto it = tables_with_info.find(table_zookeeper_root_path); it != tables_with_info.end())
{
if (table_id > it->second.table_id)
it->second = KeeperMapTableInfo{table_id, data_path_in_backup};
return;
}
tables_with_info.emplace(table_zookeeper_root_path, KeeperMapTableInfo{table_id, data_path_in_backup});
}
std::string BackupCoordinationKeeperMapTables::getDataPath(const std::string & table_zookeeper_root_path) const
{
return tables_with_info.at(table_zookeeper_root_path).data_path_in_backup;
}
}

View File

@ -0,0 +1,23 @@
#pragma once
#include <unordered_map>
#include <string>
namespace DB
{
struct BackupCoordinationKeeperMapTables
{
void addTable(const std::string & table_zookeeper_root_path, const std::string & table_id, const std::string & data_path_in_backup);
std::string getDataPath(const std::string & table_zookeeper_root_path) const;
struct KeeperMapTableInfo
{
std::string table_id;
std::string data_path_in_backup;
};
private:
std::unordered_map<std::string /* root zookeeper path */, KeeperMapTableInfo> tables_with_info;
};
}

View File

@ -97,6 +97,18 @@ Strings BackupCoordinationLocal::getReplicatedSQLObjectsDirs(const String & load
return replicated_sql_objects.getDirectories(loader_zk_path, object_type, "");
}
void BackupCoordinationLocal::addKeeperMapTable(const String & table_zookeeper_root_path, const String & table_id, const String & data_path_in_backup)
{
std::lock_guard lock(keeper_map_tables_mutex);
keeper_map_tables.addTable(table_zookeeper_root_path, table_id, data_path_in_backup);
}
String BackupCoordinationLocal::getKeeperMapDataPath(const String & table_zookeeper_root_path) const
{
std::lock_guard lock(keeper_map_tables_mutex);
return keeper_map_tables.getDataPath(table_zookeeper_root_path);
}
void BackupCoordinationLocal::addFileInfos(BackupFileInfos && file_infos_)
{

View File

@ -5,7 +5,9 @@
#include <Backups/BackupCoordinationReplicatedAccess.h>
#include <Backups/BackupCoordinationReplicatedSQLObjects.h>
#include <Backups/BackupCoordinationReplicatedTables.h>
#include <Backups/BackupCoordinationKeeperMapTables.h>
#include <base/defines.h>
#include <cstddef>
#include <mutex>
#include <unordered_set>
@ -44,6 +46,9 @@ public:
void addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & dir_path) override;
Strings getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type) const override;
void addKeeperMapTable(const String & table_zookeeper_root_path, const String & table_id, const String & data_path_in_backup) override;
String getKeeperMapDataPath(const String & table_zookeeper_root_path) const override;
void addFileInfos(BackupFileInfos && file_infos) override;
BackupFileInfos getFileInfos() const override;
BackupFileInfos getFileInfosForAllHosts() const override;
@ -58,6 +63,7 @@ private:
BackupCoordinationReplicatedAccess TSA_GUARDED_BY(replicated_access_mutex) replicated_access;
BackupCoordinationReplicatedSQLObjects TSA_GUARDED_BY(replicated_sql_objects_mutex) replicated_sql_objects;
BackupCoordinationFileInfos TSA_GUARDED_BY(file_infos_mutex) file_infos;
BackupCoordinationKeeperMapTables keeper_map_tables TSA_GUARDED_BY(keeper_map_tables_mutex);
std::unordered_set<size_t> TSA_GUARDED_BY(writing_files_mutex) writing_files;
mutable std::mutex replicated_tables_mutex;
@ -65,6 +71,7 @@ private:
mutable std::mutex replicated_sql_objects_mutex;
mutable std::mutex file_infos_mutex;
mutable std::mutex writing_files_mutex;
mutable std::mutex keeper_map_tables_mutex;
};
}

View File

@ -116,6 +116,7 @@ namespace
writeBinary(info.base_size, out);
writeBinary(info.base_checksum, out);
writeBinary(info.encrypted_by_disk, out);
writeBinary(info.reference_target, out);
/// We don't store `info.data_file_name` and `info.data_file_index` because they're determined automalically
/// after reading file infos for all the hosts (see the class BackupCoordinationFileInfos).
}
@ -138,6 +139,7 @@ namespace
readBinary(info.base_size, in);
readBinary(info.base_checksum, in);
readBinary(info.encrypted_by_disk, in);
readBinary(info.reference_target, in);
}
return res;
}
@ -230,6 +232,7 @@ void BackupCoordinationRemote::createRootNodes()
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_data_paths", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_access", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_sql_objects", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/keeper_map_tables", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/file_infos", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/writing_files", "", zkutil::CreateMode::Persistent));
zk->tryMulti(ops, responses);
@ -666,6 +669,80 @@ void BackupCoordinationRemote::prepareReplicatedSQLObjects() const
replicated_sql_objects->addDirectory(std::move(directory));
}
void BackupCoordinationRemote::addKeeperMapTable(const String & table_zookeeper_root_path, const String & table_id, const String & data_path_in_backup)
{
{
std::lock_guard lock{keeper_map_tables_mutex};
if (keeper_map_tables)
throw Exception(ErrorCodes::LOGICAL_ERROR, "addKeeperMapTable() must not be called after preparing");
}
auto holder = with_retries.createRetriesControlHolder("addKeeperMapTable");
holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zk);
String path = zookeeper_path + "/keeper_map_tables/" + escapeForFileName(table_id);
if (auto res
= zk->tryCreate(path, fmt::format("{}\n{}", table_zookeeper_root_path, data_path_in_backup), zkutil::CreateMode::Persistent);
res != Coordination::Error::ZOK && res != Coordination::Error::ZNODEEXISTS)
throw zkutil::KeeperException(res);
});
}
void BackupCoordinationRemote::prepareKeeperMapTables() const
{
if (keeper_map_tables)
return;
std::vector<std::pair<std::string, BackupCoordinationKeeperMapTables::KeeperMapTableInfo>> keeper_map_table_infos;
auto holder = with_retries.createRetriesControlHolder("prepareKeeperMapTables");
holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{
keeper_map_table_infos.clear();
with_retries.renewZooKeeper(zk);
fs::path tables_path = fs::path(zookeeper_path) / "keeper_map_tables";
auto tables = zk->getChildren(tables_path);
keeper_map_table_infos.reserve(tables.size());
for (auto & table : tables)
table = tables_path / table;
auto tables_info = zk->get(tables);
for (size_t i = 0; i < tables_info.size(); ++i)
{
const auto & table_info = tables_info[i];
if (table_info.error != Coordination::Error::ZOK)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Path in Keeper {} is unexpectedly missing", tables[i]);
std::vector<std::string> data;
boost::split(data, table_info.data, [](char c) { return c == '\n'; });
keeper_map_table_infos.emplace_back(
std::move(data[0]),
BackupCoordinationKeeperMapTables::KeeperMapTableInfo{
.table_id = fs::path(tables[i]).filename(), .data_path_in_backup = std::move(data[1])});
}
});
keeper_map_tables.emplace();
for (const auto & [zk_root_path, table_info] : keeper_map_table_infos)
keeper_map_tables->addTable(zk_root_path, table_info.table_id, table_info.data_path_in_backup);
}
String BackupCoordinationRemote::getKeeperMapDataPath(const String & table_zookeeper_root_path) const
{
std::lock_guard lock(keeper_map_tables_mutex);
prepareKeeperMapTables();
return keeper_map_tables->getDataPath(table_zookeeper_root_path);
}
void BackupCoordinationRemote::addFileInfos(BackupFileInfos && file_infos_)
{
{

View File

@ -5,6 +5,7 @@
#include <Backups/BackupCoordinationReplicatedAccess.h>
#include <Backups/BackupCoordinationReplicatedSQLObjects.h>
#include <Backups/BackupCoordinationReplicatedTables.h>
#include <Backups/BackupCoordinationKeeperMapTables.h>
#include <Backups/BackupCoordinationStageSync.h>
#include <Backups/WithRetries.h>
@ -63,6 +64,9 @@ public:
void addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & dir_path) override;
Strings getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type) const override;
void addKeeperMapTable(const String & table_zookeeper_root_path, const String & table_id, const String & data_path_in_backup) override;
String getKeeperMapDataPath(const String & table_zookeeper_root_path) const override;
void addFileInfos(BackupFileInfos && file_infos) override;
BackupFileInfos getFileInfos() const override;
BackupFileInfos getFileInfosForAllHosts() const override;
@ -85,6 +89,7 @@ private:
void prepareReplicatedTables() const TSA_REQUIRES(replicated_tables_mutex);
void prepareReplicatedAccess() const TSA_REQUIRES(replicated_access_mutex);
void prepareReplicatedSQLObjects() const TSA_REQUIRES(replicated_sql_objects_mutex);
void prepareKeeperMapTables() const TSA_REQUIRES(keeper_map_tables_mutex);
void prepareFileInfos() const TSA_REQUIRES(file_infos_mutex);
const String root_zookeeper_path;
@ -106,6 +111,7 @@ private:
mutable std::optional<BackupCoordinationReplicatedAccess> TSA_GUARDED_BY(replicated_access_mutex) replicated_access;
mutable std::optional<BackupCoordinationReplicatedSQLObjects> TSA_GUARDED_BY(replicated_sql_objects_mutex) replicated_sql_objects;
mutable std::optional<BackupCoordinationFileInfos> TSA_GUARDED_BY(file_infos_mutex) file_infos;
mutable std::optional<BackupCoordinationKeeperMapTables> keeper_map_tables TSA_GUARDED_BY(keeper_map_tables_mutex);
std::unordered_set<size_t> TSA_GUARDED_BY(writing_files_mutex) writing_files;
mutable std::mutex zookeeper_mutex;
@ -114,6 +120,7 @@ private:
mutable std::mutex replicated_sql_objects_mutex;
mutable std::mutex file_infos_mutex;
mutable std::mutex writing_files_mutex;
mutable std::mutex keeper_map_tables_mutex;
};
}

View File

@ -0,0 +1,47 @@
#include <Backups/BackupEntryReference.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
BackupEntryReference::BackupEntryReference(std::string reference_target_)
: reference_target(std::move(reference_target_))
{}
bool BackupEntryReference::isReference() const
{
return true;
}
String BackupEntryReference::getReferenceTarget() const
{
return reference_target;
}
UInt64 BackupEntryReference::getSize() const
{
return 0;
}
UInt128 BackupEntryReference::getChecksum(const ReadSettings & /*read_settings*/) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Checksum not implemented for reference backup entries");
}
std::unique_ptr<SeekableReadBuffer> BackupEntryReference::getReadBuffer(const ReadSettings & /*read_settings*/) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Reading not implemented for reference backup entries");
}
DataSourceDescription BackupEntryReference::getDataSourceDescription() const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Data source description not implemented for reference backup entries");
}
}

View File

@ -0,0 +1,25 @@
#pragma once
#include <Backups/IBackupEntry.h>
namespace DB
{
/// Represents a reference to another backup entry.
class BackupEntryReference : public IBackupEntry
{
public:
explicit BackupEntryReference(std::string reference_target_);
UInt64 getSize() const override;
UInt128 getChecksum(const ReadSettings & read_settings) const override;
std::unique_ptr<SeekableReadBuffer> getReadBuffer(const ReadSettings & read_settings) const override;
DataSourceDescription getDataSourceDescription() const override;
bool isReference() const override;
String getReferenceTarget() const override;
private:
String reference_target;
};
}

View File

@ -89,6 +89,8 @@ String BackupFileInfo::describe() const
result += fmt::format("data_file_name: {};\n", data_file_name);
result += fmt::format("data_file_index: {};\n", data_file_index);
result += fmt::format("encrypted_by_disk: {};\n", encrypted_by_disk);
if (!reference_target.empty())
result += fmt::format("reference_target: {};\n", reference_target);
return result;
}
@ -104,6 +106,14 @@ BackupFileInfo buildFileInfoForBackupEntry(
BackupFileInfo info;
info.file_name = adjusted_path;
/// If it's a "reference" just set the target to a concrete file
if (backup_entry->isReference())
{
info.reference_target = removeLeadingSlash(backup_entry->getReferenceTarget());
return info;
}
info.size = backup_entry->getSize();
info.encrypted_by_disk = backup_entry->isEncryptedByDisk();

View File

@ -39,6 +39,14 @@ struct BackupFileInfo
/// Whether this file is encrypted by an encrypted disk.
bool encrypted_by_disk = false;
/// Set if this file is just a reference to another file
String reference_target;
/// (While writing a backup) if this list is not empty then after writing
/// `data_file_name` it should be copied to this list of destinations too.
/// This is used for plain backups.
Strings data_file_copies;
struct LessByFileName
{
bool operator()(const BackupFileInfo & lhs, const BackupFileInfo & rhs) const { return (lhs.file_name < rhs.file_name); }

View File

@ -61,6 +61,8 @@ public:
virtual void copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
bool copy_encrypted, UInt64 start_pos, UInt64 length) = 0;
virtual void copyFile(const String & destination, const String & source, size_t size) = 0;
virtual void removeFile(const String & file_name) = 0;
virtual void removeFiles(const Strings & file_names) = 0;

View File

@ -128,4 +128,13 @@ void BackupWriterDisk::copyFileFromDisk(const String & path_in_backup, DiskPtr s
BackupWriterDefault::copyFileFromDisk(path_in_backup, src_disk, src_path, copy_encrypted, start_pos, length);
}
void BackupWriterDisk::copyFile(const String & destination, const String & source, size_t /*size*/)
{
LOG_TRACE(log, "Copying file inside backup from {} to {} ", source, destination);
auto dest_file_path = root_path / destination;
auto src_file_path = root_path / source;
disk->createDirectories(dest_file_path.parent_path());
disk->copyFile(src_file_path, *disk, dest_file_path, read_settings, write_settings);
}
}

View File

@ -44,6 +44,8 @@ public:
void copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
bool copy_encrypted, UInt64 start_pos, UInt64 length) override;
void copyFile(const String & destination, const String & source, size_t size) override;
void removeFile(const String & file_name) override;
void removeFiles(const Strings & file_names) override;

View File

@ -152,4 +152,14 @@ void BackupWriterFile::copyFileFromDisk(const String & path_in_backup, DiskPtr s
BackupWriterDefault::copyFileFromDisk(path_in_backup, src_disk, src_path, copy_encrypted, start_pos, length);
}
void BackupWriterFile::copyFile(const String & destination, const String & source, size_t /*size*/)
{
LOG_TRACE(log, "Copying file inside backup from {} to {} ", source, destination);
auto abs_source_path = root_path / source;
auto abs_dest_path = root_path / destination;
fs::create_directories(abs_dest_path.parent_path());
fs::copy(abs_source_path, abs_dest_path, fs::copy_options::overwrite_existing);
}
}

View File

@ -38,6 +38,8 @@ public:
void copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
bool copy_encrypted, UInt64 start_pos, UInt64 length) override;
void copyFile(const String & destination, const String & source, size_t size) override;
void removeFile(const String & file_name) override;
void removeFiles(const Strings & file_names) override;

View File

@ -55,7 +55,9 @@ namespace
static_cast<unsigned>(context->getGlobalContext()->getSettingsRef().s3_max_redirects),
static_cast<unsigned>(context->getGlobalContext()->getSettingsRef().s3_retry_attempts),
context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging,
/* for_disk_s3 = */ false, request_settings.get_request_throttler, request_settings.put_request_throttler,
/* for_disk_s3 = */ false,
request_settings.get_request_throttler,
request_settings.put_request_throttler,
s3_uri.uri.getScheme());
client_configuration.endpointOverride = s3_uri.endpoint;
@ -167,7 +169,6 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s
blob_path.size(), mode);
copyS3File(
client,
client,
s3_uri.bucket,
fs::path(s3_uri.key) / path_in_backup,
@ -229,7 +230,6 @@ void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src
{
LOG_TRACE(log, "Copying file {} from disk {} to S3", src_path, src_disk->getName());
copyS3File(
client,
client,
/* src_bucket */ blob_path[1],
/* src_key= */ blob_path[0],
@ -249,9 +249,26 @@ void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src
BackupWriterDefault::copyFileFromDisk(path_in_backup, src_disk, src_path, copy_encrypted, start_pos, length);
}
void BackupWriterS3::copyFile(const String & destination, const String & source, size_t size)
{
LOG_TRACE(log, "Copying file inside backup from {} to {} ", source, destination);
copyS3File(
client,
/* src_bucket */ s3_uri.bucket,
/* src_key= */ fs::path(s3_uri.key) / source,
0,
size,
s3_uri.bucket,
fs::path(s3_uri.key) / destination,
s3_settings.request_settings,
read_settings,
{},
threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"));
}
void BackupWriterS3::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length)
{
copyDataToS3File(create_read_buffer, start_pos, length, client, client, s3_uri.bucket, fs::path(s3_uri.key) / path_in_backup, s3_settings.request_settings, {},
copyDataToS3File(create_read_buffer, start_pos, length, client, s3_uri.bucket, fs::path(s3_uri.key) / path_in_backup, s3_settings.request_settings, {},
threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"));
}
@ -281,7 +298,6 @@ std::unique_ptr<WriteBuffer> BackupWriterS3::writeFile(const String & file_name)
{
return std::make_unique<WriteBufferFromS3>(
client,
client, // already has long timeout
s3_uri.bucket,
fs::path(s3_uri.key) / file_name,
DBMS_DEFAULT_BUFFER_SIZE,

View File

@ -49,6 +49,8 @@ public:
void copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
bool copy_encrypted, UInt64 start_pos, UInt64 length) override;
void copyFile(const String & destination, const String & source, size_t size) override;
void removeFile(const String & file_name) override;
void removeFiles(const Strings & file_names) override;

View File

@ -24,6 +24,8 @@
#include <Poco/Util/XMLConfiguration.h>
#include <Poco/DOM/DOMParser.h>
#include <ranges>
namespace ProfileEvents
{
@ -459,6 +461,7 @@ void BackupImpl::readBackupMetadata()
const Poco::XML::Node * file_config = child;
BackupFileInfo info;
info.file_name = getString(file_config, "name");
info.size = getUInt64(file_config, "size");
if (info.size)
{
@ -873,6 +876,10 @@ size_t BackupImpl::copyFileToDisk(const SizeAndChecksum & size_and_checksum,
void BackupImpl::writeFile(const BackupFileInfo & info, BackupEntryPtr entry)
{
/// we don't write anything for reference files
if (entry->isReference())
return;
if (open_mode != OpenMode::WRITE)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Backup is not opened for writing");
@ -907,15 +914,20 @@ void BackupImpl::writeFile(const BackupFileInfo & info, BackupEntryPtr entry)
/// NOTE: `mutex` must be unlocked during copying otherwise writing will be in one thread maximum and hence slow.
if (use_archive)
const auto write_info_to_archive = [&](const auto & file_name)
{
LOG_TRACE(log, "Writing backup for file {} from {}: data file #{}, adding to archive", info.data_file_name, src_file_desc, info.data_file_index);
auto out = archive_writer->writeFile(info.data_file_name);
auto out = archive_writer->writeFile(file_name);
auto read_buffer = entry->getReadBuffer(writer->getReadSettings());
if (info.base_size != 0)
read_buffer->seek(info.base_size, SEEK_SET);
copyData(*read_buffer, *out);
out->finalize();
};
if (use_archive)
{
LOG_TRACE(log, "Writing backup for file {} from {}: data file #{}, adding to archive", info.data_file_name, src_file_desc, info.data_file_index);
write_info_to_archive(info.data_file_name);
}
else if (src_disk && from_immutable_file)
{
@ -929,6 +941,21 @@ void BackupImpl::writeFile(const BackupFileInfo & info, BackupEntryPtr entry)
writer->copyDataToFile(info.data_file_name, create_read_buffer, info.base_size, info.size - info.base_size);
}
std::function<void(const String &)> copy_file_inside_backup;
if (use_archive)
{
copy_file_inside_backup = write_info_to_archive;
}
else
{
copy_file_inside_backup = [&](const auto & data_file_copy)
{
writer->copyFile(data_file_copy, info.data_file_name, info.size - info.base_size);
};
}
std::ranges::for_each(info.data_file_copies, copy_file_inside_backup);
{
std::lock_guard lock{mutex};
++num_entries;

View File

@ -31,8 +31,10 @@ namespace CurrentMetrics
{
extern const Metric BackupsThreads;
extern const Metric BackupsThreadsActive;
extern const Metric BackupsThreadsScheduled;
extern const Metric RestoreThreads;
extern const Metric RestoreThreadsActive;
extern const Metric RestoreThreadsScheduled;
}
namespace DB
@ -58,16 +60,7 @@ namespace
auto get_zookeeper = [global_context = context->getGlobalContext()] { return global_context->getZooKeeper(); };
BackupCoordinationRemote::BackupKeeperSettings keeper_settings
{
.keeper_max_retries = context->getSettingsRef().backup_restore_keeper_max_retries,
.keeper_retry_initial_backoff_ms = context->getSettingsRef().backup_restore_keeper_retry_initial_backoff_ms,
.keeper_retry_max_backoff_ms = context->getSettingsRef().backup_restore_keeper_retry_max_backoff_ms,
.batch_size_for_keeper_multiread = context->getSettingsRef().backup_restore_batch_size_for_keeper_multiread,
.keeper_fault_injection_probability = context->getSettingsRef().backup_restore_keeper_fault_injection_probability,
.keeper_fault_injection_seed = context->getSettingsRef().backup_restore_keeper_fault_injection_seed,
.keeper_value_max_size = context->getSettingsRef().backup_restore_keeper_value_max_size,
};
BackupCoordinationRemote::BackupKeeperSettings keeper_settings = WithRetries::KeeperSettings::fromContext(context);
auto all_hosts = BackupSettings::Util::filterHostIDs(
backup_settings.cluster_host_ids, backup_settings.shard_num, backup_settings.replica_num);
@ -264,6 +257,7 @@ public:
CurrentMetrics::Metric metric_threads;
CurrentMetrics::Metric metric_active_threads;
CurrentMetrics::Metric metric_scheduled_threads;
size_t max_threads = 0;
/// What to do with a new job if a corresponding thread pool is already running `max_threads` jobs:
@ -279,6 +273,7 @@ public:
{
metric_threads = CurrentMetrics::BackupsThreads;
metric_active_threads = CurrentMetrics::BackupsThreadsActive;
metric_active_threads = CurrentMetrics::BackupsThreadsScheduled;
max_threads = num_backup_threads;
/// We don't use thread pool queues for thread pools with a lot of tasks otherwise that queue could be memory-wasting.
use_queue = (thread_pool_id != ThreadPoolId::BACKUP_COPY_FILES);
@ -291,6 +286,7 @@ public:
{
metric_threads = CurrentMetrics::RestoreThreads;
metric_active_threads = CurrentMetrics::RestoreThreadsActive;
metric_active_threads = CurrentMetrics::RestoreThreadsScheduled;
max_threads = num_restore_threads;
use_queue = (thread_pool_id != ThreadPoolId::RESTORE_TABLES_DATA);
break;
@ -301,7 +297,7 @@ public:
chassert(max_threads != 0);
size_t max_free_threads = 0;
size_t queue_size = use_queue ? 0 : max_threads;
auto thread_pool = std::make_unique<ThreadPool>(metric_threads, metric_active_threads, max_threads, max_free_threads, queue_size);
auto thread_pool = std::make_unique<ThreadPool>(metric_threads, metric_active_threads, metric_scheduled_threads, max_threads, max_free_threads, queue_size);
auto * thread_pool_ptr = thread_pool.get();
thread_pools.emplace(thread_pool_id, std::move(thread_pool));
return *thread_pool_ptr;

View File

@ -56,6 +56,12 @@ public:
/// Returns all mutations of a replicated table which are not finished for some data parts added by addReplicatedPartNames().
virtual std::vector<MutationInfo> getReplicatedMutations(const String & table_shared_id, const String & replica_name) const = 0;
/// Adds information about KeeperMap tables
virtual void addKeeperMapTable(const String & table_zookeeper_root_path, const String & table_id, const String & data_path_in_backup) = 0;
/// KeeperMap tables use shared storage without local data so only one table should backup the data
virtual String getKeeperMapDataPath(const String & table_zookeeper_root_path) const = 0;
/// Adds a data path in backup for a replicated table.
/// Multiple replicas of the replicated table call this function and then all the added paths can be returned by call of the function
/// getReplicatedDataPaths().

Some files were not shown because too many files have changed in this diff Show More