mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 23:52:03 +00:00
Merge branch 'master' into planner-support-transactions
This commit is contained in:
commit
b102c8e4b7
@ -164,7 +164,7 @@ if (OS_LINUX)
|
||||
# and whatever is poisoning it by LD_PRELOAD should not link to our symbols.
|
||||
# - The clickhouse-odbc-bridge and clickhouse-library-bridge binaries
|
||||
# should not expose their symbols to ODBC drivers and libraries.
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic")
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic -Wl,--gc-sections")
|
||||
endif ()
|
||||
|
||||
if (OS_DARWIN)
|
||||
@ -187,9 +187,10 @@ if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE"
|
||||
OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO"
|
||||
OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL")
|
||||
if (NOT (SANITIZE_COVERAGE OR WITH_COVERAGE)
|
||||
AND (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE"
|
||||
OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO"
|
||||
OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL"))
|
||||
set (OMIT_HEAVY_DEBUG_SYMBOLS_DEFAULT ON)
|
||||
else()
|
||||
set (OMIT_HEAVY_DEBUG_SYMBOLS_DEFAULT OFF)
|
||||
@ -291,9 +292,6 @@ set (CMAKE_C_STANDARD 11)
|
||||
set (CMAKE_C_EXTENSIONS ON) # required by most contribs written in C
|
||||
set (CMAKE_C_STANDARD_REQUIRED ON)
|
||||
|
||||
# Compiler-specific coverage flags e.g. -fcoverage-mapping
|
||||
option(WITH_COVERAGE "Profile the resulting binary/binaries" OFF)
|
||||
|
||||
if (COMPILER_CLANG)
|
||||
# Enable C++14 sized global deallocation functions. It should be enabled by setting -std=c++14 but I'm not sure.
|
||||
# See https://reviews.llvm.org/D112921
|
||||
@ -309,18 +307,12 @@ if (COMPILER_CLANG)
|
||||
set(BRANCHES_WITHIN_32B_BOUNDARIES "-mbranches-within-32B-boundaries")
|
||||
set(COMPILER_FLAGS "${COMPILER_FLAGS} ${BRANCHES_WITHIN_32B_BOUNDARIES}")
|
||||
endif()
|
||||
|
||||
if (WITH_COVERAGE)
|
||||
set(COMPILER_FLAGS "${COMPILER_FLAGS} -fprofile-instr-generate -fcoverage-mapping")
|
||||
# If we want to disable coverage for specific translation units
|
||||
set(WITHOUT_COVERAGE "-fno-profile-instr-generate -fno-coverage-mapping")
|
||||
endif()
|
||||
endif ()
|
||||
|
||||
set (COMPILER_FLAGS "${COMPILER_FLAGS}")
|
||||
|
||||
# Our built-in unwinder only supports DWARF version up to 4.
|
||||
set (DEBUG_INFO_FLAGS "-g -gdwarf-4")
|
||||
set (DEBUG_INFO_FLAGS "-g")
|
||||
|
||||
# Disable omit frame pointer compiler optimization using -fno-omit-frame-pointer
|
||||
option(DISABLE_OMIT_FRAME_POINTER "Disable omit frame pointer compiler optimization" OFF)
|
||||
@ -569,7 +561,6 @@ option(CHECK_LARGE_OBJECT_SIZES "Check that there are no large object files afte
|
||||
add_subdirectory (base)
|
||||
add_subdirectory (src)
|
||||
add_subdirectory (programs)
|
||||
add_subdirectory (tests)
|
||||
add_subdirectory (utils)
|
||||
|
||||
if (FUZZER)
|
||||
|
@ -1,3 +1,5 @@
|
||||
add_compile_options($<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>)
|
||||
|
||||
if (USE_CLANG_TIDY)
|
||||
set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
|
||||
endif ()
|
||||
|
@ -1,11 +1,15 @@
|
||||
#include "coverage.h"
|
||||
|
||||
#if WITH_COVERAGE
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wreserved-identifier"
|
||||
|
||||
# include <mutex>
|
||||
# include <unistd.h>
|
||||
|
||||
/// WITH_COVERAGE enables the default implementation of code coverage,
|
||||
/// that dumps a map to the filesystem.
|
||||
|
||||
#if WITH_COVERAGE
|
||||
|
||||
#include <mutex>
|
||||
#include <unistd.h>
|
||||
|
||||
|
||||
# if defined(__clang__)
|
||||
@ -31,3 +35,131 @@ void dumpCoverageReportIfPossible()
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/// SANITIZE_COVERAGE enables code instrumentation,
|
||||
/// but leaves the callbacks implementation to us,
|
||||
/// which we use to calculate coverage on a per-test basis
|
||||
/// and to write it to system tables.
|
||||
|
||||
#if defined(SANITIZE_COVERAGE)
|
||||
|
||||
namespace
|
||||
{
|
||||
bool pc_guards_initialized = false;
|
||||
bool pc_table_initialized = false;
|
||||
|
||||
uint32_t * guards_start = nullptr;
|
||||
uint32_t * guards_end = nullptr;
|
||||
|
||||
uintptr_t * coverage_array = nullptr;
|
||||
size_t coverage_array_size = 0;
|
||||
|
||||
uintptr_t * all_addresses_array = nullptr;
|
||||
size_t all_addresses_array_size = 0;
|
||||
}
|
||||
|
||||
extern "C"
|
||||
{
|
||||
|
||||
/// This is called at least once for every DSO for initialization.
|
||||
/// But we will use it only for the main DSO.
|
||||
void __sanitizer_cov_trace_pc_guard_init(uint32_t * start, uint32_t * stop)
|
||||
{
|
||||
if (pc_guards_initialized)
|
||||
return;
|
||||
pc_guards_initialized = true;
|
||||
|
||||
/// The function can be called multiple times, but we need to initialize only once.
|
||||
if (start == stop || *start)
|
||||
return;
|
||||
|
||||
guards_start = start;
|
||||
guards_end = stop;
|
||||
coverage_array_size = stop - start;
|
||||
|
||||
/// Note: we will leak this.
|
||||
coverage_array = static_cast<uintptr_t*>(malloc(sizeof(uintptr_t) * coverage_array_size));
|
||||
|
||||
resetCoverage();
|
||||
}
|
||||
|
||||
/// This is called at least once for every DSO for initialization
|
||||
/// and provides information about all instrumented addresses.
|
||||
void __sanitizer_cov_pcs_init(const uintptr_t * pcs_begin, const uintptr_t * pcs_end)
|
||||
{
|
||||
if (pc_table_initialized)
|
||||
return;
|
||||
pc_table_initialized = true;
|
||||
|
||||
all_addresses_array = static_cast<uintptr_t*>(malloc(sizeof(uintptr_t) * coverage_array_size));
|
||||
all_addresses_array_size = pcs_end - pcs_begin;
|
||||
|
||||
/// They are not a real pointers, but also contain a flag in the most significant bit,
|
||||
/// in which we are not interested for now. Reset it.
|
||||
for (size_t i = 0; i < all_addresses_array_size; ++i)
|
||||
all_addresses_array[i] = pcs_begin[i] & 0x7FFFFFFFFFFFFFFFULL;
|
||||
}
|
||||
|
||||
/// This is called at every basic block / edge, etc.
|
||||
void __sanitizer_cov_trace_pc_guard(uint32_t * guard)
|
||||
{
|
||||
/// Duplicate the guard check.
|
||||
if (!*guard)
|
||||
return;
|
||||
*guard = 0;
|
||||
|
||||
/// If you set *guard to 0 this code will not be called again for this edge.
|
||||
/// Now we can get the PC and do whatever you want:
|
||||
/// - store it somewhere or symbolize it and print right away.
|
||||
/// The values of `*guard` are as you set them in
|
||||
/// __sanitizer_cov_trace_pc_guard_init and so you can make them consecutive
|
||||
/// and use them to dereference an array or a bit vector.
|
||||
void * pc = __builtin_return_address(0);
|
||||
|
||||
coverage_array[guard - guards_start] = reinterpret_cast<uintptr_t>(pc);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
__attribute__((no_sanitize("coverage")))
|
||||
std::span<const uintptr_t> getCoverage()
|
||||
{
|
||||
return {coverage_array, coverage_array_size};
|
||||
}
|
||||
|
||||
__attribute__((no_sanitize("coverage")))
|
||||
std::span<const uintptr_t> getAllInstrumentedAddresses()
|
||||
{
|
||||
return {all_addresses_array, all_addresses_array_size};
|
||||
}
|
||||
|
||||
__attribute__((no_sanitize("coverage")))
|
||||
void resetCoverage()
|
||||
{
|
||||
memset(coverage_array, 0, coverage_array_size * sizeof(*coverage_array));
|
||||
|
||||
/// The guard defines whether the __sanitizer_cov_trace_pc_guard should be called.
|
||||
/// For example, you can unset it after first invocation to prevent excessive work.
|
||||
/// Initially set all the guards to 1 to enable callbacks.
|
||||
for (uint32_t * x = guards_start; x < guards_end; ++x)
|
||||
*x = 1;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
std::span<const uintptr_t> getCoverage()
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
std::span<const uintptr_t> getAllInstrumentedAddresses()
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
void resetCoverage()
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1,5 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <span>
|
||||
#include <cstdint>
|
||||
|
||||
/// Flush coverage report to file, depending on coverage system
|
||||
/// proposed by compiler (llvm for clang and gcov for gcc).
|
||||
///
|
||||
@ -7,3 +10,16 @@
|
||||
/// Thread safe (use exclusive lock).
|
||||
/// Idempotent, may be called multiple times.
|
||||
void dumpCoverageReportIfPossible();
|
||||
|
||||
/// This is effective if SANITIZE_COVERAGE is enabled at build time.
|
||||
/// Get accumulated unique program addresses of the instrumented parts of the code,
|
||||
/// seen so far after program startup or after previous reset.
|
||||
/// The returned span will be represented as a sparse map, containing mostly zeros, which you should filter away.
|
||||
std::span<const uintptr_t> getCoverage();
|
||||
|
||||
/// Get all instrumented addresses that could be in the coverage.
|
||||
std::span<const uintptr_t> getAllInstrumentedAddresses();
|
||||
|
||||
/// Reset the accumulated coverage.
|
||||
/// This is useful to compare coverage of different tests, including differential coverage.
|
||||
void resetCoverage();
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include "memcpy.h"
|
||||
|
||||
__attribute__((no_sanitize("coverage")))
|
||||
extern "C" void * memcpy(void * __restrict dst, const void * __restrict src, size_t size)
|
||||
{
|
||||
return inline_memcpy(dst, src, size);
|
||||
|
@ -93,7 +93,7 @@
|
||||
* See https://habr.com/en/company/yandex/blog/457612/
|
||||
*/
|
||||
|
||||
|
||||
__attribute__((no_sanitize("coverage")))
|
||||
static inline void * inline_memcpy(void * __restrict dst_, const void * __restrict src_, size_t size)
|
||||
{
|
||||
/// We will use pointer arithmetic, so char pointer will be used.
|
||||
|
@ -1,19 +0,0 @@
|
||||
# Adding test output on failure
|
||||
enable_testing ()
|
||||
|
||||
if (NOT TARGET check)
|
||||
if (CMAKE_CONFIGURATION_TYPES)
|
||||
add_custom_target (check COMMAND ${CMAKE_CTEST_COMMAND}
|
||||
--force-new-ctest-process --output-on-failure --build-config "$<CONFIGURATION>"
|
||||
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
else ()
|
||||
add_custom_target (check COMMAND ${CMAKE_CTEST_COMMAND}
|
||||
--force-new-ctest-process --output-on-failure
|
||||
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
macro (add_check target)
|
||||
add_test (NAME test_${target} COMMAND ${target} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
add_dependencies (check ${target})
|
||||
endmacro (add_check)
|
@ -58,3 +58,27 @@ if (SANITIZE)
|
||||
message (FATAL_ERROR "Unknown sanitizer type: ${SANITIZE}")
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
# Default coverage instrumentation (dumping the coverage map on exit)
|
||||
option(WITH_COVERAGE "Instrumentation for code coverage with default implementation" OFF)
|
||||
|
||||
if (WITH_COVERAGE)
|
||||
message (INFORMATION "Enabled instrumentation for code coverage")
|
||||
set(COVERAGE_FLAGS "-fprofile-instr-generate -fcoverage-mapping")
|
||||
endif()
|
||||
|
||||
option (SANITIZE_COVERAGE "Instrumentation for code coverage with custom callbacks" OFF)
|
||||
|
||||
if (SANITIZE_COVERAGE)
|
||||
message (INFORMATION "Enabled instrumentation for code coverage")
|
||||
|
||||
# We set this define for whole build to indicate that at least some parts are compiled with coverage.
|
||||
# And to expose it in system.build_options.
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSANITIZE_COVERAGE=1")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DSANITIZE_COVERAGE=1")
|
||||
|
||||
# But the actual coverage will be enabled on per-library basis: for ClickHouse code, but not for 3rd-party.
|
||||
set (COVERAGE_FLAGS "-fsanitize-coverage=trace-pc-guard,pc-table")
|
||||
endif()
|
||||
|
||||
set (WITHOUT_COVERAGE_FLAGS "-fno-profile-instr-generate -fno-coverage-mapping -fno-sanitize-coverage=trace-pc-guard,pc-table")
|
||||
|
13
contrib/CMakeLists.txt
vendored
13
contrib/CMakeLists.txt
vendored
@ -1,16 +1,7 @@
|
||||
#"${folder}/CMakeLists.txt" Third-party libraries may have substandard code.
|
||||
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w")
|
||||
|
||||
if (WITH_COVERAGE)
|
||||
set (WITHOUT_COVERAGE_LIST ${WITHOUT_COVERAGE})
|
||||
separate_arguments(WITHOUT_COVERAGE_LIST)
|
||||
# disable coverage for contib files and build with optimisations
|
||||
if (COMPILER_CLANG)
|
||||
add_compile_options(-O3 -DNDEBUG -finline-functions -finline-hint-functions ${WITHOUT_COVERAGE_LIST})
|
||||
endif()
|
||||
endif()
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w -ffunction-sections -fdata-sections")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w -ffunction-sections -fdata-sections")
|
||||
|
||||
if (SANITIZE STREQUAL "undefined")
|
||||
# 3rd-party libraries usually not intended to work with UBSan.
|
||||
|
2
contrib/abseil-cpp
vendored
2
contrib/abseil-cpp
vendored
@ -1 +1 @@
|
||||
Subproject commit 5655528c41830f733160de4fb0b99073841bae9e
|
||||
Subproject commit 3bd86026c93da5a40006fd53403dff9d5f5e30e3
|
2
contrib/google-protobuf
vendored
2
contrib/google-protobuf
vendored
@ -1 +1 @@
|
||||
Subproject commit 2a4fa1a4e95012d754ac55d43c8bc462dd1c78a8
|
||||
Subproject commit 0862007f6ca1f5723c58f10f0ca34f3f25a63b2e
|
@ -20,7 +20,6 @@ endif()
|
||||
set(protobuf_source_dir "${ClickHouse_SOURCE_DIR}/contrib/google-protobuf")
|
||||
set(protobuf_binary_dir "${ClickHouse_BINARY_DIR}/contrib/google-protobuf")
|
||||
|
||||
|
||||
add_definitions(-DGOOGLE_PROTOBUF_CMAKE_BUILD)
|
||||
|
||||
add_definitions(-DHAVE_PTHREAD)
|
||||
@ -30,17 +29,69 @@ include_directories(
|
||||
${protobuf_binary_dir}
|
||||
${protobuf_source_dir}/src)
|
||||
|
||||
add_library(utf8_range
|
||||
${protobuf_source_dir}/third_party/utf8_range/naive.c
|
||||
${protobuf_source_dir}/third_party/utf8_range/range2-neon.c
|
||||
${protobuf_source_dir}/third_party/utf8_range/range2-sse.c
|
||||
)
|
||||
include_directories(${protobuf_source_dir}/third_party/utf8_range)
|
||||
|
||||
add_library(utf8_validity
|
||||
${protobuf_source_dir}/third_party/utf8_range/utf8_validity.cc
|
||||
)
|
||||
target_link_libraries(utf8_validity PUBLIC absl::strings)
|
||||
|
||||
set(protobuf_absl_used_targets
|
||||
absl::absl_check
|
||||
absl::absl_log
|
||||
absl::algorithm
|
||||
absl::base
|
||||
absl::bind_front
|
||||
absl::bits
|
||||
absl::btree
|
||||
absl::cleanup
|
||||
absl::cord
|
||||
absl::core_headers
|
||||
absl::debugging
|
||||
absl::die_if_null
|
||||
absl::dynamic_annotations
|
||||
absl::flags
|
||||
absl::flat_hash_map
|
||||
absl::flat_hash_set
|
||||
absl::function_ref
|
||||
absl::hash
|
||||
absl::layout
|
||||
absl::log_initialize
|
||||
absl::log_severity
|
||||
absl::memory
|
||||
absl::node_hash_map
|
||||
absl::node_hash_set
|
||||
absl::optional
|
||||
absl::span
|
||||
absl::status
|
||||
absl::statusor
|
||||
absl::strings
|
||||
absl::synchronization
|
||||
absl::time
|
||||
absl::type_traits
|
||||
absl::utility
|
||||
absl::variant
|
||||
)
|
||||
|
||||
set(libprotobuf_lite_files
|
||||
${protobuf_source_dir}/src/google/protobuf/any_lite.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/arena.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/arena_align.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/arenastring.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/arenaz_sampler.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/extension_set.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/generated_enum_util.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/generated_message_tctable_lite.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/generated_message_util.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/implicit_weak_message.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/inlined_string_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/io/coded_stream.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/io/io_win32.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/io/strtod.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/io/zero_copy_stream.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/io/zero_copy_stream_impl.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/io/zero_copy_stream_impl_lite.cc
|
||||
@ -48,21 +99,15 @@ set(libprotobuf_lite_files
|
||||
${protobuf_source_dir}/src/google/protobuf/message_lite.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/parse_context.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/repeated_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/stubs/bytestream.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/repeated_ptr_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/stubs/common.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/stubs/int128.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/stubs/status.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/stubs/statusor.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/stubs/stringpiece.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/stubs/stringprintf.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/stubs/structurally_valid.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/stubs/strutil.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/stubs/time.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/wire_format_lite.cc
|
||||
)
|
||||
|
||||
add_library(_libprotobuf-lite ${libprotobuf_lite_files})
|
||||
target_link_libraries(_libprotobuf-lite pthread)
|
||||
target_link_libraries(_libprotobuf-lite
|
||||
pthread
|
||||
utf8_validity)
|
||||
if(${CMAKE_SYSTEM_NAME} STREQUAL "Android")
|
||||
target_link_libraries(_libprotobuf-lite log)
|
||||
endif()
|
||||
@ -71,67 +116,93 @@ add_library(protobuf::libprotobuf-lite ALIAS _libprotobuf-lite)
|
||||
|
||||
|
||||
set(libprotobuf_files
|
||||
${protobuf_source_dir}/src/google/protobuf/any.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/any.pb.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/api.pb.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/duration.pb.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/empty.pb.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/field_mask.pb.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/source_context.pb.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/struct.pb.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/timestamp.pb.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/type.pb.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/wrappers.pb.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/any.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/any_lite.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/arena.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/arena_align.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/arenastring.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/arenaz_sampler.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/importer.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/parser.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/cpp_features.pb.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/descriptor.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/descriptor.pb.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/descriptor_database.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/duration.pb.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/dynamic_message.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/empty.pb.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/extension_set.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/extension_set_heavy.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/field_mask.pb.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/feature_resolver.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/generated_enum_util.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/generated_message_bases.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/generated_message_reflection.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/generated_message_tctable_full.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/generated_message_tctable_gen.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/generated_message_tctable_lite.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/generated_message_util.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/implicit_weak_message.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/inlined_string_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/io/coded_stream.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/io/gzip_stream.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/io/io_win32.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/io/printer.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/io/strtod.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/io/tokenizer.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/io/zero_copy_sink.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/io/zero_copy_stream.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/io/zero_copy_stream_impl.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/io/zero_copy_stream_impl_lite.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/json/internal/lexer.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/json/internal/message_path.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/json/internal/parser.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/json/internal/unparser.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/json/internal/untyped_message.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/json/internal/writer.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/json/internal/zero_copy_buffered_stream.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/json/json.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/map.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/map_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/message.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/message_lite.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/parse_context.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/port.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/raw_ptr.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/reflection_mode.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/reflection_ops.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/repeated_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/repeated_ptr_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/service.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/source_context.pb.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/struct.pb.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/stubs/substitute.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/stubs/common.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/text_format.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/timestamp.pb.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/type.pb.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/unknown_field_set.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/util/delimited_message_util.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/util/field_comparator.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/util/field_mask_util.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/util/internal/datapiece.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/util/internal/default_value_objectwriter.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/util/internal/error_listener.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/util/internal/field_mask_utility.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/util/internal/json_escaping.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/util/internal/json_objectwriter.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/util/internal/json_stream_parser.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/util/internal/object_writer.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/util/internal/proto_writer.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/util/internal/protostream_objectsource.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/util/internal/protostream_objectwriter.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/util/internal/type_info.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/util/internal/type_info_test_helper.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/util/internal/utility.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/util/json_util.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/util/message_differencer.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/util/time_util.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/util/type_resolver_util.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/wire_format.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/wrappers.pb.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/wire_format_lite.cc
|
||||
)
|
||||
|
||||
add_library(_libprotobuf ${libprotobuf_lite_files} ${libprotobuf_files})
|
||||
if (ENABLE_FUZZING)
|
||||
target_compile_options(_libprotobuf PRIVATE "-fsanitize-recover=all")
|
||||
endif()
|
||||
target_link_libraries(_libprotobuf pthread)
|
||||
target_link_libraries(_libprotobuf ch_contrib::zlib)
|
||||
target_link_libraries(_libprotobuf
|
||||
pthread
|
||||
ch_contrib::zlib
|
||||
utf8_validity
|
||||
${protobuf_absl_used_targets})
|
||||
if(${CMAKE_SYSTEM_NAME} STREQUAL "Android")
|
||||
target_link_libraries(_libprotobuf log)
|
||||
endif()
|
||||
@ -140,23 +211,26 @@ add_library(protobuf::libprotobuf ALIAS _libprotobuf)
|
||||
|
||||
|
||||
set(libprotoc_files
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/allowlists/editions.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/code_generator.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/command_line_interface.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/cpp/enum.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/cpp/enum_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/cpp/extension.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/cpp/field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/cpp/field_generators/cord_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/cpp/field_generators/enum_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/cpp/field_generators/map_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/cpp/field_generators/message_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/cpp/field_generators/primitive_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/cpp/field_generators/string_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/cpp/file.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/cpp/generator.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/cpp/helpers.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/cpp/map_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/cpp/message.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/cpp/message_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/cpp/padding_optimizer.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/cpp/parse_function_generator.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/cpp/primitive_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/cpp/service.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/cpp/string_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/cpp/tracker.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/csharp/csharp_doc_comment.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/csharp/csharp_enum.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/csharp/csharp_enum_field.cc
|
||||
@ -173,6 +247,7 @@ set(libprotoc_files
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/csharp/csharp_repeated_primitive_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/csharp/csharp_source_generator_base.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/csharp/csharp_wrapper_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/csharp/names.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/java/context.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/java/doc_comment.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/java/enum.cc
|
||||
@ -195,38 +270,55 @@ set(libprotoc_files
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/java/message_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/java/message_field_lite.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/java/message_lite.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/java/message_serialization.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/java/name_resolver.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/java/names.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/java/primitive_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/java/primitive_field_lite.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/java/service.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/java/shared_code_generator.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/java/string_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/java/string_field_lite.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/objectivec_enum.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/objectivec_enum_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/objectivec_extension.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/objectivec_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/objectivec_file.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/objectivec_generator.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/objectivec_helpers.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/objectivec_map_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/objectivec_message.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/objectivec_message_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/objectivec_oneof.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/objectivec_primitive_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/enum.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/enum_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/extension.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/file.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/generator.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/helpers.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/import_writer.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/line_consumer.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/map_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/message.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/message_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/names.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/oneof.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/primitive_field.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/text_format_decode_data.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/php/names.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/php/php_generator.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/plugin.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/plugin.pb.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/python/generator.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/python/helpers.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/python/pyi_generator.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/retention.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/ruby/ruby_generator.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/rust/accessors/accessors.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/rust/accessors/singular_bytes.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/rust/accessors/singular_scalar.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/rust/context.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/rust/generator.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/rust/message.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/rust/naming.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/subprocess.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/compiler/zip_writer.cc
|
||||
)
|
||||
|
||||
add_library(_libprotoc ${libprotoc_files})
|
||||
target_link_libraries(_libprotoc _libprotobuf)
|
||||
target_link_libraries(_libprotoc
|
||||
_libprotobuf
|
||||
${protobuf_absl_used_targets})
|
||||
add_library(protobuf::libprotoc ALIAS _libprotoc)
|
||||
|
||||
set(protoc_files ${protobuf_source_dir}/src/google/protobuf/compiler/main.cc)
|
||||
@ -235,7 +327,11 @@ if (CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME
|
||||
AND CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL CMAKE_SYSTEM_PROCESSOR)
|
||||
|
||||
add_executable(protoc ${protoc_files})
|
||||
target_link_libraries(protoc _libprotoc _libprotobuf pthread)
|
||||
target_link_libraries(protoc _libprotoc
|
||||
_libprotobuf
|
||||
pthread
|
||||
utf8_validity
|
||||
${protobuf_absl_used_targets})
|
||||
add_executable(protobuf::protoc ALIAS protoc)
|
||||
|
||||
if (ENABLE_FUZZING)
|
||||
@ -255,6 +351,8 @@ else ()
|
||||
|
||||
# This is quite ugly but I cannot make dependencies work propery.
|
||||
|
||||
set(abseil_source_dir "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp")
|
||||
|
||||
execute_process(
|
||||
COMMAND mkdir -p ${PROTOC_BUILD_DIR}
|
||||
COMMAND_ECHO STDOUT)
|
||||
@ -269,7 +367,9 @@ else ()
|
||||
"-Dprotobuf_BUILD_CONFORMANCE=0"
|
||||
"-Dprotobuf_BUILD_EXAMPLES=0"
|
||||
"-Dprotobuf_BUILD_PROTOC_BINARIES=1"
|
||||
"${protobuf_source_dir}/cmake"
|
||||
"-DABSL_ROOT_DIR=${abseil_source_dir}"
|
||||
"-DABSL_ENABLE_INSTALL=0"
|
||||
"${protobuf_source_dir}"
|
||||
WORKING_DIRECTORY "${PROTOC_BUILD_DIR}"
|
||||
COMMAND_ECHO STDOUT)
|
||||
|
||||
@ -278,38 +378,6 @@ else ()
|
||||
COMMAND_ECHO STDOUT)
|
||||
endif ()
|
||||
|
||||
# add_custom_command (
|
||||
# OUTPUT ${PROTOC_BUILD_DIR}
|
||||
# COMMAND mkdir -p ${PROTOC_BUILD_DIR})
|
||||
#
|
||||
# add_custom_command (
|
||||
# OUTPUT "${PROTOC_BUILD_DIR}/CMakeCache.txt"
|
||||
#
|
||||
# COMMAND ${CMAKE_COMMAND}
|
||||
# -G"${CMAKE_GENERATOR}"
|
||||
# -DCMAKE_MAKE_PROGRAM="${CMAKE_MAKE_PROGRAM}"
|
||||
# -DCMAKE_C_COMPILER="${CMAKE_C_COMPILER}"
|
||||
# -DCMAKE_CXX_COMPILER="${CMAKE_CXX_COMPILER}"
|
||||
# -Dprotobuf_BUILD_TESTS=0
|
||||
# -Dprotobuf_BUILD_CONFORMANCE=0
|
||||
# -Dprotobuf_BUILD_EXAMPLES=0
|
||||
# -Dprotobuf_BUILD_PROTOC_BINARIES=1
|
||||
# "${protobuf_source_dir}/cmake"
|
||||
#
|
||||
# DEPENDS "${PROTOC_BUILD_DIR}"
|
||||
# WORKING_DIRECTORY "${PROTOC_BUILD_DIR}"
|
||||
# COMMENT "Configuring 'protoc' for host architecture."
|
||||
# USES_TERMINAL)
|
||||
#
|
||||
# add_custom_command (
|
||||
# OUTPUT "${PROTOC_BUILD_DIR}/protoc"
|
||||
# COMMAND ${CMAKE_COMMAND} --build "${PROTOC_BUILD_DIR}"
|
||||
# DEPENDS "${PROTOC_BUILD_DIR}/CMakeCache.txt"
|
||||
# COMMENT "Building 'protoc' for host architecture."
|
||||
# USES_TERMINAL)
|
||||
#
|
||||
# add_custom_target (protoc-host DEPENDS "${PROTOC_BUILD_DIR}/protoc")
|
||||
|
||||
add_executable(protoc IMPORTED GLOBAL)
|
||||
set_target_properties (protoc PROPERTIES IMPORTED_LOCATION "${PROTOC_BUILD_DIR}/protoc")
|
||||
add_dependencies(protoc "${PROTOC_BUILD_DIR}/protoc")
|
||||
|
2
contrib/grpc
vendored
2
contrib/grpc
vendored
@ -1 +1 @@
|
||||
Subproject commit 6e5e645de7cb0604e3ad4ba55abff2eca38c1803
|
||||
Subproject commit 740e3dfd97301a52ad8165b65285bcc149d9e817
|
@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="23.10.3.5"
|
||||
ARG VERSION="23.10.4.25"
|
||||
ARG PACKAGES="clickhouse-keeper"
|
||||
|
||||
# user/group precreated explicitly with fixed uid/gid on purpose.
|
||||
|
@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="23.10.3.5"
|
||||
ARG VERSION="23.10.4.25"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
# user/group precreated explicitly with fixed uid/gid on purpose.
|
||||
|
@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
|
||||
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
|
||||
ARG VERSION="23.10.3.5"
|
||||
ARG VERSION="23.10.4.25"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
# set non-empty deb_location_url url to create a docker image
|
||||
|
@ -206,7 +206,7 @@ function build
|
||||
(
|
||||
cd "$FASTTEST_BUILD"
|
||||
TIMEFORMAT=$'\nreal\t%3R\nuser\t%3U\nsys\t%3S'
|
||||
( time ninja clickhouse-bundle) |& ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/build_log.txt"
|
||||
( time ninja clickhouse-bundle clickhouse-stripped) |& ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/build_log.txt"
|
||||
BUILD_SECONDS_ELAPSED=$(awk '/^....-..-.. ..:..:.. real\t[0-9]/ {print $4}' < "$FASTTEST_OUTPUT/build_log.txt")
|
||||
echo "build_clickhouse_fasttest_binary: [ OK ] $BUILD_SECONDS_ELAPSED sec." \
|
||||
| ts '%Y-%m-%d %H:%M:%S' \
|
||||
@ -215,7 +215,6 @@ function build
|
||||
mkdir -p "$FASTTEST_OUTPUT/binaries/"
|
||||
cp programs/clickhouse "$FASTTEST_OUTPUT/binaries/clickhouse"
|
||||
|
||||
strip programs/clickhouse -o programs/clickhouse-stripped
|
||||
zstd --threads=0 programs/clickhouse-stripped -o "$FASTTEST_OUTPUT/binaries/clickhouse-stripped.zst"
|
||||
fi
|
||||
ccache_status
|
||||
|
@ -39,8 +39,7 @@ public class MySQLJavaClientTest {
|
||||
|
||||
// useServerPrepStmts=true -> COM_STMT_PREPARE + COM_STMT_EXECUTE -> binary
|
||||
// useServerPrepStmts=false -> COM_QUERY -> text
|
||||
String jdbcUrl = String.format("jdbc:mysql://%s:%s/%s?useSSL=false&useServerPrepStmts=%s",
|
||||
host, port, database, binary);
|
||||
String jdbcUrl = String.format("jdbc:mysql://%s:%s/%s?useSSL=false&useServerPrepStmts=%s", host, port, database, binary);
|
||||
|
||||
try {
|
||||
Class.forName("com.mysql.cj.jdbc.Driver");
|
||||
@ -67,21 +66,21 @@ public class MySQLJavaClientTest {
|
||||
int rowNum = 1;
|
||||
while (rs.next()) {
|
||||
System.out.printf("Row #%d\n", rowNum++);
|
||||
System.out.printf("%s, value: %d\n", getMysqlType(rs, "i8"), rs.getInt("i8"));
|
||||
System.out.printf("%s, value: %d\n", getMysqlType(rs, "i16"), rs.getInt("i16"));
|
||||
System.out.printf("%s, value: %d\n", getMysqlType(rs, "i32"), rs.getInt("i32"));
|
||||
System.out.printf("%s, value: %d\n", getMysqlType(rs, "i64"), rs.getLong("i64"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "i128"), rs.getString("i128"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "i256"), rs.getString("i256"));
|
||||
System.out.printf("%s, value: %d\n", getMysqlType(rs, "ui8"), rs.getInt("ui8"));
|
||||
System.out.printf("%s, value: %d\n", getMysqlType(rs, "ui16"), rs.getInt("ui16"));
|
||||
System.out.printf("%s, value: %d\n", getMysqlType(rs, "ui32"), rs.getLong("ui32"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "ui64"), rs.getString("ui64"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "ui128"), rs.getString("ui128"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "ui256"), rs.getString("ui256"));
|
||||
System.out.printf("%s, value: %f\n", getMysqlType(rs, "f32"), rs.getFloat("f32"));
|
||||
System.out.printf("%s, value: %f\n", getMysqlType(rs, "f64"), rs.getFloat("f64"));
|
||||
System.out.printf("%s, value: %b\n", getMysqlType(rs, "b"), rs.getBoolean("b"));
|
||||
System.out.printf("%s, value: %d, wasNull: %b\n", getMysqlType(rs, "i8"), rs.getInt("i8"), rs.wasNull());
|
||||
System.out.printf("%s, value: %d, wasNull: %b\n", getMysqlType(rs, "i16"), rs.getInt("i16"), rs.wasNull());
|
||||
System.out.printf("%s, value: %d, wasNull: %b\n", getMysqlType(rs, "i32"), rs.getInt("i32"), rs.wasNull());
|
||||
System.out.printf("%s, value: %d, wasNull: %b\n", getMysqlType(rs, "i64"), rs.getLong("i64"), rs.wasNull());
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "i128"), rs.getString("i128"), rs.wasNull());
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "i256"), rs.getString("i256"), rs.wasNull());
|
||||
System.out.printf("%s, value: %d, wasNull: %b\n", getMysqlType(rs, "ui8"), rs.getInt("ui8"), rs.wasNull());
|
||||
System.out.printf("%s, value: %d, wasNull: %b\n", getMysqlType(rs, "ui16"), rs.getInt("ui16"), rs.wasNull());
|
||||
System.out.printf("%s, value: %d, wasNull: %b\n", getMysqlType(rs, "ui32"), rs.getLong("ui32"), rs.wasNull());
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "ui64"), rs.getString("ui64"), rs.wasNull());
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "ui128"), rs.getString("ui128"), rs.wasNull());
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "ui256"), rs.getString("ui256"), rs.wasNull());
|
||||
System.out.printf("%s, value: %f, wasNull: %b\n", getMysqlType(rs, "f32"), rs.getFloat("f32"), rs.wasNull());
|
||||
System.out.printf("%s, value: %f, wasNull: %b\n", getMysqlType(rs, "f64"), rs.getFloat("f64"), rs.wasNull());
|
||||
System.out.printf("%s, value: %b, wasNull: %b\n", getMysqlType(rs, "b"), rs.getBoolean("b"), rs.wasNull());
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
@ -92,10 +91,10 @@ public class MySQLJavaClientTest {
|
||||
int rowNum = 1;
|
||||
while (rs.next()) {
|
||||
System.out.printf("Row #%d\n", rowNum++);
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "s"), rs.getString("s"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "sn"), rs.getString("sn"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "lc"), rs.getString("lc"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "nlc"), rs.getString("nlc"));
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "s"), rs.getString("s"), rs.wasNull());
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "sn"), rs.getString("sn"), rs.wasNull());
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "lc"), rs.getString("lc"), rs.wasNull());
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "nlc"), rs.getString("nlc"), rs.wasNull());
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
@ -106,10 +105,10 @@ public class MySQLJavaClientTest {
|
||||
int rowNum = 1;
|
||||
while (rs.next()) {
|
||||
System.out.printf("Row #%d\n", rowNum++);
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "ilc"), rs.getInt("ilc"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dlc"), rs.getDate("dlc"));
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "ilc"), rs.getInt("ilc"), rs.wasNull());
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "dlc"), rs.getDate("dlc"), rs.wasNull());
|
||||
// NULL int is represented as zero
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "ni"), rs.getInt("ni"));
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "ni"), rs.getInt("ni"), rs.wasNull());
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
@ -120,12 +119,11 @@ public class MySQLJavaClientTest {
|
||||
int rowNum = 1;
|
||||
while (rs.next()) {
|
||||
System.out.printf("Row #%d\n", rowNum++);
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "d32"), rs.getBigDecimal("d32").toPlainString());
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "d64"), rs.getBigDecimal("d64").toPlainString());
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "d128_native"),
|
||||
rs.getBigDecimal("d128_native").toPlainString());
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "d128_text"), rs.getString("d128_text"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "d256"), rs.getString("d256"));
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "d32"), rs.getBigDecimal("d32").toPlainString(), rs.wasNull());
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "d64"), rs.getBigDecimal("d64").toPlainString(), rs.wasNull());
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "d128_native"), rs.getBigDecimal("d128_native").toPlainString(), rs.wasNull());
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "d128_text"), rs.getString("d128_text"), rs.wasNull());
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "d256"), rs.getString("d256"), rs.wasNull());
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
@ -136,12 +134,12 @@ public class MySQLJavaClientTest {
|
||||
int rowNum = 1;
|
||||
while (rs.next()) {
|
||||
System.out.printf("Row #%d\n", rowNum++);
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "d"), rs.getDate("d"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "d32"), rs.getDate("d32"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt"), rs.getTimestamp("dt"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_3"), rs.getTimestamp("dt64_3"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_6"), rs.getTimestamp("dt64_6"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_9"), rs.getTimestamp("dt64_9"));
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "d"), rs.getDate("d"), rs.wasNull());
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "d32"), rs.getDate("d32"), rs.wasNull());
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "dt"), rs.getTimestamp("dt"), rs.wasNull());
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "dt64_3"), rs.getTimestamp("dt64_3"), rs.wasNull());
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "dt64_6"), rs.getTimestamp("dt64_6"), rs.wasNull());
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "dt64_9"), rs.getTimestamp("dt64_9"), rs.wasNull());
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
@ -152,13 +150,13 @@ public class MySQLJavaClientTest {
|
||||
int rowNum = 1;
|
||||
while (rs.next()) {
|
||||
System.out.printf("Row #%d\n", rowNum++);
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_0"), rs.getTimestamp("dt64_0"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_1"), rs.getTimestamp("dt64_1"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_2"), rs.getTimestamp("dt64_2"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_4"), rs.getTimestamp("dt64_4"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_5"), rs.getTimestamp("dt64_5"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_7"), rs.getTimestamp("dt64_7"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_8"), rs.getTimestamp("dt64_8"));
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "dt64_0"), rs.getTimestamp("dt64_0"), rs.wasNull());
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "dt64_1"), rs.getTimestamp("dt64_1"), rs.wasNull());
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "dt64_2"), rs.getTimestamp("dt64_2"), rs.wasNull());
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "dt64_4"), rs.getTimestamp("dt64_4"), rs.wasNull());
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "dt64_5"), rs.getTimestamp("dt64_5"), rs.wasNull());
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "dt64_7"), rs.getTimestamp("dt64_7"), rs.wasNull());
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "dt64_8"), rs.getTimestamp("dt64_8"), rs.wasNull());
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
@ -169,8 +167,8 @@ public class MySQLJavaClientTest {
|
||||
int rowNum = 1;
|
||||
while (rs.next()) {
|
||||
System.out.printf("Row #%d\n", rowNum++);
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt"), rs.getTimestamp("dt"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_3"), rs.getTimestamp("dt64_3"));
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "dt"), rs.getTimestamp("dt"), rs.wasNull());
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "dt64_3"), rs.getTimestamp("dt64_3"), rs.wasNull());
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
@ -181,10 +179,10 @@ public class MySQLJavaClientTest {
|
||||
int rowNum = 1;
|
||||
while (rs.next()) {
|
||||
System.out.printf("Row #%d\n", rowNum++);
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "a"), rs.getString("a"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "u"), rs.getString("u"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "t"), rs.getString("t"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "m"), rs.getString("m"));
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "a"), rs.getString("a"), rs.wasNull());
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "u"), rs.getString("u"), rs.wasNull());
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "t"), rs.getString("t"), rs.wasNull());
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "m"), rs.getString("m"), rs.wasNull());
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
@ -196,17 +194,15 @@ public class MySQLJavaClientTest {
|
||||
int rowNum = 1;
|
||||
while (rs.next()) {
|
||||
System.out.printf("Row #%d\n", rowNum++);
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "f"), rs.getFloat("f"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "d"), rs.getDate("d"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt"), rs.getTimestamp("dt"));
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "f"), rs.getFloat("f"), rs.wasNull());
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "d"), rs.getDate("d"), rs.wasNull());
|
||||
System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "dt"), rs.getTimestamp("dt"), rs.wasNull());
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
private static String getMysqlType(ResultSet rs, String columnLabel) throws SQLException {
|
||||
ResultSetMetaData meta = rs.getMetaData();
|
||||
return String.format("%s type is %s", columnLabel,
|
||||
MysqlType.getByJdbcType(meta.getColumnType(rs.findColumn(columnLabel))));
|
||||
return String.format("%s type is %s", columnLabel, MysqlType.getByJdbcType(meta.getColumnType(rs.findColumn(columnLabel))));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -68,6 +68,7 @@ RUN python3 -m pip install --no-cache-dir \
|
||||
asyncio \
|
||||
avro==1.10.2 \
|
||||
azure-storage-blob \
|
||||
boto3 \
|
||||
cassandra-driver \
|
||||
confluent-kafka==1.9.2 \
|
||||
delta-spark==2.3.0 \
|
||||
|
@ -53,7 +53,7 @@ function configure()
|
||||
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
|
||||
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
|
||||
function randomize_keeper_config_boolean_value {
|
||||
function randomize_config_boolean_value {
|
||||
value=$(($RANDOM % 2))
|
||||
sudo cat /etc/clickhouse-server/config.d/$2.xml \
|
||||
| sed "s|<$1>[01]</$1>|<$1>$value</$1>|" \
|
||||
@ -72,7 +72,11 @@ function configure()
|
||||
sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
sudo chgrp clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
|
||||
randomize_config_boolean_value use_compression zookeeper
|
||||
if [[ -n "$ZOOKEEPER_FAULT_INJECTION" ]] && [[ "$ZOOKEEPER_FAULT_INJECTION" -eq 1 ]]; then
|
||||
randomize_config_boolean_value use_compression zookeeper_fault_injection
|
||||
else
|
||||
randomize_config_boolean_value use_compression zookeeper
|
||||
fi
|
||||
|
||||
# for clickhouse-server (via service)
|
||||
echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment
|
||||
|
@ -20,9 +20,9 @@ then
|
||||
fi
|
||||
elif [ "${ARCH}" = "aarch64" -o "${ARCH}" = "arm64" ]
|
||||
then
|
||||
# If the system has >=ARMv8.2 (https://en.wikipedia.org/wiki/AArch64), choose the corresponding build, else fall back to a v8.0
|
||||
# compat build. Unfortunately, the ARM ISA level cannot be read directly, we need to guess from the "features" in /proc/cpuinfo.
|
||||
# Also, the flags in /proc/cpuinfo are named differently than the flags passed to the compiler (cmake/cpu_features.cmake).
|
||||
# Dispatch between standard and compatibility builds, see cmake/cpu_features.cmake for details. Unfortunately, (1) the ARM ISA level
|
||||
# cannot be read directly, we need to guess from the "features" in /proc/cpuinfo, and (2) the flags in /proc/cpuinfo are named
|
||||
# differently than the flags passed to the compiler in cpu_features.cmake.
|
||||
HAS_ARMV82=$(grep -m 1 'Features' /proc/cpuinfo | awk '/asimd/ && /sha1/ && /aes/ && /atomics/ && /lrcpc/')
|
||||
if [ "${HAS_ARMV82}" ]
|
||||
then
|
||||
|
28
docs/changelogs/v23.10.4.25-stable.md
Normal file
28
docs/changelogs/v23.10.4.25-stable.md
Normal file
@ -0,0 +1,28 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.10.4.25-stable (330fd687d41) FIXME as compared to v23.10.3.5-stable (b2ba7637a41)
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#56633](https://github.com/ClickHouse/ClickHouse/issues/56633): In [#54043](https://github.com/ClickHouse/ClickHouse/issues/54043) the setup plan started to appear in the logs. It should be only in the `runner_get_all_tests.log` only. As well, send the failed infrastructure event to CI db. [#56214](https://github.com/ClickHouse/ClickHouse/pull/56214) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#56737](https://github.com/ClickHouse/ClickHouse/issues/56737): Do not fetch changed submodules in the builder container. [#56689](https://github.com/ClickHouse/ClickHouse/pull/56689) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Select from system tables when table based on table function. [#55540](https://github.com/ClickHouse/ClickHouse/pull/55540) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
|
||||
* Fix restore from backup with `flatten_nested` and `data_type_default_nullable` [#56306](https://github.com/ClickHouse/ClickHouse/pull/56306) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix segfault during Kerberos initialization [#56401](https://github.com/ClickHouse/ClickHouse/pull/56401) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Fix: RabbitMQ OpenSSL dynamic loading issue [#56703](https://github.com/ClickHouse/ClickHouse/pull/56703) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Fix crash in GCD codec in case when zeros present in data [#56704](https://github.com/ClickHouse/ClickHouse/pull/56704) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Fix crash in FPC codec [#56795](https://github.com/ClickHouse/ClickHouse/pull/56795) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Rewrite jobs to use callable workflow [#56385](https://github.com/ClickHouse/ClickHouse/pull/56385) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Continue rewriting workflows to reusable tests [#56501](https://github.com/ClickHouse/ClickHouse/pull/56501) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Better exception messages [#56854](https://github.com/ClickHouse/ClickHouse/pull/56854) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
23
docs/changelogs/v23.3.17.13-lts.md
Normal file
23
docs/changelogs/v23.3.17.13-lts.md
Normal file
@ -0,0 +1,23 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.3.17.13-lts (e867d59020f) FIXME as compared to v23.3.16.7-lts (fb4125cc92a)
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#56731](https://github.com/ClickHouse/ClickHouse/issues/56731): Do not fetch changed submodules in the builder container. [#56689](https://github.com/ClickHouse/ClickHouse/pull/56689) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix segfault during Kerberos initialization [#56401](https://github.com/ClickHouse/ClickHouse/pull/56401) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Fix crash in FPC codec [#56795](https://github.com/ClickHouse/ClickHouse/pull/56795) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Rewrite jobs to use callable workflow [#56385](https://github.com/ClickHouse/ClickHouse/pull/56385) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Continue rewriting workflows to reusable tests [#56501](https://github.com/ClickHouse/ClickHouse/pull/56501) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Better exception messages [#56854](https://github.com/ClickHouse/ClickHouse/pull/56854) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
31
docs/changelogs/v23.8.7.24-lts.md
Normal file
31
docs/changelogs/v23.8.7.24-lts.md
Normal file
@ -0,0 +1,31 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.8.7.24-lts (812b95e14ba) FIXME as compared to v23.8.6.16-lts (077df679bed)
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#56733](https://github.com/ClickHouse/ClickHouse/issues/56733): Do not fetch changed submodules in the builder container. [#56689](https://github.com/ClickHouse/ClickHouse/pull/56689) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Select from system tables when table based on table function. [#55540](https://github.com/ClickHouse/ClickHouse/pull/55540) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
|
||||
* Fix incomplete query result for UNION in view() function. [#56274](https://github.com/ClickHouse/ClickHouse/pull/56274) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix crash in case of adding a column with type Object(JSON) [#56307](https://github.com/ClickHouse/ClickHouse/pull/56307) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Fix segfault during Kerberos initialization [#56401](https://github.com/ClickHouse/ClickHouse/pull/56401) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Fix: RabbitMQ OpenSSL dynamic loading issue [#56703](https://github.com/ClickHouse/ClickHouse/pull/56703) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Fix crash in FPC codec [#56795](https://github.com/ClickHouse/ClickHouse/pull/56795) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
#### NO CL CATEGORY
|
||||
|
||||
* Backported in [#56601](https://github.com/ClickHouse/ClickHouse/issues/56601):. [#56598](https://github.com/ClickHouse/ClickHouse/pull/56598) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Rewrite jobs to use callable workflow [#56385](https://github.com/ClickHouse/ClickHouse/pull/56385) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Continue rewriting workflows to reusable tests [#56501](https://github.com/ClickHouse/ClickHouse/pull/56501) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Better exception messages [#56854](https://github.com/ClickHouse/ClickHouse/pull/56854) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
34
docs/changelogs/v23.9.5.29-stable.md
Normal file
34
docs/changelogs/v23.9.5.29-stable.md
Normal file
@ -0,0 +1,34 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.9.5.29-stable (f8554c1a1ff) FIXME as compared to v23.9.4.11-stable (74c1f49dd6a)
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#56631](https://github.com/ClickHouse/ClickHouse/issues/56631): In [#54043](https://github.com/ClickHouse/ClickHouse/issues/54043) the setup plan started to appear in the logs. It should be only in the `runner_get_all_tests.log` only. As well, send the failed infrastructure event to CI db. [#56214](https://github.com/ClickHouse/ClickHouse/pull/56214) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#56735](https://github.com/ClickHouse/ClickHouse/issues/56735): Do not fetch changed submodules in the builder container. [#56689](https://github.com/ClickHouse/ClickHouse/pull/56689) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Select from system tables when table based on table function. [#55540](https://github.com/ClickHouse/ClickHouse/pull/55540) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
|
||||
* Fix incomplete query result for UNION in view() function. [#56274](https://github.com/ClickHouse/ClickHouse/pull/56274) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix crash in case of adding a column with type Object(JSON) [#56307](https://github.com/ClickHouse/ClickHouse/pull/56307) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Fix segfault during Kerberos initialization [#56401](https://github.com/ClickHouse/ClickHouse/pull/56401) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Fix: RabbitMQ OpenSSL dynamic loading issue [#56703](https://github.com/ClickHouse/ClickHouse/pull/56703) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Fix crash in GCD codec in case when zeros present in data [#56704](https://github.com/ClickHouse/ClickHouse/pull/56704) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Fix crash in FPC codec [#56795](https://github.com/ClickHouse/ClickHouse/pull/56795) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
#### NO CL CATEGORY
|
||||
|
||||
* Backported in [#56603](https://github.com/ClickHouse/ClickHouse/issues/56603):. [#56598](https://github.com/ClickHouse/ClickHouse/pull/56598) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Improve enrich image [#55793](https://github.com/ClickHouse/ClickHouse/pull/55793) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Rewrite jobs to use callable workflow [#56385](https://github.com/ClickHouse/ClickHouse/pull/56385) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Continue rewriting workflows to reusable tests [#56501](https://github.com/ClickHouse/ClickHouse/pull/56501) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Better exception messages [#56854](https://github.com/ClickHouse/ClickHouse/pull/56854) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
@ -238,19 +238,19 @@ Example:
|
||||
|
||||
## Virtual Columns {#virtual-columns}
|
||||
|
||||
- `_topic` — Kafka topic.
|
||||
- `_key` — Key of the message.
|
||||
- `_offset` — Offset of the message.
|
||||
- `_timestamp` — Timestamp of the message.
|
||||
- `_timestamp_ms` — Timestamp in milliseconds of the message.
|
||||
- `_partition` — Partition of Kafka topic.
|
||||
- `_headers.name` — Array of message's headers keys.
|
||||
- `_headers.value` — Array of message's headers values.
|
||||
- `_topic` — Kafka topic. Data type: `LowCardinality(String)`.
|
||||
- `_key` — Key of the message. Data type: `String`.
|
||||
- `_offset` — Offset of the message. Data type: `UInt64`.
|
||||
- `_timestamp` — Timestamp of the message Data type: `Nullable(DateTime)`.
|
||||
- `_timestamp_ms` — Timestamp in milliseconds of the message. Data type: `Nullable(DateTime64(3))`.
|
||||
- `_partition` — Partition of Kafka topic. Data type: `UInt64`.
|
||||
- `_headers.name` — Array of message's headers keys. Data type: `Array(String)`.
|
||||
- `_headers.value` — Array of message's headers values. Data type: `Array(String)`.
|
||||
|
||||
Additional virtual columns when `kafka_handle_error_mode='stream'`:
|
||||
|
||||
- `_raw_message` - Raw message that couldn't be parsed successfully.
|
||||
- `_error` - Exception message happened during failed parsing.
|
||||
- `_raw_message` - Raw message that couldn't be parsed successfully. Data type: `String`.
|
||||
- `_error` - Exception message happened during failed parsing. Data type: `String`.
|
||||
|
||||
Note: `_raw_message` and `_error` virtual columns are filled only in case of exception during parsing, they are always empty when message was parsed successfully.
|
||||
|
||||
|
@ -163,14 +163,14 @@ If you want to change the target table by using `ALTER`, we recommend disabling
|
||||
|
||||
## Virtual Columns {#virtual-columns}
|
||||
|
||||
- `_subject` - NATS message subject.
|
||||
- `_subject` - NATS message subject. Data type: `String`.
|
||||
|
||||
Additional virtual columns when `kafka_handle_error_mode='stream'`:
|
||||
|
||||
- `_raw_message` - Raw message that couldn't be parsed successfully.
|
||||
- `_error` - Exception message happened during failed parsing.
|
||||
- `_raw_message` - Raw message that couldn't be parsed successfully. Data type: `Nullable(String)`.
|
||||
- `_error` - Exception message happened during failed parsing. Data type: `Nullable(String)`.
|
||||
|
||||
Note: `_raw_message` and `_error` virtual columns are filled only in case of exception during parsing, they are always empty when message was parsed successfully.
|
||||
Note: `_raw_message` and `_error` virtual columns are filled only in case of exception during parsing, they are always `NULL` when message was parsed successfully.
|
||||
|
||||
|
||||
## Data formats support {#data-formats-support}
|
||||
|
@ -184,19 +184,19 @@ Example:
|
||||
|
||||
## Virtual Columns {#virtual-columns}
|
||||
|
||||
- `_exchange_name` - RabbitMQ exchange name.
|
||||
- `_channel_id` - ChannelID, on which consumer, who received the message, was declared.
|
||||
- `_delivery_tag` - DeliveryTag of the received message. Scoped per channel.
|
||||
- `_redelivered` - `redelivered` flag of the message.
|
||||
- `_message_id` - messageID of the received message; non-empty if was set, when message was published.
|
||||
- `_timestamp` - timestamp of the received message; non-empty if was set, when message was published.
|
||||
- `_exchange_name` - RabbitMQ exchange name. Data type: `String`.
|
||||
- `_channel_id` - ChannelID, on which consumer, who received the message, was declared. Data type: `String`.
|
||||
- `_delivery_tag` - DeliveryTag of the received message. Scoped per channel. Data type: `UInt64`.
|
||||
- `_redelivered` - `redelivered` flag of the message. Data type: `UInt8`.
|
||||
- `_message_id` - messageID of the received message; non-empty if was set, when message was published. Data type: `String`.
|
||||
- `_timestamp` - timestamp of the received message; non-empty if was set, when message was published. Data type: `UInt64`.
|
||||
|
||||
Additional virtual columns when `kafka_handle_error_mode='stream'`:
|
||||
|
||||
- `_raw_message` - Raw message that couldn't be parsed successfully.
|
||||
- `_error` - Exception message happened during failed parsing.
|
||||
- `_raw_message` - Raw message that couldn't be parsed successfully. Data type: `Nullable(String)`.
|
||||
- `_error` - Exception message happened during failed parsing. Data type: `Nullable(String)`.
|
||||
|
||||
Note: `_raw_message` and `_error` virtual columns are filled only in case of exception during parsing, they are always empty when message was parsed successfully.
|
||||
Note: `_raw_message` and `_error` virtual columns are filled only in case of exception during parsing, they are always `NULL` when message was parsed successfully.
|
||||
|
||||
## Data formats support {#data-formats-support}
|
||||
|
||||
|
@ -94,12 +94,12 @@ If you want to change the target table by using `ALTER`, we recommend disabling
|
||||
|
||||
## Virtual Columns {#virtual-columns}
|
||||
|
||||
- `_filename` - Name of the log file.
|
||||
- `_offset` - Offset in the log file.
|
||||
- `_filename` - Name of the log file. Data type: `LowCardinality(String)`.
|
||||
- `_offset` - Offset in the log file. Data type: `UInt64`.
|
||||
|
||||
Additional virtual columns when `kafka_handle_error_mode='stream'`:
|
||||
|
||||
- `_raw_record` - Raw record that couldn't be parsed successfully.
|
||||
- `_error` - Exception message happened during failed parsing.
|
||||
- `_raw_record` - Raw record that couldn't be parsed successfully. Data type: `Nullable(String)`.
|
||||
- `_error` - Exception message happened during failed parsing. Data type: `Nullable(String)`.
|
||||
|
||||
Note: `_raw_record` and `_error` virtual columns are filled only in case of exception during parsing, they are always empty when message was parsed successfully.
|
||||
Note: `_raw_record` and `_error` virtual columns are filled only in case of exception during parsing, they are always `NULL` when message was parsed successfully.
|
||||
|
@ -2469,6 +2469,7 @@ This function is designed to load a NumPy array from a .npy file into ClickHouse
|
||||
| u2 | UInt16 |
|
||||
| u4 | UInt32 |
|
||||
| u8 | UInt64 |
|
||||
| f2 | Float32 |
|
||||
| f4 | Float32 |
|
||||
| f8 | Float64 |
|
||||
| S | String |
|
||||
|
@ -17,12 +17,8 @@
|
||||
|
||||
- The issue may be happened when the GPG key is changed.
|
||||
|
||||
Please use the following scripts to resolve the issue:
|
||||
Please use the manual from the [setup](../getting-started/install.md#setup-the-debian-repository) page to update the repository configuration.
|
||||
|
||||
```bash
|
||||
sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754
|
||||
sudo apt-get update
|
||||
```
|
||||
|
||||
### You Get Different Warnings with `apt-get update` {#you-get-different-warnings-with-apt-get-update}
|
||||
|
||||
|
@ -961,9 +961,13 @@ See also “[Executable User Defined Functions](../../sql-reference/functions/in
|
||||
|
||||
Lazy loading of dictionaries.
|
||||
|
||||
If `true`, then each dictionary is created on first use. If dictionary creation failed, the function that was using the dictionary throws an exception.
|
||||
If `true`, then each dictionary is loaded on the first use. If the loading is failed, the function that was using the dictionary throws an exception.
|
||||
|
||||
If `false`, all dictionaries are created when the server starts, if the dictionary or dictionaries are created too long or are created with errors, then the server boots without of these dictionaries and continues to try to create these dictionaries.
|
||||
If `false`, then the server starts loading all dictionaries at startup.
|
||||
Dictionaries are loaded in background.
|
||||
The server doesn't wait at startup until all the dictionaries finish their loading
|
||||
(exception: if `wait_dictionaries_load_at_startup` is set to `true` - see below).
|
||||
When a dictionary is used in a query for the first time then the query waits until the dictionary is loaded if it's not loaded yet.
|
||||
|
||||
The default is `true`.
|
||||
|
||||
@ -2391,6 +2395,24 @@ Path to the file that contains:
|
||||
<users_config>users.xml</users_config>
|
||||
```
|
||||
|
||||
## wait_dictionaries_load_at_startup {#wait_dictionaries_load_at_startup}
|
||||
|
||||
If `false`, then the server will not wait at startup until all the dictionaries finish their loading.
|
||||
This allows to start ClickHouse faster.
|
||||
|
||||
If `true`, then the server will wait at startup until all the dictionaries finish their loading (successfully or not)
|
||||
before listening to any connections.
|
||||
This can make ClickHouse start slowly, however after that some queries can be executed faster
|
||||
(because they won't have to wait for the used dictionaries to be load).
|
||||
|
||||
The default is `false`.
|
||||
|
||||
**Example**
|
||||
|
||||
``` xml
|
||||
<wait_dictionaries_load_at_startup>false</wait_dictionaries_load_at_startup>
|
||||
```
|
||||
|
||||
## zookeeper {#server-settings_zookeeper}
|
||||
|
||||
Contains settings that allow ClickHouse to interact with a [ZooKeeper](http://zookeeper.apache.org/) cluster.
|
||||
|
@ -172,7 +172,27 @@ If you set `timeout_before_checking_execution_speed `to 0, ClickHouse will use c
|
||||
|
||||
## timeout_overflow_mode {#timeout-overflow-mode}
|
||||
|
||||
What to do if the query is run longer than ‘max_execution_time’: ‘throw’ or ‘break’. By default, throw.
|
||||
What to do if the query is run longer than `max_execution_time`: `throw` or `break`. By default, `throw`.
|
||||
|
||||
# max_execution_time_leaf
|
||||
|
||||
Similar semantic to `max_execution_time` but only apply on leaf node for distributed or remote queries.
|
||||
|
||||
For example, if we want to limit execution time on leaf node to `10s` but no limit on the initial node, instead of having `max_execution_time` in the nested subquery settings:
|
||||
|
||||
``` sql
|
||||
SELECT count() FROM cluster(cluster, view(SELECT * FROM t SETTINGS max_execution_time = 10));
|
||||
```
|
||||
|
||||
We can use `max_execution_time_leaf` as the query settings:
|
||||
|
||||
``` sql
|
||||
SELECT count() FROM cluster(cluster, view(SELECT * FROM t)) SETTINGS max_execution_time_leaf = 10;
|
||||
```
|
||||
|
||||
# timeout_overflow_mode_leaf
|
||||
|
||||
What to do when the query in leaf node run longer than `max_execution_time_leaf`: `throw` or `break`. By default, `throw`.
|
||||
|
||||
## min_execution_speed {#min-execution-speed}
|
||||
|
||||
|
50
docs/en/operations/utilities/backupview.md
Normal file
50
docs/en/operations/utilities/backupview.md
Normal file
@ -0,0 +1,50 @@
|
||||
---
|
||||
slug: /en/operations/utilities/backupview
|
||||
title: clickhouse_backupview
|
||||
---
|
||||
|
||||
# clickhouse_backupview {#clickhouse_backupview}
|
||||
|
||||
Python module to help analyzing backups made by the [BACKUP](https://clickhouse.com/docs/en/operations/backup) command.
|
||||
The main motivation was to allows getting some information from a backup without actually restoring it.
|
||||
|
||||
This module provides functions to
|
||||
- enumerate files contained in a backup
|
||||
- read files from a backup
|
||||
- get useful information in readable form about databases, tables, parts contained in a backup
|
||||
- check integrity of a backup
|
||||
|
||||
## Example:
|
||||
|
||||
```python
|
||||
from clickhouse_backupview import open_backup, S3, FileInfo
|
||||
|
||||
# Open a backup. We could also use a local path:
|
||||
# backup = open_backup("/backups/my_backup_1/")
|
||||
backup = open_backup(S3("uri", "access_key_id", "secret_access_key"))
|
||||
|
||||
# Get a list of databasess inside the backup.
|
||||
print(backup.get_databases()))
|
||||
|
||||
# Get a list of tables inside the backup,
|
||||
# and for each table its create query and a list of parts and partitions.
|
||||
for db in backup.get_databases():
|
||||
for tbl in backup.get_tables(database=db):
|
||||
print(backup.get_create_query(database=db, table=tbl))
|
||||
print(backup.get_partitions(database=db, table=tbl))
|
||||
print(backup.get_parts(database=db, table=tbl))
|
||||
|
||||
# Extract everything from the backup.
|
||||
backup.extract_all(table="mydb.mytable", out='/tmp/my_backup_1/all/')
|
||||
|
||||
# Extract the data of a specific table.
|
||||
backup.extract_table_data(table="mydb.mytable", out='/tmp/my_backup_1/mytable/')
|
||||
|
||||
# Extract a single partition.
|
||||
backup.extract_table_data(table="mydb.mytable", partition="202201", out='/tmp/my_backup_1/202201/')
|
||||
|
||||
# Extract a single part.
|
||||
backup.extract_table_data(table="mydb.mytable", part="202201_100_200_3", out='/tmp/my_backup_1/202201_100_200_3/')
|
||||
```
|
||||
|
||||
For more examples see the [test](https://github.com/ClickHouse/ClickHouse/blob/master/utils/backupview/test/test.py).
|
@ -16,3 +16,4 @@ pagination_next: 'en/operations/utilities/clickhouse-copier'
|
||||
- [clickhouse-disks](../../operations/utilities/clickhouse-disks.md) -- Provides filesystem-like operations
|
||||
on files among different ClickHouse disks.
|
||||
- [clickhouse-odbc-bridge](../../operations/utilities/odbc-bridge.md) — A proxy server for ODBC driver.
|
||||
- [clickhouse_backupview](../../operations/utilities/backupview.md) — A python module to analyze ClickHouse backups.
|
||||
|
@ -6,9 +6,9 @@ sidebar_label: Random Numbers
|
||||
|
||||
# Functions for Generating Random Numbers
|
||||
|
||||
All functions in this section accept zero or one arguments. The only use of the argument (if provided) is to prevent prevent [common subexpression
|
||||
elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) such that two different execution of the same random
|
||||
function in a query return different random values.
|
||||
All functions in this section accept zero or one arguments. The only use of the argument (if provided) is to prevent [common subexpression
|
||||
elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) such that two different executions within a row of the same random
|
||||
function return different random values.
|
||||
|
||||
Related content
|
||||
- Blog: [Generating random data in ClickHouse](https://clickhouse.com/blog/generating-random-test-distribution-data-for-clickhouse)
|
||||
|
@ -1,4 +1,4 @@
|
||||
---
|
||||
--
|
||||
slug: /en/sql-reference/table-functions/file
|
||||
sidebar_position: 60
|
||||
sidebar_label: file
|
||||
@ -6,7 +6,7 @@ sidebar_label: file
|
||||
|
||||
# file
|
||||
|
||||
Provides a table-like interface to SELECT from and INSERT to files. This table function is similar to the [s3](/docs/en/sql-reference/table-functions/url.md) table function. Use file() when working with local files, and s3() when working with buckets in S3, GCS, or MinIO.
|
||||
A table engine which provides a table-like interface to SELECT from and INSERT into files, similar to the [s3](/docs/en/sql-reference/table-functions/url.md) table function. Use `file()` when working with local files, and `s3()` when working with buckets in object storage such as S3, GCS, or MinIO.
|
||||
|
||||
The `file` function can be used in `SELECT` and `INSERT` queries to read from or write to files.
|
||||
|
||||
@ -18,18 +18,18 @@ file([path_to_archive ::] path [,format] [,structure] [,compression])
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in read-only mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings.
|
||||
- `path_to_archive` - The relative path to zip/tar/7z archive. Path to archive support the same globs as `path`.
|
||||
- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports in read-only mode the following [globs](#globs_in_path): `*`, `?`, `{abc,def}` (with `'abc'` and `'def'` being strings) and `{N..M}` (with `N` and `M` being numbers).
|
||||
- `path_to_archive` - The relative path to a zip/tar/7z archive. Supports the same globs as `path`.
|
||||
- `format` — The [format](/docs/en/interfaces/formats.md#formats) of the file.
|
||||
- `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`.
|
||||
- `compression` — The existing compression type when used in a `SELECT` query, or the desired compression type when used in an `INSERT` query. The supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`.
|
||||
- `compression` — The existing compression type when used in a `SELECT` query, or the desired compression type when used in an `INSERT` query. Supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`.
|
||||
|
||||
|
||||
**Returned value**
|
||||
|
||||
A table with the specified structure for reading or writing data in the specified file.
|
||||
A table for reading or writing data in a file.
|
||||
|
||||
## File Write Examples
|
||||
## Examples for Writing to a File
|
||||
|
||||
### Write to a TSV file
|
||||
|
||||
@ -48,9 +48,9 @@ As a result, the data is written into the file `test.tsv`:
|
||||
1 3 2
|
||||
```
|
||||
|
||||
### Partitioned Write to multiple TSV files
|
||||
### Partitioned write to multiple TSV files
|
||||
|
||||
If you specify `PARTITION BY` expression when inserting data into a file() function, a separate file is created for each partition value. Splitting the data into separate files helps to improve reading operations efficiency.
|
||||
If you specify a `PARTITION BY` expression when inserting data into a table function of type `file()`, then a separate file is created for each partition. Splitting the data into separate files helps to improve performance of read operations.
|
||||
|
||||
```sql
|
||||
INSERT INTO TABLE FUNCTION
|
||||
@ -72,11 +72,11 @@ As a result, the data is written into three files: `test_1.tsv`, `test_2.tsv`, a
|
||||
1 2 3
|
||||
```
|
||||
|
||||
## File Read Examples
|
||||
## Examples for Reading from a File
|
||||
|
||||
### SELECT from a CSV file
|
||||
|
||||
Setting `user_files_path` and the contents of the file `test.csv`:
|
||||
First, set `user_files_path` in the server configuration and prepare a file `test.csv`:
|
||||
|
||||
``` bash
|
||||
$ grep user_files_path /etc/clickhouse-server/config.xml
|
||||
@ -88,7 +88,7 @@ $ cat /var/lib/clickhouse/user_files/test.csv
|
||||
78,43,45
|
||||
```
|
||||
|
||||
Getting data from a table in `test.csv` and selecting the first two rows from it:
|
||||
Then, read data from `test.csv` into a table and select its first two rows:
|
||||
|
||||
``` sql
|
||||
SELECT * FROM
|
||||
@ -103,14 +103,6 @@ LIMIT 2;
|
||||
└─────────┴─────────┴─────────┘
|
||||
```
|
||||
|
||||
Getting the first 10 lines of a table that contains 3 columns of [UInt32](/docs/en/sql-reference/data-types/int-uint.md) type from a CSV file:
|
||||
|
||||
``` sql
|
||||
SELECT * FROM
|
||||
file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32')
|
||||
LIMIT 10;
|
||||
```
|
||||
|
||||
### Inserting data from a file into a table:
|
||||
|
||||
``` sql
|
||||
@ -130,41 +122,42 @@ file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32');
|
||||
└─────────┴─────────┴─────────┘
|
||||
```
|
||||
|
||||
Getting data from table in table.csv, located in archive1.zip or/and archive2.zip
|
||||
Reading data from `table.csv`, located in `archive1.zip` or/and `archive2.zip`:
|
||||
|
||||
``` sql
|
||||
SELECT * FROM file('user_files/archives/archive{1..2}.zip :: table.csv');
|
||||
```
|
||||
|
||||
## Globs in Path {#globs_in_path}
|
||||
## Globbing {#globs_in_path}
|
||||
|
||||
Multiple path components can have globs. For being processed file must exist and match to the whole path pattern (not only suffix or prefix).
|
||||
Paths may use globbing. Files must match the whole path pattern, not only the suffix or prefix.
|
||||
|
||||
- `*` — Substitutes any number of any characters except `/` including empty string.
|
||||
- `?` — Substitutes any single character.
|
||||
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. The strings can contain the `/` symbol.
|
||||
- `{N..M}` — Substitutes any number in range from N to M including both borders.
|
||||
- `**` - Fetches all files inside the folder recursively.
|
||||
- `*` — Represents arbitrarily many characters except `/` but including the empty string.
|
||||
- `?` — Represents an arbitrary single character.
|
||||
- `{some_string,another_string,yet_another_one}` — Represents any of alternative strings `'some_string', 'another_string', 'yet_another_one'`. The strings may contain `/`.
|
||||
- `{N..M}` — Represents any number `>= N` and `<= M`.
|
||||
- `**` - Represents all files inside a folder recursively.
|
||||
|
||||
Constructions with `{}` are similar to the [remote](remote.md) table function.
|
||||
|
||||
**Example**
|
||||
|
||||
Suppose we have several files with the following relative paths:
|
||||
Suppose there are these files with the following relative paths:
|
||||
|
||||
- 'some_dir/some_file_1'
|
||||
- 'some_dir/some_file_2'
|
||||
- 'some_dir/some_file_3'
|
||||
- 'another_dir/some_file_1'
|
||||
- 'another_dir/some_file_2'
|
||||
- 'another_dir/some_file_3'
|
||||
- `some_dir/some_file_1`
|
||||
- `some_dir/some_file_2`
|
||||
- `some_dir/some_file_3`
|
||||
- `another_dir/some_file_1`
|
||||
- `another_dir/some_file_2`
|
||||
- `another_dir/some_file_3`
|
||||
|
||||
Query the number of rows in these files:
|
||||
Query the total number of rows in all files:
|
||||
|
||||
``` sql
|
||||
SELECT count(*) FROM file('{some,another}_dir/some_file_{1..3}', 'TSV', 'name String, value UInt32');
|
||||
```
|
||||
|
||||
Query the number of rows in all files of these two directories:
|
||||
An alternative path expression which achieves the same:
|
||||
|
||||
``` sql
|
||||
SELECT count(*) FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32');
|
||||
@ -176,7 +169,7 @@ If your listing of files contains number ranges with leading zeros, use the cons
|
||||
|
||||
**Example**
|
||||
|
||||
Query the data from files named `file000`, `file001`, … , `file999`:
|
||||
Query the total number of rows in files named `file000`, `file001`, … , `file999`:
|
||||
|
||||
``` sql
|
||||
SELECT count(*) FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String, value UInt32');
|
||||
@ -184,7 +177,7 @@ SELECT count(*) FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String,
|
||||
|
||||
**Example**
|
||||
|
||||
Query the data from all files inside `big_dir` directory recursively:
|
||||
Query the total number of rows from all files inside directory `big_dir/` recursively:
|
||||
|
||||
``` sql
|
||||
SELECT count(*) FROM file('big_dir/**', 'CSV', 'name String, value UInt32');
|
||||
@ -192,7 +185,7 @@ SELECT count(*) FROM file('big_dir/**', 'CSV', 'name String, value UInt32');
|
||||
|
||||
**Example**
|
||||
|
||||
Query the data from all `file002` files from any folder inside `big_dir` directory recursively:
|
||||
Query the total number of rows from all files `file002` inside any folder in directory `big_dir/` recursively:
|
||||
|
||||
``` sql
|
||||
SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt32');
|
||||
|
@ -6,7 +6,7 @@ sidebar_label: remote
|
||||
|
||||
# remote, remoteSecure
|
||||
|
||||
Allows accessing remote servers, including migration of data, without creating a [Distributed](../../engines/table-engines/special/distributed.md) table. `remoteSecure` - same as `remote` but with a secured connection.
|
||||
Table function `remote` allows to access remote servers on-the-fly, i.e. without creating a [Distributed](../../engines/table-engines/special/distributed.md) table. Table function `remoteSecure` is same as `remote` but over a secure connection.
|
||||
|
||||
Both functions can be used in `SELECT` and `INSERT` queries.
|
||||
|
||||
@ -21,36 +21,36 @@ remoteSecure('addresses_expr', [db.table, 'user'[, 'password'], sharding_key])
|
||||
|
||||
## Parameters
|
||||
|
||||
- `addresses_expr` — An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port`, or just `host`.
|
||||
- `addresses_expr` — A remote server address or an expression that generates multiple addresses of remote servers. Format: `host` or `host:port`.
|
||||
|
||||
The host can be specified as the server name, or as the IPv4 or IPv6 address. An IPv6 address is specified in square brackets.
|
||||
The `host` can be specified as a server name, or as a IPv4 or IPv6 address. An IPv6 address must be specified in square brackets.
|
||||
|
||||
The port is the TCP port on the remote server. If the port is omitted, it uses [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) from the server’s config file in `remote` (by default, 9000) and [tcp_port_secure](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) in `remoteSecure` (by default, 9440).
|
||||
The `port` is the TCP port on the remote server. If the port is omitted, it uses [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) from the server config file for table function `remote` (by default, 9000) and [tcp_port_secure](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) for table function `remoteSecure` (by default, 9440).
|
||||
|
||||
The port is required for an IPv6 address.
|
||||
For IPv6 addresses, a port is required.
|
||||
|
||||
If only specify this parameter, `db` and `table` will use `system.one` by default.
|
||||
If only parameter `addresses_expr` is specified, `db` and `table` will use `system.one` by default.
|
||||
|
||||
Type: [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
- `db` — Database name. Type: [String](../../sql-reference/data-types/string.md).
|
||||
- `table` — Table name. Type: [String](../../sql-reference/data-types/string.md).
|
||||
- `user` — User name. If the user is not specified, `default` is used. Type: [String](../../sql-reference/data-types/string.md).
|
||||
- `password` — User password. If the password is not specified, an empty password is used. Type: [String](../../sql-reference/data-types/string.md).
|
||||
- `user` — User name. If not specified, `default` is used. Type: [String](../../sql-reference/data-types/string.md).
|
||||
- `password` — User password. If not specified, an empty password is used. Type: [String](../../sql-reference/data-types/string.md).
|
||||
- `sharding_key` — Sharding key to support distributing data across nodes. For example: `insert into remote('127.0.0.1:9000,127.0.0.2', db, table, 'default', rand())`. Type: [UInt32](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
## Returned value
|
||||
|
||||
The dataset from remote servers.
|
||||
A table located on a remote server.
|
||||
|
||||
## Usage
|
||||
|
||||
Unless you are migrating data from one system to another, using the `remote` table function is less optimal than creating a `Distributed` table because in this case the server connection is re-established for every request. Also, if hostnames are set, the names are resolved, and errors are not counted when working with various replicas. When processing a large number of queries, always create the `Distributed` table ahead of time, and do not use the `remote` table function.
|
||||
As table functions `remote` and `remoteSecure` re-establish the connection for each request, it is recommended to use a `Distributed` table instead. Also, if hostnames are set, the names are resolved, and errors are not counted when working with various replicas. When processing a large number of queries, always create the `Distributed` table ahead of time, and do not use the `remote` table function.
|
||||
|
||||
The `remote` table function can be useful in the following cases:
|
||||
|
||||
- Migrating data from one system to another
|
||||
- Accessing a specific server for data comparison, debugging, and testing.
|
||||
- One-time data migration from one system to another
|
||||
- Accessing a specific server for data comparison, debugging, and testing, i.e. ad-hoc connections.
|
||||
- Queries between various ClickHouse clusters for research purposes.
|
||||
- Infrequent distributed requests that are made manually.
|
||||
- Distributed requests where the set of servers is re-defined each time.
|
||||
@ -68,7 +68,7 @@ localhost
|
||||
[2a02:6b8:0:1111::11]:9000
|
||||
```
|
||||
|
||||
Multiple addresses can be comma-separated. In this case, ClickHouse will use distributed processing, so it will send the query to all specified addresses (like shards with different data). Example:
|
||||
Multiple addresses can be comma-separated. In this case, ClickHouse will use distributed processing and send the query to all specified addresses (like shards with different data). Example:
|
||||
|
||||
``` text
|
||||
example01-01-1,example01-02-1
|
||||
@ -91,10 +91,13 @@ SELECT * FROM remote_table;
|
||||
```
|
||||
|
||||
### Migration of tables from one system to another:
|
||||
|
||||
This example uses one table from a sample dataset. The database is `imdb`, and the table is `actors`.
|
||||
|
||||
#### On the source ClickHouse system (the system that currently hosts the data)
|
||||
|
||||
- Verify the source database and table name (`imdb.actors`)
|
||||
|
||||
```sql
|
||||
show databases
|
||||
```
|
||||
@ -104,6 +107,7 @@ This example uses one table from a sample dataset. The database is `imdb`, and
|
||||
```
|
||||
|
||||
- Get the CREATE TABLE statement from the source:
|
||||
|
||||
```
|
||||
select create_table_query
|
||||
from system.tables
|
||||
@ -111,6 +115,7 @@ This example uses one table from a sample dataset. The database is `imdb`, and
|
||||
```
|
||||
|
||||
Response
|
||||
|
||||
```sql
|
||||
CREATE TABLE imdb.actors (`id` UInt32,
|
||||
`first_name` String,
|
||||
@ -123,11 +128,13 @@ This example uses one table from a sample dataset. The database is `imdb`, and
|
||||
#### On the destination ClickHouse system:
|
||||
|
||||
- Create the destination database:
|
||||
|
||||
```sql
|
||||
CREATE DATABASE imdb
|
||||
```
|
||||
|
||||
- Using the CREATE TABLE statement from the source, create the destination:
|
||||
|
||||
```sql
|
||||
CREATE TABLE imdb.actors (`id` UInt32,
|
||||
`first_name` String,
|
||||
@ -140,21 +147,23 @@ This example uses one table from a sample dataset. The database is `imdb`, and
|
||||
#### Back on the source deployment:
|
||||
|
||||
Insert into the new database and table created on the remote system. You will need the host, port, username, password, destination database, and destination table.
|
||||
|
||||
```sql
|
||||
INSERT INTO FUNCTION
|
||||
remoteSecure('remote.clickhouse.cloud:9440', 'imdb.actors', 'USER', 'PASSWORD')
|
||||
SELECT * from imdb.actors
|
||||
```
|
||||
|
||||
## Globs in Addresses {#globs-in-addresses}
|
||||
## Globbing {#globs-in-addresses}
|
||||
|
||||
Patterns in curly brackets `{ }` are used to generate a set of shards and to specify replicas. If there are multiple pairs of curly brackets, then the direct product of the corresponding sets is generated.
|
||||
|
||||
The following pattern types are supported.
|
||||
|
||||
- {*a*,*b*} - Any number of variants separated by a comma. The pattern is replaced with *a* in the first shard address and it is replaced with *b* in the second shard address and so on. For instance, `example0{1,2}-1` generates addresses `example01-1` and `example02-1`.
|
||||
- {*n*..*m*} - A range of numbers. This pattern generates shard addresses with incrementing indices from *n* to *m*. `example0{1..2}-1` generates `example01-1` and `example02-1`.
|
||||
- {*0n*..*0m*} - A range of numbers with leading zeroes. This modification preserves leading zeroes in indices. The pattern `example{01..03}-1` generates `example01-1`, `example02-1` and `example03-1`.
|
||||
- {*a*|*b*} - Any number of variants separated by a `|`. The pattern specifies replicas. For instance, `example01-{1|2}` generates replicas `example01-1` and `example01-2`.
|
||||
- `{a,b,c}` - Represents any of alternative strings `a`, `b` or `c`. The pattern is replaced with `a` in the first shard address and replaced with `b` in the second shard address and so on. For instance, `example0{1,2}-1` generates addresses `example01-1` and `example02-1`.
|
||||
- `{N..M}` - A range of numbers. This pattern generates shard addresses with incrementing indices from `N` to (and including) `M`. For instance, `example0{1..2}-1` generates `example01-1` and `example02-1`.
|
||||
- `{0n..0m}` - A range of numbers with leading zeroes. This pattern preserves leading zeroes in indices. For instance, `example{01..03}-1` generates `example01-1`, `example02-1` and `example03-1`.
|
||||
- `{a|b}` - Any number of variants separated by a `|`. The pattern specifies replicas. For instance, `example01-{1|2}` generates replicas `example01-1` and `example01-2`.
|
||||
|
||||
The query will be sent to the first healthy replica. However, for `remote` the replicas are iterated in the order currently set in the [load_balancing](../../operations/settings/settings.md#settings-load_balancing) setting.
|
||||
The number of generated addresses is limited by [table_function_remote_max_addresses](../../operations/settings/settings.md#table_function_remote_max_addresses) setting.
|
||||
|
@ -2,6 +2,3 @@ position: 1
|
||||
label: 'Введение'
|
||||
collapsible: true
|
||||
collapsed: true
|
||||
link:
|
||||
type: generated-index
|
||||
title: Введение
|
||||
|
13
docs/ru/introduction/index.md
Normal file
13
docs/ru/introduction/index.md
Normal file
@ -0,0 +1,13 @@
|
||||
---
|
||||
slug: /ru/introduction/
|
||||
sidebar_label: "Введение"
|
||||
sidebar_position: 8
|
||||
---
|
||||
|
||||
# Введение
|
||||
|
||||
В этом разделе содержится информация о том, как начать работу с ClickHouse.
|
||||
|
||||
- [Отличительные возможности ClickHouse](./distinctive-features.md)
|
||||
- [Производительность](./performance.md)
|
||||
- [История ClickHouse](./history.md)
|
@ -277,8 +277,10 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
|
||||
|
||||
Если `true`, то каждый словарь создаётся при первом использовании. Если словарь не удалось создать, то вызов функции, использующей словарь, сгенерирует исключение.
|
||||
|
||||
Если `false`, то все словари создаются при старте сервера, если словарь или словари создаются слишком долго или создаются с ошибкой, то сервер загружается без
|
||||
этих словарей и продолжает попытки создать эти словари.
|
||||
Если `false`, сервер начнет загрузку всех словарей на старте сервера.
|
||||
Словари загружаются в фоне. Сервер не ждет на старте, пока словари закончат загружаться
|
||||
(исключение: если `wait_dictionaries_load_at_startup` установлена в `true` - см. ниже).
|
||||
Когда словарь используется в запросе первый раз, этот запрос будет ждать окончания загрузки словаря, если он еще не загрузился.
|
||||
|
||||
По умолчанию - `true`.
|
||||
|
||||
@ -1718,6 +1720,24 @@ TCP порт для защищённого обмена данными с кли
|
||||
<users_config>users.xml</users_config>
|
||||
```
|
||||
|
||||
## wait_dictionaries_load_at_startup {#wait_dictionaries_load_at_startup}
|
||||
|
||||
Если `false`, то сервер не будет ждать на старте, пока словари закончат загружаться.
|
||||
Это позволяет ClickHouse стартовать быстрее.
|
||||
|
||||
Если `true`, то ClickHouse будет ждать на старте до окончания загрузки всех словарей (успешно или нет)
|
||||
перед тем, как начать принимать соединения.
|
||||
Это может привести к медленному старту ClickHouse, однако после этого некоторые запросы могут выполняться быстрее
|
||||
(потому что им не придется ждать окончания загрузки используемых словарей).
|
||||
|
||||
По умолчанию - `false`.
|
||||
|
||||
**Пример**
|
||||
|
||||
``` xml
|
||||
<wait_dictionaries_load_at_startup>false</wait_dictionaries_load_at_startup>
|
||||
```
|
||||
|
||||
## zookeeper {#server-settings_zookeeper}
|
||||
|
||||
Содержит параметры, позволяющие ClickHouse взаимодействовать с кластером [ZooKeeper](http://zookeeper.apache.org/).
|
||||
|
50
docs/ru/operations/utilities/backupview.md
Normal file
50
docs/ru/operations/utilities/backupview.md
Normal file
@ -0,0 +1,50 @@
|
||||
---
|
||||
slug: /en/operations/utilities/backupview
|
||||
title: clickhouse_backupview
|
||||
---
|
||||
|
||||
# clickhouse_backupview {#clickhouse_backupview}
|
||||
|
||||
Модуль на Питоне для анализа бэкапов, созданных командой [BACKUP](https://clickhouse.com/docs/ru/operations/backup)
|
||||
Главная идея этого модуля была в том, чтобы позволить извлечение информации из бэкапа без выполнения команды RESTORE.
|
||||
|
||||
Этот модуль содержит функции для
|
||||
- получения списка файлов внутри бэкапа
|
||||
- чтения файлов из бэкапа
|
||||
- получения информации в читаемом виде о базах данных, таблицах, партах, содержащихся в бэкапе
|
||||
- проверки целостности бэкапа
|
||||
|
||||
## Пример:
|
||||
|
||||
```python
|
||||
from clickhouse_backupview import open_backup, S3, FileInfo
|
||||
|
||||
# Открыть бэкап. Можно также использовать локальный путь:
|
||||
# backup = open_backup("/backups/my_backup_1/")
|
||||
backup = open_backup(S3("uri", "access_key_id", "secret_access_key"))
|
||||
|
||||
# Получить список баз данных внутри бэкапа.
|
||||
print(backup.get_databases()))
|
||||
|
||||
# Получить список таблиц внутри бэкапа,
|
||||
# и для каждой таблицы получить ее определение а также список партов и партиций.
|
||||
for db in backup.get_databases():
|
||||
for tbl in backup.get_tables(database=db):
|
||||
print(backup.get_create_query(database=db, table=tbl))
|
||||
print(backup.get_partitions(database=db, table=tbl))
|
||||
print(backup.get_parts(database=db, table=tbl))
|
||||
|
||||
# Извлечь все содержимое бэкапа.
|
||||
backup.extract_all(table="mydb.mytable", out='/tmp/my_backup_1/all/')
|
||||
|
||||
# Извлечь данные конкретной таблицы.
|
||||
backup.extract_table_data(table="mydb.mytable", out='/tmp/my_backup_1/mytable/')
|
||||
|
||||
# Извлечь одну партицию из бэкапа.
|
||||
backup.extract_table_data(table="mydb.mytable", partition="202201", out='/tmp/my_backup_1/202201/')
|
||||
|
||||
# Извлечь один парт из бэкапа.
|
||||
backup.extract_table_data(table="mydb.mytable", part="202201_100_200_3", out='/tmp/my_backup_1/202201_100_200_3/')
|
||||
```
|
||||
|
||||
Больше примеров смотрите в [тесте](https://github.com/ClickHouse/ClickHouse/blob/master/utils/backupview/test/test.py).
|
@ -13,3 +13,4 @@ sidebar_position: 56
|
||||
- [ClickHouse obfuscator](../../operations/utilities/clickhouse-obfuscator.md) — обфусцирует данные.
|
||||
- [ClickHouse compressor](../../operations/utilities/clickhouse-compressor.md) — упаковывает и распаковывает данные.
|
||||
- [clickhouse-odbc-bridge](../../operations/utilities/odbc-bridge.md) — прокси-сервер для ODBC.
|
||||
- [clickhouse_backupview](../../operations/utilities/backupview.md) — модуль на Питоне для анализа бэкапов ClickHouse.
|
||||
|
@ -1,3 +1,5 @@
|
||||
add_compile_options($<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>)
|
||||
|
||||
if (USE_CLANG_TIDY)
|
||||
set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
|
||||
endif ()
|
||||
|
@ -676,6 +676,10 @@ try
|
||||
global_context->addWarningMessage("Server was built with sanitizer. It will work slowly.");
|
||||
#endif
|
||||
|
||||
#if defined(SANITIZE_COVERAGE) || WITH_COVERAGE
|
||||
global_context->addWarningMessage("Server was built with code coverage. It will work slowly.");
|
||||
#endif
|
||||
|
||||
const size_t physical_server_memory = getMemoryAmount();
|
||||
|
||||
LOG_INFO(log, "Available RAM: {}; physical cores: {}; logical cores: {}.",
|
||||
@ -1372,6 +1376,8 @@ try
|
||||
|
||||
global_context->reloadAuxiliaryZooKeepersConfigIfChanged(config);
|
||||
|
||||
global_context->reloadQueryMaskingRulesIfChanged(config);
|
||||
|
||||
std::lock_guard lock(servers_lock);
|
||||
updateServers(*config, server_pool, async_metrics, servers, servers_to_start_before_tables);
|
||||
}
|
||||
@ -1816,6 +1822,9 @@ try
|
||||
try
|
||||
{
|
||||
global_context->loadOrReloadDictionaries(config());
|
||||
|
||||
if (config().getBool("wait_dictionaries_load_at_startup", false))
|
||||
global_context->waitForDictionariesLoad();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
|
@ -1266,6 +1266,17 @@
|
||||
-->
|
||||
<dictionaries_config>*_dictionary.*ml</dictionaries_config>
|
||||
|
||||
<!-- Load dictionaries lazily, i.e. a dictionary will be loaded when it's used for the first time.
|
||||
"false" means ClickHouse will start loading dictionaries immediately at startup.
|
||||
-->
|
||||
<dictionaries_lazy_load>true</dictionaries_lazy_load>
|
||||
|
||||
<!-- Wait at startup until all the dictionaries finish their loading (successfully or not)
|
||||
before listening to connections. Setting this to 1 can make ClickHouse start slowly,
|
||||
however some queries can be executed faster (because it won't have to wait for the used dictionaries to be load).
|
||||
-->
|
||||
<wait_dictionaries_load_at_startup>false</wait_dictionaries_load_at_startup>
|
||||
|
||||
<!-- Configuration of user defined executable functions -->
|
||||
<user_defined_executable_functions_config>*_function.*ml</user_defined_executable_functions_config>
|
||||
|
||||
|
@ -182,7 +182,7 @@ public:
|
||||
|
||||
struct ConvertToASTOptions
|
||||
{
|
||||
/// Add _CAST if constant litral type is different from column type
|
||||
/// Add _CAST if constant literal type is different from column type
|
||||
bool add_cast_for_constants = true;
|
||||
|
||||
/// Identifiers are fully qualified (`database.table.column`), otherwise names are just column names (`column`)
|
||||
|
@ -188,7 +188,7 @@ private:
|
||||
if (auto * table_function_node = parent->as<TableFunctionNode>())
|
||||
{
|
||||
if (child != table_function_node->getArgumentsNode())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "TableFunctioNode is expected to have only one child node");
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "TableFunctionNode is expected to have only one child node");
|
||||
|
||||
const auto & unresolved_indexes = table_function_node->getUnresolvedArgumentIndexes();
|
||||
|
||||
|
168
src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.cpp
Normal file
168
src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.cpp
Normal file
@ -0,0 +1,168 @@
|
||||
#include <Analyzer/Passes/RemoveUnusedProjectionColumnsPass.h>
|
||||
|
||||
#include <Functions/FunctionFactory.h>
|
||||
|
||||
#include <Analyzer/InDepthQueryTreeVisitor.h>
|
||||
#include <Analyzer/FunctionNode.h>
|
||||
#include <Analyzer/QueryNode.h>
|
||||
#include <Analyzer/ColumnNode.h>
|
||||
#include <Analyzer/SortNode.h>
|
||||
#include <Analyzer/AggregationUtils.h>
|
||||
#include <Analyzer/Utils.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
class CollectUsedColumnsVisitor : public InDepthQueryTreeVisitorWithContext<CollectUsedColumnsVisitor>
|
||||
{
|
||||
public:
|
||||
using Base = InDepthQueryTreeVisitorWithContext<CollectUsedColumnsVisitor>;
|
||||
using Base::Base;
|
||||
|
||||
bool needChildVisit(QueryTreeNodePtr &, QueryTreeNodePtr & child)
|
||||
{
|
||||
if (isQueryOrUnionNode(child))
|
||||
{
|
||||
subqueries_nodes_to_visit.insert(child);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void enterImpl(QueryTreeNodePtr & node)
|
||||
{
|
||||
auto node_type = node->getNodeType();
|
||||
|
||||
if (node_type == QueryTreeNodeType::QUERY)
|
||||
{
|
||||
auto & query_node = node->as<QueryNode &>();
|
||||
auto table_expressions = extractTableExpressions(query_node.getJoinTree());
|
||||
for (const auto & table_expression : table_expressions)
|
||||
if (isQueryOrUnionNode(table_expression))
|
||||
query_or_union_node_to_used_columns.emplace(table_expression, std::unordered_set<std::string>());
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (node_type != QueryTreeNodeType::COLUMN)
|
||||
return;
|
||||
|
||||
auto & column_node = node->as<ColumnNode &>();
|
||||
auto column_source_node = column_node.getColumnSource();
|
||||
auto column_source_node_type = column_source_node->getNodeType();
|
||||
|
||||
if (column_source_node_type == QueryTreeNodeType::QUERY || column_source_node_type == QueryTreeNodeType::UNION)
|
||||
query_or_union_node_to_used_columns[column_source_node].insert(column_node.getColumnName());
|
||||
}
|
||||
|
||||
void reset()
|
||||
{
|
||||
subqueries_nodes_to_visit.clear();
|
||||
query_or_union_node_to_used_columns.clear();
|
||||
}
|
||||
|
||||
std::unordered_set<QueryTreeNodePtr> subqueries_nodes_to_visit;
|
||||
std::unordered_map<QueryTreeNodePtr, std::unordered_set<std::string>> query_or_union_node_to_used_columns;
|
||||
};
|
||||
|
||||
std::unordered_set<size_t> convertUsedColumnNamesToUsedProjectionIndexes(const QueryTreeNodePtr & query_or_union_node, const std::unordered_set<std::string> & used_column_names)
|
||||
{
|
||||
std::unordered_set<size_t> result;
|
||||
|
||||
auto * union_node = query_or_union_node->as<UnionNode>();
|
||||
auto * query_node = query_or_union_node->as<QueryNode>();
|
||||
|
||||
const auto & projection_columns = query_node ? query_node->getProjectionColumns() : union_node->computeProjectionColumns();
|
||||
size_t projection_columns_size = projection_columns.size();
|
||||
|
||||
for (size_t i = 0; i < projection_columns_size; ++i)
|
||||
{
|
||||
const auto & projection_column = projection_columns[i];
|
||||
if (used_column_names.contains(projection_column.name))
|
||||
result.insert(i);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// We cannot remove aggregate functions, if query does not contain GROUP BY or arrayJoin from subquery projection
|
||||
void updateUsedProjectionIndexes(const QueryTreeNodePtr & query_or_union_node, std::unordered_set<size_t> & used_projection_columns_indexes)
|
||||
{
|
||||
if (auto * union_node = query_or_union_node->as<UnionNode>())
|
||||
{
|
||||
auto union_node_mode = union_node->getUnionMode();
|
||||
bool is_distinct = union_node_mode == SelectUnionMode::UNION_DISTINCT ||
|
||||
union_node_mode == SelectUnionMode::INTERSECT_DISTINCT ||
|
||||
union_node_mode == SelectUnionMode::EXCEPT_DISTINCT;
|
||||
|
||||
if (is_distinct)
|
||||
{
|
||||
auto union_projection_columns = union_node->computeProjectionColumns();
|
||||
size_t union_projection_columns_size = union_projection_columns.size();
|
||||
|
||||
for (size_t i = 0; i < union_projection_columns_size; ++i)
|
||||
used_projection_columns_indexes.insert(i);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
for (auto & query_node : union_node->getQueries().getNodes())
|
||||
updateUsedProjectionIndexes(query_node, used_projection_columns_indexes);
|
||||
return;
|
||||
}
|
||||
|
||||
const auto & query_node = query_or_union_node->as<const QueryNode &>();
|
||||
const auto & projection_nodes = query_node.getProjection().getNodes();
|
||||
size_t projection_nodes_size = projection_nodes.size();
|
||||
|
||||
for (size_t i = 0; i < projection_nodes_size; ++i)
|
||||
{
|
||||
const auto & projection_node = projection_nodes[i];
|
||||
if ((!query_node.hasGroupBy() && hasAggregateFunctionNodes(projection_node)) || hasFunctionNode(projection_node, "arrayJoin"))
|
||||
used_projection_columns_indexes.insert(i);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void RemoveUnusedProjectionColumnsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
|
||||
{
|
||||
std::vector<QueryTreeNodePtr> nodes_to_visit;
|
||||
nodes_to_visit.push_back(query_tree_node);
|
||||
|
||||
CollectUsedColumnsVisitor visitor(std::move(context));
|
||||
|
||||
while (!nodes_to_visit.empty())
|
||||
{
|
||||
auto node_to_visit = std::move(nodes_to_visit.back());
|
||||
nodes_to_visit.pop_back();
|
||||
|
||||
visitor.visit(node_to_visit);
|
||||
|
||||
for (auto & [query_or_union_node, used_columns] : visitor.query_or_union_node_to_used_columns)
|
||||
{
|
||||
auto used_projection_indexes = convertUsedColumnNamesToUsedProjectionIndexes(query_or_union_node, used_columns);
|
||||
updateUsedProjectionIndexes(query_or_union_node, used_projection_indexes);
|
||||
|
||||
/// Keep at least 1 column if used projection columns are empty
|
||||
if (used_projection_indexes.empty())
|
||||
used_projection_indexes.insert(0);
|
||||
|
||||
if (auto * union_node = query_or_union_node->as<UnionNode>())
|
||||
union_node->removeUnusedProjectionColumns(used_projection_indexes);
|
||||
else if (auto * query_node = query_or_union_node->as<QueryNode>())
|
||||
query_node->removeUnusedProjectionColumns(used_projection_indexes);
|
||||
}
|
||||
|
||||
for (const auto & subquery_node_to_visit : visitor.subqueries_nodes_to_visit)
|
||||
nodes_to_visit.push_back(subquery_node_to_visit);
|
||||
|
||||
visitor.reset();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
24
src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.h
Normal file
24
src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.h
Normal file
@ -0,0 +1,24 @@
|
||||
#pragma once
|
||||
|
||||
#include <Analyzer/IQueryTreePass.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** Remove unused projection columns in subqueries.
|
||||
*
|
||||
* Example: SELECT a FROM (SELECT a, b FROM test_table);
|
||||
* Result: SELECT a FROM (SELECT a FROM test_table);
|
||||
*/
|
||||
class RemoveUnusedProjectionColumnsPass final : public IQueryTreePass
|
||||
{
|
||||
public:
|
||||
String getName() override { return "RemoveUnusedProjectionColumnsPass"; }
|
||||
|
||||
String getDescription() override { return "Remove unused projection columns in subqueries."; }
|
||||
|
||||
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
|
||||
|
||||
};
|
||||
|
||||
}
|
@ -46,6 +46,54 @@ QueryNode::QueryNode(ContextMutablePtr context_)
|
||||
: QueryNode(std::move(context_), {} /*settings_changes*/)
|
||||
{}
|
||||
|
||||
void QueryNode::resolveProjectionColumns(NamesAndTypes projection_columns_value)
|
||||
{
|
||||
if (projection_columns_value.size() != getProjection().getNodes().size())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected projection columns size to match projection nodes size");
|
||||
|
||||
projection_columns = std::move(projection_columns_value);
|
||||
}
|
||||
|
||||
void QueryNode::removeUnusedProjectionColumns(const std::unordered_set<std::string> & used_projection_columns)
|
||||
{
|
||||
auto & projection_nodes = getProjection().getNodes();
|
||||
size_t projection_columns_size = projection_columns.size();
|
||||
size_t write_index = 0;
|
||||
|
||||
for (size_t i = 0; i < projection_columns_size; ++i)
|
||||
{
|
||||
if (!used_projection_columns.contains(projection_columns[i].name))
|
||||
continue;
|
||||
|
||||
projection_nodes[write_index] = projection_nodes[i];
|
||||
projection_columns[write_index] = projection_columns[i];
|
||||
++write_index;
|
||||
}
|
||||
|
||||
projection_nodes.erase(projection_nodes.begin() + write_index, projection_nodes.end());
|
||||
projection_columns.erase(projection_columns.begin() + write_index, projection_columns.end());
|
||||
}
|
||||
|
||||
void QueryNode::removeUnusedProjectionColumns(const std::unordered_set<size_t> & used_projection_columns_indexes)
|
||||
{
|
||||
auto & projection_nodes = getProjection().getNodes();
|
||||
size_t projection_columns_size = projection_columns.size();
|
||||
size_t write_index = 0;
|
||||
|
||||
for (size_t i = 0; i < projection_columns_size; ++i)
|
||||
{
|
||||
if (!used_projection_columns_indexes.contains(i))
|
||||
continue;
|
||||
|
||||
projection_nodes[write_index] = projection_nodes[i];
|
||||
projection_columns[write_index] = projection_columns[i];
|
||||
++write_index;
|
||||
}
|
||||
|
||||
projection_nodes.erase(projection_nodes.begin() + write_index, projection_nodes.end());
|
||||
projection_columns.erase(projection_columns.begin() + write_index, projection_columns.end());
|
||||
}
|
||||
|
||||
void QueryNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const
|
||||
{
|
||||
buffer << std::string(indent, ' ') << "QUERY id: " << format_state.getNodeId(this);
|
||||
|
@ -556,10 +556,13 @@ public:
|
||||
}
|
||||
|
||||
/// Resolve query node projection columns
|
||||
void resolveProjectionColumns(NamesAndTypes projection_columns_value)
|
||||
{
|
||||
projection_columns = std::move(projection_columns_value);
|
||||
}
|
||||
void resolveProjectionColumns(NamesAndTypes projection_columns_value);
|
||||
|
||||
/// Remove unused projection columns
|
||||
void removeUnusedProjectionColumns(const std::unordered_set<std::string> & used_projection_columns);
|
||||
|
||||
/// Remove unused projection columns
|
||||
void removeUnusedProjectionColumns(const std::unordered_set<size_t> & used_projection_columns_indexes);
|
||||
|
||||
QueryTreeNodeType getNodeType() const override
|
||||
{
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <Analyzer/InDepthQueryTreeVisitor.h>
|
||||
#include <Analyzer/Utils.h>
|
||||
#include <Analyzer/Passes/QueryAnalysisPass.h>
|
||||
#include <Analyzer/Passes/RemoveUnusedProjectionColumnsPass.h>
|
||||
#include <Analyzer/Passes/CountDistinctPass.h>
|
||||
#include <Analyzer/Passes/UniqToCountPass.h>
|
||||
#include <Analyzer/Passes/FunctionToSubcolumnsPass.h>
|
||||
@ -243,6 +244,7 @@ void QueryTreePassManager::dump(WriteBuffer & buffer, size_t up_to_pass_index)
|
||||
void addQueryTreePasses(QueryTreePassManager & manager)
|
||||
{
|
||||
manager.addPass(std::make_unique<QueryAnalysisPass>());
|
||||
manager.addPass(std::make_unique<RemoveUnusedProjectionColumnsPass>());
|
||||
manager.addPass(std::make_unique<FunctionToSubcolumnsPass>());
|
||||
|
||||
manager.addPass(std::make_unique<ConvertLogicalExpressionToCNFPass>());
|
||||
|
@ -88,6 +88,41 @@ NamesAndTypes UnionNode::computeProjectionColumns() const
|
||||
return result_columns;
|
||||
}
|
||||
|
||||
void UnionNode::removeUnusedProjectionColumns(const std::unordered_set<std::string> & used_projection_columns)
|
||||
{
|
||||
auto projection_columns = computeProjectionColumns();
|
||||
size_t projection_columns_size = projection_columns.size();
|
||||
std::unordered_set<size_t> used_projection_column_indexes;
|
||||
|
||||
for (size_t i = 0; i < projection_columns_size; ++i)
|
||||
{
|
||||
const auto & projection_column = projection_columns[i];
|
||||
if (used_projection_columns.contains(projection_column.name))
|
||||
used_projection_column_indexes.insert(i);
|
||||
}
|
||||
|
||||
auto & query_nodes = getQueries().getNodes();
|
||||
for (auto & query_node : query_nodes)
|
||||
{
|
||||
if (auto * query_node_typed = query_node->as<QueryNode>())
|
||||
query_node_typed->removeUnusedProjectionColumns(used_projection_column_indexes);
|
||||
else if (auto * union_node_typed = query_node->as<UnionNode>())
|
||||
union_node_typed->removeUnusedProjectionColumns(used_projection_column_indexes);
|
||||
}
|
||||
}
|
||||
|
||||
void UnionNode::removeUnusedProjectionColumns(const std::unordered_set<size_t> & used_projection_columns_indexes)
|
||||
{
|
||||
auto & query_nodes = getQueries().getNodes();
|
||||
for (auto & query_node : query_nodes)
|
||||
{
|
||||
if (auto * query_node_typed = query_node->as<QueryNode>())
|
||||
query_node_typed->removeUnusedProjectionColumns(used_projection_columns_indexes);
|
||||
else if (auto * union_node_typed = query_node->as<UnionNode>())
|
||||
union_node_typed->removeUnusedProjectionColumns(used_projection_columns_indexes);
|
||||
}
|
||||
}
|
||||
|
||||
void UnionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const
|
||||
{
|
||||
buffer << std::string(indent, ' ') << "UNION id: " << format_state.getNodeId(this);
|
||||
|
@ -129,6 +129,12 @@ public:
|
||||
/// Compute union node projection columns
|
||||
NamesAndTypes computeProjectionColumns() const;
|
||||
|
||||
/// Remove unused projection columns
|
||||
void removeUnusedProjectionColumns(const std::unordered_set<std::string> & used_projection_columns);
|
||||
|
||||
/// Remove unused projection columns
|
||||
void removeUnusedProjectionColumns(const std::unordered_set<size_t> & used_projection_columns_indexes);
|
||||
|
||||
QueryTreeNodeType getNodeType() const override
|
||||
{
|
||||
return QueryTreeNodeType::UNION;
|
||||
|
@ -152,6 +152,17 @@ void makeUniqueColumnNamesInBlock(Block & block)
|
||||
}
|
||||
}
|
||||
|
||||
bool isQueryOrUnionNode(const IQueryTreeNode * node)
|
||||
{
|
||||
auto node_type = node->getNodeType();
|
||||
return node_type == QueryTreeNodeType::QUERY || node_type == QueryTreeNodeType::UNION;
|
||||
}
|
||||
|
||||
bool isQueryOrUnionNode(const QueryTreeNodePtr & node)
|
||||
{
|
||||
return isQueryOrUnionNode(node.get());
|
||||
}
|
||||
|
||||
QueryTreeNodePtr buildCastFunction(const QueryTreeNodePtr & expression,
|
||||
const DataTypePtr & type,
|
||||
const ContextPtr & context,
|
||||
|
@ -27,6 +27,12 @@ std::string getGlobalInFunctionNameForLocalInFunctionName(const std::string & fu
|
||||
/// Add unique suffix to names of duplicate columns in block
|
||||
void makeUniqueColumnNamesInBlock(Block & block);
|
||||
|
||||
/// Returns true, if node has type QUERY or UNION
|
||||
bool isQueryOrUnionNode(const IQueryTreeNode * node);
|
||||
|
||||
/// Returns true, if node has type QUERY or UNION
|
||||
bool isQueryOrUnionNode(const QueryTreeNodePtr & node);
|
||||
|
||||
/** Build cast function that cast expression into type.
|
||||
* If resolve = true, then result cast function is resolved during build, otherwise
|
||||
* result cast function is not resolved during build.
|
||||
|
@ -1,3 +1,5 @@
|
||||
add_compile_options($<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>)
|
||||
|
||||
if (USE_INCLUDE_WHAT_YOU_USE)
|
||||
set (CMAKE_CXX_INCLUDE_WHAT_YOU_USE ${IWYU_PATH})
|
||||
endif ()
|
||||
@ -293,7 +295,8 @@ set_source_files_properties(
|
||||
Common/Elf.cpp
|
||||
Common/Dwarf.cpp
|
||||
Common/SymbolIndex.cpp
|
||||
PROPERTIES COMPILE_FLAGS "-O2 ${WITHOUT_COVERAGE}")
|
||||
Common/ThreadFuzzer.cpp
|
||||
PROPERTIES COMPILE_FLAGS "-O2 ${WITHOUT_COVERAGE_FLAGS}")
|
||||
|
||||
target_link_libraries (clickhouse_common_io
|
||||
PRIVATE
|
||||
@ -597,8 +600,6 @@ if (TARGET ch_rust::skim)
|
||||
dbms_target_link_libraries(PUBLIC ch_rust::skim)
|
||||
endif()
|
||||
|
||||
include ("${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake")
|
||||
|
||||
if (ENABLE_TESTS)
|
||||
macro (grep_gtest_sources BASE_DIR DST_VAR)
|
||||
# Cold match files that are not in tests/ directories
|
||||
@ -642,6 +643,4 @@ if (ENABLE_TESTS)
|
||||
if (TARGET ch_contrib::parquet)
|
||||
target_link_libraries(unit_tests_dbms PRIVATE ch_contrib::parquet)
|
||||
endif()
|
||||
|
||||
add_check(unit_tests_dbms)
|
||||
endif ()
|
||||
|
@ -1797,7 +1797,12 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
|
||||
{
|
||||
const auto * logs_level_field = set_query->changes.tryGet(std::string_view{"send_logs_level"});
|
||||
if (logs_level_field)
|
||||
updateLoggerLevel(logs_level_field->safeGet<String>());
|
||||
{
|
||||
auto logs_level = logs_level_field->safeGet<String>();
|
||||
/// Check that setting value is correct before updating logger level.
|
||||
SettingFieldLogsLevelTraits::fromString(logs_level);
|
||||
updateLoggerLevel(logs_level);
|
||||
}
|
||||
}
|
||||
|
||||
if (const auto * create_user_query = parsed_query->as<ASTCreateUserQuery>())
|
||||
|
@ -251,10 +251,12 @@ void LocalConnection::finishQuery()
|
||||
else if (state->pushing_async_executor)
|
||||
{
|
||||
state->pushing_async_executor->finish();
|
||||
state->pushing_async_executor.reset();
|
||||
}
|
||||
else if (state->pushing_executor)
|
||||
{
|
||||
state->pushing_executor->finish();
|
||||
state->pushing_executor.reset();
|
||||
}
|
||||
|
||||
state->io.onFinish();
|
||||
|
@ -330,6 +330,12 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root,
|
||||
{
|
||||
Element & config_element = dynamic_cast<Element &>(*config_node);
|
||||
|
||||
/// Remove substitution attributes from the merge target node if source node already has a value
|
||||
bool source_has_value = with_element.hasChildNodes();
|
||||
if (source_has_value)
|
||||
for (const auto & attr_name: SUBSTITUTION_ATTRS)
|
||||
config_element.removeAttribute(attr_name);
|
||||
|
||||
mergeAttributes(config_element, with_element);
|
||||
mergeRecursive(config, config_node, with_node);
|
||||
}
|
||||
@ -513,6 +519,9 @@ void ConfigProcessor::doIncludesRecursive(
|
||||
|
||||
if (attr_nodes["from_zk"]) /// we have zookeeper subst
|
||||
{
|
||||
if (node->hasChildNodes()) /// only allow substitution for nodes with no value
|
||||
throw Poco::Exception("Element <" + node->nodeName() + "> has value, can't process from_zk substitution");
|
||||
|
||||
contributing_zk_paths.insert(attr_nodes["from_zk"]->getNodeValue());
|
||||
|
||||
if (zk_node_cache)
|
||||
@ -535,6 +544,9 @@ void ConfigProcessor::doIncludesRecursive(
|
||||
|
||||
if (attr_nodes["from_env"]) /// we have env subst
|
||||
{
|
||||
if (node->hasChildNodes()) /// only allow substitution for nodes with no value
|
||||
throw Poco::Exception("Element <" + node->nodeName() + "> has value, can't process from_env substitution");
|
||||
|
||||
XMLDocumentPtr env_document;
|
||||
auto get_env_node = [&](const std::string & name) -> const Node *
|
||||
{
|
||||
|
@ -46,8 +46,8 @@ class Elf;
|
||||
* can parse Debug Information Entries (DIEs), abbreviations, attributes (of
|
||||
* all forms), and we can interpret bytecode for the line number VM.
|
||||
*
|
||||
* We can interpret DWARF records of version 2, 3, or 4, although we don't
|
||||
* actually support many of the version 4 features (such as VLIW, multiple
|
||||
* We can interpret DWARF records of version 2, 3, 4, or 5, although we don't
|
||||
* actually support many of the features of versions 4 and 5 (such as VLIW, multiple
|
||||
* operations per instruction)
|
||||
*
|
||||
* Note that the DWARF record parser does not allocate heap memory at all.
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include "SensitiveDataMasker.h"
|
||||
|
||||
#include <mutex>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <atomic>
|
||||
@ -94,20 +95,28 @@ public:
|
||||
SensitiveDataMasker::~SensitiveDataMasker() = default;
|
||||
|
||||
std::unique_ptr<SensitiveDataMasker> SensitiveDataMasker::sensitive_data_masker = nullptr;
|
||||
std::mutex SensitiveDataMasker::instance_mutex;
|
||||
|
||||
void SensitiveDataMasker::setInstance(std::unique_ptr<SensitiveDataMasker> sensitive_data_masker_)
|
||||
{
|
||||
|
||||
if (!sensitive_data_masker_)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: the 'sensitive_data_masker' is not set");
|
||||
|
||||
std::lock_guard lock(instance_mutex);
|
||||
if (sensitive_data_masker_->rulesCount() > 0)
|
||||
{
|
||||
sensitive_data_masker = std::move(sensitive_data_masker_);
|
||||
}
|
||||
else
|
||||
{
|
||||
sensitive_data_masker.reset();
|
||||
}
|
||||
}
|
||||
|
||||
SensitiveDataMasker * SensitiveDataMasker::getInstance()
|
||||
{
|
||||
std::lock_guard lock(instance_mutex);
|
||||
return sensitive_data_masker.get();
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
#include <cstdint>
|
||||
|
||||
@ -45,6 +46,7 @@ class SensitiveDataMasker
|
||||
private:
|
||||
class MaskingRule;
|
||||
std::vector<std::unique_ptr<MaskingRule>> all_masking_rules;
|
||||
static std::mutex instance_mutex;
|
||||
static std::unique_ptr<SensitiveDataMasker> sensitive_data_masker;
|
||||
|
||||
public:
|
||||
|
@ -17,6 +17,11 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
/* Transforms string from grep-wildcard-syntax ("{N..M}", "{a,b,c}" as in remote table function and "*", "?") to perl-regexp for using re2 library for matching
|
||||
* with such steps:
|
||||
* 1) search intervals like {0..9} and enums like {abc,xyz,qwe} in {}, replace them by regexp with pipe (expr1|expr2|expr3),
|
||||
@ -116,4 +121,79 @@ std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_glob
|
||||
}
|
||||
return buf_final_processing.str();
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
void expandSelectorGlobImpl(const std::string & path, std::vector<std::string> & for_match_paths_expanded)
|
||||
{
|
||||
/// regexp for {expr1,expr2,....} (a selector glob);
|
||||
/// expr1, expr2,... cannot contain any of these: '{', '}', ','
|
||||
static const re2::RE2 selector_regex(R"({([^{}*,]+,[^{}*]*[^{}*,])})");
|
||||
|
||||
std::string_view path_view(path);
|
||||
std::string_view matched;
|
||||
|
||||
// No (more) selector globs found, quit
|
||||
if (!RE2::FindAndConsume(&path_view, selector_regex, &matched))
|
||||
{
|
||||
for_match_paths_expanded.push_back(path);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<size_t> anchor_positions;
|
||||
bool opened = false;
|
||||
bool closed = false;
|
||||
|
||||
// Looking for first occurrence of {} selector: write down positions of {, } and all intermediate commas
|
||||
for (auto it = path.begin(); it != path.end(); ++it)
|
||||
{
|
||||
if (*it == '{')
|
||||
{
|
||||
if (opened)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Unexpected '{{' found in path '{}' at position {}.", path, it - path.begin());
|
||||
anchor_positions.push_back(std::distance(path.begin(), it));
|
||||
opened = true;
|
||||
}
|
||||
else if (*it == '}')
|
||||
{
|
||||
if (!opened)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Unexpected '}}' found in path '{}' at position {}.", path, it - path.begin());
|
||||
anchor_positions.push_back(std::distance(path.begin(), it));
|
||||
closed = true;
|
||||
break;
|
||||
}
|
||||
else if (*it == ',')
|
||||
{
|
||||
if (!opened)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Unexpected ',' found in path '{}' at position {}.", path, std::distance(path.begin(), it));
|
||||
anchor_positions.push_back(std::distance(path.begin(), it));
|
||||
}
|
||||
}
|
||||
if (!opened || !closed)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Invalid {{}} glob in path {}.", path);
|
||||
|
||||
// generate result: prefix/{a,b,c}/suffix -> [prefix/a/suffix, prefix/b/suffix, prefix/c/suffix]
|
||||
std::string common_prefix = path.substr(0, anchor_positions.front());
|
||||
std::string common_suffix = path.substr(anchor_positions.back() + 1);
|
||||
for (size_t i = 1; i < anchor_positions.size(); ++i)
|
||||
{
|
||||
std::string current_selection =
|
||||
path.substr(anchor_positions[i-1] + 1, (anchor_positions[i] - anchor_positions[i-1] - 1));
|
||||
|
||||
std::string expanded_matcher = common_prefix + current_selection + common_suffix;
|
||||
expandSelectorGlobImpl(expanded_matcher, for_match_paths_expanded);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::string> expandSelectionGlob(const std::string & path)
|
||||
{
|
||||
std::vector<std::string> result;
|
||||
expandSelectorGlobImpl(path, result);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
@ -4,7 +4,11 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
/* Parse globs in string and make a regexp for it.
|
||||
*/
|
||||
std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_globs);
|
||||
/// Parse globs in string and make a regexp for it.
|
||||
std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_globs);
|
||||
|
||||
/// Process {a,b,c...} globs:
|
||||
/// Don't match it against regex, but generate a,b,c strings instead and process each of them separately.
|
||||
/// E.g. for a string like `file{1,2,3}.csv` return vector of strings: {`file1.csv`,`file2.csv`,`file3.csv`}
|
||||
std::vector<std::string> expandSelectionGlob(const std::string & path);
|
||||
}
|
||||
|
@ -66,7 +66,7 @@ template <typename T>
|
||||
void compressDataForType(const char * source, UInt32 source_size, char * dest)
|
||||
{
|
||||
if (source_size % sizeof(T) != 0)
|
||||
throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot delta compress, data size {} is not aligned to {}", source_size, sizeof(T));
|
||||
throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress with Delta codec, data size {} is not aligned to {}", source_size, sizeof(T));
|
||||
|
||||
T prev_src = 0;
|
||||
const char * const source_end = source + source_size;
|
||||
@ -87,7 +87,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest,
|
||||
const char * const output_end = dest + output_size;
|
||||
|
||||
if (source_size % sizeof(T) != 0)
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot delta decompress, data size {} is not aligned to {}", source_size, sizeof(T));
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress Delta-encoded data, data size {} is not aligned to {}", source_size, sizeof(T));
|
||||
|
||||
T accumulator{};
|
||||
const char * const source_end = source + source_size;
|
||||
@ -95,7 +95,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest,
|
||||
{
|
||||
accumulator += unalignedLoadLittleEndian<T>(source);
|
||||
if (dest + sizeof(accumulator) > output_end) [[unlikely]]
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress the data");
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress delta-encoded data");
|
||||
unalignedStoreLittleEndian<T>(dest, accumulator);
|
||||
|
||||
source += sizeof(T);
|
||||
@ -133,7 +133,7 @@ UInt32 CompressionCodecDelta::doCompressData(const char * source, UInt32 source_
|
||||
void CompressionCodecDelta::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const
|
||||
{
|
||||
if (source_size < 2)
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header");
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress delta-encoded data. File has wrong header");
|
||||
|
||||
if (uncompressed_size == 0)
|
||||
return;
|
||||
@ -141,13 +141,13 @@ void CompressionCodecDelta::doDecompressData(const char * source, UInt32 source_
|
||||
UInt8 bytes_size = source[0];
|
||||
|
||||
if (!(bytes_size == 1 || bytes_size == 2 || bytes_size == 4 || bytes_size == 8))
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header");
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress delta-encoded data. File has wrong header");
|
||||
|
||||
UInt8 bytes_to_skip = uncompressed_size % bytes_size;
|
||||
UInt32 output_size = uncompressed_size - bytes_to_skip;
|
||||
|
||||
if (static_cast<UInt32>(2 + bytes_to_skip) > source_size)
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header");
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress delta-encoded data. File has wrong header");
|
||||
|
||||
memcpy(dest, &source[2], bytes_to_skip);
|
||||
UInt32 source_size_no_header = source_size - bytes_to_skip - 2;
|
||||
|
@ -287,7 +287,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
|
||||
using SignedDeltaType = typename std::make_signed_t<UnsignedDeltaType>;
|
||||
|
||||
if (source_size % sizeof(ValueType) != 0)
|
||||
throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress, data size {} is not aligned to {}",
|
||||
throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress with DoubleDelta codec, data size {} is not aligned to {}",
|
||||
source_size, sizeof(ValueType));
|
||||
const char * source_end = source + source_size;
|
||||
const char * dest_start = dest;
|
||||
@ -381,7 +381,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest,
|
||||
|
||||
prev_value = unalignedLoadLittleEndian<ValueType>(source);
|
||||
if (dest + sizeof(prev_value) > output_end)
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress the data");
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress double-delta-encoded data");
|
||||
unalignedStoreLittleEndian<ValueType>(dest, prev_value);
|
||||
|
||||
source += sizeof(prev_value);
|
||||
@ -394,7 +394,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest,
|
||||
prev_delta = unalignedLoadLittleEndian<UnsignedDeltaType>(source);
|
||||
prev_value = prev_value + static_cast<ValueType>(prev_delta);
|
||||
if (dest + sizeof(prev_value) > output_end)
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress the data");
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress double-delta-encoded data");
|
||||
unalignedStoreLittleEndian<ValueType>(dest, prev_value);
|
||||
|
||||
source += sizeof(prev_delta);
|
||||
@ -427,7 +427,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest,
|
||||
const UnsignedDeltaType delta = double_delta + prev_delta;
|
||||
const ValueType curr_value = prev_value + delta;
|
||||
if (dest + sizeof(curr_value) > output_end)
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress the data");
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress double-delta-encoded data");
|
||||
unalignedStoreLittleEndian<ValueType>(dest, curr_value);
|
||||
dest += sizeof(curr_value);
|
||||
|
||||
@ -511,18 +511,18 @@ UInt32 CompressionCodecDoubleDelta::doCompressData(const char * source, UInt32 s
|
||||
void CompressionCodecDoubleDelta::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const
|
||||
{
|
||||
if (source_size < 2)
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header");
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress double-delta encoded data. File has wrong header");
|
||||
|
||||
UInt8 bytes_size = source[0];
|
||||
|
||||
if (bytes_size == 0)
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header");
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress double-delta encoded data. File has wrong header");
|
||||
|
||||
UInt8 bytes_to_skip = uncompressed_size % bytes_size;
|
||||
UInt32 output_size = uncompressed_size - bytes_to_skip;
|
||||
|
||||
if (static_cast<UInt32>(2 + bytes_to_skip) > source_size)
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header");
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress double-delta encoded data. File has wrong header");
|
||||
|
||||
memcpy(dest, &source[2], bytes_to_skip);
|
||||
UInt32 source_size_no_header = source_size - bytes_to_skip - 2;
|
||||
|
@ -153,23 +153,23 @@ void registerCodecFPC(CompressionCodecFactory & factory)
|
||||
namespace
|
||||
{
|
||||
|
||||
template <std::unsigned_integral TUint>
|
||||
requires (sizeof(TUint) >= 4)
|
||||
template <std::unsigned_integral TUInt>
|
||||
requires (sizeof(TUInt) >= 4)
|
||||
class DfcmPredictor
|
||||
{
|
||||
public:
|
||||
explicit DfcmPredictor(std::size_t table_size)
|
||||
explicit DfcmPredictor(size_t table_size)
|
||||
: table(table_size, 0), prev_value{0}, hash{0}
|
||||
{
|
||||
}
|
||||
|
||||
[[nodiscard]]
|
||||
TUint predict() const noexcept
|
||||
TUInt predict() const noexcept
|
||||
{
|
||||
return table[hash] + prev_value;
|
||||
}
|
||||
|
||||
void add(TUint value) noexcept
|
||||
void add(TUInt value) noexcept
|
||||
{
|
||||
table[hash] = value - prev_value;
|
||||
recalculateHash();
|
||||
@ -180,38 +180,38 @@ private:
|
||||
void recalculateHash() noexcept
|
||||
{
|
||||
auto value = table[hash];
|
||||
if constexpr (sizeof(TUint) >= 8)
|
||||
if constexpr (sizeof(TUInt) >= 8)
|
||||
{
|
||||
hash = ((hash << 2) ^ static_cast<std::size_t>(value >> 40)) & (table.size() - 1);
|
||||
hash = ((hash << 2) ^ static_cast<size_t>(value >> 40)) & (table.size() - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
hash = ((hash << 4) ^ static_cast<std::size_t>(value >> 23)) & (table.size() - 1);
|
||||
hash = ((hash << 4) ^ static_cast<size_t>(value >> 23)) & (table.size() - 1);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<TUint> table;
|
||||
TUint prev_value;
|
||||
std::size_t hash;
|
||||
std::vector<TUInt> table;
|
||||
TUInt prev_value;
|
||||
size_t hash;
|
||||
};
|
||||
|
||||
template <std::unsigned_integral TUint>
|
||||
requires (sizeof(TUint) >= 4)
|
||||
template <std::unsigned_integral TUInt>
|
||||
requires (sizeof(TUInt) >= 4)
|
||||
class FcmPredictor
|
||||
{
|
||||
public:
|
||||
explicit FcmPredictor(std::size_t table_size)
|
||||
explicit FcmPredictor(size_t table_size)
|
||||
: table(table_size, 0), hash{0}
|
||||
{
|
||||
}
|
||||
|
||||
[[nodiscard]]
|
||||
TUint predict() const noexcept
|
||||
TUInt predict() const noexcept
|
||||
{
|
||||
return table[hash];
|
||||
}
|
||||
|
||||
void add(TUint value) noexcept
|
||||
void add(TUInt value) noexcept
|
||||
{
|
||||
table[hash] = value;
|
||||
recalculateHash();
|
||||
@ -221,31 +221,31 @@ private:
|
||||
void recalculateHash() noexcept
|
||||
{
|
||||
auto value = table[hash];
|
||||
if constexpr (sizeof(TUint) >= 8)
|
||||
if constexpr (sizeof(TUInt) >= 8)
|
||||
{
|
||||
hash = ((hash << 6) ^ static_cast<std::size_t>(value >> 48)) & (table.size() - 1);
|
||||
hash = ((hash << 6) ^ static_cast<size_t>(value >> 48)) & (table.size() - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
hash = ((hash << 1) ^ static_cast<std::size_t>(value >> 22)) & (table.size() - 1);
|
||||
hash = ((hash << 1) ^ static_cast<size_t>(value >> 22)) & (table.size() - 1);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<TUint> table;
|
||||
std::size_t hash;
|
||||
std::vector<TUInt> table;
|
||||
size_t hash;
|
||||
};
|
||||
|
||||
template <std::unsigned_integral TUint>
|
||||
template <std::unsigned_integral TUInt>
|
||||
class FPCOperation
|
||||
{
|
||||
static constexpr auto VALUE_SIZE = sizeof(TUint);
|
||||
static constexpr size_t VALUE_SIZE = sizeof(TUInt);
|
||||
static constexpr std::byte FCM_BIT{0};
|
||||
static constexpr std::byte DFCM_BIT{1u << 3};
|
||||
static constexpr auto DFCM_BIT_1 = DFCM_BIT << 4;
|
||||
static constexpr auto DFCM_BIT_2 = DFCM_BIT;
|
||||
static constexpr unsigned MAX_ZERO_BYTE_COUNT = 0b111u;
|
||||
static constexpr std::byte DFCM_BIT_1 = DFCM_BIT << 4;
|
||||
static constexpr std::byte DFCM_BIT_2 = DFCM_BIT;
|
||||
static constexpr UInt32 MAX_ZERO_BYTE_COUNT = 0b111u;
|
||||
static constexpr std::endian ENDIAN = std::endian::little;
|
||||
static constexpr std::size_t CHUNK_SIZE = 64;
|
||||
static constexpr size_t CHUNK_SIZE = 64;
|
||||
|
||||
public:
|
||||
FPCOperation(std::span<std::byte> destination, UInt8 compression_level)
|
||||
@ -253,12 +253,12 @@ public:
|
||||
{
|
||||
}
|
||||
|
||||
std::size_t encode(std::span<const std::byte> data) &&
|
||||
size_t encode(std::span<const std::byte> data) &&
|
||||
{
|
||||
auto initial_size = result.size();
|
||||
|
||||
std::span chunk_view(chunk);
|
||||
for (std::size_t i = 0; i < data.size(); i += chunk_view.size_bytes())
|
||||
for (size_t i = 0; i < data.size(); i += chunk_view.size_bytes())
|
||||
{
|
||||
auto written_values_count = importChunk(data.subspan(i), chunk_view);
|
||||
encodeChunk(chunk_view.subspan(0, written_values_count));
|
||||
@ -267,12 +267,12 @@ public:
|
||||
return initial_size - result.size();
|
||||
}
|
||||
|
||||
void decode(std::span<const std::byte> values, std::size_t decoded_size) &&
|
||||
void decode(std::span<const std::byte> values, size_t decoded_size) &&
|
||||
{
|
||||
std::size_t read_bytes = 0;
|
||||
size_t read_bytes = 0;
|
||||
|
||||
std::span<TUint> chunk_view(chunk);
|
||||
for (std::size_t i = 0; i < decoded_size; i += chunk_view.size_bytes())
|
||||
std::span<TUInt> chunk_view(chunk);
|
||||
for (size_t i = 0; i < decoded_size; i += chunk_view.size_bytes())
|
||||
{
|
||||
if (i + chunk_view.size_bytes() > decoded_size)
|
||||
chunk_view = chunk_view.first(ceilBytesToEvenValues(decoded_size - i));
|
||||
@ -282,50 +282,50 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
static std::size_t ceilBytesToEvenValues(std::size_t bytes_count)
|
||||
static size_t ceilBytesToEvenValues(size_t bytes_count)
|
||||
{
|
||||
auto values_count = (bytes_count + VALUE_SIZE - 1) / VALUE_SIZE;
|
||||
size_t values_count = (bytes_count + VALUE_SIZE - 1) / VALUE_SIZE;
|
||||
return values_count % 2 == 0 ? values_count : values_count + 1;
|
||||
}
|
||||
|
||||
std::size_t importChunk(std::span<const std::byte> values, std::span<TUint> chnk)
|
||||
size_t importChunk(std::span<const std::byte> values, std::span<TUInt> current_chunk)
|
||||
{
|
||||
if (auto chunk_view = std::as_writable_bytes(chnk); chunk_view.size() <= values.size())
|
||||
if (auto chunk_view = std::as_writable_bytes(current_chunk); chunk_view.size() <= values.size())
|
||||
{
|
||||
std::memcpy(chunk_view.data(), values.data(), chunk_view.size());
|
||||
memcpy(chunk_view.data(), values.data(), chunk_view.size());
|
||||
return chunk_view.size() / VALUE_SIZE;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::memset(chunk_view.data(), 0, chunk_view.size());
|
||||
std::memcpy(chunk_view.data(), values.data(), values.size());
|
||||
memset(chunk_view.data(), 0, chunk_view.size());
|
||||
memcpy(chunk_view.data(), values.data(), values.size());
|
||||
return ceilBytesToEvenValues(values.size());
|
||||
}
|
||||
}
|
||||
|
||||
void exportChunk(std::span<const TUint> chnk)
|
||||
void exportChunk(std::span<const TUInt> current_chunk)
|
||||
{
|
||||
auto chunk_view = std::as_bytes(chnk).first(std::min(result.size(), chnk.size_bytes()));
|
||||
std::memcpy(result.data(), chunk_view.data(), chunk_view.size());
|
||||
auto chunk_view = std::as_bytes(current_chunk).first(std::min(result.size(), current_chunk.size_bytes()));
|
||||
memcpy(result.data(), chunk_view.data(), chunk_view.size());
|
||||
result = result.subspan(chunk_view.size());
|
||||
}
|
||||
|
||||
void encodeChunk(std::span<const TUint> seq)
|
||||
void encodeChunk(std::span<const TUInt> sequence)
|
||||
{
|
||||
for (std::size_t i = 0; i < seq.size(); i += 2)
|
||||
for (size_t i = 0; i < sequence.size(); i += 2)
|
||||
{
|
||||
encodePair(seq[i], seq[i + 1]);
|
||||
encodePair(sequence[i], sequence[i + 1]);
|
||||
}
|
||||
}
|
||||
|
||||
struct CompressedValue
|
||||
{
|
||||
TUint value;
|
||||
unsigned compressed_size;
|
||||
TUInt value;
|
||||
UInt32 compressed_size;
|
||||
std::byte predictor;
|
||||
};
|
||||
|
||||
unsigned encodeCompressedZeroByteCount(unsigned compressed)
|
||||
UInt32 encodeCompressedZeroByteCount(UInt32 compressed)
|
||||
{
|
||||
if constexpr (VALUE_SIZE == MAX_ZERO_BYTE_COUNT + 1)
|
||||
{
|
||||
@ -335,7 +335,7 @@ private:
|
||||
return std::min(compressed, MAX_ZERO_BYTE_COUNT);
|
||||
}
|
||||
|
||||
unsigned decodeCompressedZeroByteCount(unsigned encoded_size)
|
||||
UInt32 decodeCompressedZeroByteCount(UInt32 encoded_size)
|
||||
{
|
||||
if constexpr (VALUE_SIZE == MAX_ZERO_BYTE_COUNT + 1)
|
||||
{
|
||||
@ -345,22 +345,22 @@ private:
|
||||
return encoded_size;
|
||||
}
|
||||
|
||||
CompressedValue compressValue(TUint value) noexcept
|
||||
CompressedValue compressValue(TUInt value) noexcept
|
||||
{
|
||||
static constexpr auto BITS_PER_BYTE = std::numeric_limits<unsigned char>::digits;
|
||||
|
||||
TUint compressed_dfcm = dfcm_predictor.predict() ^ value;
|
||||
TUint compressed_fcm = fcm_predictor.predict() ^ value;
|
||||
TUInt compressed_dfcm = dfcm_predictor.predict() ^ value;
|
||||
TUInt compressed_fcm = fcm_predictor.predict() ^ value;
|
||||
dfcm_predictor.add(value);
|
||||
fcm_predictor.add(value);
|
||||
auto zeroes_dfcm = std::countl_zero(compressed_dfcm);
|
||||
auto zeroes_fcm = std::countl_zero(compressed_fcm);
|
||||
if (zeroes_dfcm > zeroes_fcm)
|
||||
return {compressed_dfcm, encodeCompressedZeroByteCount(static_cast<unsigned>(zeroes_dfcm) / BITS_PER_BYTE), DFCM_BIT};
|
||||
return {compressed_fcm, encodeCompressedZeroByteCount(static_cast<unsigned>(zeroes_fcm) / BITS_PER_BYTE), FCM_BIT};
|
||||
return {compressed_dfcm, encodeCompressedZeroByteCount(static_cast<UInt32>(zeroes_dfcm) / BITS_PER_BYTE), DFCM_BIT};
|
||||
return {compressed_fcm, encodeCompressedZeroByteCount(static_cast<UInt32>(zeroes_fcm) / BITS_PER_BYTE), FCM_BIT};
|
||||
}
|
||||
|
||||
void encodePair(TUint first, TUint second)
|
||||
void encodePair(TUInt first, TUInt second)
|
||||
{
|
||||
auto [compressed_value1, zero_byte_count1, predictor1] = compressValue(first);
|
||||
auto [compressed_value2, zero_byte_count2, predictor2] = compressValue(second);
|
||||
@ -374,24 +374,24 @@ private:
|
||||
auto tail_size1 = VALUE_SIZE - zero_byte_count1;
|
||||
auto tail_size2 = VALUE_SIZE - zero_byte_count2;
|
||||
|
||||
std::memcpy(result.data() + 1, valueTail(compressed_value1, zero_byte_count1), tail_size1);
|
||||
std::memcpy(result.data() + 1 + tail_size1, valueTail(compressed_value2, zero_byte_count2), tail_size2);
|
||||
memcpy(result.data() + 1, valueTail(compressed_value1, zero_byte_count1), tail_size1);
|
||||
memcpy(result.data() + 1 + tail_size1, valueTail(compressed_value2, zero_byte_count2), tail_size2);
|
||||
result = result.subspan(1 + tail_size1 + tail_size2);
|
||||
}
|
||||
|
||||
std::size_t decodeChunk(std::span<const std::byte> values, std::span<TUint> seq)
|
||||
size_t decodeChunk(std::span<const std::byte> values, std::span<TUInt> sequence)
|
||||
{
|
||||
std::size_t read_bytes = 0;
|
||||
for (std::size_t i = 0; i < seq.size(); i += 2)
|
||||
size_t read_bytes = 0;
|
||||
for (size_t i = 0; i < sequence.size(); i += 2)
|
||||
{
|
||||
read_bytes += decodePair(values.subspan(read_bytes), seq[i], seq[i + 1]);
|
||||
read_bytes += decodePair(values.subspan(read_bytes), sequence[i], sequence[i + 1]);
|
||||
}
|
||||
return read_bytes;
|
||||
}
|
||||
|
||||
TUint decompressValue(TUint value, bool isDfcmPredictor)
|
||||
TUInt decompressValue(TUInt value, bool isDfcmPredictor)
|
||||
{
|
||||
TUint decompressed;
|
||||
TUInt decompressed;
|
||||
if (isDfcmPredictor)
|
||||
{
|
||||
decompressed = dfcm_predictor.predict() ^ value;
|
||||
@ -405,37 +405,45 @@ private:
|
||||
return decompressed;
|
||||
}
|
||||
|
||||
std::size_t decodePair(std::span<const std::byte> bytes, TUint& first, TUint& second)
|
||||
size_t decodePair(std::span<const std::byte> bytes, TUInt & first, TUInt & second)
|
||||
{
|
||||
if (bytes.empty()) [[unlikely]]
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Unexpected end of encoded sequence");
|
||||
|
||||
auto zero_byte_count1 = decodeCompressedZeroByteCount(
|
||||
std::to_integer<unsigned>(bytes.front() >> 4) & MAX_ZERO_BYTE_COUNT);
|
||||
auto zero_byte_count2 = decodeCompressedZeroByteCount(
|
||||
std::to_integer<unsigned>(bytes.front()) & MAX_ZERO_BYTE_COUNT);
|
||||
UInt32 zero_byte_count1 = decodeCompressedZeroByteCount(
|
||||
std::to_integer<UInt32>(bytes.front() >> 4) & MAX_ZERO_BYTE_COUNT);
|
||||
UInt32 zero_byte_count2 = decodeCompressedZeroByteCount(
|
||||
std::to_integer<UInt32>(bytes.front()) & MAX_ZERO_BYTE_COUNT);
|
||||
|
||||
auto tail_size1 = VALUE_SIZE - zero_byte_count1;
|
||||
auto tail_size2 = VALUE_SIZE - zero_byte_count2;
|
||||
if (zero_byte_count1 > VALUE_SIZE || zero_byte_count2 > VALUE_SIZE) [[unlikely]]
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Invalid zero byte count(s): {} and {}", zero_byte_count1, zero_byte_count2);
|
||||
|
||||
if (bytes.size() < 1 + tail_size1 + tail_size2) [[unlikely]]
|
||||
size_t tail_size1 = VALUE_SIZE - zero_byte_count1;
|
||||
size_t tail_size2 = VALUE_SIZE - zero_byte_count2;
|
||||
|
||||
size_t expected_size = 0;
|
||||
if (__builtin_add_overflow(tail_size1, tail_size2, &expected_size)
|
||||
|| __builtin_add_overflow(expected_size, 1, &expected_size)) [[unlikely]]
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Overflow occurred while calculating expected size");
|
||||
|
||||
if (bytes.size() < expected_size) [[unlikely]]
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Unexpected end of encoded sequence");
|
||||
|
||||
TUint value1 = 0;
|
||||
TUint value2 = 0;
|
||||
TUInt value1 = 0;
|
||||
TUInt value2 = 0;
|
||||
|
||||
std::memcpy(valueTail(value1, zero_byte_count1), bytes.data() + 1, tail_size1);
|
||||
std::memcpy(valueTail(value2, zero_byte_count2), bytes.data() + 1 + tail_size1, tail_size2);
|
||||
memcpy(valueTail(value1, zero_byte_count1), bytes.data() + 1, tail_size1);
|
||||
memcpy(valueTail(value2, zero_byte_count2), bytes.data() + 1 + tail_size1, tail_size2);
|
||||
|
||||
auto is_dfcm_predictor1 = std::to_integer<unsigned char>(bytes.front() & DFCM_BIT_1) != 0;
|
||||
auto is_dfcm_predictor2 = std::to_integer<unsigned char>(bytes.front() & DFCM_BIT_2) != 0;
|
||||
first = decompressValue(value1, is_dfcm_predictor1);
|
||||
second = decompressValue(value2, is_dfcm_predictor2);
|
||||
|
||||
return 1 + tail_size1 + tail_size2;
|
||||
return expected_size;
|
||||
}
|
||||
|
||||
static void* valueTail(TUint& value, unsigned compressed_size)
|
||||
static void* valueTail(TUInt& value, UInt32 compressed_size)
|
||||
{
|
||||
if constexpr (ENDIAN == std::endian::little)
|
||||
{
|
||||
@ -447,11 +455,11 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
DfcmPredictor<TUint> dfcm_predictor;
|
||||
FcmPredictor<TUint> fcm_predictor;
|
||||
DfcmPredictor<TUInt> dfcm_predictor;
|
||||
FcmPredictor<TUInt> fcm_predictor;
|
||||
|
||||
// memcpy the input into this buffer to align reads, this improves performance compared to unaligned reads (bit_cast) by ~10%
|
||||
std::array<TUint, CHUNK_SIZE> chunk{};
|
||||
std::array<TUInt, CHUNK_SIZE> chunk{};
|
||||
|
||||
std::span<std::byte> result{};
|
||||
};
|
||||
@ -475,19 +483,19 @@ UInt32 CompressionCodecFPC::doCompressData(const char * source, UInt32 source_si
|
||||
default:
|
||||
break;
|
||||
}
|
||||
throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress. File has incorrect float width");
|
||||
throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress with FPC codec. File has incorrect float width");
|
||||
}
|
||||
|
||||
void CompressionCodecFPC::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const
|
||||
{
|
||||
if (source_size < HEADER_SIZE)
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header");
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress FPC-encoded data. File has wrong header");
|
||||
|
||||
auto compressed_data = std::as_bytes(std::span(source, source_size));
|
||||
auto compressed_float_width = std::to_integer<UInt8>(compressed_data[0]);
|
||||
auto compressed_level = std::to_integer<UInt8>(compressed_data[1]);
|
||||
if (compressed_level == 0 || compressed_level > MAX_COMPRESSION_LEVEL)
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has incorrect level");
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress FPC-encoded data. File has incorrect level");
|
||||
|
||||
auto destination = std::as_writable_bytes(std::span(dest, uncompressed_size));
|
||||
auto src = compressed_data.subspan(HEADER_SIZE);
|
||||
@ -500,7 +508,7 @@ void CompressionCodecFPC::doDecompressData(const char * source, UInt32 source_si
|
||||
FPCOperation<UInt32>(destination, compressed_level).decode(src, uncompressed_size);
|
||||
break;
|
||||
default:
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has incorrect float width");
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress FPC-encoded data. File has incorrect float width");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,10 +1,10 @@
|
||||
#include <Compression/ICompressionCodec.h>
|
||||
#include <Compression/CompressionInfo.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Compression/CompressionFactory.h>
|
||||
#include <base/unaligned.h>
|
||||
#include <Compression/CompressionInfo.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include "Common/Exception.h"
|
||||
#include "DataTypes/IDataType.h"
|
||||
#include <base/unaligned.h>
|
||||
|
||||
#include <boost/integer/common_factor.hpp>
|
||||
#include <libdivide-config.h>
|
||||
@ -74,29 +74,37 @@ template <typename T>
|
||||
void compressDataForType(const char * source, UInt32 source_size, char * dest)
|
||||
{
|
||||
if (source_size % sizeof(T) != 0)
|
||||
throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot GCD compress, data size {} is not aligned to {}", source_size, sizeof(T));
|
||||
throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress with GCD codec, data size {} is not aligned to {}", source_size, sizeof(T));
|
||||
|
||||
const char * const source_end = source + source_size;
|
||||
|
||||
T gcd_divider = 0;
|
||||
T gcd = 0;
|
||||
const auto * cur_source = source;
|
||||
while (gcd_divider != T(1) && cur_source < source_end)
|
||||
while (gcd != T(1) && cur_source < source_end)
|
||||
{
|
||||
if (cur_source == source)
|
||||
gcd_divider = unalignedLoad<T>(cur_source);
|
||||
gcd = unalignedLoad<T>(cur_source);
|
||||
else
|
||||
gcd_divider = boost::integer::gcd(gcd_divider, unalignedLoad<T>(cur_source));
|
||||
gcd = boost::integer::gcd(gcd, unalignedLoad<T>(cur_source));
|
||||
cur_source += sizeof(T);
|
||||
}
|
||||
|
||||
unalignedStore<T>(dest, gcd_divider);
|
||||
unalignedStore<T>(dest, gcd);
|
||||
dest += sizeof(T);
|
||||
|
||||
/// GCD compression is pointless if GCD = 1 or GCD = 0 (happens with 0 values in data).
|
||||
/// In these cases only copy the source to dest, i.e. don't compress.
|
||||
if (gcd == 0 || gcd == 1)
|
||||
{
|
||||
memcpy(dest, source, source_size);
|
||||
return;
|
||||
}
|
||||
|
||||
if constexpr (sizeof(T) <= 8)
|
||||
{
|
||||
/// libdivide supports only UInt32 and UInt64.
|
||||
using LibdivideT = std::conditional_t<sizeof(T) <= 4, UInt32, UInt64>;
|
||||
libdivide::divider<LibdivideT> divider(static_cast<LibdivideT>(gcd_divider));
|
||||
libdivide::divider<LibdivideT> divider(static_cast<LibdivideT>(gcd));
|
||||
cur_source = source;
|
||||
while (cur_source < source_end)
|
||||
{
|
||||
@ -110,7 +118,7 @@ void compressDataForType(const char * source, UInt32 source_size, char * dest)
|
||||
cur_source = source;
|
||||
while (cur_source < source_end)
|
||||
{
|
||||
unalignedStore<T>(dest, unalignedLoad<T>(cur_source) / gcd_divider);
|
||||
unalignedStore<T>(dest, unalignedLoad<T>(cur_source) / gcd);
|
||||
cur_source += sizeof(T);
|
||||
dest += sizeof(T);
|
||||
}
|
||||
@ -121,10 +129,10 @@ template <typename T>
|
||||
void decompressDataForType(const char * source, UInt32 source_size, char * dest, UInt32 output_size)
|
||||
{
|
||||
if (source_size % sizeof(T) != 0)
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot GCD decompress, data size {} is not aligned to {}", source_size, sizeof(T));
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress GCD-encoded data, data size {} is not aligned to {}", source_size, sizeof(T));
|
||||
|
||||
if (source_size < sizeof(T))
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot GCD decompress, data size {} is less than {}", source_size, sizeof(T));
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress GCD-encoded data, data size {} is less than {}", source_size, sizeof(T));
|
||||
|
||||
const char * const source_end = source + source_size;
|
||||
const char * const dest_end = dest + output_size;
|
||||
@ -132,10 +140,21 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest,
|
||||
const T gcd_multiplier = unalignedLoad<T>(source);
|
||||
source += sizeof(T);
|
||||
|
||||
/// Handle special cases GCD = 1 and GCD = 0.
|
||||
if (gcd_multiplier == 0 || gcd_multiplier == 1)
|
||||
{
|
||||
/// Subtraction is safe, because we checked that source_size >= sizeof(T)
|
||||
if (source_size - sizeof(T) != output_size)
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress GCD-encoded data");
|
||||
|
||||
memcpy(dest, source, source_size);
|
||||
return;
|
||||
}
|
||||
|
||||
while (source < source_end)
|
||||
{
|
||||
if (dest + sizeof(T) > dest_end) [[unlikely]]
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress the data");
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress GCD-encoded data");
|
||||
unalignedStore<T>(dest, unalignedLoad<T>(source) * gcd_multiplier);
|
||||
|
||||
source += sizeof(T);
|
||||
@ -179,7 +198,7 @@ UInt32 CompressionCodecGCD::doCompressData(const char * source, UInt32 source_si
|
||||
void CompressionCodecGCD::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const
|
||||
{
|
||||
if (source_size < 2)
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header");
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress GCD-encoded data. File has wrong header");
|
||||
|
||||
if (uncompressed_size == 0)
|
||||
return;
|
||||
@ -187,13 +206,13 @@ void CompressionCodecGCD::doDecompressData(const char * source, UInt32 source_si
|
||||
UInt8 bytes_size = source[0];
|
||||
|
||||
if (!(bytes_size == 1 || bytes_size == 2 || bytes_size == 4 || bytes_size == 8 || bytes_size == 16 || bytes_size == 32))
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header");
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress GCD-encoded data. File has wrong header");
|
||||
|
||||
UInt8 bytes_to_skip = uncompressed_size % bytes_size;
|
||||
UInt32 output_size = uncompressed_size - bytes_to_skip;
|
||||
|
||||
if (static_cast<UInt32>(2 + bytes_to_skip) > source_size)
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header");
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress GCD-encoded data. File has wrong header");
|
||||
|
||||
memcpy(dest, &source[2], bytes_to_skip);
|
||||
UInt32 source_size_no_header = source_size - bytes_to_skip - 2;
|
||||
@ -227,7 +246,7 @@ UInt8 getGCDBytesSize(const IDataType * column_type)
|
||||
{
|
||||
WhichDataType which(column_type);
|
||||
if (!(which.isInt() || which.isUInt() || which.isDecimal() || which.isDateOrDate32() || which.isDateTime() ||which.isDateTime64()))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec GCD is not applicable for {} because the data type is not of fixed size",
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec GCD cannot be applied to column {} because it can only be used with Int*, UInt*, Decimal*, Date* or DateTime* types.",
|
||||
column_type->getName());
|
||||
|
||||
size_t max_size = column_type->getSizeOfValueInMemory();
|
||||
|
@ -197,7 +197,7 @@ template <typename T>
|
||||
UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest, UInt32 dest_size)
|
||||
{
|
||||
if (source_size % sizeof(T) != 0)
|
||||
throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress, data size {} is not aligned to {}", source_size, sizeof(T));
|
||||
throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress with Gorilla codec, data size {} is not aligned to {}", source_size, sizeof(T));
|
||||
|
||||
const char * const source_end = source + source_size;
|
||||
const char * const dest_start = dest;
|
||||
@ -317,7 +317,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest)
|
||||
&& curr_xored_info.data_bits == 0
|
||||
&& curr_xored_info.trailing_zero_bits == 0) [[unlikely]]
|
||||
{
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress gorilla-encoded data: corrupted input data.");
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress Gorilla-encoded data: corrupted input data.");
|
||||
}
|
||||
|
||||
xored_data = static_cast<T>(reader.readBits(curr_xored_info.data_bits));
|
||||
@ -410,17 +410,17 @@ UInt32 CompressionCodecGorilla::doCompressData(const char * source, UInt32 sourc
|
||||
void CompressionCodecGorilla::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const
|
||||
{
|
||||
if (source_size < 2)
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header");
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress Gorilla-encoded data. File has wrong header");
|
||||
|
||||
UInt8 bytes_size = source[0];
|
||||
|
||||
if (bytes_size == 0)
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header");
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress Gorilla-encoded data. File has wrong header");
|
||||
|
||||
UInt8 bytes_to_skip = uncompressed_size % bytes_size;
|
||||
|
||||
if (static_cast<UInt32>(2 + bytes_to_skip) > source_size)
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header");
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress Gorilla-encoded data. File has wrong header");
|
||||
|
||||
memcpy(dest, &source[2], bytes_to_skip);
|
||||
UInt32 source_size_no_header = source_size - bytes_to_skip - 2;
|
||||
|
@ -96,7 +96,7 @@ void CompressionCodecLZ4::doDecompressData(const char * source, UInt32 source_si
|
||||
bool success = LZ4::decompress(source, dest, source_size, uncompressed_size, lz4_stat);
|
||||
|
||||
if (!success)
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress");
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress LZ4-encoded data");
|
||||
}
|
||||
|
||||
void registerCodecLZ4(CompressionCodecFactory & factory)
|
||||
@ -112,7 +112,7 @@ UInt32 CompressionCodecLZ4HC::doCompressData(const char * source, UInt32 source_
|
||||
auto success = LZ4_compress_HC(source, dest, source_size, LZ4_COMPRESSBOUND(source_size), level);
|
||||
|
||||
if (!success)
|
||||
throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot LZ4_compress_HC");
|
||||
throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress with LZ4 codec");
|
||||
|
||||
return success;
|
||||
}
|
||||
|
@ -483,7 +483,7 @@ UInt32 compressData(const char * src, UInt32 bytes_size, char * dst)
|
||||
static constexpr const UInt32 header_size = 2 * sizeof(UInt64);
|
||||
|
||||
if (bytes_size % sizeof(T))
|
||||
throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress, data size {} is not multiplier of {}",
|
||||
throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress with T64 codec, data size {} is not multiplier of {}",
|
||||
bytes_size, sizeof(T));
|
||||
|
||||
UInt32 src_size = bytes_size / sizeof(T);
|
||||
@ -538,11 +538,11 @@ void decompressData(const char * src, UInt32 bytes_size, char * dst, UInt32 unco
|
||||
static constexpr const UInt32 header_size = 2 * sizeof(UInt64);
|
||||
|
||||
if (bytes_size < header_size)
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress, data size ({}) is less than the size of T64 header",
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress T64-encoded data, data size ({}) is less than the size of T64 header",
|
||||
bytes_size);
|
||||
|
||||
if (uncompressed_size % sizeof(T))
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress, unexpected uncompressed size ({})"
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress T64-encoded data, unexpected uncompressed size ({})"
|
||||
" isn't a multiple of the data type size ({})",
|
||||
uncompressed_size, sizeof(T));
|
||||
|
||||
@ -571,7 +571,7 @@ void decompressData(const char * src, UInt32 bytes_size, char * dst, UInt32 unco
|
||||
UInt32 dst_shift = sizeof(T) * matrix_size;
|
||||
|
||||
if (!bytes_size || bytes_size % src_shift)
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress, data size ({}) is not a multiplier of {}",
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress T64-encoded data, data size ({}) is not a multiplier of {}",
|
||||
bytes_size, src_shift);
|
||||
|
||||
UInt32 num_full = bytes_size / src_shift;
|
||||
@ -666,13 +666,13 @@ UInt32 CompressionCodecT64::doCompressData(const char * src, UInt32 src_size, ch
|
||||
break;
|
||||
}
|
||||
|
||||
throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress with T64");
|
||||
throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress with T64 codec");
|
||||
}
|
||||
|
||||
void CompressionCodecT64::doDecompressData(const char * src, UInt32 src_size, char * dst, UInt32 uncompressed_size) const
|
||||
{
|
||||
if (!src_size)
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress with T64");
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress T64-encoded data");
|
||||
|
||||
UInt8 cookie = unalignedLoad<UInt8>(src);
|
||||
src += 1;
|
||||
@ -703,7 +703,7 @@ void CompressionCodecT64::doDecompressData(const char * src, UInt32 src_size, ch
|
||||
break;
|
||||
}
|
||||
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress with T64");
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress T64-encoded data");
|
||||
}
|
||||
|
||||
uint8_t CompressionCodecT64::getMethodByte() const
|
||||
|
@ -82,7 +82,7 @@ UInt32 CompressionCodecZSTD::doCompressData(const char * source, UInt32 source_s
|
||||
ZSTD_freeCCtx(cctx);
|
||||
|
||||
if (ZSTD_isError(compressed_size))
|
||||
throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress block with ZSTD: {}", std::string(ZSTD_getErrorName(compressed_size)));
|
||||
throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress with ZSTD codec: {}", std::string(ZSTD_getErrorName(compressed_size)));
|
||||
|
||||
return static_cast<UInt32>(compressed_size);
|
||||
}
|
||||
@ -93,7 +93,7 @@ void CompressionCodecZSTD::doDecompressData(const char * source, UInt32 source_s
|
||||
size_t res = ZSTD_decompress(dest, uncompressed_size, source, source_size);
|
||||
|
||||
if (ZSTD_isError(res))
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot ZSTD_decompress: {}", std::string(ZSTD_getErrorName(res)));
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress ZSTD-encoded data: {}", std::string(ZSTD_getErrorName(res)));
|
||||
}
|
||||
|
||||
CompressionCodecZSTD::CompressionCodecZSTD(int level_, int window_log_) : level(level_), enable_long_range(true), window_log(window_log_)
|
||||
|
@ -476,6 +476,9 @@ struct ChangelogReadResult
|
||||
|
||||
/// last offset we were able to read from log
|
||||
off_t last_position;
|
||||
|
||||
/// Whether the changelog file was written using compression
|
||||
bool compressed_log;
|
||||
bool error;
|
||||
};
|
||||
|
||||
@ -484,7 +487,7 @@ class ChangelogReader
|
||||
public:
|
||||
explicit ChangelogReader(DiskPtr disk_, const std::string & filepath_) : disk(disk_), filepath(filepath_)
|
||||
{
|
||||
auto compression_method = chooseCompressionMethod(filepath, "");
|
||||
compression_method = chooseCompressionMethod(filepath, "");
|
||||
auto read_buffer_from_file = disk->readFile(filepath);
|
||||
read_buf = wrapReadBufferWithCompressionMethod(std::move(read_buffer_from_file), compression_method);
|
||||
}
|
||||
@ -493,6 +496,7 @@ public:
|
||||
ChangelogReadResult readChangelog(IndexToLogEntry & logs, uint64_t start_log_index, Poco::Logger * log)
|
||||
{
|
||||
ChangelogReadResult result{};
|
||||
result.compressed_log = compression_method != CompressionMethod::None;
|
||||
try
|
||||
{
|
||||
while (!read_buf->eof())
|
||||
@ -583,6 +587,7 @@ public:
|
||||
private:
|
||||
DiskPtr disk;
|
||||
std::string filepath;
|
||||
CompressionMethod compression_method;
|
||||
std::unique_ptr<ReadBuffer> read_buf;
|
||||
};
|
||||
|
||||
@ -590,6 +595,7 @@ Changelog::Changelog(
|
||||
Poco::Logger * log_, LogFileSettings log_file_settings, FlushSettings flush_settings_, KeeperContextPtr keeper_context_)
|
||||
: changelogs_detached_dir("detached")
|
||||
, rotate_interval(log_file_settings.rotate_interval)
|
||||
, compress_logs(log_file_settings.compress_logs)
|
||||
, log(log_)
|
||||
, write_operations(std::numeric_limits<size_t>::max())
|
||||
, append_completion_queue(std::numeric_limits<size_t>::max())
|
||||
@ -851,7 +857,8 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
|
||||
std::erase_if(logs, [last_log_read_result](const auto & item) { return item.first > last_log_read_result->last_read_index; });
|
||||
move_from_latest_logs_disks(existing_changelogs.at(last_log_read_result->log_start_index));
|
||||
}
|
||||
else
|
||||
/// don't mix compressed and uncompressed writes
|
||||
else if (compress_logs == last_log_read_result->compressed_log)
|
||||
{
|
||||
initWriter(description);
|
||||
}
|
||||
|
@ -182,6 +182,7 @@ private:
|
||||
|
||||
const String changelogs_detached_dir;
|
||||
const uint64_t rotate_interval;
|
||||
const bool compress_logs;
|
||||
Poco::Logger * log;
|
||||
|
||||
std::mutex writer_mutex;
|
||||
|
@ -45,7 +45,7 @@ struct Settings;
|
||||
M(UInt64, max_requests_quick_batch_size, 100, "Max size of batch of requests to try to get before proceeding with RAFT. Keeper will not wait for requests but take only requests that are already in queue" , 0) \
|
||||
M(Bool, quorum_reads, false, "Execute read requests as writes through whole RAFT consesus with similar speed", 0) \
|
||||
M(Bool, force_sync, true, "Call fsync on each change in RAFT changelog", 0) \
|
||||
M(Bool, compress_logs, true, "Write compressed coordination logs in ZSTD format", 0) \
|
||||
M(Bool, compress_logs, false, "Write compressed coordination logs in ZSTD format", 0) \
|
||||
M(Bool, compress_snapshots_with_zstd_format, true, "Write compressed snapshots in ZSTD format (instead of custom LZ4)", 0) \
|
||||
M(UInt64, configuration_change_tries_count, 20, "How many times we will try to apply configuration change (add/remove server) to the cluster", 0) \
|
||||
M(UInt64, max_log_file_size, 50 * 1024 * 1024, "Max size of the Raft log file. If possible, each created log file will preallocate this amount of bytes on disk. Set to 0 to disable the limit", 0) \
|
||||
|
@ -1104,20 +1104,15 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2)
|
||||
}
|
||||
|
||||
/// Truncating only some entries from the end
|
||||
TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate3)
|
||||
/// For compressed logs we have no reliable way of knowing how many log entries were lost
|
||||
/// after we truncate some bytes from the end
|
||||
TEST_F(CoordinationTest, ChangelogTestReadAfterBrokenTruncate3)
|
||||
{
|
||||
auto params = GetParam();
|
||||
|
||||
/// For compressed logs we have no reliable way of knowing how many log entries were lost
|
||||
/// after we truncate some bytes from the end
|
||||
if (!params.extension.empty())
|
||||
return;
|
||||
|
||||
ChangelogDirTest test("./logs");
|
||||
setLogDirectory("./logs");
|
||||
|
||||
DB::KeeperLogStore changelog(
|
||||
DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20},
|
||||
DB::LogFileSettings{.force_sync = true, .compress_logs = false, .rotate_interval = 20},
|
||||
DB::FlushSettings(),
|
||||
keeper_context);
|
||||
changelog.init(1, 0);
|
||||
@ -1131,23 +1126,23 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate3)
|
||||
changelog.end_of_append_batch(0, 0);
|
||||
|
||||
waitDurableLogs(changelog);
|
||||
EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin" + params.extension));
|
||||
EXPECT_TRUE(fs::exists("./logs/changelog_21_40.bin" + params.extension));
|
||||
EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin"));
|
||||
EXPECT_TRUE(fs::exists("./logs/changelog_21_40.bin"));
|
||||
|
||||
DB::WriteBufferFromFile plain_buf(
|
||||
"./logs/changelog_1_20.bin" + params.extension, DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
|
||||
"./logs/changelog_1_20.bin", DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
|
||||
plain_buf.truncate(plain_buf.size() - 30);
|
||||
|
||||
DB::KeeperLogStore changelog_reader(
|
||||
DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20},
|
||||
DB::LogFileSettings{.force_sync = true, .compress_logs = false, .rotate_interval = 20},
|
||||
DB::FlushSettings(),
|
||||
keeper_context);
|
||||
changelog_reader.init(1, 0);
|
||||
|
||||
EXPECT_EQ(changelog_reader.size(), 19);
|
||||
EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin" + params.extension));
|
||||
assertBrokenLogRemoved("./logs", "changelog_21_40.bin" + params.extension);
|
||||
EXPECT_TRUE(fs::exists("./logs/changelog_20_39.bin" + params.extension));
|
||||
EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin"));
|
||||
assertBrokenLogRemoved("./logs", "changelog_21_40.bin");
|
||||
EXPECT_TRUE(fs::exists("./logs/changelog_20_39.bin"));
|
||||
auto entry = getLogEntry("hello_world", 7777);
|
||||
changelog_reader.append(entry);
|
||||
changelog_reader.end_of_append_batch(0, 0);
|
||||
@ -1158,6 +1153,102 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate3)
|
||||
EXPECT_EQ(changelog_reader.last_entry()->get_term(), 7777);
|
||||
}
|
||||
|
||||
TEST_F(CoordinationTest, ChangelogTestMixedLogTypes)
|
||||
{
|
||||
ChangelogDirTest test("./logs");
|
||||
setLogDirectory("./logs");
|
||||
|
||||
std::vector<std::string> changelog_files;
|
||||
|
||||
const auto verify_changelog_files = [&]
|
||||
{
|
||||
for (const auto & log_file : changelog_files)
|
||||
EXPECT_TRUE(fs::exists(log_file)) << "File " << log_file << " not found";
|
||||
};
|
||||
|
||||
size_t last_term = 0;
|
||||
size_t log_size = 0;
|
||||
|
||||
const auto append_log = [&](auto & changelog, const std::string & data, uint64_t term)
|
||||
{
|
||||
last_term = term;
|
||||
++log_size;
|
||||
auto entry = getLogEntry(data, last_term);
|
||||
changelog.append(entry);
|
||||
};
|
||||
|
||||
const auto verify_log_content = [&](const auto & changelog)
|
||||
{
|
||||
EXPECT_EQ(changelog.size(), log_size);
|
||||
EXPECT_EQ(changelog.last_entry()->get_term(), last_term);
|
||||
};
|
||||
|
||||
{
|
||||
SCOPED_TRACE("Initial uncompressed log");
|
||||
DB::KeeperLogStore changelog(
|
||||
DB::LogFileSettings{.force_sync = true, .compress_logs = false, .rotate_interval = 20},
|
||||
DB::FlushSettings(),
|
||||
keeper_context);
|
||||
changelog.init(1, 0);
|
||||
|
||||
for (size_t i = 0; i < 35; ++i)
|
||||
append_log(changelog, std::to_string(i) + "_hello_world", (i+ 44) * 10);
|
||||
|
||||
changelog.end_of_append_batch(0, 0);
|
||||
|
||||
waitDurableLogs(changelog);
|
||||
changelog_files.push_back("./logs/changelog_1_20.bin");
|
||||
changelog_files.push_back("./logs/changelog_21_40.bin");
|
||||
verify_changelog_files();
|
||||
|
||||
verify_log_content(changelog);
|
||||
}
|
||||
|
||||
{
|
||||
SCOPED_TRACE("Compressed log");
|
||||
DB::KeeperLogStore changelog_compressed(
|
||||
DB::LogFileSettings{.force_sync = true, .compress_logs = true, .rotate_interval = 20},
|
||||
DB::FlushSettings(),
|
||||
keeper_context);
|
||||
changelog_compressed.init(1, 0);
|
||||
|
||||
verify_changelog_files();
|
||||
verify_log_content(changelog_compressed);
|
||||
|
||||
append_log(changelog_compressed, "hello_world", 7777);
|
||||
changelog_compressed.end_of_append_batch(0, 0);
|
||||
|
||||
waitDurableLogs(changelog_compressed);
|
||||
|
||||
verify_log_content(changelog_compressed);
|
||||
|
||||
changelog_files.push_back("./logs/changelog_36_55.bin.zstd");
|
||||
verify_changelog_files();
|
||||
}
|
||||
|
||||
{
|
||||
SCOPED_TRACE("Final uncompressed log");
|
||||
DB::KeeperLogStore changelog(
|
||||
DB::LogFileSettings{.force_sync = true, .compress_logs = false, .rotate_interval = 20},
|
||||
DB::FlushSettings(),
|
||||
keeper_context);
|
||||
changelog.init(1, 0);
|
||||
|
||||
verify_changelog_files();
|
||||
verify_log_content(changelog);
|
||||
|
||||
append_log(changelog, "hello_world", 7778);
|
||||
changelog.end_of_append_batch(0, 0);
|
||||
|
||||
waitDurableLogs(changelog);
|
||||
|
||||
verify_log_content(changelog);
|
||||
|
||||
changelog_files.push_back("./logs/changelog_37_56.bin");
|
||||
verify_changelog_files();
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(CoordinationTest, ChangelogTestLostFiles)
|
||||
{
|
||||
auto params = GetParam();
|
||||
|
@ -12,8 +12,7 @@ namespace MySQLProtocol
|
||||
namespace MySQLUtils
|
||||
{
|
||||
|
||||
DecimalUtils::DecimalComponents<DateTime64>
|
||||
getNormalizedDateTime64Components(DataTypePtr data_type, ColumnPtr col, size_t row_num)
|
||||
DecimalUtils::DecimalComponents<DateTime64> getNormalizedDateTime64Components(DataTypePtr data_type, ColumnPtr col, size_t row_num)
|
||||
{
|
||||
const auto * date_time_type = typeid_cast<const DataTypeDateTime64 *>(data_type.get());
|
||||
|
||||
@ -52,14 +51,6 @@ getNormalizedDateTime64Components(DataTypePtr data_type, ColumnPtr col, size_t r
|
||||
|
||||
return components;
|
||||
};
|
||||
|
||||
ColumnPtr getBaseColumn(const DB::Columns & columns, size_t i)
|
||||
{
|
||||
ColumnPtr col = columns[i]->convertToFullIfNeeded();
|
||||
if (col->isNullable())
|
||||
return assert_cast<const ColumnNullable &>(*col).getNestedColumnPtr();
|
||||
return col;
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
#include "Core/DecimalFunctions.h"
|
||||
#include "DataTypes/IDataType.h"
|
||||
#include "base/types.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -13,10 +12,6 @@ namespace MySQLUtils
|
||||
/// Splits DateTime64 column data at a certain row number into whole and fractional part
|
||||
/// Additionally, normalizes the fractional part as if it was scale 6 for MySQL compatibility purposes
|
||||
DecimalUtils::DecimalComponents<DateTime64> getNormalizedDateTime64Components(DataTypePtr data_type, ColumnPtr col, size_t row_num);
|
||||
|
||||
/// If a column is ColumnSparse/ColumnLowCardinality/ColumnNullable, it is unwrapped in a correct order;
|
||||
/// otherwise, the original column is returned
|
||||
ColumnPtr getBaseColumn(const DB::Columns & columns, size_t i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -4,20 +4,15 @@
|
||||
#include <Core/MySQL/PacketsProtocolBinary.h>
|
||||
#include "Common/LocalDate.h"
|
||||
#include "Common/LocalDateTime.h"
|
||||
#include "Columns/ColumnLowCardinality.h"
|
||||
#include "Columns/ColumnNullable.h"
|
||||
#include "Columns/ColumnVector.h"
|
||||
#include "Columns/ColumnsDateTime.h"
|
||||
#include "Core/DecimalFunctions.h"
|
||||
#include "DataTypes/DataTypeDateTime64.h"
|
||||
#include "DataTypes/DataTypeLowCardinality.h"
|
||||
#include "DataTypes/DataTypeNullable.h"
|
||||
#include "DataTypes/DataTypesNumber.h"
|
||||
#include "Formats/FormatSettings.h"
|
||||
#include "IO/WriteBufferFromString.h"
|
||||
#include "MySQLUtils.h"
|
||||
#include "base/DayNum.h"
|
||||
#include "base/Decimal.h"
|
||||
#include "base/types.h"
|
||||
|
||||
namespace DB
|
||||
@ -33,14 +28,18 @@ ResultSetRow::ResultSetRow(const Serializations & serializations_, const DataTyp
|
||||
FormatSettings format_settings;
|
||||
for (size_t i = 0; i < columns.size(); ++i)
|
||||
{
|
||||
ColumnPtr col = MySQLUtils::getBaseColumn(columns, i);
|
||||
if (col->isNullAt(row_num))
|
||||
ColumnPtr col = columns[i]->convertToFullIfNeeded();
|
||||
if (col->isNullable())
|
||||
{
|
||||
// See https://dev.mysql.com/doc/dev/mysql-server/8.1.0/page_protocol_binary_resultset.html#sect_protocol_binary_resultset_row
|
||||
size_t byte = (i + 2) / 8;
|
||||
int bit = 1 << ((i + 2) % 8);
|
||||
null_bitmap[byte] |= bit;
|
||||
continue; // NULLs are stored in the null bitmap only
|
||||
if (columns[i]->isNullAt(row_num))
|
||||
{
|
||||
// See https://dev.mysql.com/doc/dev/mysql-server/8.1.0/page_protocol_binary_resultset.html#sect_protocol_binary_resultset_row
|
||||
size_t byte = (i + 2) / 8;
|
||||
int bit = 1 << ((i + 2) % 8);
|
||||
null_bitmap[byte] |= bit;
|
||||
continue; // NULLs are stored in the null bitmap only
|
||||
}
|
||||
col = assert_cast<const ColumnNullable &>(*col).getNestedColumnPtr();
|
||||
}
|
||||
|
||||
DataTypePtr data_type = removeLowCardinalityAndNullable(data_types[i]);
|
||||
@ -145,9 +144,13 @@ void ResultSetRow::writePayloadImpl(WriteBuffer & buffer) const
|
||||
buffer.write(null_bitmap.data(), null_bitmap_size);
|
||||
for (size_t i = 0; i < columns.size(); ++i)
|
||||
{
|
||||
ColumnPtr col = MySQLUtils::getBaseColumn(columns, i);
|
||||
if (col->isNullAt(row_num))
|
||||
continue;
|
||||
ColumnPtr col = columns[i]->convertToFullIfNeeded();
|
||||
if (col->isNullable())
|
||||
{
|
||||
if (columns[i]->isNullAt(row_num))
|
||||
continue;
|
||||
col = assert_cast<const ColumnNullable &>(*col).getNestedColumnPtr();
|
||||
}
|
||||
|
||||
DataTypePtr data_type = removeLowCardinalityAndNullable(data_types[i]);
|
||||
TypeIndex type_index = data_type->getTypeId();
|
||||
|
@ -1,12 +1,13 @@
|
||||
#include <Core/MySQL/PacketsProtocolText.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include "Common/assert_cast.h"
|
||||
#include "Core/MySQL/IMySQLWritePacket.h"
|
||||
#include "DataTypes/DataTypeLowCardinality.h"
|
||||
#include "DataTypes/DataTypeNullable.h"
|
||||
#include "DataTypes/DataTypesDecimal.h"
|
||||
|
||||
#include "MySQLUtils.h"
|
||||
|
||||
namespace DB
|
||||
@ -36,7 +37,9 @@ ResultSetRow::ResultSetRow(const Serializations & serializations, const DataType
|
||||
else if (type_index == TypeIndex::DateTime64)
|
||||
{
|
||||
WriteBufferFromOwnString ostr;
|
||||
ColumnPtr col = MySQLUtils::getBaseColumn(columns, i);
|
||||
ColumnPtr col = columns[i]->convertToFullIfNeeded();
|
||||
if (col->isNullable())
|
||||
col = assert_cast<const ColumnNullable &>(*col).getNestedColumnPtr();
|
||||
auto components = MySQLUtils::getNormalizedDateTime64Components(data_type, col, row_num);
|
||||
writeDateTimeText<'-', ':', ' '>(LocalDateTime(components.whole, DateLUT::instance(getDateTimeTimezone(*data_type))), ostr);
|
||||
ostr.write('.');
|
||||
|
@ -140,6 +140,7 @@ class IColumn;
|
||||
\
|
||||
M(UInt64, alter_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) ALIAS(replication_alter_partitions_sync) \
|
||||
M(Int64, replication_wait_for_inactive_replica_timeout, 120, "Wait for inactive replica to execute ALTER/OPTIMIZE. Time in seconds, 0 - do not wait, negative - wait for unlimited time.", 0) \
|
||||
M(Bool, alter_move_to_space_execute_async, false, "Execute ALTER TABLE MOVE ... TO [DISK|VOLUME] asynchronously", 0) \
|
||||
\
|
||||
M(LoadBalancing, load_balancing, LoadBalancing::RANDOM, "Which replicas (among healthy replicas) to preferably send a query to (on the first attempt) for distributed processing.", 0) \
|
||||
M(UInt64, load_balancing_first_offset, 0, "Which replica to preferably send a query when FIRST_OR_RANDOM load balancing strategy is used.", 0) \
|
||||
@ -364,16 +365,16 @@ class IColumn;
|
||||
M(UInt64, max_bytes_to_read, 0, "Limit on read bytes (after decompression) from the most 'deep' sources. That is, only in the deepest subquery. When reading from a remote server, it is only checked on a remote server.", 0) \
|
||||
M(OverflowMode, read_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
|
||||
\
|
||||
M(UInt64, max_rows_to_read_leaf, 0, "Limit on read rows on the leaf nodes for distributed queries. Limit is applied for local reads only excluding the final merge stage on the root node. Note, the setting is unstable with prefer_localhost_replica=1.", 0) \
|
||||
M(UInt64, max_bytes_to_read_leaf, 0, "Limit on read bytes (after decompression) on the leaf nodes for distributed queries. Limit is applied for local reads only excluding the final merge stage on the root node. Note, the setting is unstable with prefer_localhost_replica=1.", 0) \
|
||||
M(UInt64, max_rows_to_read_leaf, 0, "Limit on read rows on the leaf nodes for distributed queries. Limit is applied for local reads only, excluding the final merge stage on the root node. Note, the setting is unstable with prefer_localhost_replica=1.", 0) \
|
||||
M(UInt64, max_bytes_to_read_leaf, 0, "Limit on read bytes (after decompression) on the leaf nodes for distributed queries. Limit is applied for local reads only, excluding the final merge stage on the root node. Note, the setting is unstable with prefer_localhost_replica=1.", 0) \
|
||||
M(OverflowMode, read_overflow_mode_leaf, OverflowMode::THROW, "What to do when the leaf limit is exceeded.", 0) \
|
||||
\
|
||||
M(UInt64, max_rows_to_group_by, 0, "If aggregation during GROUP BY is generating more than specified number of rows (unique GROUP BY keys), the behavior will be determined by the 'group_by_overflow_mode' which by default is - throw an exception, but can be also switched to an approximate GROUP BY mode.", 0) \
|
||||
M(UInt64, max_rows_to_group_by, 0, "If aggregation during GROUP BY is generating more than the specified number of rows (unique GROUP BY keys), the behavior will be determined by the 'group_by_overflow_mode' which by default is - throw an exception, but can be also switched to an approximate GROUP BY mode.", 0) \
|
||||
M(OverflowModeGroupBy, group_by_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
|
||||
M(UInt64, max_bytes_before_external_group_by, 0, "If memory usage during GROUP BY operation is exceeding this threshold in bytes, activate the 'external aggregation' mode (spill data to disk). Recommended value is half of available system memory.", 0) \
|
||||
\
|
||||
M(UInt64, max_rows_to_sort, 0, "If more than specified amount of records have to be processed for ORDER BY operation, the behavior will be determined by the 'sort_overflow_mode' which by default is - throw an exception", 0) \
|
||||
M(UInt64, max_bytes_to_sort, 0, "If more than specified amount of (uncompressed) bytes have to be processed for ORDER BY operation, the behavior will be determined by the 'sort_overflow_mode' which by default is - throw an exception", 0) \
|
||||
M(UInt64, max_rows_to_sort, 0, "If more than the specified amount of records have to be processed for ORDER BY operation, the behavior will be determined by the 'sort_overflow_mode' which by default is - throw an exception", 0) \
|
||||
M(UInt64, max_bytes_to_sort, 0, "If more than the specified amount of (uncompressed) bytes have to be processed for ORDER BY operation, the behavior will be determined by the 'sort_overflow_mode' which by default is - throw an exception", 0) \
|
||||
M(OverflowMode, sort_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
|
||||
M(UInt64, max_bytes_before_external_sort, 0, "If memory usage during ORDER BY operation is exceeding this threshold in bytes, activate the 'external sorting' mode (spill data to disk). Recommended value is half of available system memory.", 0) \
|
||||
M(UInt64, max_bytes_before_remerge_sort, 1000000000, "In case of ORDER BY with LIMIT, when memory usage is higher than specified threshold, perform additional steps of merging blocks before final merge to keep just top LIMIT rows.", 0) \
|
||||
@ -384,8 +385,10 @@ class IColumn;
|
||||
M(OverflowMode, result_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
|
||||
\
|
||||
/* TODO: Check also when merging and finalizing aggregate functions. */ \
|
||||
M(Seconds, max_execution_time, 0, "If query run time exceeded the specified number of seconds, the behavior will be determined by the 'timeout_overflow_mode' which by default is - throw an exception. Note that the timeout is checked and query can stop only in designated places during data processing. It currently cannot stop during merging of aggregation states or during query analysis, and the actual run time will be higher than the value of this setting.", 0) \
|
||||
M(Seconds, max_execution_time, 0, "If query runtime exceeds the specified number of seconds, the behavior will be determined by the 'timeout_overflow_mode', which by default is - throw an exception. Note that the timeout is checked and query can stop only in designated places during data processing. It currently cannot stop during merging of aggregation states or during query analysis, and the actual run time will be higher than the value of this setting.", 0) \
|
||||
M(OverflowMode, timeout_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
|
||||
M(Seconds, max_execution_time_leaf, 0, "Similar semantic to max_execution_time but only apply on leaf node for distributed queries, the time out behavior will be determined by 'timeout_overflow_mode_leaf' which by default is - throw an exception", 0) \
|
||||
M(OverflowMode, timeout_overflow_mode_leaf, OverflowMode::THROW, "What to do when the leaf limit is exceeded.", 0) \
|
||||
\
|
||||
M(UInt64, min_execution_speed, 0, "Minimum number of execution rows per second.", 0) \
|
||||
M(UInt64, max_execution_speed, 0, "Maximum number of execution rows per second.", 0) \
|
||||
@ -399,7 +402,7 @@ class IColumn;
|
||||
\
|
||||
M(UInt64, max_sessions_for_user, 0, "Maximum number of simultaneous sessions for a user.", 0) \
|
||||
\
|
||||
M(UInt64, max_subquery_depth, 100, "If a query has more than specified number of nested subqueries, throw an exception. This allows you to have a sanity check to protect the users of your cluster from going insane with their queries.", 0) \
|
||||
M(UInt64, max_subquery_depth, 100, "If a query has more than the specified number of nested subqueries, throw an exception. This allows you to have a sanity check to protect the users of your cluster from going insane with their queries.", 0) \
|
||||
M(UInt64, max_analyze_depth, 5000, "Maximum number of analyses performed by interpreter.", 0) \
|
||||
M(UInt64, max_ast_depth, 1000, "Maximum depth of query syntax tree. Checked after parsing.", 0) \
|
||||
M(UInt64, max_ast_elements, 50000, "Maximum size of query syntax tree in number of nodes. Checked after parsing.", 0) \
|
||||
@ -614,6 +617,8 @@ class IColumn;
|
||||
M(Bool, mutations_execute_subqueries_on_initiator, false, "If true scalar subqueries are executed on initiator and replaced to literals in UPDATE and DELETE queries", 0) \
|
||||
M(UInt64, mutations_max_literal_size_to_replace, 16384, "The maximum size of serialized literal in bytes to replace in UPDATE and DELETE queries", 0) \
|
||||
\
|
||||
M(Float, create_replicated_merge_tree_fault_injection_probability, 0.0f, "The probability of a fault injection during table creation after creating metadata in ZooKeeper", 0) \
|
||||
\
|
||||
M(Bool, use_query_cache, false, "Enable the query cache", 0) \
|
||||
M(Bool, enable_writes_to_query_cache, true, "Enable storing results of SELECT queries in the query cache", 0) \
|
||||
M(Bool, enable_reads_from_query_cache, true, "Enable reading results of SELECT queries from the query cache", 0) \
|
||||
|
@ -1083,12 +1083,14 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
|
||||
}
|
||||
LOG_INFO(log, "All tables are created successfully");
|
||||
|
||||
if (max_log_ptr_at_creation != 0)
|
||||
chassert(max_log_ptr_at_creation || our_log_ptr);
|
||||
UInt32 first_entry_to_mark_finished = new_replica ? max_log_ptr_at_creation : our_log_ptr;
|
||||
if (first_entry_to_mark_finished)
|
||||
{
|
||||
/// If the replica is new and some of the queries applied during recovery
|
||||
/// where issued after the replica was created, then other nodes might be
|
||||
/// waiting for this node to notify them that the query was applied.
|
||||
for (UInt32 ptr = max_log_ptr_at_creation; ptr <= max_log_ptr; ++ptr)
|
||||
for (UInt32 ptr = first_entry_to_mark_finished; ptr <= max_log_ptr; ++ptr)
|
||||
{
|
||||
auto entry_name = DDLTaskBase::getLogEntryName(ptr);
|
||||
auto path = fs::path(zookeeper_path) / "log" / entry_name / "finished" / getFullReplicaName();
|
||||
|
@ -17,6 +17,7 @@ enum class NumpyDataTypeIndex
|
||||
UInt16,
|
||||
UInt32,
|
||||
UInt64,
|
||||
Float16,
|
||||
Float32,
|
||||
Float64,
|
||||
String,
|
||||
@ -79,6 +80,7 @@ public:
|
||||
{
|
||||
switch (size)
|
||||
{
|
||||
case 2: type_index = NumpyDataTypeIndex::Float16; break;
|
||||
case 4: type_index = NumpyDataTypeIndex::Float32; break;
|
||||
case 8: type_index = NumpyDataTypeIndex::Float64; break;
|
||||
default:
|
||||
|
@ -182,10 +182,19 @@ ColumnPtr FunctionArrayReduce::executeImpl(const ColumnsWithTypeAndName & argume
|
||||
that->addBatchArray(0, input_rows_count, places.data(), 0, aggregate_arguments, offsets->data(), arena.get());
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
/// We should use insertMergeResultInto to insert result into ColumnAggregateFunction
|
||||
/// correctly if result contains AggregateFunction's states
|
||||
agg_func.insertMergeResultInto(places[i], res_col, arena.get());
|
||||
if (agg_func.isState())
|
||||
{
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
/// We should use insertMergeResultInto to insert result into ColumnAggregateFunction
|
||||
/// correctly if result contains AggregateFunction's states
|
||||
agg_func.insertMergeResultInto(places[i], res_col, arena.get());
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
agg_func.insertResultInto(places[i], res_col, arena.get());
|
||||
}
|
||||
|
||||
return result_holder;
|
||||
}
|
||||
|
||||
|
94
src/Functions/coverage.cpp
Normal file
94
src/Functions/coverage.cpp
Normal file
@ -0,0 +1,94 @@
|
||||
#if defined(SANITIZE_COVERAGE)
|
||||
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
#include <base/coverage.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
enum class Kind
|
||||
{
|
||||
Current,
|
||||
All
|
||||
};
|
||||
|
||||
/** If ClickHouse is build with coverage instrumentation, returns an array
|
||||
* of currently accumulated (`coverage`) / all possible (`coverageAll`) unique code addresses.
|
||||
*/
|
||||
class FunctionCoverage : public IFunction
|
||||
{
|
||||
private:
|
||||
Kind kind;
|
||||
|
||||
public:
|
||||
String getName() const override
|
||||
{
|
||||
return kind == Kind::Current ? "coverage" : "coverageAll";
|
||||
}
|
||||
|
||||
explicit FunctionCoverage(Kind kind_) : kind(kind_)
|
||||
{
|
||||
}
|
||||
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t getNumberOfArguments() const override
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool isDeterministic() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
|
||||
{
|
||||
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>());
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
auto coverage_table = kind == Kind::Current ? getCoverage() : getAllInstrumentedAddresses();
|
||||
|
||||
auto column_addresses = ColumnUInt64::create();
|
||||
auto & data = column_addresses->getData();
|
||||
|
||||
for (auto ptr : coverage_table)
|
||||
if (ptr)
|
||||
data.push_back(ptr);
|
||||
|
||||
auto column_array = ColumnArray::create(
|
||||
std::move(column_addresses),
|
||||
ColumnArray::ColumnOffsets::create(1, data.size()));
|
||||
|
||||
return ColumnConst::create(std::move(column_array), input_rows_count);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(Coverage)
|
||||
{
|
||||
factory.registerFunction("coverage", [](ContextPtr){ return std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionCoverage>(Kind::Current)); });
|
||||
factory.registerFunction("coverageAll", [](ContextPtr){ return std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionCoverage>(Kind::All)); });
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -45,8 +45,34 @@ namespace
|
||||
return toString(kind);
|
||||
}
|
||||
|
||||
explicit FunctionProfiles(const ContextPtr & context, Kind kind_)
|
||||
explicit FunctionProfiles(const ContextPtr & context_, Kind kind_)
|
||||
: kind(kind_)
|
||||
, context(context_)
|
||||
{}
|
||||
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
bool isDeterministic() const override { return false; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
|
||||
{
|
||||
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
std::call_once(initialized_flag, [&]{ initialize(); });
|
||||
|
||||
auto col_res = ColumnArray::create(ColumnString::create());
|
||||
ColumnString & res_strings = typeid_cast<ColumnString &>(col_res->getData());
|
||||
ColumnArray::Offsets & res_offsets = col_res->getOffsets();
|
||||
for (const String & profile_name : profile_names)
|
||||
res_strings.insertData(profile_name.data(), profile_name.length());
|
||||
res_offsets.push_back(res_strings.size());
|
||||
return ColumnConst::create(std::move(col_res), input_rows_count);
|
||||
}
|
||||
|
||||
private:
|
||||
void initialize() const
|
||||
{
|
||||
const auto & manager = context->getAccessControl();
|
||||
|
||||
@ -62,28 +88,11 @@ namespace
|
||||
profile_names = manager.tryReadNames(profile_ids);
|
||||
}
|
||||
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
bool isDeterministic() const override { return false; }
|
||||
mutable std::once_flag initialized_flag;
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
|
||||
{
|
||||
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
auto col_res = ColumnArray::create(ColumnString::create());
|
||||
ColumnString & res_strings = typeid_cast<ColumnString &>(col_res->getData());
|
||||
ColumnArray::Offsets & res_offsets = col_res->getOffsets();
|
||||
for (const String & profile_name : profile_names)
|
||||
res_strings.insertData(profile_name.data(), profile_name.length());
|
||||
res_offsets.push_back(res_strings.size());
|
||||
return ColumnConst::create(std::move(col_res), input_rows_count);
|
||||
}
|
||||
|
||||
private:
|
||||
Kind kind;
|
||||
Strings profile_names;
|
||||
ContextPtr context;
|
||||
mutable Strings profile_names;
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -35,7 +35,33 @@ namespace
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
explicit FunctionCurrentRoles(const ContextPtr & context)
|
||||
explicit FunctionCurrentRoles(const ContextPtr & context_)
|
||||
: context(context_)
|
||||
{}
|
||||
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
bool isDeterministic() const override { return false; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
|
||||
{
|
||||
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
std::call_once(initialized_flag, [&]{ initialize(); });
|
||||
|
||||
auto col_res = ColumnArray::create(ColumnString::create());
|
||||
ColumnString & res_strings = typeid_cast<ColumnString &>(col_res->getData());
|
||||
ColumnArray::Offsets & res_offsets = col_res->getOffsets();
|
||||
for (const String & role_name : role_names)
|
||||
res_strings.insertData(role_name.data(), role_name.length());
|
||||
res_offsets.push_back(res_strings.size());
|
||||
return ColumnConst::create(std::move(col_res), input_rows_count);
|
||||
}
|
||||
|
||||
private:
|
||||
void initialize() const
|
||||
{
|
||||
if constexpr (kind == Kind::CURRENT_ROLES)
|
||||
{
|
||||
@ -57,27 +83,9 @@ namespace
|
||||
::sort(role_names.begin(), role_names.end());
|
||||
}
|
||||
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
bool isDeterministic() const override { return false; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
|
||||
{
|
||||
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
auto col_res = ColumnArray::create(ColumnString::create());
|
||||
ColumnString & res_strings = typeid_cast<ColumnString &>(col_res->getData());
|
||||
ColumnArray::Offsets & res_offsets = col_res->getOffsets();
|
||||
for (const String & role_name : role_names)
|
||||
res_strings.insertData(role_name.data(), role_name.length());
|
||||
res_offsets.push_back(res_strings.size());
|
||||
return ColumnConst::create(std::move(col_res), input_rows_count);
|
||||
}
|
||||
|
||||
private:
|
||||
Strings role_names;
|
||||
mutable std::once_flag initialized_flag;
|
||||
ContextPtr context;
|
||||
mutable Strings role_names;
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -141,10 +141,19 @@ ColumnPtr FunctionInitializeAggregation::executeImpl(const ColumnsWithTypeAndNam
|
||||
that->addBatch(0, input_rows_count, places.data(), 0, aggregate_arguments, arena.get());
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
if (agg_func.isState())
|
||||
{
|
||||
/// We should use insertMergeResultInto to insert result into ColumnAggregateFunction
|
||||
/// correctly if result contains AggregateFunction's states
|
||||
agg_func.insertMergeResultInto(places[i], res_col, arena.get());
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
agg_func.insertMergeResultInto(places[i], res_col, arena.get());
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
agg_func.insertResultInto(places[i], res_col, arena.get());
|
||||
}
|
||||
|
||||
return result_holder;
|
||||
}
|
||||
|
||||
|
@ -154,7 +154,7 @@ namespace
|
||||
ColumnPtr executeImpl(
|
||||
const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
{
|
||||
initialize(arguments, result_type);
|
||||
std::call_once(once, [&] { initialize(arguments, result_type); });
|
||||
|
||||
const auto * in = arguments[0].column.get();
|
||||
|
||||
@ -672,11 +672,9 @@ namespace
|
||||
ColumnPtr default_column;
|
||||
|
||||
bool is_empty = false;
|
||||
bool initialized = false;
|
||||
|
||||
std::mutex mutex;
|
||||
};
|
||||
|
||||
mutable std::once_flag once;
|
||||
mutable Cache cache;
|
||||
|
||||
|
||||
@ -706,10 +704,6 @@ namespace
|
||||
/// Can be called from different threads. It works only on the first call.
|
||||
void initialize(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const
|
||||
{
|
||||
std::lock_guard lock(cache.mutex);
|
||||
if (cache.initialized)
|
||||
return;
|
||||
|
||||
const DataTypePtr & from_type = arguments[0].type;
|
||||
|
||||
if (from_type->onlyNull())
|
||||
@ -824,8 +818,6 @@ namespace
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cache.initialized = true;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -664,11 +664,20 @@ ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf);
|
||||
template <typename ReturnType = void>
|
||||
inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf)
|
||||
{
|
||||
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
|
||||
|
||||
/// Optimistic path, when whole value is in buffer.
|
||||
if (!buf.eof() && buf.position() + 10 <= buf.buffer().end())
|
||||
{
|
||||
char * pos = buf.position();
|
||||
|
||||
auto error = [&]
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Cannot parse date here: {}", String(buf.position(), 10));
|
||||
return ReturnType(false);
|
||||
};
|
||||
|
||||
/// YYYY-MM-DD
|
||||
/// YYYY-MM-D
|
||||
/// YYYY-M-DD
|
||||
@ -677,6 +686,9 @@ inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf)
|
||||
|
||||
/// The delimiters can be arbitrary characters, like YYYY/MM!DD, but obviously not digits.
|
||||
|
||||
if (!isNumericASCII(pos[0]) || !isNumericASCII(pos[1]) || !isNumericASCII(pos[2]) || !isNumericASCII(pos[3]))
|
||||
return error();
|
||||
|
||||
UInt16 year = (pos[0] - '0') * 1000 + (pos[1] - '0') * 100 + (pos[2] - '0') * 10 + (pos[3] - '0');
|
||||
UInt8 month;
|
||||
UInt8 day;
|
||||
@ -685,12 +697,18 @@ inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf)
|
||||
if (isNumericASCII(pos[-1]))
|
||||
{
|
||||
/// YYYYMMDD
|
||||
if (!isNumericASCII(pos[0]) || !isNumericASCII(pos[1]) || !isNumericASCII(pos[2]))
|
||||
return error();
|
||||
|
||||
month = (pos[-1] - '0') * 10 + (pos[0] - '0');
|
||||
day = (pos[1] - '0') * 10 + (pos[2] - '0');
|
||||
pos += 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!isNumericASCII(pos[0]))
|
||||
return error();
|
||||
|
||||
month = pos[0] - '0';
|
||||
if (isNumericASCII(pos[1]))
|
||||
{
|
||||
@ -700,8 +718,8 @@ inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf)
|
||||
else
|
||||
pos += 2;
|
||||
|
||||
if (isNumericASCII(pos[-1]))
|
||||
return ReturnType(false);
|
||||
if (isNumericASCII(pos[-1]) || !isNumericASCII(pos[0]))
|
||||
return error();
|
||||
|
||||
day = pos[0] - '0';
|
||||
if (isNumericASCII(pos[1]))
|
||||
|
@ -1,20 +1,21 @@
|
||||
#include <Interpreters/ClusterProxy/SelectStreamFactory.h>
|
||||
#include <Interpreters/Cluster.h>
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
#include <Storages/VirtualColumnUtils.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
#include <Common/checkStackSize.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/FailPoint.h>
|
||||
#include <TableFunctions/TableFunctionFactory.h>
|
||||
#include <IO/ConnectionTimeouts.h>
|
||||
#include <Interpreters/ClusterProxy/SelectStreamFactory.h>
|
||||
#include <Interpreters/Cluster.h>
|
||||
#include <Interpreters/AddDefaultDatabaseVisitor.h>
|
||||
#include <Interpreters/RequiredSourceColumnsVisitor.h>
|
||||
#include <Interpreters/TranslateQualifiedNamesVisitor.h>
|
||||
#include <DataTypes/ObjectUtils.h>
|
||||
|
||||
#include <Client/IConnections.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/FailPoint.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Parsers/ASTSetQuery.h>
|
||||
#include <Processors/QueryPlan/QueryPlan.h>
|
||||
#include <Processors/QueryPlan/ReadFromRemote.h>
|
||||
#include <Processors/QueryPlan/ExpressionStep.h>
|
||||
@ -22,6 +23,7 @@
|
||||
#include <Processors/QueryPlan/DistributedCreateLocalPlan.h>
|
||||
#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
|
||||
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event DistributedConnectionMissingTable;
|
||||
@ -121,6 +123,7 @@ void SelectStreamFactory::createForShard(
|
||||
if (it != objects_by_shard.end())
|
||||
replaceMissedSubcolumnsByConstants(storage_snapshot->object_columns, it->second, query_ast);
|
||||
|
||||
|
||||
auto emplace_local_stream = [&]()
|
||||
{
|
||||
local_plans.emplace_back(createLocalPlan(
|
||||
|
@ -141,6 +141,14 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster,
|
||||
new_settings.allow_experimental_parallel_reading_from_replicas = false;
|
||||
}
|
||||
|
||||
if (settings.max_execution_time_leaf.value > 0)
|
||||
{
|
||||
/// Replace 'max_execution_time' of this sub-query with 'max_execution_time_leaf' and 'timeout_overflow_mode'
|
||||
/// with 'timeout_overflow_mode_leaf'
|
||||
new_settings.max_execution_time = settings.max_execution_time_leaf;
|
||||
new_settings.timeout_overflow_mode = settings.timeout_overflow_mode_leaf;
|
||||
}
|
||||
|
||||
auto new_context = Context::createCopy(context);
|
||||
new_context->setSettings(new_settings);
|
||||
return new_context;
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <memory>
|
||||
#include <Poco/UUID.h>
|
||||
#include <Poco/Util/Application.h>
|
||||
#include <Common/SensitiveDataMasker.h>
|
||||
#include <Common/Macros.h>
|
||||
#include <Common/EventNotifier.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
@ -196,6 +197,8 @@ struct ContextSharedPart : boost::noncopyable
|
||||
mutable zkutil::ZooKeeperPtr zookeeper TSA_GUARDED_BY(zookeeper_mutex); /// Client for ZooKeeper.
|
||||
ConfigurationPtr zookeeper_config TSA_GUARDED_BY(zookeeper_mutex); /// Stores zookeeper configs
|
||||
|
||||
ConfigurationPtr sensitive_data_masker_config;
|
||||
|
||||
#if USE_NURAFT
|
||||
mutable std::mutex keeper_dispatcher_mutex;
|
||||
mutable std::shared_ptr<KeeperDispatcher> keeper_dispatcher TSA_GUARDED_BY(keeper_dispatcher_mutex);
|
||||
@ -2324,6 +2327,25 @@ void Context::loadOrReloadDictionaries(const Poco::Util::AbstractConfiguration &
|
||||
shared->dictionaries_xmls = external_dictionaries_loader.addConfigRepository(std::move(repository));
|
||||
}
|
||||
|
||||
void Context::waitForDictionariesLoad() const
|
||||
{
|
||||
LOG_TRACE(shared->log, "Waiting for dictionaries to be loaded");
|
||||
auto results = getExternalDictionariesLoader().tryLoadAll<ExternalLoader::LoadResults>();
|
||||
bool all_dictionaries_loaded = true;
|
||||
for (const auto & result : results)
|
||||
{
|
||||
if ((result.status != ExternalLoaderStatus::LOADED) && (result.status != ExternalLoaderStatus::LOADED_AND_RELOADING))
|
||||
{
|
||||
LOG_WARNING(shared->log, "Dictionary {} was not loaded ({})", result.name, result.status);
|
||||
all_dictionaries_loaded = false;
|
||||
}
|
||||
}
|
||||
if (all_dictionaries_loaded)
|
||||
LOG_INFO(shared->log, "All dictionaries have been loaded");
|
||||
else
|
||||
LOG_INFO(shared->log, "Some dictionaries were not loaded");
|
||||
}
|
||||
|
||||
void Context::loadOrReloadUserDefinedExecutableFunctions(const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
auto patterns_values = getMultipleValuesFromConfig(config, "", "user_defined_executable_functions_config");
|
||||
@ -3198,6 +3220,16 @@ bool Context::hasAuxiliaryZooKeeper(const String & name) const
|
||||
return getConfigRef().has("auxiliary_zookeepers." + name);
|
||||
}
|
||||
|
||||
void Context::reloadQueryMaskingRulesIfChanged(const ConfigurationPtr & config) const
|
||||
{
|
||||
const auto old_config = shared->sensitive_data_masker_config;
|
||||
if (old_config && isSameConfiguration(*config, *old_config, "query_masking_rules"))
|
||||
return;
|
||||
|
||||
SensitiveDataMasker::setInstance(std::make_unique<SensitiveDataMasker>(*config, "query_masking_rules"));
|
||||
shared->sensitive_data_masker_config = config;
|
||||
}
|
||||
|
||||
InterserverCredentialsPtr Context::getInterserverCredentials() const
|
||||
{
|
||||
return shared->interserver_io_credentials.get();
|
||||
|
@ -792,6 +792,7 @@ public:
|
||||
EmbeddedDictionaries & getEmbeddedDictionaries();
|
||||
void tryCreateEmbeddedDictionaries(const Poco::Util::AbstractConfiguration & config) const;
|
||||
void loadOrReloadDictionaries(const Poco::Util::AbstractConfiguration & config);
|
||||
void waitForDictionariesLoad() const;
|
||||
|
||||
const ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoader() const;
|
||||
ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoader();
|
||||
@ -946,6 +947,8 @@ public:
|
||||
// Reload Zookeeper
|
||||
void reloadZooKeeperIfChanged(const ConfigurationPtr & config) const;
|
||||
|
||||
void reloadQueryMaskingRulesIfChanged(const ConfigurationPtr & config) const;
|
||||
|
||||
void setSystemZooKeeperLogAfterInitializationIfNeeded();
|
||||
|
||||
/// --- Caches ------------------------------------------------------------------------------------------
|
||||
|
@ -1448,6 +1448,21 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
|
||||
"ATTACH ... FROM ... query is not supported for {} table engine, "
|
||||
"because such tables do not store any data on disk. Use CREATE instead.", res->getName());
|
||||
|
||||
auto * replicated_storage = typeid_cast<StorageReplicatedMergeTree *>(res.get());
|
||||
if (replicated_storage)
|
||||
{
|
||||
const auto probability = getContext()->getSettingsRef().create_replicated_merge_tree_fault_injection_probability;
|
||||
std::bernoulli_distribution fault(probability);
|
||||
if (fault(thread_local_rng))
|
||||
{
|
||||
/// We emulate the case when the exception was thrown in StorageReplicatedMergeTree constructor
|
||||
if (!create.attach)
|
||||
replicated_storage->dropIfEmpty();
|
||||
|
||||
throw Coordination::Exception(Coordination::Error::ZCONNECTIONLOSS, "Fault injected (during table creation)");
|
||||
}
|
||||
}
|
||||
|
||||
database->createTable(getContext(), create.getTable(), res, query_ptr);
|
||||
|
||||
/// Move table data to the proper place. Wo do not move data earlier to avoid situations
|
||||
|
@ -2929,6 +2929,7 @@ void InterpreterSelectQuery::executeWindow(QueryPlan & query_plan)
|
||||
auto sorting_step = std::make_unique<SortingStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
window.full_sort_description,
|
||||
window.partition_by,
|
||||
0 /* LIMIT */,
|
||||
sort_settings,
|
||||
settings.optimize_sorting_by_input_stream_properties);
|
||||
|
@ -57,6 +57,7 @@
|
||||
#include <Parsers/ASTSystemQuery.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Common/ThreadFuzzer.h>
|
||||
#include <base/coverage.h>
|
||||
#include <csignal>
|
||||
#include <algorithm>
|
||||
#include <unistd.h>
|
||||
@ -687,6 +688,12 @@ BlockIO InterpreterSystemQuery::execute()
|
||||
FailPointInjection::disableFailPoint(query.fail_point_name);
|
||||
break;
|
||||
}
|
||||
case Type::RESET_COVERAGE:
|
||||
{
|
||||
getContext()->checkAccess(AccessType::SYSTEM);
|
||||
resetCoverage();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown type of SYSTEM query");
|
||||
}
|
||||
@ -1301,6 +1308,7 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster()
|
||||
case Type::START_THREAD_FUZZER:
|
||||
case Type::ENABLE_FAILPOINT:
|
||||
case Type::DISABLE_FAILPOINT:
|
||||
case Type::RESET_COVERAGE:
|
||||
case Type::UNKNOWN:
|
||||
case Type::END: break;
|
||||
}
|
||||
|
@ -53,6 +53,18 @@ bool PredicateExpressionsOptimizer::optimize(ASTSelectQuery & select_query)
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool hasInputTableFunction(const ASTPtr & expr)
|
||||
{
|
||||
if (const auto * func = typeid_cast<const ASTFunction *>(expr.get()); func && func->name == "input")
|
||||
return true;
|
||||
|
||||
for (const auto & child : expr->children)
|
||||
if (hasInputTableFunction(child))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<ASTs> PredicateExpressionsOptimizer::extractTablesPredicates(const ASTPtr & where, const ASTPtr & prewhere)
|
||||
{
|
||||
std::vector<ASTs> tables_predicates(tables_with_columns.size());
|
||||
@ -72,6 +84,11 @@ std::vector<ASTs> PredicateExpressionsOptimizer::extractTablesPredicates(const A
|
||||
return {}; /// Not optimized when predicate contains stateful function or indeterministic function or window functions
|
||||
}
|
||||
|
||||
/// Skip predicate like `... IN (SELECT ... FROM input())` because
|
||||
/// it can be duplicated but we can't execute `input()` twice.
|
||||
if (hasInputTableFunction(predicate_expression))
|
||||
return {};
|
||||
|
||||
if (!expression_info.is_array_join)
|
||||
{
|
||||
if (expression_info.unique_reference_tables_pos.size() == 1)
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user