diff --git a/.github/workflows/docs_check.yml b/.github/workflows/docs_check.yml index 0c657a245cb..b50584a2c01 100644 --- a/.github/workflows/docs_check.yml +++ b/.github/workflows/docs_check.yml @@ -102,6 +102,9 @@ jobs: run: | cat >> "$GITHUB_ENV" << 'EOF' TEMP_PATH=${{ runner.temp }}/style_check + ROBOT_CLICKHOUSE_SSH_KEY<> "$GITHUB_ENV" << 'EOF' TEMP_PATH=${{ runner.temp }}/style_check + ROBOT_CLICKHOUSE_SSH_KEY<" folders in GUI-based IDEs. # Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear diff --git a/contrib/c-ares b/contrib/c-ares new file mode 160000 index 00000000000..afee6748b0b --- /dev/null +++ b/contrib/c-ares @@ -0,0 +1 @@ +Subproject commit afee6748b0b99acf4509d42fa37ac8422262f91b diff --git a/contrib/c-ares-cmake/CMakeLists.txt b/contrib/c-ares-cmake/CMakeLists.txt new file mode 100644 index 00000000000..603c1f8b65c --- /dev/null +++ b/contrib/c-ares-cmake/CMakeLists.txt @@ -0,0 +1,35 @@ +# Choose to build static or shared library for c-ares. +if (USE_STATIC_LIBRARIES) + set(CARES_STATIC ON CACHE BOOL "" FORCE) + set(CARES_SHARED OFF CACHE BOOL "" FORCE) +else () + set(CARES_STATIC OFF CACHE BOOL "" FORCE) + set(CARES_SHARED ON CACHE BOOL "" FORCE) +endif () + +# Disable looking for libnsl on a platforms that has gethostbyname in glibc +# +# c-ares searching for gethostbyname in the libnsl library, however in the +# version that shipped with gRPC it doing it wrong [1], since it uses +# CHECK_LIBRARY_EXISTS(), which will return TRUE even if the function exists in +# another dependent library. The upstream already contains correct macro [2], +# but it is not included in gRPC (even upstream gRPC, not the one that is +# shipped with clickhousee). +# +# [1]: https://github.com/c-ares/c-ares/blob/e982924acee7f7313b4baa4ee5ec000c5e373c30/CMakeLists.txt#L125 +# [2]: https://github.com/c-ares/c-ares/blob/44fbc813685a1fa8aa3f27fcd7544faf612d376a/CMakeLists.txt#L146 +# +# And because if you by some reason have libnsl [3] installed, clickhouse will +# reject to start w/o it. While this is completelly different library. +# +# [3]: https://packages.debian.org/bullseye/libnsl2 +if (NOT CMAKE_SYSTEM_NAME STREQUAL "SunOS") + set(HAVE_LIBNSL OFF CACHE BOOL "" FORCE) +endif() + +# Force use of c-ares inet_net_pton instead of libresolv one +set(HAVE_INET_NET_PTON OFF CACHE BOOL "" FORCE) + +add_subdirectory("../c-ares/" "../c-ares/") + +add_library(ch_contrib::c-ares ALIAS c-ares) \ No newline at end of file diff --git a/contrib/grpc-cmake/CMakeLists.txt b/contrib/grpc-cmake/CMakeLists.txt index 520e04d198e..b1ed7e464b6 100644 --- a/contrib/grpc-cmake/CMakeLists.txt +++ b/contrib/grpc-cmake/CMakeLists.txt @@ -45,38 +45,11 @@ set(_gRPC_SSL_LIBRARIES OpenSSL::Crypto OpenSSL::SSL) # Use abseil-cpp from ClickHouse contrib, not from gRPC third_party. set(gRPC_ABSL_PROVIDER "clickhouse" CACHE STRING "" FORCE) -# Choose to build static or shared library for c-ares. -if (USE_STATIC_LIBRARIES) - set(CARES_STATIC ON CACHE BOOL "" FORCE) - set(CARES_SHARED OFF CACHE BOOL "" FORCE) -else () - set(CARES_STATIC OFF CACHE BOOL "" FORCE) - set(CARES_SHARED ON CACHE BOOL "" FORCE) -endif () - -# Disable looking for libnsl on a platforms that has gethostbyname in glibc -# -# c-ares searching for gethostbyname in the libnsl library, however in the -# version that shipped with gRPC it doing it wrong [1], since it uses -# CHECK_LIBRARY_EXISTS(), which will return TRUE even if the function exists in -# another dependent library. The upstream already contains correct macro [2], -# but it is not included in gRPC (even upstream gRPC, not the one that is -# shipped with clickhousee). -# -# [1]: https://github.com/c-ares/c-ares/blob/e982924acee7f7313b4baa4ee5ec000c5e373c30/CMakeLists.txt#L125 -# [2]: https://github.com/c-ares/c-ares/blob/44fbc813685a1fa8aa3f27fcd7544faf612d376a/CMakeLists.txt#L146 -# -# And because if you by some reason have libnsl [3] installed, clickhouse will -# reject to start w/o it. While this is completelly different library. -# -# [3]: https://packages.debian.org/bullseye/libnsl2 -if (NOT CMAKE_SYSTEM_NAME STREQUAL "SunOS") - set(HAVE_LIBNSL OFF CACHE BOOL "" FORCE) -endif() - # We don't want to build C# extensions. set(gRPC_BUILD_CSHARP_EXT OFF) +set(_gRPC_CARES_LIBRARIES ch_contrib::c-ares) +set(gRPC_CARES_PROVIDER "clickhouse" CACHE STRING "" FORCE) add_subdirectory("${_gRPC_SOURCE_DIR}" "${_gRPC_BINARY_DIR}") # The contrib/grpc/CMakeLists.txt redefined the PROTOBUF_GENERATE_GRPC_CPP() function for its own purposes, diff --git a/contrib/qpl b/contrib/qpl new file mode 160000 index 00000000000..cdc8442f7a5 --- /dev/null +++ b/contrib/qpl @@ -0,0 +1 @@ +Subproject commit cdc8442f7a5e7a6ff6eea39c69665e0c5034d85d diff --git a/contrib/qpl-cmake/CMakeLists.txt b/contrib/qpl-cmake/CMakeLists.txt new file mode 100644 index 00000000000..dc90f07a9bc --- /dev/null +++ b/contrib/qpl-cmake/CMakeLists.txt @@ -0,0 +1,322 @@ +## The Intel® QPL provides high performance implementations of data processing functions for existing hardware accelerator, and/or software path in case if hardware accelerator is not available. +if (OS_LINUX AND ARCH_AMD64 AND (ENABLE_AVX2 OR ENABLE_AVX512)) + option (ENABLE_QPL "Enable Intel® Query Processing Library" ${ENABLE_LIBRARIES}) +elseif(ENABLE_QPL) + message (${RECONFIGURE_MESSAGE_LEVEL} "QPL library is only supported on x86_64 arch with avx2/avx512 support") +endif() + +if (NOT ENABLE_QPL) + message(STATUS "Not using QPL") + return() +endif() + +set (QPL_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl") +set (QPL_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl/sources") +set (QPL_BINARY_DIR "${ClickHouse_BINARY_DIR}/build/contrib/qpl") +set (UUID_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl-cmake") + +set (EFFICIENT_WAIT ON) +set (BLOCK_ON_FAULT ON) +set (LOG_HW_INIT OFF) +set (SANITIZE_MEMORY OFF) +set (SANITIZE_THREADS OFF) +set (LIB_FUZZING_ENGINE OFF) + +function(GetLibraryVersion _content _outputVar) + string(REGEX MATCHALL "Qpl VERSION (.+) LANGUAGES" VERSION_REGEX "${_content}") + SET(${_outputVar} ${CMAKE_MATCH_1} PARENT_SCOPE) +endfunction() + +FILE(READ "${QPL_PROJECT_DIR}/CMakeLists.txt" HEADER_CONTENT) +GetLibraryVersion("${HEADER_CONTENT}" QPL_VERSION) + +message(STATUS "Intel QPL version: ${QPL_VERSION}") + +# There are 5 source subdirectories under $QPL_SRC_DIR: isal, c_api, core-sw, middle-layer, c_api. +# Generate 7 library targets: middle_layer_lib, isal, isal_asm, qplcore_px, qplcore_avx512, core_iaa, middle_layer_lib. +# Output ch_contrib::qpl by linking with 7 library targets. + +include("${QPL_PROJECT_DIR}/cmake/CompileOptions.cmake") + +# check nasm compiler +include(CheckLanguage) +check_language(ASM_NASM) +if(NOT CMAKE_ASM_NASM_COMPILER) + message(FATAL_ERROR "Please install NASM from 'https://www.nasm.us/' because NASM compiler can not be found!") +endif() + +# [SUBDIR]isal +enable_language(ASM_NASM) + +set(ISAL_C_SRC ${QPL_SRC_DIR}/isal/igzip/adler32_base.c + ${QPL_SRC_DIR}/isal/igzip/huff_codes.c + ${QPL_SRC_DIR}/isal/igzip/hufftables_c.c + ${QPL_SRC_DIR}/isal/igzip/igzip.c + ${QPL_SRC_DIR}/isal/igzip/igzip_base.c + ${QPL_SRC_DIR}/isal/igzip/flatten_ll.c + ${QPL_SRC_DIR}/isal/igzip/encode_df.c + ${QPL_SRC_DIR}/isal/igzip/igzip_icf_base.c + ${QPL_SRC_DIR}/isal/igzip/igzip_inflate.c + ${QPL_SRC_DIR}/isal/igzip/igzip_icf_body.c + ${QPL_SRC_DIR}/isal/crc/crc_base.c + ${QPL_SRC_DIR}/isal/crc/crc64_base.c) + +set(ISAL_ASM_SRC ${QPL_SRC_DIR}/isal/igzip/igzip_body.asm + ${QPL_SRC_DIR}/isal/igzip/igzip_gen_icf_map_lh1_04.asm + ${QPL_SRC_DIR}/isal/igzip/igzip_gen_icf_map_lh1_06.asm + ${QPL_SRC_DIR}/isal/igzip/igzip_decode_block_stateless_04.asm + ${QPL_SRC_DIR}/isal/igzip/igzip_finish.asm + ${QPL_SRC_DIR}/isal/igzip/encode_df_04.asm + ${QPL_SRC_DIR}/isal/igzip/encode_df_06.asm + ${QPL_SRC_DIR}/isal/igzip/igzip_decode_block_stateless_01.asm + ${QPL_SRC_DIR}/isal/igzip/proc_heap.asm + ${QPL_SRC_DIR}/isal/igzip/igzip_icf_body_h1_gr_bt.asm + ${QPL_SRC_DIR}/isal/igzip/igzip_icf_finish.asm + ${QPL_SRC_DIR}/isal/igzip/igzip_inflate_multibinary.asm + ${QPL_SRC_DIR}/isal/igzip/igzip_update_histogram_01.asm + ${QPL_SRC_DIR}/isal/igzip/igzip_update_histogram_04.asm + ${QPL_SRC_DIR}/isal/igzip/rfc1951_lookup.asm + ${QPL_SRC_DIR}/isal/igzip/adler32_sse.asm + ${QPL_SRC_DIR}/isal/igzip/adler32_avx2_4.asm + ${QPL_SRC_DIR}/isal/igzip/igzip_deflate_hash.asm + ${QPL_SRC_DIR}/isal/igzip/igzip_set_long_icf_fg_04.asm + ${QPL_SRC_DIR}/isal/igzip/igzip_set_long_icf_fg_06.asm + ${QPL_SRC_DIR}/isal/igzip/igzip_multibinary.asm + ${QPL_SRC_DIR}/isal/igzip/stdmac.asm + ${QPL_SRC_DIR}/isal/crc/crc_multibinary.asm + ${QPL_SRC_DIR}/isal/crc/crc32_gzip_refl_by8.asm + ${QPL_SRC_DIR}/isal/crc/crc32_gzip_refl_by8_02.asm + ${QPL_SRC_DIR}/isal/crc/crc32_gzip_refl_by16_10.asm + ${QPL_SRC_DIR}/isal/crc/crc32_ieee_01.asm + ${QPL_SRC_DIR}/isal/crc/crc32_ieee_02.asm + ${QPL_SRC_DIR}/isal/crc/crc32_ieee_by4.asm + ${QPL_SRC_DIR}/isal/crc/crc32_ieee_by16_10.asm + ${QPL_SRC_DIR}/isal/crc/crc32_iscsi_00.asm + ${QPL_SRC_DIR}/isal/crc/crc32_iscsi_01.asm + ${QPL_SRC_DIR}/isal/crc/crc32_iscsi_by16_10.asm) + +# Adding ISA-L library target +add_library(isal OBJECT ${ISAL_C_SRC}) +add_library(isal_asm OBJECT ${ISAL_ASM_SRC}) + +# Setting external and internal interfaces for ISA-L library +target_include_directories(isal + PUBLIC $ + PRIVATE ${QPL_SRC_DIR}/isal/include + PUBLIC ${QPL_SRC_DIR}/isal/igzip) + +target_compile_options(isal PRIVATE + "$<$:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}>" + "$<$:>" + "$<$:>") + +target_compile_options(isal_asm PUBLIC "-I${QPL_SRC_DIR}/isal/include/" + PUBLIC "-I${QPL_SRC_DIR}/isal/igzip/" + PUBLIC "-I${QPL_SRC_DIR}/isal/crc/" + PUBLIC "-DQPL_LIB") + +# AS_FEATURE_LEVEL=10 means "Check SIMD capabilities of the target system at runtime and use up to AVX512 if available". +# AS_FEATURE_LEVEL=5 means "Check SIMD capabilities of the target system at runtime and use up to AVX2 if available". +# HAVE_KNOWS_AVX512 means rely on AVX512 being available on the target system. +if (ENABLE_AVX512) + target_compile_options(isal_asm PUBLIC "-DHAVE_AS_KNOWS_AVX512" "-DAS_FEATURE_LEVEL=10") +else() + target_compile_options(isal_asm PUBLIC "-DAS_FEATURE_LEVEL=5") +endif() + +# Here must remove "-fno-sanitize=undefined" from COMPILE_OPTIONS. +# Otherwise nasm compiler would fail to proceed due to unrecognition of "-fno-sanitize=undefined" +if (SANITIZE STREQUAL "undefined") + get_target_property(target_options isal_asm COMPILE_OPTIONS) + list(REMOVE_ITEM target_options "-fno-sanitize=undefined") + set_property(TARGET isal_asm PROPERTY COMPILE_OPTIONS ${target_options}) +endif() + +target_compile_definitions(isal PUBLIC + QPL_LIB + NDEBUG) + +# [SUBDIR]core-sw +# Two libraries:qplcore_avx512/qplcore_px for SW fallback will be created which are implemented by AVX512 and non-AVX512 instructions respectively. +# The upper level QPL API will check SIMD capabilities of the target system at runtime and decide to call AVX512 function or non-AVX512 function. +# Hence, here we don't need put qplcore_avx512 under an ENABLE_AVX512 CMake switch. +# Actually, if we do that, some undefined symbols errors would happen because both of AVX512 function and non-AVX512 function are referenced by QPL API. +# PLATFORM=2 means AVX512 implementation; PLATFORM=0 means non-AVX512 implementation. + +# Find Core Sources +file(GLOB SOURCES + ${QPL_SRC_DIR}/core-sw/src/checksums/*.c + ${QPL_SRC_DIR}/core-sw/src/filtering/*.c + ${QPL_SRC_DIR}/core-sw/src/other/*.c + ${QPL_SRC_DIR}/core-sw/src/compression/*.c) + +file(GLOB DATA_SOURCES + ${QPL_SRC_DIR}/core-sw/src/data/*.c) + +# Create avx512 library +add_library(qplcore_avx512 OBJECT ${SOURCES}) + +target_compile_definitions(qplcore_avx512 PRIVATE PLATFORM=2) + +target_include_directories(qplcore_avx512 + PUBLIC $ + PUBLIC $ + PUBLIC $ + PRIVATE $) + +set_target_properties(qplcore_avx512 PROPERTIES + $<$:C_STANDARD 17>) + +target_link_libraries(qplcore_avx512 ${CMAKE_DL_LIBS} isal) + +target_compile_options(qplcore_avx512 + PRIVATE ${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS} + PRIVATE -march=skylake-avx512 + PRIVATE "$<$:>" + PRIVATE "$<$:-O3;-D_FORTIFY_SOURCE=2>") + + +target_compile_definitions(qplcore_avx512 PUBLIC QPL_BADARG_CHECK) + +# +# Create px library +# +#set(CMAKE_INCLUDE_CURRENT_DIR ON) + +# Create library +add_library(qplcore_px OBJECT ${SOURCES} ${DATA_SOURCES}) + +target_compile_definitions(qplcore_px PRIVATE PLATFORM=0) + +target_include_directories(qplcore_px + PUBLIC $ + PUBLIC $ + PUBLIC $ + PRIVATE $) + +set_target_properties(qplcore_px PROPERTIES + $<$:C_STANDARD 17>) + +target_link_libraries(qplcore_px isal ${CMAKE_DL_LIBS}) + +target_compile_options(qplcore_px + PRIVATE ${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS} + PRIVATE "$<$:>" + PRIVATE "$<$:-O3;-D_FORTIFY_SOURCE=2>") + +target_compile_definitions(qplcore_px PUBLIC QPL_BADARG_CHECK) + +# [SUBDIR]core-iaa +file(GLOB HW_PATH_SRC ${QPL_SRC_DIR}/core-iaa/sources/aecs/*.c + ${QPL_SRC_DIR}/core-iaa/sources/aecs/*.cpp + ${QPL_SRC_DIR}/core-iaa/sources/driver_loader/*.c + ${QPL_SRC_DIR}/core-iaa/sources/driver_loader/*.cpp + ${QPL_SRC_DIR}/core-iaa/sources/descriptors/*.c + ${QPL_SRC_DIR}/core-iaa/sources/descriptors/*.cpp + ${QPL_SRC_DIR}/core-iaa/sources/bit_rev.c) + +# Create library +add_library(core_iaa OBJECT ${HW_PATH_SRC}) + +target_include_directories(core_iaa + PRIVATE ${UUID_DIR} + PUBLIC $ + PRIVATE $ + PRIVATE $) + +target_compile_options(core_iaa + PRIVATE $<$:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}; + $<$:-O3;-D_FORTIFY_SOURCE=2>>) + +target_compile_features(core_iaa PRIVATE c_std_11) + +target_compile_definitions(core_iaa PRIVATE QPL_BADARG_CHECK + PRIVATE $<$: BLOCK_ON_FAULT_ENABLED> + PRIVATE $<$:LOG_HW_INIT>) + +# [SUBDIR]middle-layer +generate_unpack_kernel_arrays(${QPL_BINARY_DIR}) + +file(GLOB MIDDLE_LAYER_SRC + ${QPL_SRC_DIR}/middle-layer/analytics/*.cpp + ${QPL_SRC_DIR}/middle-layer/c_wrapper/*.cpp + ${QPL_SRC_DIR}/middle-layer/checksum/*.cpp + ${QPL_SRC_DIR}/middle-layer/common/*.cpp + ${QPL_SRC_DIR}/middle-layer/compression/*.cpp + ${QPL_SRC_DIR}/middle-layer/compression/*/*.cpp + ${QPL_SRC_DIR}/middle-layer/compression/*/*/*.cpp + ${QPL_SRC_DIR}/middle-layer/dispatcher/*.cpp + ${QPL_SRC_DIR}/middle-layer/other/*.cpp + ${QPL_SRC_DIR}/middle-layer/util/*.cpp + ${QPL_SRC_DIR}/middle-layer/inflate/*.cpp + ${QPL_SRC_DIR}/core-iaa/sources/accelerator/*.cpp) # todo + +file(GLOB GENERATED_PX_TABLES_SRC ${QPL_BINARY_DIR}/generated/px_*.cpp) +file(GLOB GENERATED_AVX512_TABLES_SRC ${QPL_BINARY_DIR}/generated/avx512_*.cpp) + +add_library(middle_layer_lib OBJECT + ${GENERATED_PX_TABLES_SRC} + ${GENERATED_AVX512_TABLES_SRC} + ${MIDDLE_LAYER_SRC}) + +target_compile_options(middle_layer_lib + PRIVATE $<$:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}; + ${QPL_LINUX_TOOLCHAIN_DYNAMIC_LIBRARY_FLAGS}; + $<$:-O3;-D_FORTIFY_SOURCE=2>> + PRIVATE $<$:${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS}>) + +target_compile_definitions(middle_layer_lib + PUBLIC QPL_VERSION="${QPL_VERSION}" + PUBLIC $<$:LOG_HW_INIT> + PUBLIC $<$:QPL_EFFICIENT_WAIT> + PUBLIC QPL_BADARG_CHECK) + +set_source_files_properties(${GENERATED_PX_TABLES_SRC} PROPERTIES COMPILE_DEFINITIONS PLATFORM=0) +set_source_files_properties(${GENERATED_AVX512_TABLES_SRC} PROPERTIES COMPILE_DEFINITIONS PLATFORM=2) + +target_include_directories(middle_layer_lib + PRIVATE ${UUID_DIR} + PUBLIC $ + PUBLIC $ + PUBLIC $ + PUBLIC $ + PUBLIC $ + PUBLIC $) + +target_compile_definitions(middle_layer_lib PUBLIC -DQPL_LIB) + +# [SUBDIR]c_api +file(GLOB_RECURSE QPL_C_API_SRC + ${QPL_SRC_DIR}/c_api/*.c + ${QPL_SRC_DIR}/c_api/*.cpp) + +add_library(_qpl STATIC ${QPL_C_API_SRC} + $ + $ + $ + $ + $ + $ + $) + +target_include_directories(_qpl + PUBLIC $ + PRIVATE $ + PRIVATE $) + +target_compile_options(_qpl + PRIVATE $<$:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}; + ${QPL_LINUX_TOOLCHAIN_DYNAMIC_LIBRARY_FLAGS}; + $<$:-O3;-D_FORTIFY_SOURCE=2>> + PRIVATE $<$:${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS}>) + +target_compile_definitions(_qpl + PRIVATE -DQPL_LIB + PRIVATE -DQPL_BADARG_CHECK + PUBLIC -DENABLE_QPL_COMPRESSION) + +target_link_libraries(_qpl + PRIVATE ${CMAKE_DL_LIBS}) + +add_library (ch_contrib::qpl ALIAS _qpl) +target_include_directories(_qpl SYSTEM BEFORE PUBLIC "${QPL_PROJECT_DIR}/include") diff --git a/contrib/qpl-cmake/uuid/uuid.h b/contrib/qpl-cmake/uuid/uuid.h new file mode 100644 index 00000000000..bf108ba0d29 --- /dev/null +++ b/contrib/qpl-cmake/uuid/uuid.h @@ -0,0 +1,4 @@ +#ifndef _QPL_UUID_UUID_H +#define _QPL_UUID_UUID_H +typedef unsigned char uuid_t[16]; +#endif /* _QPL_UUID_UUID_H */ diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 1dff4b1a2d4..d57c447e2af 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -51,6 +51,7 @@ RUN apt-get update \ rename \ software-properties-common \ tzdata \ + nasm \ --yes --no-install-recommends \ && apt-get clean diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index ca44354620f..a1ae77343cb 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -55,6 +55,7 @@ RUN apt-get update \ pkg-config \ tzdata \ pv \ + nasm \ --yes --no-install-recommends # Sanitizer options for services (clickhouse-server) diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index aa209f197df..699e2c7ceb9 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -72,6 +72,7 @@ RUN apt-get update \ tzdata \ unixodbc \ file \ + nasm \ --yes --no-install-recommends RUN pip3 install numpy scipy pandas Jinja2 diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 2bbdd978e5e..6b8109a15b2 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -135,6 +135,7 @@ function clone_submodules contrib/replxx contrib/wyhash contrib/hashidsxx + contrib/c-ares ) git submodule sync diff --git a/docker/test/integration/runner/compose/docker_compose_coredns.yml b/docker/test/integration/runner/compose/docker_compose_coredns.yml new file mode 100644 index 00000000000..b329d4e0a46 --- /dev/null +++ b/docker/test/integration/runner/compose/docker_compose_coredns.yml @@ -0,0 +1,9 @@ +version: "2.3" + +services: + coredns: + image: coredns/coredns:latest + restart: always + volumes: + - ${COREDNS_CONFIG_DIR}/example.com:/example.com + - ${COREDNS_CONFIG_DIR}/Corefile:/Corefile diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index c2ed47a1392..0ec3f09ab7f 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -17,7 +17,9 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ python3-pip \ shellcheck \ yamllint \ - && pip3 install black boto3 codespell dohq-artifactory PyGithub unidiff pylint==2.6.2 + && pip3 install black boto3 codespell dohq-artifactory PyGithub unidiff pylint==2.6.2 \ + && apt-get clean \ + && rm -rf /root/.cache/pip # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH diff --git a/docker/test/style/process_style_check_result.py b/docker/test/style/process_style_check_result.py index fd544f3e9c1..8c2110d64e5 100755 --- a/docker/test/style/process_style_check_result.py +++ b/docker/test/style/process_style_check_result.py @@ -40,10 +40,10 @@ def process_result(result_folder): def write_results(results_file, status_file, results, status): - with open(results_file, "w") as f: + with open(results_file, "w", encoding="utf-8") as f: out = csv.writer(f, delimiter="\t") out.writerows(results) - with open(status_file, "w") as f: + with open(status_file, "w", encoding="utf-8") as f: out = csv.writer(f, delimiter="\t") out.writerow(status) @@ -53,9 +53,10 @@ if __name__ == "__main__": parser = argparse.ArgumentParser( description="ClickHouse script for parsing results of style check" ) - parser.add_argument("--in-results-dir", default="/test_output/") - parser.add_argument("--out-results-file", default="/test_output/test_results.tsv") - parser.add_argument("--out-status-file", default="/test_output/check_status.tsv") + default_dir = "/test_output" + parser.add_argument("--in-results-dir", default=default_dir) + parser.add_argument("--out-results-file", default=f"{default_dir}/test_results.tsv") + parser.add_argument("--out-status-file", default=f"{default_dir}/check_status.tsv") args = parser.parse_args() state, description, test_results = process_result(args.in_results_dir) diff --git a/docs/README.md b/docs/README.md index b328a3ee125..fa8b6bed85c 100644 --- a/docs/README.md +++ b/docs/README.md @@ -38,9 +38,9 @@ Writing the docs is extremely useful for project's users and developers, and gro The documentation contains information about all the aspects of the ClickHouse lifecycle: developing, testing, installing, operating, and using. The base language of the documentation is English. The English version is the most actual. All other languages are supported as much as they can by contributors from different countries. -At the moment, [documentation](https://clickhouse.com/docs) exists in English, Russian, Chinese, Japanese. We store the documentation besides the ClickHouse source code in the [GitHub repository](https://github.com/ClickHouse/ClickHouse/tree/master/docs). +At the moment, [documentation](https://clickhouse.com/docs) exists in English, Russian, and Chinese. We store the reference documentation besides the ClickHouse source code in the [GitHub repository](https://github.com/ClickHouse/ClickHouse/tree/master/docs), and user guides in a separate repo [Clickhouse/clickhouse-docs](https://github.com/ClickHouse/clickhouse-docs). -Each language lays in the corresponding folder. Files that are not translated from English are the symbolic links to the English ones. +Each language lies in the corresponding folder. Files that are not translated from English are symbolic links to the English ones. @@ -48,9 +48,9 @@ Each language lays in the corresponding folder. Files that are not translated fr You can contribute to the documentation in many ways, for example: -- Fork the ClickHouse repository, edit, commit, push, and open a pull request. +- Fork the ClickHouse and ClickHouse-docs repositories, edit, commit, push, and open a pull request. - Add the `documentation` label to this pull request for proper automatic checks applying. If you have no permissions for adding labels, the reviewer of your PR adds it. + Add the `pr-documentation` label to this pull request for proper automatic checks applying. If you do not have permission to add labels, then the reviewer of your PR will add it. - Open a required file in the ClickHouse repository and edit it from the GitHub web interface. @@ -158,15 +158,15 @@ When everything is ready, we will add the new language to the website. -### Documentation for Different Audience +### Documentation for Different Audiences -When writing documentation, think about people who read it. Each audience has specific requirements for terms they use in communications. +When writing documentation, think about the people who read it. Each audience has specific requirements for terms they use in communications. -ClickHouse documentation can be divided by the audience for the following parts: +ClickHouse documentation can be divided up by the audience for the following parts: -- Conceptual topics in [Introduction](https://clickhouse.com/docs/en/), tutorials and overviews, changelog. +- Conceptual topics like tutorials and overviews. - These topics are for the most common auditory. When editing text in them, use the most common terms that are comfortable for the audience with basic technical skills. + These topics are for the most common audience. When editing text in them, use the most common terms that are comfortable for the audience with basic technical skills. - Query language reference and related topics. diff --git a/docs/changelogs/v22.7.1.2484-stable.md b/docs/changelogs/v22.7.1.2484-stable.md new file mode 100644 index 00000000000..0343568658b --- /dev/null +++ b/docs/changelogs/v22.7.1.2484-stable.md @@ -0,0 +1,468 @@ +--- +sidebar_position: 1 +sidebar_label: 2022 +--- + +# 2022 Changelog + +### ClickHouse release v22.7.1.2484-stable (f4f05ec786a) FIXME as compared to v22.6.1.1985-stable (7000c4e0033) + +#### Backward Incompatible Change +* Enable setting `enable_positional_arguments` by default. It allows queries like `SELECT ... ORDER BY 1, 2` where 1, 2 are the references to the select clause. If you need to return the old behavior, disable this setting. [#38204](https://github.com/ClickHouse/ClickHouse/pull/38204) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* `Ordinary` database engine and old storage definition syntax for `*MergeTree` tables are deprecated. By default it's not possible to create new ones. If `system` database has `Ordinary` engine it will be automatically converted to `Atomic` on server startup. There are settings to keep old behavior (`allow_deprecated_database_ordinary` and `allow_deprecated_syntax_for_merge_tree`), but these settings may be removed in future releases. [#38335](https://github.com/ClickHouse/ClickHouse/pull/38335) ([Alexander Tokmakov](https://github.com/tavplubix)). +* * Force rewriting comma join to inner by default (set default value `cross_to_inner_join_rewrite = 2`). To have old behavior set `cross_to_inner_join_rewrite = 1`. [#39326](https://github.com/ClickHouse/ClickHouse/pull/39326) ([Vladimir C](https://github.com/vdimir)). +* Disable format_csv_allow_single_quotes by default. [#37096](https://github.com/ClickHouse/ClickHouse/issues/37096). [#39423](https://github.com/ClickHouse/ClickHouse/pull/39423) ([Kruglov Pavel](https://github.com/Avogar)). + +#### New Feature +* Add new `direct` join algorithm for RocksDB, ref [#33582](https://github.com/ClickHouse/ClickHouse/issues/33582). [#35363](https://github.com/ClickHouse/ClickHouse/pull/35363) ([Vladimir C](https://github.com/vdimir)). +* * Added full sorting merge join algorithm. [#35796](https://github.com/ClickHouse/ClickHouse/pull/35796) ([Vladimir C](https://github.com/vdimir)). +* Add a setting `zstd_window_log_max` to configure max memory usage on zstd decoding when importing external files. Closes [#35693](https://github.com/ClickHouse/ClickHouse/issues/35693). [#37015](https://github.com/ClickHouse/ClickHouse/pull/37015) ([wuxiaobai24](https://github.com/wuxiaobai24)). +* Implement NatsStorage - table engine, which allows to pub/sub to NATS. Closes [#32388](https://github.com/ClickHouse/ClickHouse/issues/32388). [#37171](https://github.com/ClickHouse/ClickHouse/pull/37171) ([tchepavel](https://github.com/tchepavel)). +* Implement table function MongoDB. Allow writes into MongoDB storage / table function. [#37213](https://github.com/ClickHouse/ClickHouse/pull/37213) ([aaapetrenko](https://github.com/aaapetrenko)). +* `clickhouse-keeper` new feature: add support for real-time digest calculation and verification. [#37555](https://github.com/ClickHouse/ClickHouse/pull/37555) ([Antonio Andelic](https://github.com/antonio2368)). +* In [#17202](https://github.com/ClickHouse/ClickHouse/issues/17202) was reported that host_regexp was being tested against only one of the possible PTR responses. This PR makes the necessary changes so that host_regexp is applied against all possible PTR responses and validate if any matches. [#37827](https://github.com/ClickHouse/ClickHouse/pull/37827) ([Arthur Passos](https://github.com/arthurpassos)). +* Support hadoop secure rpc transfer(hadoop.rpc.protection=privacy and hadoop.rpc.protection=integrity). [#37852](https://github.com/ClickHouse/ClickHouse/pull/37852) ([Peng Liu](https://github.com/michael1589)). +* Add struct type support in `StorageHive`. [#38118](https://github.com/ClickHouse/ClickHouse/pull/38118) ([lgbo](https://github.com/lgbo-ustc)). +* Added Base58 encoding/decoding. [#38159](https://github.com/ClickHouse/ClickHouse/pull/38159) ([Andrey Zvonov](https://github.com/zvonand)). +* Add chart visualization to Play UI. [#38197](https://github.com/ClickHouse/ClickHouse/pull/38197) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* support `alter` command on `StorageHive` table. [#38214](https://github.com/ClickHouse/ClickHouse/pull/38214) ([lgbo](https://github.com/lgbo-ustc)). +* Added `CREATE TABLE ... EMPTY AS SELECT` query. It automatically deduces table structure from the SELECT query, but does not fill the table after creation. Resolves [#38049](https://github.com/ClickHouse/ClickHouse/issues/38049). [#38272](https://github.com/ClickHouse/ClickHouse/pull/38272) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Adds new setting `implicit_transaction` to run standalone queries inside a transaction. It handles both creation and closing (via COMMIT if the query succeeded or ROLLBACK if it didn't) of the transaction automatically. [#38344](https://github.com/ClickHouse/ClickHouse/pull/38344) ([Raúl Marín](https://github.com/Algunenano)). +* Allow trailing comma in columns list. closes [#38425](https://github.com/ClickHouse/ClickHouse/issues/38425). [#38440](https://github.com/ClickHouse/ClickHouse/pull/38440) ([chen](https://github.com/xiedeyantu)). +* Compress clickhouse into self-extracting executable (path programs/self-extracting). New build target 'self-extracting' is added. [#38447](https://github.com/ClickHouse/ClickHouse/pull/38447) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Introduced settings `additional_table_filters`. Using this setting, you can specify additional filtering condition for a table which will be applied directly after reading. Example: `select number, x, y from (select number from system.numbers limit 5) f any left join (select x, y from table_1) s on f.number = s.x settings additional_table_filters={'system.numbers : 'number != 3', 'table_1' : 'x != 2'}`. Introduced setting `additional_result_filter` which specifies additional filtering condition for query result. Closes [#37918](https://github.com/ClickHouse/ClickHouse/issues/37918). [#38475](https://github.com/ClickHouse/ClickHouse/pull/38475) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add SQLInsert output format. Closes [#38441](https://github.com/ClickHouse/ClickHouse/issues/38441). [#38477](https://github.com/ClickHouse/ClickHouse/pull/38477) ([Kruglov Pavel](https://github.com/Avogar)). +* Downloadable clickhouse executable is compressed self-extracting. [#38653](https://github.com/ClickHouse/ClickHouse/pull/38653) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Support `isNullable` function. This function checks whether it's argument is nullable and return true(1) or false(0). Closes [#38611](https://github.com/ClickHouse/ClickHouse/issues/38611). [#38841](https://github.com/ClickHouse/ClickHouse/pull/38841) ([lokax](https://github.com/lokax)). +* Add functions `translate(string, from_string, to_string)` and `translateUTF8(string, from_string, to_string)`. [#38935](https://github.com/ClickHouse/ClickHouse/pull/38935) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add `compatibility` setting and `system.settings_changes` system table that contains information about changes in settings through ClickHouse versions. Closes [#35972](https://github.com/ClickHouse/ClickHouse/issues/35972). [#38957](https://github.com/ClickHouse/ClickHouse/pull/38957) ([Kruglov Pavel](https://github.com/Avogar)). +* Add the 3rd parameter to the tupleElement function and return it if tuple doesn't have a member. Only works if the 2nd parameter is of type String. Closes [#38872](https://github.com/ClickHouse/ClickHouse/issues/38872). [#38989](https://github.com/ClickHouse/ClickHouse/pull/38989) ([lokax](https://github.com/lokax)). +* Support parseTimedelta function. It can be used like ```sql # ' ', ';', '-', '+', ',', ':' can be used as separators, eg. "1yr-2mo", "2m:6s" SELECT parseTimeDelta('1yr-2mo-4w + 12 days, 3 hours : 1 minute ; 33 seconds');. [#39071](https://github.com/ClickHouse/ClickHouse/pull/39071) ([jiahui-97](https://github.com/jiahui-97)). +* Added options to limit IO operations with remote storage: `max_remote_read_network_bandwidth_for_server` and `max_remote_write_network_bandwidth_for_server`. [#39095](https://github.com/ClickHouse/ClickHouse/pull/39095) ([Sergei Trifonov](https://github.com/serxa)). +* Add `send_logs_source_regexp` setting. Send server text logs with specified regexp to match log source name. Empty means all sources. [#39161](https://github.com/ClickHouse/ClickHouse/pull/39161) ([Amos Bird](https://github.com/amosbird)). +* OpenTelemetry now collects traces without Processors spans by default. To enable Processors spans collection `opentelemetry_trace_processors` setting. [#39170](https://github.com/ClickHouse/ClickHouse/pull/39170) ([Ilya Yatsishin](https://github.com/qoega)). + +#### Performance Improvement +* Add new `local_filesystem_read_method` method `io_uring` based on the asynchronous Linux [io_uring](https://kernel.dk/io_uring.pdf) subsystem, improving read performance almost universally compared to the default `pread` method. [#36103](https://github.com/ClickHouse/ClickHouse/pull/36103) ([Saulius Valatka](https://github.com/sauliusvl)). +* Distinct optimization for sorted columns. Use specialized distinct transformation in case input stream is sorted by column(s) in distinct. Optimization can be applied to pre-distinct, final distinct, or both. Initial implementation by @dimarub2000. [#37803](https://github.com/ClickHouse/ClickHouse/pull/37803) ([Igor Nikonov](https://github.com/devcrafter)). +* Add VBMI optimized copyOverlap32Shuffle for LZ4 decompress. [#37891](https://github.com/ClickHouse/ClickHouse/pull/37891) ([Guo Wangyang](https://github.com/guowangy)). +* Improve performance of `ORDER BY`, `MergeTree` merges, window functions using batch version of `BinaryHeap`. [#38022](https://github.com/ClickHouse/ClickHouse/pull/38022) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix significant join performance regression which was introduced in https://github.com/ClickHouse/ClickHouse/pull/35616 . It's interesting that common join queries such as ssb queries have been 10 times slower for almost 3 months while no one complains. [#38052](https://github.com/ClickHouse/ClickHouse/pull/38052) ([Amos Bird](https://github.com/amosbird)). +* Migrate from the Intel hyperscan library to vectorscan, this speeds up many string matching on non-x86 platforms. [#38171](https://github.com/ClickHouse/ClickHouse/pull/38171) ([Robert Schulze](https://github.com/rschu1ze)). +* Increased parallelism of query plan steps executed after aggregation. [#38295](https://github.com/ClickHouse/ClickHouse/pull/38295) ([Nikita Taranov](https://github.com/nickitat)). +* Improve performance of insertion to columns of type `JSON`. [#38320](https://github.com/ClickHouse/ClickHouse/pull/38320) ([Anton Popov](https://github.com/CurtizJ)). +* Optimized insertion and lookups in the HashTable. [#38413](https://github.com/ClickHouse/ClickHouse/pull/38413) ([Nikita Taranov](https://github.com/nickitat)). +* Fix performance degradation from [#32493](https://github.com/ClickHouse/ClickHouse/issues/32493). [#38417](https://github.com/ClickHouse/ClickHouse/pull/38417) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve performance of column vector replicate using SIMD instructions. Author @zzachimed. [#38565](https://github.com/ClickHouse/ClickHouse/pull/38565) ([Maksim Kita](https://github.com/kitaisreal)). +* Norm and Distance functions for arrays speed up 1.2-2 times. [#38740](https://github.com/ClickHouse/ClickHouse/pull/38740) ([Alexander Gololobov](https://github.com/davenger)). +* A less efficient execution plan can be generated for query with ORDER BY (a, b) than for ORDER BY a, b. [#38873](https://github.com/ClickHouse/ClickHouse/pull/38873) ([Igor Nikonov](https://github.com/devcrafter)). +* Executable UDF, Executable Dictionary, Executable Storage poll subprocess fix 1 second subprocess wait during subprocess termination. [#38929](https://github.com/ClickHouse/ClickHouse/pull/38929) ([Constantine Peresypkin](https://github.com/pkit)). +* * Pushdown filter to the right side of sorting join. [#39123](https://github.com/ClickHouse/ClickHouse/pull/39123) ([Vladimir C](https://github.com/vdimir)). +* Optimize accesses to system.stack_trace. [#39177](https://github.com/ClickHouse/ClickHouse/pull/39177) ([Azat Khuzhin](https://github.com/azat)). + +#### Improvement +* Optimized processing of ORDER BY in window functions. [#34632](https://github.com/ClickHouse/ClickHouse/pull/34632) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Support SQL standard create index and drop index syntax. [#35166](https://github.com/ClickHouse/ClickHouse/pull/35166) ([Jianmei Zhang](https://github.com/zhangjmruc)). +* use simd to re-write the current column replicate funcion and got 2x performance boost in our unit benchmark test. [#37235](https://github.com/ClickHouse/ClickHouse/pull/37235) ([zzachimed](https://github.com/zzachimed)). +* Send profile events for INSERT queries (previously only SELECT was supported). [#37391](https://github.com/ClickHouse/ClickHouse/pull/37391) ([Azat Khuzhin](https://github.com/azat)). +* Implement in order aggregation (`optimize_aggregation_in_order`) for fully materialized projections. [#37469](https://github.com/ClickHouse/ClickHouse/pull/37469) ([Azat Khuzhin](https://github.com/azat)). +* * Bugfixes and performance improvements for `parallel_hash`. [#37648](https://github.com/ClickHouse/ClickHouse/pull/37648) ([Vladimir C](https://github.com/vdimir)). +* Support expressions with window functions. Closes [#19857](https://github.com/ClickHouse/ClickHouse/issues/19857). [#37848](https://github.com/ClickHouse/ClickHouse/pull/37848) ([Dmitry Novik](https://github.com/novikd)). +* S3 single objects are now removed with `RemoveObjectRequest` (sic). Fixed a bug with `S3ObjectStorage` on GCP which did not allow to use `removeFileIfExists` effectively breaking approximately half of `remove` functionality. Automatic detection for `DeleteObjects` S3 API, that is not supported by GCS. This will allow to use GCS without explicit `support_batch_delete=0` in configuration. [#37882](https://github.com/ClickHouse/ClickHouse/pull/37882) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fix refcnt for unused MergeTree parts in SELECT queries (may defer parts removal). [#37913](https://github.com/ClickHouse/ClickHouse/pull/37913) ([Azat Khuzhin](https://github.com/azat)). +* Expose basic Keeper related monitoring data (via ProfileEvents and CurrentMetrics). [#38072](https://github.com/ClickHouse/ClickHouse/pull/38072) ([lingpeng0314](https://github.com/lingpeng0314)). +* Added kerberosInit function and corresponding KerberosInit class as a replacement for kinit executable. Replaced all calls of kinit in Kafka and HDFS code by call of kerberosInit function. Added new integration test. Closes [#27651](https://github.com/ClickHouse/ClickHouse/issues/27651). [#38105](https://github.com/ClickHouse/ClickHouse/pull/38105) ([Roman Vasin](https://github.com/rvasin)). +* * Add setting `multiple_joins_try_to_keep_original_names` to not rewrite identifier name on multiple JOINs rewrite, close [#34697](https://github.com/ClickHouse/ClickHouse/issues/34697). [#38149](https://github.com/ClickHouse/ClickHouse/pull/38149) ([Vladimir C](https://github.com/vdimir)). +* improved trace-visualizer UX. [#38169](https://github.com/ClickHouse/ClickHouse/pull/38169) ([Sergei Trifonov](https://github.com/serxa)). +* Add ability to pass headers to url table function / storage via sql. Closes [#37897](https://github.com/ClickHouse/ClickHouse/issues/37897). [#38176](https://github.com/ClickHouse/ClickHouse/pull/38176) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Enable trace collection for AArch64. [#38181](https://github.com/ClickHouse/ClickHouse/pull/38181) ([Maksim Kita](https://github.com/kitaisreal)). +* Do not skip symlinks in `user_defined` directory during SQL user defined functions loading. Closes [#38042](https://github.com/ClickHouse/ClickHouse/issues/38042). [#38184](https://github.com/ClickHouse/ClickHouse/pull/38184) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve the stability for hive storage integration test. Move the data prepare step into test.py. [#38260](https://github.com/ClickHouse/ClickHouse/pull/38260) ([lgbo](https://github.com/lgbo-ustc)). +* Added background cleanup of subdirectories in `store/`. In some cases clickhouse-server might left garbage subdirectories in `store/` (for example, on unsuccessful table creation) and those dirs were never been removed. Fixes [#33710](https://github.com/ClickHouse/ClickHouse/issues/33710). [#38265](https://github.com/ClickHouse/ClickHouse/pull/38265) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add `DESCRIBE CACHE` query to show cache settings from config. Add `SHOW CACHES` query to show available filesystem caches list. [#38279](https://github.com/ClickHouse/ClickHouse/pull/38279) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add access check for system drop fs cache. Support ON CLUSTER. [#38319](https://github.com/ClickHouse/ClickHouse/pull/38319) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support `auto_close` option for postgres engine connection. Closes [#31486](https://github.com/ClickHouse/ClickHouse/issues/31486). [#38363](https://github.com/ClickHouse/ClickHouse/pull/38363) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix PostgreSQL database engine incompatibility on upgrade from 21.3 to 22.3. Closes [#36659](https://github.com/ClickHouse/ClickHouse/issues/36659). [#38369](https://github.com/ClickHouse/ClickHouse/pull/38369) ([Kseniia Sumarokova](https://github.com/kssenii)). +* `filesystemAvailable` and similar functions now work in `clickhouse-local`. This closes [#38423](https://github.com/ClickHouse/ClickHouse/issues/38423). [#38424](https://github.com/ClickHouse/ClickHouse/pull/38424) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Hardware benchmark now has support for automatic results uploading. [#38427](https://github.com/ClickHouse/ClickHouse/pull/38427) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The table `system.asynchronous_metric_log` is further optimized for storage space. This closes [#38134](https://github.com/ClickHouse/ClickHouse/issues/38134). See the [YouTube video](https://www.youtube.com/watch?v=0fSp9SF8N8A). [#38428](https://github.com/ClickHouse/ClickHouse/pull/38428) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Functions multiMatchAny(), multiMatchAnyIndex(), multiMatchAllIndices() and their fuzzy variants now accept non-const pattern array argument. [#38485](https://github.com/ClickHouse/ClickHouse/pull/38485) ([Robert Schulze](https://github.com/rschu1ze)). +* Added L2 Squared distance and norm for both arrays and tuples. [#38545](https://github.com/ClickHouse/ClickHouse/pull/38545) ([Julian Gilyadov](https://github.com/israelg99)). +* Add revision() function. [#38555](https://github.com/ClickHouse/ClickHouse/pull/38555) ([Azat Khuzhin](https://github.com/azat)). +* Add `group_by_use_nulls` setting to make aggregation key columns nullable in the case of ROLLUP, CUBE and GROUPING SETS. Closes [#37359](https://github.com/ClickHouse/ClickHouse/issues/37359). [#38642](https://github.com/ClickHouse/ClickHouse/pull/38642) ([Dmitry Novik](https://github.com/novikd)). +* Fix GCS via proxy tunnel usage. [#38726](https://github.com/ClickHouse/ClickHouse/pull/38726) ([Azat Khuzhin](https://github.com/azat)). +* Support `\i file` in clickhouse client / local (similar to psql \i). [#38813](https://github.com/ClickHouse/ClickHouse/pull/38813) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow null modifier in columns declaration for table functions. [#38816](https://github.com/ClickHouse/ClickHouse/pull/38816) ([Kruglov Pavel](https://github.com/Avogar)). +* - Deactivate `mutations_finalizing_task` before shutdown to avoid `TABLE_IS_READ_ONLY` errors. [#38851](https://github.com/ClickHouse/ClickHouse/pull/38851) ([Raúl Marín](https://github.com/Algunenano)). +* Fix waiting of shared lock after exclusive lock failure. [#38864](https://github.com/ClickHouse/ClickHouse/pull/38864) ([Azat Khuzhin](https://github.com/azat)). +* Add the ability to specify compression level during data export. [#38907](https://github.com/ClickHouse/ClickHouse/pull/38907) ([Nikolay Degterinsky](https://github.com/evillique)). +* New option `rewrite` in `EXPLAIN AST`. If enabled, it shows AST after it's rewritten, otherwise AST of original query. Disabled by default. [#38910](https://github.com/ClickHouse/ClickHouse/pull/38910) ([Igor Nikonov](https://github.com/devcrafter)). +* - Stop reporting Zookeeper "Node exists" exceptions in system.errors when they are expected. [#38961](https://github.com/ClickHouse/ClickHouse/pull/38961) ([Raúl Marín](https://github.com/Algunenano)). +* Allow to specify globs `* or {expr1, expr2, expr3}` inside a key for `clickhouse-extract-from-config` tool. [#38966](https://github.com/ClickHouse/ClickHouse/pull/38966) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add option enabling that SELECT from the system database requires grant. Details:. [#38970](https://github.com/ClickHouse/ClickHouse/pull/38970) ([Vitaly Baranov](https://github.com/vitlibar)). +* - clearOldLogs: Don't report KEEPER_EXCEPTION on concurrent deletes. [#39016](https://github.com/ClickHouse/ClickHouse/pull/39016) ([Raúl Marín](https://github.com/Algunenano)). +* clickhouse-keeper improvement: persist metainformation about keeper servers to disk. [#39069](https://github.com/ClickHouse/ClickHouse/pull/39069) ([Antonio Andelic](https://github.com/antonio2368)). +* Continue without exception when running out of disk space when using filesystem cache. [#39106](https://github.com/ClickHouse/ClickHouse/pull/39106) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Handling SIGTERM signals from k8s. [#39130](https://github.com/ClickHouse/ClickHouse/pull/39130) ([Timur Solodovnikov](https://github.com/tsolodov)). +* SQL function multiStringAllPositions() now accepts non-const needle arguments. [#39167](https://github.com/ClickHouse/ClickHouse/pull/39167) ([Robert Schulze](https://github.com/rschu1ze)). +* Add merge_algorithm (Undecided, Horizontal, Vertical) to system.part_log. [#39181](https://github.com/ClickHouse/ClickHouse/pull/39181) ([Azat Khuzhin](https://github.com/azat)). +* Improve isNullable/isConstant/isNull/isNotNull performance for LowCardinality argument. [#39192](https://github.com/ClickHouse/ClickHouse/pull/39192) ([Kruglov Pavel](https://github.com/Avogar)). +* - Don't report system.errors when the disk is not rotational. [#39216](https://github.com/ClickHouse/ClickHouse/pull/39216) ([Raúl Marín](https://github.com/Algunenano)). +* Metric `result_bytes` for `INSERT` queries in `system.query_log` shows number of bytes inserted. Previously value was incorrect and stored the same value as `result_rows`. [#39225](https://github.com/ClickHouse/ClickHouse/pull/39225) ([Ilya Yatsishin](https://github.com/qoega)). +* The CPU usage metric in clickhouse-client will be displayed in a better way. Fixes [#38756](https://github.com/ClickHouse/ClickHouse/issues/38756). [#39280](https://github.com/ClickHouse/ClickHouse/pull/39280) ([Sergei Trifonov](https://github.com/serxa)). +* Rethrow exception on filesystem cache initialisation on server startup, better error message. [#39386](https://github.com/ClickHouse/ClickHouse/pull/39386) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support milliseconds, microseconds and nanoseconds in `parseTimeDelta` function. [#39447](https://github.com/ClickHouse/ClickHouse/pull/39447) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Bug Fix +* Fix crash when executing GRANT ALL ON *.* with ON CLUSTER. It was broken in https://github.com/ClickHouse/ClickHouse/pull/35767. This closes [#38618](https://github.com/ClickHouse/ClickHouse/issues/38618). [#38674](https://github.com/ClickHouse/ClickHouse/pull/38674) ([Vitaly Baranov](https://github.com/vitlibar)). +* * Fixed crash caused by IHiveFile be shared among threads. [#38887](https://github.com/ClickHouse/ClickHouse/pull/38887) ([lgbo](https://github.com/lgbo-ustc)). + +#### Build/Testing/Packaging Improvement +* - Apply Clang Thread Safety Analysis (TSA) annotations to ClickHouse. [#38068](https://github.com/ClickHouse/ClickHouse/pull/38068) ([Robert Schulze](https://github.com/rschu1ze)). +* - System table "system.licenses" is now correctly populated on Mac (Darwin). [#38294](https://github.com/ClickHouse/ClickHouse/pull/38294) ([Robert Schulze](https://github.com/rschu1ze)). +* Handle full queue exception in clickhouse-test. If it happened we need to collect debug info to understand what queries didn't finish. [#38490](https://github.com/ClickHouse/ClickHouse/pull/38490) ([Dmitry Novik](https://github.com/novikd)). +* - Change `all|noarch` packages to architecture-dependent - Fix some documentation for it - Push aarch64|arm64 packages to artifactory and release assets - Fixes [#36443](https://github.com/ClickHouse/ClickHouse/issues/36443). [#38580](https://github.com/ClickHouse/ClickHouse/pull/38580) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add `clickhouse-diagnostics` binary to the packages. [#38647](https://github.com/ClickHouse/ClickHouse/pull/38647) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Always print stacktraces if test queue is full. Follow up [#38490](https://github.com/ClickHouse/ClickHouse/issues/38490) cc @tavplubix. [#38662](https://github.com/ClickHouse/ClickHouse/pull/38662) ([Dmitry Novik](https://github.com/novikd)). +* Align branches within a 32B boundary to make benchmark more stable. [#38988](https://github.com/ClickHouse/ClickHouse/pull/38988) ([Guo Wangyang](https://github.com/guowangy)). +* Fix LSan by fixing getauxval(). [#39299](https://github.com/ClickHouse/ClickHouse/pull/39299) ([Azat Khuzhin](https://github.com/azat)). +* Adapt universal installation script for FreeBSD. [#39302](https://github.com/ClickHouse/ClickHouse/pull/39302) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Fix projection exception when aggregation keys are wrapped inside other functions. This fixes [#37151](https://github.com/ClickHouse/ClickHouse/issues/37151). [#37155](https://github.com/ClickHouse/ClickHouse/pull/37155) ([Amos Bird](https://github.com/amosbird)). +* Fix possible logical error `... with argument with type Nothing and default implementation for Nothing is expected to return result with type Nothing, got ...` in some functions. Closes: [#37610](https://github.com/ClickHouse/ClickHouse/issues/37610) Closes: [#37741](https://github.com/ClickHouse/ClickHouse/issues/37741). [#37759](https://github.com/ClickHouse/ClickHouse/pull/37759) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix incorrect columns order in subqueries of UNION (in case of duplicated columns in subselects may produce incorrect result). [#37887](https://github.com/ClickHouse/ClickHouse/pull/37887) ([Azat Khuzhin](https://github.com/azat)). +* Fix incorrect work of MODIFY ALTER Column with column names that contain dots. Closes [#37907](https://github.com/ClickHouse/ClickHouse/issues/37907). [#37971](https://github.com/ClickHouse/ClickHouse/pull/37971) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix reading of sparse columns from `MergeTree` tables that store their data in S3. [#37978](https://github.com/ClickHouse/ClickHouse/pull/37978) ([Anton Popov](https://github.com/CurtizJ)). +* Fix rounding for `Decimal128/Decimal256` with more than 19-digits long scale. [#38027](https://github.com/ClickHouse/ClickHouse/pull/38027) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix possible crash in `Distributed` async insert in case of removing a replica from config. [#38029](https://github.com/ClickHouse/ClickHouse/pull/38029) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix "Missing columns" for GLOBAL JOIN with CTE w/o alias. [#38056](https://github.com/ClickHouse/ClickHouse/pull/38056) ([Azat Khuzhin](https://github.com/azat)). +* Rewrite tuple functions as literals in backwards-compatibility mode. [#38096](https://github.com/ClickHouse/ClickHouse/pull/38096) ([Anton Kozlov](https://github.com/tonickkozlov)). +* - Fix redundant memory reservation for output block during `ORDER BY`. [#38127](https://github.com/ClickHouse/ClickHouse/pull/38127) ([iyupeng](https://github.com/iyupeng)). +* Fix possible logical error `Bad cast from type DB::IColumn* to DB::ColumnNullable*` in array mapped functions. Closes [#38006](https://github.com/ClickHouse/ClickHouse/issues/38006). [#38132](https://github.com/ClickHouse/ClickHouse/pull/38132) ([Kruglov Pavel](https://github.com/Avogar)). +* * Fix temporary name clash in partial merge join, close [#37928](https://github.com/ClickHouse/ClickHouse/issues/37928). [#38135](https://github.com/ClickHouse/ClickHouse/pull/38135) ([Vladimir C](https://github.com/vdimir)). +* With table ```SQL CREATE TABLE nested_name_tuples ( `a` Tuple(x String, y Tuple(i Int32, j String)) ) ENGINE = Memory; ```. [#38136](https://github.com/ClickHouse/ClickHouse/pull/38136) ([lgbo](https://github.com/lgbo-ustc)). +* Fix bug with nested short-circuit functions that led to execution of arguments even if condition is false. Closes [#38040](https://github.com/ClickHouse/ClickHouse/issues/38040). [#38173](https://github.com/ClickHouse/ClickHouse/pull/38173) ([Kruglov Pavel](https://github.com/Avogar)). +* (Window View is a experimental feature) Fix LOGICAL_ERROR for WINDOW VIEW with incorrect structure. [#38205](https://github.com/ClickHouse/ClickHouse/pull/38205) ([Azat Khuzhin](https://github.com/azat)). +* Update librdkafka submodule to fix crash when an OAUTHBEARER refresh callback is set. [#38225](https://github.com/ClickHouse/ClickHouse/pull/38225) ([Rafael Acevedo](https://github.com/racevedoo)). +* Do not allow recursive usage of OvercommitTracker during logging. Fixes [#37794](https://github.com/ClickHouse/ClickHouse/issues/37794) cc @tavplubix @davenger. [#38246](https://github.com/ClickHouse/ClickHouse/pull/38246) ([Dmitry Novik](https://github.com/novikd)). +* Fix INSERT into Distributed hung due to ProfileEvents. [#38307](https://github.com/ClickHouse/ClickHouse/pull/38307) ([Azat Khuzhin](https://github.com/azat)). +* Fix retries in PostgreSQL engine. [#38310](https://github.com/ClickHouse/ClickHouse/pull/38310) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix optimization in PartialSortingTransform (SIGSEGV and possible incorrect result). [#38324](https://github.com/ClickHouse/ClickHouse/pull/38324) ([Azat Khuzhin](https://github.com/azat)). +* Fix RabbitMQ with formats based on PeekableReadBuffer. Closes [#38061](https://github.com/ClickHouse/ClickHouse/issues/38061). [#38356](https://github.com/ClickHouse/ClickHouse/pull/38356) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible `Invalid number of rows in Chunk` in materialised pg. Closes [#37323](https://github.com/ClickHouse/ClickHouse/issues/37323). [#38360](https://github.com/ClickHouse/ClickHouse/pull/38360) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix RabbitMQ configuration with connection string setting. Closes [#36531](https://github.com/ClickHouse/ClickHouse/issues/36531). [#38365](https://github.com/ClickHouse/ClickHouse/pull/38365) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix PostgreSQL engine not using PostgreSQL schema when retrieving array dimension size. Closes [#36755](https://github.com/ClickHouse/ClickHouse/issues/36755). Closes [#36772](https://github.com/ClickHouse/ClickHouse/issues/36772). [#38366](https://github.com/ClickHouse/ClickHouse/pull/38366) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix incorrect result of distributed queries with `DISTINCT` and `LIMIT`. Fixes [#38282](https://github.com/ClickHouse/ClickHouse/issues/38282). [#38371](https://github.com/ClickHouse/ClickHouse/pull/38371) ([Anton Popov](https://github.com/CurtizJ)). +* fix: expose new CH keeper port in Dockerfile clickhouse/clickhouse-keeper fix: use correct KEEPER_CONFIG filename in clickhouse/clickhouse-keeper docker image. [#38462](https://github.com/ClickHouse/ClickHouse/pull/38462) ([Evgeny Kruglov](https://github.com/nordluf)). +* Fix parts removal (will be left forever if they had not been removed on server shutdown) after incorrect server shutdown. [#38486](https://github.com/ClickHouse/ClickHouse/pull/38486) ([Azat Khuzhin](https://github.com/azat)). +* Fixes [#38498](https://github.com/ClickHouse/ClickHouse/issues/38498) Current Implementation is similar to what shell does mentiond by @rschu1ze [here](https://github.com/ClickHouse/ClickHouse/pull/38502#issuecomment-1169057723). [#38502](https://github.com/ClickHouse/ClickHouse/pull/38502) ([Heena Bansal](https://github.com/HeenaBansal2009)). +* Fix table creation to avoid replication issues with pre-22.4 replicas. [#38541](https://github.com/ClickHouse/ClickHouse/pull/38541) ([Raúl Marín](https://github.com/Algunenano)). +* Fix crash for `mapUpdate`, `mapFilter` functions when using with constant map argument. Closes [#38547](https://github.com/ClickHouse/ClickHouse/issues/38547). [#38553](https://github.com/ClickHouse/ClickHouse/pull/38553) ([hexiaoting](https://github.com/hexiaoting)). +* Fix wrong results of countSubstrings() & position() on patterns with 0-bytes. [#38589](https://github.com/ClickHouse/ClickHouse/pull/38589) ([Robert Schulze](https://github.com/rschu1ze)). +* Now it's possible to start a clickhouse-server and attach/detach tables even for tables with the incorrect values of IPv4/IPv6 representation. Proper fix for issue [#35156](https://github.com/ClickHouse/ClickHouse/issues/35156). [#38590](https://github.com/ClickHouse/ClickHouse/pull/38590) ([alesapin](https://github.com/alesapin)). +* Adapt some more nodes to avoid issues with pre-22.4 replicas. [#38627](https://github.com/ClickHouse/ClickHouse/pull/38627) ([Raúl Marín](https://github.com/Algunenano)). +* Fix toHour() monotonicity which can lead to incorrect query result (incorrect index analysis). This fixes [#38333](https://github.com/ClickHouse/ClickHouse/issues/38333). [#38675](https://github.com/ClickHouse/ClickHouse/pull/38675) ([Amos Bird](https://github.com/amosbird)). +* `rankCorr` function will work correctly if some arguments are NaNs. This closes [#38396](https://github.com/ClickHouse/ClickHouse/issues/38396). [#38722](https://github.com/ClickHouse/ClickHouse/pull/38722) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `parallel_view_processing=1` with `optimize_trivial_insert_select=1`. Fix `max_insert_threads` while pushing to views. [#38731](https://github.com/ClickHouse/ClickHouse/pull/38731) ([Azat Khuzhin](https://github.com/azat)). +* Fix use-after-free for Map combinator that leads to incorrect result. [#38748](https://github.com/ClickHouse/ClickHouse/pull/38748) ([Azat Khuzhin](https://github.com/azat)). +* Fix throwing exception for seekable read from s3 (exception was not thrown). [#38773](https://github.com/ClickHouse/ClickHouse/pull/38773) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix checking whether s3 storage support parallel writes. It resulted in s3 parallel writes not working. [#38792](https://github.com/ClickHouse/ClickHouse/pull/38792) ([chen](https://github.com/xiedeyantu)). +* Fix s3 seekable reads with parallel read buffer. (Affected memory usage during query). Closes [#38258](https://github.com/ClickHouse/ClickHouse/issues/38258). [#38802](https://github.com/ClickHouse/ClickHouse/pull/38802) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update `simdjson`. This fixes [#38621](https://github.com/ClickHouse/ClickHouse/issues/38621). [#38838](https://github.com/ClickHouse/ClickHouse/pull/38838) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* MergeTree fix possible logical error for Vertical merges. [#38859](https://github.com/ClickHouse/ClickHouse/pull/38859) ([Maksim Kita](https://github.com/kitaisreal)). +* - Fix settings profile with seconds unit. [#38896](https://github.com/ClickHouse/ClickHouse/pull/38896) ([Raúl Marín](https://github.com/Algunenano)). +* Fix incorrect partition pruning when there is a nullable partition. This fixes [#38941](https://github.com/ClickHouse/ClickHouse/issues/38941). [#38946](https://github.com/ClickHouse/ClickHouse/pull/38946) ([Amos Bird](https://github.com/amosbird)). +* Fix fsync_part_directory for fetches. [#38993](https://github.com/ClickHouse/ClickHouse/pull/38993) ([Azat Khuzhin](https://github.com/azat)). +* Functions multiMatch[Fuzzy](AllIndices/Any/AnyIndex)() no throw a logical error if the needle argument is empty. [#39012](https://github.com/ClickHouse/ClickHouse/pull/39012) ([Robert Schulze](https://github.com/rschu1ze)). +* Any allocations inside OvercommitTracker may lead to deadlock. Logging was not very informative so it's easier just to remove logging. Fixes [#37794](https://github.com/ClickHouse/ClickHouse/issues/37794). [#39030](https://github.com/ClickHouse/ClickHouse/pull/39030) ([Dmitry Novik](https://github.com/novikd)). +* Fix toHour() monotonicity which can lead to incorrect query result (incorrect index analysis). This fixes [#38333](https://github.com/ClickHouse/ClickHouse/issues/38333). [#39037](https://github.com/ClickHouse/ClickHouse/pull/39037) ([Amos Bird](https://github.com/amosbird)). +* Fix bug in filesystem cache that could happen in some corner case which coincided with cache capacity hitting the limit. Closes [#39066](https://github.com/ClickHouse/ClickHouse/issues/39066). [#39070](https://github.com/ClickHouse/ClickHouse/pull/39070) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix ActionsDAG construction for arguments of window expressions. Fixes [#38538](https://github.com/ClickHouse/ClickHouse/issues/38538) Allow using of higher-order functions in window expressions. [#39112](https://github.com/ClickHouse/ClickHouse/pull/39112) ([Dmitry Novik](https://github.com/novikd)). +* Keep `LowCardinality` type in `tuple()` function. Previously `LowCardinality` type was dropped and elements of created tuple had underlying type of `LowCardinality`. [#39113](https://github.com/ClickHouse/ClickHouse/pull/39113) ([Anton Popov](https://github.com/CurtizJ)). +* Fix error `Block structure mismatch` which could happen for INSERT into table with attached MATERIALIZED VIEW and enabled setting `extremes = 1`. Closes [#29759](https://github.com/ClickHouse/ClickHouse/issues/29759) and [#38729](https://github.com/ClickHouse/ClickHouse/issues/38729). [#39125](https://github.com/ClickHouse/ClickHouse/pull/39125) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix unexpected query result when both `optimize_trivial_count_query` and `empty_result_for_aggregation_by_empty_set` are set to true. This fixes [#39140](https://github.com/ClickHouse/ClickHouse/issues/39140). [#39155](https://github.com/ClickHouse/ClickHouse/pull/39155) ([Amos Bird](https://github.com/amosbird)). +* Fixed error `Not found column Type in block` in selects with `PREWHERE` and read-in-order optimizations. [#39157](https://github.com/ClickHouse/ClickHouse/pull/39157) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix extremely rare race condition in during hardnlinks for remote fs. The only way to reproduce it is concurrent run of backups. [#39190](https://github.com/ClickHouse/ClickHouse/pull/39190) ([alesapin](https://github.com/alesapin)). +* Fix fetch of in-memory part with `allow_remote_fs_zero_copy_replication`. [#39214](https://github.com/ClickHouse/ClickHouse/pull/39214) ([Azat Khuzhin](https://github.com/azat)). +* Fix NOEXCEPT_SCOPE (before it calls std::terminate and looses the exception). [#39229](https://github.com/ClickHouse/ClickHouse/pull/39229) ([Azat Khuzhin](https://github.com/azat)). +* Declare RabbitMQ queue without default arguments `x-max-length` and `x-overflow`. [#39259](https://github.com/ClickHouse/ClickHouse/pull/39259) ([rnbondarenko](https://github.com/rnbondarenko)). +* Fix segmentation fault in MaterializedPostgreSQL database engine, which could happen if some exception occurred at replication initialisation. Closes [#36939](https://github.com/ClickHouse/ClickHouse/issues/36939). [#39272](https://github.com/ClickHouse/ClickHouse/pull/39272) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix incorrect fetch postgresql tables query fro PostgreSQL database engine. Closes [#33502](https://github.com/ClickHouse/ClickHouse/issues/33502). [#39283](https://github.com/ClickHouse/ClickHouse/pull/39283) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible UB in MergeTreeBackgroundExecutor (leads to SIGSEGV on race with DROP/DETACH). [#39342](https://github.com/ClickHouse/ClickHouse/pull/39342) ([Azat Khuzhin](https://github.com/azat)). +* Avoid possible abort() in CapnProto on exception descruction. Closes [#30706](https://github.com/ClickHouse/ClickHouse/issues/30706). [#39365](https://github.com/ClickHouse/ClickHouse/pull/39365) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix behaviour of dictHas for direct dictionaries when multiple lookups to the same key are made in a single action. [#39385](https://github.com/ClickHouse/ClickHouse/pull/39385) ([James Morrison](https://github.com/jawm)). +* Fix crash which may happen while reading from dictionary with `DateTime64` attribute. Fixes [#38930](https://github.com/ClickHouse/ClickHouse/issues/38930). [#39391](https://github.com/ClickHouse/ClickHouse/pull/39391) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix WriteBuffer finalize in destructor when cacnel query that could lead to stuck query or even terminate. Closes [#38199](https://github.com/ClickHouse/ClickHouse/issues/38199). [#39396](https://github.com/ClickHouse/ClickHouse/pull/39396) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix UB (stack-use-after-scope) in extactAll(). [#39397](https://github.com/ClickHouse/ClickHouse/pull/39397) ([Azat Khuzhin](https://github.com/azat)). +* Fix incorrect query result when trivial count optimization is in effect with array join. This fixes [#39431](https://github.com/ClickHouse/ClickHouse/issues/39431). [#39444](https://github.com/ClickHouse/ClickHouse/pull/39444) ([Amos Bird](https://github.com/amosbird)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Disable send_logs_level for INSERT into Distributed to avoid possible hung. [#35075](https://github.com/ClickHouse/ClickHouse/pull/35075) ([Azat Khuzhin](https://github.com/azat)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Add a setting to use more memory for zstd decompression"'. [#38194](https://github.com/ClickHouse/ClickHouse/pull/38194) ([alesapin](https://github.com/alesapin)). +* NO CL ENTRY: 'Revert "Revert "Add a setting to use more memory for zstd decompression""'. [#38196](https://github.com/ClickHouse/ClickHouse/pull/38196) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "ClickHouse's boringssl module updated to the official version of the FIPS compliant."'. [#38201](https://github.com/ClickHouse/ClickHouse/pull/38201) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Fix optimization in PartialSortingTransform (SIGSEGV and possible incorrect result)"'. [#38361](https://github.com/ClickHouse/ClickHouse/pull/38361) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Add support for io_uring read method"'. [#38377](https://github.com/ClickHouse/ClickHouse/pull/38377) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Revert "Fix optimization in PartialSortingTransform (SIGSEGV and possible incorrect result)""'. [#38449](https://github.com/ClickHouse/ClickHouse/pull/38449) ([Maksim Kita](https://github.com/kitaisreal)). +* NO CL ENTRY: 'Don't spoil return code of integration tests runner with redundant tee'. [#38548](https://github.com/ClickHouse/ClickHouse/pull/38548) ([Vladimir Chebotarev](https://github.com/excitoon)). +* NO CL ENTRY: 'Revert "Non Negative Derivative window function"'. [#38551](https://github.com/ClickHouse/ClickHouse/pull/38551) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Upload to S3 compressed self-extracting clickhouse"'. [#38788](https://github.com/ClickHouse/ClickHouse/pull/38788) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* NO CL ENTRY: 'Revert "Smallish updates of dev guide"'. [#38848](https://github.com/ClickHouse/ClickHouse/pull/38848) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Fix toHour() monotonicity which can lead to incorrect query result (incorrect index analysis)"'. [#39001](https://github.com/ClickHouse/ClickHouse/pull/39001) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Fix WriteBuffer finalize in destructor when cacnel query"'. [#39433](https://github.com/ClickHouse/ClickHouse/pull/39433) ([Kruglov Pavel](https://github.com/Avogar)). +* NO CL ENTRY: 'Revert "[RFC] Fix LSan by fixing getauxval()"'. [#39434](https://github.com/ClickHouse/ClickHouse/pull/39434) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Remove broken optimisation in Direct dictionary dictHas implementation"'. [#39461](https://github.com/ClickHouse/ClickHouse/pull/39461) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Fix trivial count optimization with array join"'. [#39466](https://github.com/ClickHouse/ClickHouse/pull/39466) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Separate data storage abstraction for MergeTree [#36555](https://github.com/ClickHouse/ClickHouse/pull/36555) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Randomize settings related to in-order read/aggregation [#36914](https://github.com/ClickHouse/ClickHouse/pull/36914) ([Azat Khuzhin](https://github.com/azat)). +* Merge tree reader support for multiple read/filter steps: row level filter, prewhere, ... [#37165](https://github.com/ClickHouse/ClickHouse/pull/37165) ([Alexander Gololobov](https://github.com/davenger)). +* Backup Improvements 6 [#37358](https://github.com/ClickHouse/ClickHouse/pull/37358) ([Vitaly Baranov](https://github.com/vitlibar)). +* Move `updateInputStream` to `ITransformingStep` [#37393](https://github.com/ClickHouse/ClickHouse/pull/37393) ([Nikita Taranov](https://github.com/nickitat)). +* Proper wait of the clickhouse-server in tests [#37560](https://github.com/ClickHouse/ClickHouse/pull/37560) ([Azat Khuzhin](https://github.com/azat)). +* Upgrade curl to 7.83.1 [#37795](https://github.com/ClickHouse/ClickHouse/pull/37795) ([Suzy Wang](https://github.com/SuzyWangIBMer)). +* Try fix flaky tests with transactions [#37822](https://github.com/ClickHouse/ClickHouse/pull/37822) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Send perf tests results to ci database [#37841](https://github.com/ClickHouse/ClickHouse/pull/37841) ([Vladimir C](https://github.com/vdimir)). +* Remove duplicate peak mem log [#37860](https://github.com/ClickHouse/ClickHouse/pull/37860) ([Amos Bird](https://github.com/amosbird)). +* tests: fix log_comment (extra quotes) [#37932](https://github.com/ClickHouse/ClickHouse/pull/37932) ([Azat Khuzhin](https://github.com/azat)). +* Throw exception when xml user profile does not exist [#38024](https://github.com/ClickHouse/ClickHouse/pull/38024) ([nvartolomei](https://github.com/nvartolomei)). +* Add `SYNC` command to internal ZooKeeper client [#38047](https://github.com/ClickHouse/ClickHouse/pull/38047) ([Antonio Andelic](https://github.com/antonio2368)). +* Better support of GCP storage [#38069](https://github.com/ClickHouse/ClickHouse/pull/38069) ([Anton Popov](https://github.com/CurtizJ)). +* Build artifacts upload [#38086](https://github.com/ClickHouse/ClickHouse/pull/38086) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Extract some diff from pr [#36171](https://github.com/ClickHouse/ClickHouse/issues/36171) [#38088](https://github.com/ClickHouse/ClickHouse/pull/38088) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Aggregate functions added restrict into batch methods [#38090](https://github.com/ClickHouse/ClickHouse/pull/38090) ([Maksim Kita](https://github.com/kitaisreal)). +* Add perf checkers to all Jepsen tests [#38091](https://github.com/ClickHouse/ClickHouse/pull/38091) ([Antonio Andelic](https://github.com/antonio2368)). +* Some fixes for tests with tsan [#38106](https://github.com/ClickHouse/ClickHouse/pull/38106) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Bring back [#36396](https://github.com/ClickHouse/ClickHouse/issues/36396) [#38110](https://github.com/ClickHouse/ClickHouse/pull/38110) ([Nikita Taranov](https://github.com/nickitat)). +* More suppressions for backward compatibility check [#38131](https://github.com/ClickHouse/ClickHouse/pull/38131) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Cherry pick [#38137](https://github.com/ClickHouse/ClickHouse/pull/38137) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Disable parameters for non direct executable user defined functions [#38142](https://github.com/ClickHouse/ClickHouse/pull/38142) ([Maksim Kita](https://github.com/kitaisreal)). +* SortDescription compile fix typo [#38144](https://github.com/ClickHouse/ClickHouse/pull/38144) ([Maksim Kita](https://github.com/kitaisreal)). +* Update version after release [#38147](https://github.com/ClickHouse/ClickHouse/pull/38147) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* New changelog and versions updated [#38148](https://github.com/ClickHouse/ClickHouse/pull/38148) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Some fixes for clickhouse-disks [#38150](https://github.com/ClickHouse/ClickHouse/pull/38150) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Remove processor description from span attributes - it is not working [#38157](https://github.com/ClickHouse/ClickHouse/pull/38157) ([Ilya Yatsishin](https://github.com/qoega)). +* Bump minimum / maximum LLVM to 12 / 14 [#38170](https://github.com/ClickHouse/ClickHouse/pull/38170) ([Robert Schulze](https://github.com/rschu1ze)). +* Disk transaction [#38182](https://github.com/ClickHouse/ClickHouse/pull/38182) ([alesapin](https://github.com/alesapin)). +* Check row size to avoid out of bounds access in PostgreSQLSource [#38190](https://github.com/ClickHouse/ClickHouse/pull/38190) ([Alexander Gololobov](https://github.com/davenger)). +* tests: add no-backward-compatibility-check for 02067_lost_part_s3 [#38195](https://github.com/ClickHouse/ClickHouse/pull/38195) ([Azat Khuzhin](https://github.com/azat)). +* tests/stress: fix TSan detection (enables thread fuzzer for non-TSan builds) [#38207](https://github.com/ClickHouse/ClickHouse/pull/38207) ([Azat Khuzhin](https://github.com/azat)). +* tests: disable 01646_system_restart_replicas_smoke under stress tests [#38212](https://github.com/ClickHouse/ClickHouse/pull/38212) ([Azat Khuzhin](https://github.com/azat)). +* tests/stress: fix TSan detection [#38213](https://github.com/ClickHouse/ClickHouse/pull/38213) ([Azat Khuzhin](https://github.com/azat)). +* buffer's getFileSize small changes [#38227](https://github.com/ClickHouse/ClickHouse/pull/38227) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix test for system table count in diag tool [#38236](https://github.com/ClickHouse/ClickHouse/pull/38236) ([Dale McDiarmid](https://github.com/gingerwizard)). +* Update version_date.tsv after v22.3.7.28-lts [#38237](https://github.com/ClickHouse/ClickHouse/pull/38237) ([github-actions[bot]](https://github.com/apps/github-actions)). +* Changelog attrs [#38238](https://github.com/ClickHouse/ClickHouse/pull/38238) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix reading from s3 in some corner cases [#38239](https://github.com/ClickHouse/ClickHouse/pull/38239) ([Anton Popov](https://github.com/CurtizJ)). +* use utility methods to access x509 struct fields. [#38251](https://github.com/ClickHouse/ClickHouse/pull/38251) ([larryluogit](https://github.com/larryluogit)). +* Don't try to kill empty list of containers in `integration/runner` II [#38269](https://github.com/ClickHouse/ClickHouse/pull/38269) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Improve runners AMI and init scripts [#38273](https://github.com/ClickHouse/ClickHouse/pull/38273) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update thrift to 0.16.0 [#38280](https://github.com/ClickHouse/ClickHouse/pull/38280) ([Suzy Wang](https://github.com/SuzyWangIBMer)). +* Extract some diff from [#36171](https://github.com/ClickHouse/ClickHouse/issues/36171) [#38285](https://github.com/ClickHouse/ClickHouse/pull/38285) ([Kseniia Sumarokova](https://github.com/kssenii)). +* fix trace-viz zoom anomalies [#38287](https://github.com/ClickHouse/ClickHouse/pull/38287) ([Sergei Trifonov](https://github.com/serxa)). +* Integration tests volume [#38291](https://github.com/ClickHouse/ClickHouse/pull/38291) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* fix typo in view.md [#38292](https://github.com/ClickHouse/ClickHouse/pull/38292) ([Anton Petrov](https://github.com/gsenseless)). +* Backup improvements 7 [#38299](https://github.com/ClickHouse/ClickHouse/pull/38299) ([Vitaly Baranov](https://github.com/vitlibar)). +* Document why the submodule check does not halt the configuration [#38304](https://github.com/ClickHouse/ClickHouse/pull/38304) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix misleading error message while s3 schema inference [#38306](https://github.com/ClickHouse/ClickHouse/pull/38306) ([Kruglov Pavel](https://github.com/Avogar)). +* Update README.md [#38313](https://github.com/ClickHouse/ClickHouse/pull/38313) ([Yuko Takagi](https://github.com/yukotakagi)). +* Ban projections for zero-copy replication in a right way [#38322](https://github.com/ClickHouse/ClickHouse/pull/38322) ([alesapin](https://github.com/alesapin)). +* Checkout full repositories for performance tests [#38327](https://github.com/ClickHouse/ClickHouse/pull/38327) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fixed comments [#38331](https://github.com/ClickHouse/ClickHouse/pull/38331) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Try to fix 02305_schema_inference_with_globs [#38337](https://github.com/ClickHouse/ClickHouse/pull/38337) ([Kruglov Pavel](https://github.com/Avogar)). +* Extend ZooKeeper list request with support for filtering persistent or ephemeral nodes only [#38338](https://github.com/ClickHouse/ClickHouse/pull/38338) ([Antonio Andelic](https://github.com/antonio2368)). +* Upload logs for getting all tests command [#38343](https://github.com/ClickHouse/ClickHouse/pull/38343) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Followup test fix for ban projections [#38351](https://github.com/ClickHouse/ClickHouse/pull/38351) ([alesapin](https://github.com/alesapin)). +* Added --recursive to clickhouse-disks list [#38354](https://github.com/ClickHouse/ClickHouse/pull/38354) ([Alexander Gololobov](https://github.com/davenger)). +* Adding TLS V13 Test [#38355](https://github.com/ClickHouse/ClickHouse/pull/38355) ([larryluogit](https://github.com/larryluogit)). +* Better exception messages on wrong table engines/functions argument types [#38362](https://github.com/ClickHouse/ClickHouse/pull/38362) ([Kruglov Pavel](https://github.com/Avogar)). +* Better error message for failed odbc query [#38364](https://github.com/ClickHouse/ClickHouse/pull/38364) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Simplify parts commit methods [#38380](https://github.com/ClickHouse/ClickHouse/pull/38380) ([alesapin](https://github.com/alesapin)). +* Update docker-compose to try get rid of v1 errors [#38394](https://github.com/ClickHouse/ClickHouse/pull/38394) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Correct submodule after "base-x" commit [#38414](https://github.com/ClickHouse/ClickHouse/pull/38414) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better hardware benchmark [#38419](https://github.com/ClickHouse/ClickHouse/pull/38419) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Refactoring to enable multi-match functions with non-const needles [#38434](https://github.com/ClickHouse/ClickHouse/pull/38434) ([Robert Schulze](https://github.com/rschu1ze)). +* more consistent work with paths in object storages [#38436](https://github.com/ClickHouse/ClickHouse/pull/38436) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Distinct sorted: calculate column positions once [#38438](https://github.com/ClickHouse/ClickHouse/pull/38438) ([Igor Nikonov](https://github.com/devcrafter)). +* Small improvement of the error message to hint at possible issue [#38458](https://github.com/ClickHouse/ClickHouse/pull/38458) ([Miel Donkers](https://github.com/mdonkers)). +* Fix comment [#38465](https://github.com/ClickHouse/ClickHouse/pull/38465) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Follow up for [#38436](https://github.com/ClickHouse/ClickHouse/issues/38436) [#38466](https://github.com/ClickHouse/ClickHouse/pull/38466) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add 22.7 release webinar. [#38481](https://github.com/ClickHouse/ClickHouse/pull/38481) ([Yuko Takagi](https://github.com/yukotakagi)). +* Add some TSA annotations [#38487](https://github.com/ClickHouse/ClickHouse/pull/38487) ([Alexander Tokmakov](https://github.com/tavplubix)). +* tests: cleanup tmp data in 02335_column_ttl_expired_column_optimization [#38488](https://github.com/ClickHouse/ClickHouse/pull/38488) ([Azat Khuzhin](https://github.com/azat)). +* Cleanup: local clang-tidy warnings founded during review [#38489](https://github.com/ClickHouse/ClickHouse/pull/38489) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix some clang-tidy warnings in headers [#38491](https://github.com/ClickHouse/ClickHouse/pull/38491) ([Robert Schulze](https://github.com/rschu1ze)). +* A tiny improvement in report logging [#38507](https://github.com/ClickHouse/ClickHouse/pull/38507) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* tests: fix 02305_schema_inference_with_globs flakiness [#38511](https://github.com/ClickHouse/ClickHouse/pull/38511) ([Azat Khuzhin](https://github.com/azat)). +* Try to fix flaky test [#38516](https://github.com/ClickHouse/ClickHouse/pull/38516) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `_csv.Error: field larger than field limit` [#38518](https://github.com/ClickHouse/ClickHouse/pull/38518) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix application errors grep in stress test [#38520](https://github.com/ClickHouse/ClickHouse/pull/38520) ([Kruglov Pavel](https://github.com/Avogar)). +* Use of disk batch operations in MergeTree [#38531](https://github.com/ClickHouse/ClickHouse/pull/38531) ([alesapin](https://github.com/alesapin)). +* Backup Improvements 8 [#38537](https://github.com/ClickHouse/ClickHouse/pull/38537) ([Vitaly Baranov](https://github.com/vitlibar)). +* Update poco [#38540](https://github.com/ClickHouse/ClickHouse/pull/38540) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Don't use std::unique_lock unless we need to [#38542](https://github.com/ClickHouse/ClickHouse/pull/38542) ([Robert Schulze](https://github.com/rschu1ze)). +* Rename slightly weirdly named "BuilderBinTidy" to "BuilderBinClangTidy" [#38546](https://github.com/ClickHouse/ClickHouse/pull/38546) ([Robert Schulze](https://github.com/rschu1ze)). +* Don't rollback SessionID request in Keeper [#38556](https://github.com/ClickHouse/ClickHouse/pull/38556) ([Antonio Andelic](https://github.com/antonio2368)). +* Add logging in Epoll and TimerDescriptor in case of EINTR [#38559](https://github.com/ClickHouse/ClickHouse/pull/38559) ([Kruglov Pavel](https://github.com/Avogar)). +* SQL create drop index minor fixes [#38561](https://github.com/ClickHouse/ClickHouse/pull/38561) ([Maksim Kita](https://github.com/kitaisreal)). +* Update version_date.tsv and changelogs after v22.6.2.12-stable [#38563](https://github.com/ClickHouse/ClickHouse/pull/38563) ([github-actions[bot]](https://github.com/apps/github-actions)). +* Allow Ordinary database in Stress Tests [#38568](https://github.com/ClickHouse/ClickHouse/pull/38568) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Make postgres integration test great again [#38582](https://github.com/ClickHouse/ClickHouse/pull/38582) ([Ilya Yatsishin](https://github.com/qoega)). +* Add check for empty proccessors in AggregatingTransform::expandPipeline [#38584](https://github.com/ClickHouse/ClickHouse/pull/38584) ([filimonov](https://github.com/filimonov)). +* quick fix for 02112_with_fill_interval [#38587](https://github.com/ClickHouse/ClickHouse/pull/38587) ([Nikita Taranov](https://github.com/nickitat)). +* Remove zlib in mariadb-connector-c [#38599](https://github.com/ClickHouse/ClickHouse/pull/38599) ([Suzy Wang](https://github.com/SuzyWangIBMer)). +* Dictionaries added TSA annotations [#38601](https://github.com/ClickHouse/ClickHouse/pull/38601) ([Maksim Kita](https://github.com/kitaisreal)). +* CacheDictionary simplify update queue [#38602](https://github.com/ClickHouse/ClickHouse/pull/38602) ([Maksim Kita](https://github.com/kitaisreal)). +* Add separate option to omit symbols from heavy contrib [#38617](https://github.com/ClickHouse/ClickHouse/pull/38617) ([Azat Khuzhin](https://github.com/azat)). +* Fix exception messages in clickhouse su [#38619](https://github.com/ClickHouse/ClickHouse/pull/38619) ([filimonov](https://github.com/filimonov)). +* Added Greenplum benchmark [#38622](https://github.com/ClickHouse/ClickHouse/pull/38622) ([Dmitry Pavlov](https://github.com/kapustor)). +* Fix typo [#38623](https://github.com/ClickHouse/ClickHouse/pull/38623) ([tiegen](https://github.com/loyispa)). +* Better diagnostics in ReplicatedMergeTreeQueue [#38641](https://github.com/ClickHouse/ClickHouse/pull/38641) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Clean out randomized integration volumes each run [#38644](https://github.com/ClickHouse/ClickHouse/pull/38644) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update README.md [#38651](https://github.com/ClickHouse/ClickHouse/pull/38651) ([Yuko Takagi](https://github.com/yukotakagi)). +* Better naming for stuff related to splitted debug symbols [#38654](https://github.com/ClickHouse/ClickHouse/pull/38654) ([Robert Schulze](https://github.com/rschu1ze)). +* Add test for keeper `mntr` command [#38656](https://github.com/ClickHouse/ClickHouse/pull/38656) ([alesapin](https://github.com/alesapin)). +* Update hardware benchmark script [#38672](https://github.com/ClickHouse/ClickHouse/pull/38672) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Fix strange backport titles issues [#38679](https://github.com/ClickHouse/ClickHouse/pull/38679) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Follow-up to [#38568](https://github.com/ClickHouse/ClickHouse/issues/38568) [#38680](https://github.com/ClickHouse/ClickHouse/pull/38680) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix possible deadlocks with `MergeTreeData::Transaction` [#38702](https://github.com/ClickHouse/ClickHouse/pull/38702) ([alesapin](https://github.com/alesapin)). +* Fix backports diff [#38703](https://github.com/ClickHouse/ClickHouse/pull/38703) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix FillingTransform [#38705](https://github.com/ClickHouse/ClickHouse/pull/38705) ([Nikita Taranov](https://github.com/nickitat)). +* Try to improve backward compatibility check [#38717](https://github.com/ClickHouse/ClickHouse/pull/38717) ([Kruglov Pavel](https://github.com/Avogar)). +* SQL create drop index fix formatting [#38720](https://github.com/ClickHouse/ClickHouse/pull/38720) ([Maksim Kita](https://github.com/kitaisreal)). +* Provide sort description for output stream in ReadFromMergeTree step [#38721](https://github.com/ClickHouse/ClickHouse/pull/38721) ([Igor Nikonov](https://github.com/devcrafter)). +* Add exp_internal for expect tests [#38728](https://github.com/ClickHouse/ClickHouse/pull/38728) ([Azat Khuzhin](https://github.com/azat)). +* Fix CLICKHOUSE_TMP in tests (fixes broken CI) [#38733](https://github.com/ClickHouse/ClickHouse/pull/38733) ([Azat Khuzhin](https://github.com/azat)). +* Add SimpleCheck [#38744](https://github.com/ClickHouse/ClickHouse/pull/38744) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Tiny tests cleanup [#38749](https://github.com/ClickHouse/ClickHouse/pull/38749) ([Azat Khuzhin](https://github.com/azat)). +* Fix replication after improper merge process [#38752](https://github.com/ClickHouse/ClickHouse/pull/38752) ([Raúl Marín](https://github.com/Algunenano)). +* tests: make aggregate_state_exception_memory_leak deterministic [#38754](https://github.com/ClickHouse/ClickHouse/pull/38754) ([Azat Khuzhin](https://github.com/azat)). +* Bump jemalloc to fix possible assertion [#38757](https://github.com/ClickHouse/ClickHouse/pull/38757) ([Azat Khuzhin](https://github.com/azat)). +* Reintroduce nonNegativeDerivative() [#38774](https://github.com/ClickHouse/ClickHouse/pull/38774) ([Andrey Zvonov](https://github.com/zvonand)). +* Temporarily disable 01710_projection_fetch_long in BC check [#38798](https://github.com/ClickHouse/ClickHouse/pull/38798) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Use native Map type for OpenTelemetry attributes [#38814](https://github.com/ClickHouse/ClickHouse/pull/38814) ([Ilya Yatsishin](https://github.com/qoega)). +* Add test for segfault in Map combinator [#38831](https://github.com/ClickHouse/ClickHouse/pull/38831) ([Kruglov Pavel](https://github.com/Avogar)). +* Update libprotobuf-mutator + fix build [#38834](https://github.com/ClickHouse/ClickHouse/pull/38834) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Get files changed in master since release is branched [#38836](https://github.com/ClickHouse/ClickHouse/pull/38836) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* update integration tests doc [#38837](https://github.com/ClickHouse/ClickHouse/pull/38837) ([Bharat Nallan](https://github.com/bharatnc)). +* Revert of revert of smallish devguide update [#38850](https://github.com/ClickHouse/ClickHouse/pull/38850) ([Robert Schulze](https://github.com/rschu1ze)). +* Do not override compiler if it had been already set [#38856](https://github.com/ClickHouse/ClickHouse/pull/38856) ([Azat Khuzhin](https://github.com/azat)). +* Move check for denied allocations [#38858](https://github.com/ClickHouse/ClickHouse/pull/38858) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Refactoring of code around object storages, added LocalObjectStorage (extracted this diff from PR [#36171](https://github.com/ClickHouse/ClickHouse/issues/36171)) [#38860](https://github.com/ClickHouse/ClickHouse/pull/38860) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backup Improvements 9 [#38861](https://github.com/ClickHouse/ClickHouse/pull/38861) ([Vitaly Baranov](https://github.com/vitlibar)). +* Simple cleanup: interpreters and parsers [#38876](https://github.com/ClickHouse/ClickHouse/pull/38876) ([Igor Nikonov](https://github.com/devcrafter)). +* Remove unnecessary log [#38892](https://github.com/ClickHouse/ClickHouse/pull/38892) ([Raúl Marín](https://github.com/Algunenano)). +* Update version_date.tsv and changelogs after v22.6.3.35-stable [#38894](https://github.com/ClickHouse/ClickHouse/pull/38894) ([github-actions[bot]](https://github.com/apps/github-actions)). +* Retry docker buildx commands with progressive sleep in between [#38898](https://github.com/ClickHouse/ClickHouse/pull/38898) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Avoid false positive red sanitizer asserts check in stress test [#38901](https://github.com/ClickHouse/ClickHouse/pull/38901) ([Kruglov Pavel](https://github.com/Avogar)). +* Interpreter cleanup: ContextPtr -> const ContextPtr & in parameters [#38902](https://github.com/ClickHouse/ClickHouse/pull/38902) ([Igor Nikonov](https://github.com/devcrafter)). +* Add a test for simdjson [#38933](https://github.com/ClickHouse/ClickHouse/pull/38933) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix a typo [#38938](https://github.com/ClickHouse/ClickHouse/pull/38938) ([Nikolay Degterinsky](https://github.com/evillique)). +* Avoid redundant join block transformation during planning. [#38943](https://github.com/ClickHouse/ClickHouse/pull/38943) ([Amos Bird](https://github.com/amosbird)). +* Rename NUMBER_OF_DIMENSIONS_MISMATHED const to NUMBER_OF_DIMENSIONS_MISMATCHED [#38947](https://github.com/ClickHouse/ClickHouse/pull/38947) ([Vladimir Galunshchikov](https://github.com/soyayaos)). +* More careful destructor in BackupImpl [#38949](https://github.com/ClickHouse/ClickHouse/pull/38949) ([Vitaly Baranov](https://github.com/vitlibar)). +* Avoid weird exception in Keeper [#38963](https://github.com/ClickHouse/ClickHouse/pull/38963) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update version_date.tsv after v22.3.8.39-lts [#38969](https://github.com/ClickHouse/ClickHouse/pull/38969) ([github-actions[bot]](https://github.com/apps/github-actions)). +* Remove tag no-backward-compatibility-check for specific versions [#38971](https://github.com/ClickHouse/ClickHouse/pull/38971) ([Kruglov Pavel](https://github.com/Avogar)). +* add Hetzner benchmark [#38974](https://github.com/ClickHouse/ClickHouse/pull/38974) ([Tyler Hannan](https://github.com/tylerhannan)). +* Update version_date.tsv after v22.4.6.53-stable [#38975](https://github.com/ClickHouse/ClickHouse/pull/38975) ([github-actions[bot]](https://github.com/apps/github-actions)). +* Disable instrumentation of sanitizer death callback [#38977](https://github.com/ClickHouse/ClickHouse/pull/38977) ([Alexander Tokmakov](https://github.com/tavplubix)). +* add ryzen 9 5950 benchmark [#38979](https://github.com/ClickHouse/ClickHouse/pull/38979) ([Tyler Hannan](https://github.com/tylerhannan)). +* EXPLAIN AST rewrite: rename to optimize [#38980](https://github.com/ClickHouse/ClickHouse/pull/38980) ([Igor Nikonov](https://github.com/devcrafter)). +* add macbook pro core i7 2014 benchmark [#38981](https://github.com/ClickHouse/ClickHouse/pull/38981) ([Tyler Hannan](https://github.com/tylerhannan)). +* add Huawei TaiShan 920 Benchmark [#38982](https://github.com/ClickHouse/ClickHouse/pull/38982) ([Tyler Hannan](https://github.com/tylerhannan)). +* tests: unique ZooKeeper path for Replicated.*MergeTree tables [#38999](https://github.com/ClickHouse/ClickHouse/pull/38999) ([Azat Khuzhin](https://github.com/azat)). +* Try another suppression for [#38629](https://github.com/ClickHouse/ClickHouse/issues/38629) [#39009](https://github.com/ClickHouse/ClickHouse/pull/39009) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add docker_server.py running to backport and release CIs [#39011](https://github.com/ClickHouse/ClickHouse/pull/39011) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix flaky `test_system_merges/test.py::test_mutation_simple` [#39013](https://github.com/ClickHouse/ClickHouse/pull/39013) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix assertion in full soring merge join [#39014](https://github.com/ClickHouse/ClickHouse/pull/39014) ([Vladimir C](https://github.com/vdimir)). +* Fix flaky 00620_optimize_on_nonleader_replica_zookeeper [#39019](https://github.com/ClickHouse/ClickHouse/pull/39019) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Suppress [#38643](https://github.com/ClickHouse/ClickHouse/issues/38643) [#39024](https://github.com/ClickHouse/ClickHouse/pull/39024) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update url.md [#39025](https://github.com/ClickHouse/ClickHouse/pull/39025) ([Ilya Yatsishin](https://github.com/qoega)). +* Fix 'Tried to lock part ... for removal second time' [#39036](https://github.com/ClickHouse/ClickHouse/pull/39036) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add more settings for randomization [#39039](https://github.com/ClickHouse/ClickHouse/pull/39039) ([Anton Popov](https://github.com/CurtizJ)). +* add ScaleFlux CSD3000 Benchmark [#39040](https://github.com/ClickHouse/ClickHouse/pull/39040) ([Tyler Hannan](https://github.com/tylerhannan)). +* BACKUP/RESTORE ON CLUSTER use async mode on replicas now. [#39046](https://github.com/ClickHouse/ClickHouse/pull/39046) ([Vitaly Baranov](https://github.com/vitlibar)). +* More stable `test_s3_zero_copy_ttl`, weakened requirement to move data to S3 in 0-5 seconds [#39064](https://github.com/ClickHouse/ClickHouse/pull/39064) ([Vladimir Chebotaryov](https://github.com/quickhouse)). +* Parameter --decompressor added to utils/self-extracting-executable/compressor [#39065](https://github.com/ClickHouse/ClickHouse/pull/39065) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Follow-up to [#39036](https://github.com/ClickHouse/ClickHouse/issues/39036) [#39091](https://github.com/ClickHouse/ClickHouse/pull/39091) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update registerDiskS3.cpp [#39092](https://github.com/ClickHouse/ClickHouse/pull/39092) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix code in getLeastSupertype function [#39101](https://github.com/ClickHouse/ClickHouse/pull/39101) ([Kruglov Pavel](https://github.com/Avogar)). +* Remove some debug logging [#39102](https://github.com/ClickHouse/ClickHouse/pull/39102) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Prefix overridden add_executable() command with "clickhouse_" [#39108](https://github.com/ClickHouse/ClickHouse/pull/39108) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix meilisearch tests [#39110](https://github.com/ClickHouse/ClickHouse/pull/39110) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Properly remove projection from part in case it was removed from table metadata. [#39119](https://github.com/ClickHouse/ClickHouse/pull/39119) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Update cluster.py [#39120](https://github.com/ClickHouse/ClickHouse/pull/39120) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Tiny updates for tests. [#39127](https://github.com/ClickHouse/ClickHouse/pull/39127) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix data race in CompletedPipelineExecutor. [#39132](https://github.com/ClickHouse/ClickHouse/pull/39132) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix style again [#39133](https://github.com/ClickHouse/ClickHouse/pull/39133) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix path retrieval for Keeper's state [#39148](https://github.com/ClickHouse/ClickHouse/pull/39148) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Slightly better interface of waitForMutation [#39154](https://github.com/ClickHouse/ClickHouse/pull/39154) ([Amos Bird](https://github.com/amosbird)). +* ThreadPool fixes [#39160](https://github.com/ClickHouse/ClickHouse/pull/39160) ([Azat Khuzhin](https://github.com/azat)). +* Add test for [#39132](https://github.com/ClickHouse/ClickHouse/issues/39132) [#39173](https://github.com/ClickHouse/ClickHouse/pull/39173) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Suppression for BC check (`Cannot parse string 'Hello' as UInt64`) [#39176](https://github.com/ClickHouse/ClickHouse/pull/39176) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix 01961_roaring_memory_tracking test [#39187](https://github.com/ClickHouse/ClickHouse/pull/39187) ([Dmitry Novik](https://github.com/novikd)). +* Cleanup: done during [#38719](https://github.com/ClickHouse/ClickHouse/issues/38719) (SortingStep: deduce way to sort based on … [#39191](https://github.com/ClickHouse/ClickHouse/pull/39191) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix exception in AsynchronousMetrics for s390x [#39193](https://github.com/ClickHouse/ClickHouse/pull/39193) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Optimize accesses to system.stack_trace (filter by name before sending signal) [#39212](https://github.com/ClickHouse/ClickHouse/pull/39212) ([Azat Khuzhin](https://github.com/azat)). +* Enable warning "-Wdeprecated-dynamic-exception-spec" [#39213](https://github.com/ClickHouse/ClickHouse/pull/39213) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove specialization global lock/unlock from ActionLocksManager [#39215](https://github.com/ClickHouse/ClickHouse/pull/39215) ([Azat Khuzhin](https://github.com/azat)). +* Turn some warnings on [#39223](https://github.com/ClickHouse/ClickHouse/pull/39223) ([Robert Schulze](https://github.com/rschu1ze)). +* Pass const std::string_view by value, not by reference [#39224](https://github.com/ClickHouse/ClickHouse/pull/39224) ([Kruglov Pavel](https://github.com/Avogar)). +* Minor fix for BC check [#39231](https://github.com/ClickHouse/ClickHouse/pull/39231) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backport script [#39235](https://github.com/ClickHouse/ClickHouse/pull/39235) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Do not color logs on client if they are redirected to file [#39243](https://github.com/ClickHouse/ClickHouse/pull/39243) ([Anton Popov](https://github.com/CurtizJ)). +* Remove incorrect assertion [#39245](https://github.com/ClickHouse/ClickHouse/pull/39245) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add X86 prefix to x86 performance tests [#39251](https://github.com/ClickHouse/ClickHouse/pull/39251) ([Robert Schulze](https://github.com/rschu1ze)). +* Check that the destination for a backup is not in use. [#39254](https://github.com/ClickHouse/ClickHouse/pull/39254) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix stacktraces in gdb in BC check [#39256](https://github.com/ClickHouse/ClickHouse/pull/39256) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable flaky test `test_s3_zero_copy_on_hybrid_storage` [#39258](https://github.com/ClickHouse/ClickHouse/pull/39258) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Enabled Wc99-extensions + Wsign-conversion [#39261](https://github.com/ClickHouse/ClickHouse/pull/39261) ([Robert Schulze](https://github.com/rschu1ze)). +* Pass const StringRef by value, not by reference [#39262](https://github.com/ClickHouse/ClickHouse/pull/39262) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix assertion in transactions [#39263](https://github.com/ClickHouse/ClickHouse/pull/39263) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix macosx compilation due to endian.h [#39265](https://github.com/ClickHouse/ClickHouse/pull/39265) ([Jordi Villar](https://github.com/jrdi)). +* Another supression for BC check [#39276](https://github.com/ClickHouse/ClickHouse/pull/39276) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix builder image for releases w/o diagnostics tool [#39281](https://github.com/ClickHouse/ClickHouse/pull/39281) ([Azat Khuzhin](https://github.com/azat)). +* [RFC] Remove superior atomic from MergeTreeBackgroundExecutor and annotations for TSA [#39285](https://github.com/ClickHouse/ClickHouse/pull/39285) ([Azat Khuzhin](https://github.com/azat)). +* Fix clang tidy [#39288](https://github.com/ClickHouse/ClickHouse/pull/39288) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix running cmake with predefined cache (for development only) [#39295](https://github.com/ClickHouse/ClickHouse/pull/39295) ([Azat Khuzhin](https://github.com/azat)). +* Fix googletest contrib compilation (due to GTEST_HAS_POSIX_RE=0) [#39298](https://github.com/ClickHouse/ClickHouse/pull/39298) ([Azat Khuzhin](https://github.com/azat)). +* First try at reducing the use of StringRef [#39300](https://github.com/ClickHouse/ClickHouse/pull/39300) ([Robert Schulze](https://github.com/rschu1ze)). +* Whitespaces [#39303](https://github.com/ClickHouse/ClickHouse/pull/39303) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add test for window function inside CASE [#39305](https://github.com/ClickHouse/ClickHouse/pull/39305) ([Dmitry Novik](https://github.com/novikd)). +* Simple Check should be updated on rerun [#39307](https://github.com/ClickHouse/ClickHouse/pull/39307) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix leaking of logger in clickhouse-disks [#39314](https://github.com/ClickHouse/ClickHouse/pull/39314) ([Azat Khuzhin](https://github.com/azat)). +* Update exception message [#39315](https://github.com/ClickHouse/ClickHouse/pull/39315) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix build clang-13 [#39318](https://github.com/ClickHouse/ClickHouse/pull/39318) ([alesapin](https://github.com/alesapin)). +* Auto set test name in integration tests [#39322](https://github.com/ClickHouse/ClickHouse/pull/39322) ([Vitaly Baranov](https://github.com/vitlibar)). +* Try fix flaky test_store_cleanup [#39334](https://github.com/ClickHouse/ClickHouse/pull/39334) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Do not start on unexpected Ordinary metadata [#39337](https://github.com/ClickHouse/ClickHouse/pull/39337) ([Alexander Tokmakov](https://github.com/tavplubix)). +* switch from mkdocs to Docusaurus [#39338](https://github.com/ClickHouse/ClickHouse/pull/39338) ([Dan Roscigno](https://github.com/DanRoscigno)). +* Fix flaky 01174_select_insert_isolation [#39339](https://github.com/ClickHouse/ClickHouse/pull/39339) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Better exception messages in schema inference [#39340](https://github.com/ClickHouse/ClickHouse/pull/39340) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix memory exceptions with transactions [#39341](https://github.com/ClickHouse/ClickHouse/pull/39341) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix typo [#39360](https://github.com/ClickHouse/ClickHouse/pull/39360) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix typo [#39361](https://github.com/ClickHouse/ClickHouse/pull/39361) ([Kruglov Pavel](https://github.com/Avogar)). +* Do not enqueue uneeded parts for check [#39366](https://github.com/ClickHouse/ClickHouse/pull/39366) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Avoid loading toolchain file multiple times to avoid confusing ccache [#39387](https://github.com/ClickHouse/ClickHouse/pull/39387) ([Azat Khuzhin](https://github.com/azat)). +* Fix make clean (due to crosscompile of llvm) [#39392](https://github.com/ClickHouse/ClickHouse/pull/39392) ([Azat Khuzhin](https://github.com/azat)). +* Disable real-time digest in Keeper by default [#39393](https://github.com/ClickHouse/ClickHouse/pull/39393) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix interactive client with older server [#39413](https://github.com/ClickHouse/ClickHouse/pull/39413) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix BC check [#39414](https://github.com/ClickHouse/ClickHouse/pull/39414) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix flaky test test_login_as_dropped_user_xml. [#39415](https://github.com/ClickHouse/ClickHouse/pull/39415) ([Vitaly Baranov](https://github.com/vitlibar)). +* Introduce a dependency to libuv when building NATS [#39427](https://github.com/ClickHouse/ClickHouse/pull/39427) ([ltrk2](https://github.com/ltrk2)). +* Set default value cross_to_inner_join_rewrite = 1 [#39443](https://github.com/ClickHouse/ClickHouse/pull/39443) ([Vladimir C](https://github.com/vdimir)). +* Respect table alias for additional_table_filters. [#39456](https://github.com/ClickHouse/ClickHouse/pull/39456) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### Performance optimization and Bug Fix + +* Enabled `pread_threadpool` read method by default. It will increase read performance. Bug fix: if direct IO is enabled and the number of threads is large and `pread_threadpool` is used, it may cause a logical error. [#33653](https://github.com/ClickHouse/ClickHouse/pull/33653) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index d3a50969a39..fe4795d3798 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -45,7 +45,7 @@ Configuration template: - `min_part_size` – The minimum size of a data part. - `min_part_size_ratio` – The ratio of the data part size to the table size. -- `method` – Compression method. Acceptable values: `lz4`, `lz4hc`, `zstd`. +- `method` – Compression method. Acceptable values: `lz4`, `lz4hc`, `zstd`,`deflate_qpl`. - `level` – Compression level. See [Codecs](../../sql-reference/statements/create/table.md#create-query-general-purpose-codecs). You can configure multiple `` sections. diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index 2cf57cc2243..0c2e87fbcac 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -248,6 +248,13 @@ ClickHouse supports general purpose codecs and specialized codecs. High compression levels are useful for asymmetric scenarios, like compress once, decompress repeatedly. Higher levels mean better compression and higher CPU usage. +#### DEFLATE_QPL + +`DEFLATE_QPL` — [Deflate compression algorithm](https://github.com/intel/qpl) implemented by Intel® Query Processing Library, which has dependency on Intel Hardware: + +- DEFLATE_QPL is only supported on systems with AVX2/AVX512/IAA. +- DEFLATE_QPL-compressed data can only be transferred between nodes with AVX2/AVX512/IAA. + ### Specialized Codecs These codecs are designed to make compression more effective by using specific features of data. Some of these codecs do not compress data themself. Instead, they prepare the data for a common purpose codec, which compresses it better than without this preparation. diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index 680e77dfb6c..0c0c7da330d 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -44,7 +44,7 @@ ClickHouse перезагружает встроенные словари с з - `min_part_size` - Минимальный размер части таблицы. - `min_part_size_ratio` - Отношение размера минимальной части таблицы к полному размеру таблицы. -- `method` - Метод сжатия. Возможные значения: `lz4`, `lz4hc`, `zstd`. +- `method` - Метод сжатия. Возможные значения: `lz4`, `lz4hc`, `zstd`,`deflate_qpl`. - `level` – Уровень сжатия. См. [Кодеки](../../sql-reference/statements/create/table/#create-query-common-purpose-codecs). Можно сконфигурировать несколько разделов ``. diff --git a/docs/tools/README.md b/docs/tools/README.md index 163600804c6..7cf3540d108 100644 --- a/docs/tools/README.md +++ b/docs/tools/README.md @@ -1,50 +1,94 @@ -## How ClickHouse documentation is generated? {#how-clickhouse-documentation-is-generated} +## Generating ClickHouse documentation {#how-clickhouse-documentation-is-generated} -ClickHouse documentation is built using [build.py](build.py) script that uses [mkdocs](https://www.mkdocs.org) library and it’s dependencies to separately build all version of documentations (all languages in either single and multi page mode) as static HTMLs for each single page version. The results are then put in the correct directory structure. It is recommended to use Python 3.7 to run this script. +ClickHouse documentation is built using [Docusaurus](https://docusaurus.io). -[release.sh](release.sh) also pulls static files needed for [official ClickHouse website](https://clickhouse.com) from [../../website](../../website) folder then pushes to specified GitHub repo to be served via [GitHub Pages](https://pages.github.com). +## Check the look of your documentation changes {#how-to-check-if-the-documentation-will-look-fine} -## How to check if the documentation will look fine? {#how-to-check-if-the-documentation-will-look-fine} +There are a few options that are all useful depending on how large or complex your edits are. -There are few options that are all useful depending on how large or complex your edits are. +### Use the GitHub web interface to edit -### Use GitHub web interface to edit +Every page in the docs has an **Edit this page** link that opens the page in the GitHub editor. GitHub has Markdown support with a preview feature. The details of GitHub Markdown and the documentation Markdown are a bit different but generally this is close enough, and the person merging your PR will build the docs and check them. -GitHub has Markdown support with preview feature, but the details of GitHub Markdown dialect are a bit different in ClickHouse documentation. +### Install a Markdown editor or plugin for your IDE {#install-markdown-editor-or-plugin-for-your-ide} -### Install Markdown editor or plugin for your IDE {#install-markdown-editor-or-plugin-for-your-ide} +Usually, these plugins provide a preview of how the markdown will render, and they catch basic errors like unclosed tags very early. -Usually those also have some way to preview how Markdown will look like, which allows to catch basic errors like unclosed tags very early. -### Use build.py {#use-build-py} +## Build the docs locally {#use-build-py} -It’ll take some effort to go through, but the result will be very close to production documentation. +You can build the docs locally. It takes a few minutes to set up, but once you have done it the first time, the process is very simple. -For the first time you’ll need to: +### Clone the repos -#### 1. Set up virtualenv +The documentation is in two repos, clone both of them: +- [ClickHouse/ClickHouse](https://github.com/ClickHouse/ClickHouse) +- [ClickHouse/ClickHouse-docs](https://github.com/ClickHouse/clickhouse-docs) -``` bash -$ cd ClickHouse/docs/tools -$ mkdir venv -$ virtualenv -p $(which python3) venv -$ source venv/bin/activate -$ pip3 install -r requirements.txt +### Install Node.js + +The documentation is built with Docusaurus, which requires Node.js. We recommend version 16. Install [Node.js](https://nodejs.org/en/download/). + +### Copy files into place + +Docusaurus expects all of the markdown files to be located in the directory tree `clickhouse-docs/docs/`. This is not the way our repos are set up, so some copying of files is needed to build the docs: + +```bash +# from the parent directory of both the ClickHouse/ClickHouse and ClickHouse-clickhouse-docs repos: +cp -r ClickHouse/docs/en/development clickhouse-docs/docs/en/ +cp -r ClickHouse/docs/en/engines clickhouse-docs/docs/en/ +cp -r ClickHouse/docs/en/getting-started clickhouse-docs/docs/en/ +cp -r ClickHouse/docs/en/interfaces clickhouse-docs/docs/en/ +cp -r ClickHouse/docs/en/operations clickhouse-docs/docs/en/ +cp -r ClickHouse/docs/en/sql-reference clickhouse-docs/docs/en/ + +cp -r ClickHouse/docs/ru/* clickhouse-docs/docs/ru/ +cp -r ClickHouse/docs/zh clickhouse-docs/docs/ ``` -#### 2. Run build.py +#### Note: Symlinks will not work. +### Setup Docusaurus -When all prerequisites are installed, running `build.py` without args (there are some, check `build.py --help`) will generate `ClickHouse/docs/build` folder with complete static html website. +There are two commands that you may need to use with Docusaurus: +- `yarn install` +- `yarn start` -The easiest way to see the result is to use `--livereload=8888` argument of build.py. Alternatively, you can manually launch a HTTP server to serve the docs, for example by running `cd ClickHouse/docs/build && python3 -m http.server 8888`. Then go to http://localhost:8888 in browser. Feel free to use any other port instead of 8888. +#### Install Docusaurus and its dependencies: + +```bash +cd clickhouse-docs +yarn install +``` + +#### Start a development Docusaurus environment + +This command will start Docusaurus in development mode, which means that as you edit source (for example, `.md` files) files the changes will be rendered into HTML files and served by the Docusaurus development server. + +```bash +yarn start +``` + +### Make your changes to the markdown files + +Edit your files. Remember that if you are editing files in the `ClickHouse/ClickHouse` repo then you should edit them +in that repo and then copy the edited file into the `ClickHouse/clickhouse-docs/` directory structure so that they are updated in your develoment environment. + +`yarn start` probably opened a browser for you when you ran it; if not, open a browser to `http://localhost:3000/docs/en/intro` and navigate to the documentation that you are changing. If you have already made the changes, you can verify them here; if not, make them, and you will see the page update as you save the changes. ## How to change code highlighting? {#how-to-change-code-hl} -ClickHouse does not use mkdocs `highlightjs` feature. It uses modified pygments styles instead. -If you want to change code highlighting, edit the `website/css/highlight.css` file. -Currently, an [eighties](https://github.com/idleberg/base16-pygments/blob/master/css/base16-eighties.dark.css) theme -is used. +Code highlighting is based on the language chosen for your code blocks. Specify the language when you start the code block: +
```sql
+SELECT firstname from imdb.actors;
+```
+
+ +```sql +SELECT firstname from imdb.actors; +``` + +If you need a language supported then open an issue in [ClickHouse-docs](https://github.com/ClickHouse/clickhouse-docs/issues). ## How to subscribe on documentation changes? {#how-to-subscribe-on-documentation-changes} At the moment there’s no easy way to do just that, but you can consider: diff --git a/programs/compressor/Compressor.cpp b/programs/compressor/Compressor.cpp index d0fc3528473..fe8debcee27 100644 --- a/programs/compressor/Compressor.cpp +++ b/programs/compressor/Compressor.cpp @@ -79,6 +79,7 @@ int mainEntryClickHouseCompressor(int argc, char ** argv) ("block-size,b", po::value()->default_value(DBMS_DEFAULT_BUFFER_SIZE), "compress in blocks of specified size") ("hc", "use LZ4HC instead of LZ4") ("zstd", "use ZSTD instead of LZ4") + ("deflate_qpl", "use deflate_qpl instead of LZ4") ("codec", po::value>()->multitoken(), "use codecs combination instead of LZ4") ("level", po::value(), "compression level for codecs specified via flags") ("none", "use no compression instead of LZ4") @@ -103,6 +104,7 @@ int mainEntryClickHouseCompressor(int argc, char ** argv) bool decompress = options.count("decompress"); bool use_lz4hc = options.count("hc"); bool use_zstd = options.count("zstd"); + bool use_deflate_qpl = options.count("deflate_qpl"); bool stat_mode = options.count("stat"); bool use_none = options.count("none"); unsigned block_size = options["block-size"].as(); @@ -110,7 +112,7 @@ int mainEntryClickHouseCompressor(int argc, char ** argv) if (options.count("codec")) codecs = options["codec"].as>(); - if ((use_lz4hc || use_zstd || use_none) && !codecs.empty()) + if ((use_lz4hc || use_zstd || use_deflate_qpl || use_none) && !codecs.empty()) throw Exception("Wrong options, codec flags like --zstd and --codec options are mutually exclusive", ErrorCodes::BAD_ARGUMENTS); if (!codecs.empty() && options.count("level")) @@ -122,6 +124,8 @@ int mainEntryClickHouseCompressor(int argc, char ** argv) method_family = "LZ4HC"; else if (use_zstd) method_family = "ZSTD"; + else if (use_deflate_qpl) + method_family = "DEFLATE_QPL"; else if (use_none) method_family = "NONE"; diff --git a/src/Access/Common/AllowedClientHosts.cpp b/src/Access/Common/AllowedClientHosts.cpp index 85d7065d823..efbdf3924e8 100644 --- a/src/Access/Common/AllowedClientHosts.cpp +++ b/src/Access/Common/AllowedClientHosts.cpp @@ -110,18 +110,24 @@ namespace } /// Returns the host name by its address. - String getHostByAddress(const IPAddress & address) + Strings getHostsByAddress(const IPAddress & address) { - String host = DNSResolver::instance().reverseResolve(address); + auto hosts = DNSResolver::instance().reverseResolve(address); - /// Check that PTR record is resolved back to client address - if (!isAddressOfHost(address, host)) - throw Exception("Host " + String(host) + " isn't resolved back to " + address.toString(), ErrorCodes::DNS_ERROR); + if (hosts.empty()) + throw Exception(ErrorCodes::DNS_ERROR, "{} could not be resolved", address.toString()); - return host; + + for (const auto & host : hosts) + { + /// Check that PTR record is resolved back to client address + if (!isAddressOfHost(address, host)) + throw Exception(ErrorCodes::DNS_ERROR, "Host {} isn't resolved back to {}", host, address.toString()); + } + + return hosts; } - void parseLikePatternIfIPSubnet(const String & pattern, IPSubnet & subnet, IPAddress::Family address_family) { size_t slash = pattern.find('/'); @@ -520,20 +526,29 @@ bool AllowedClientHosts::contains(const IPAddress & client_address) const return true; /// Check `name_regexps`. - std::optional resolved_host; + std::optional resolved_hosts; auto check_name_regexp = [&](const String & name_regexp_) { try { if (boost::iequals(name_regexp_, "localhost")) return is_client_local(); - if (!resolved_host) - resolved_host = getHostByAddress(client_v6); - if (resolved_host->empty()) - return false; - Poco::RegularExpression re(name_regexp_); - Poco::RegularExpression::Match match; - return re.match(*resolved_host, match) != 0; + if (!resolved_hosts) + { + resolved_hosts = getHostsByAddress(client_address); + } + + for (const auto & host : resolved_hosts.value()) + { + Poco::RegularExpression re(name_regexp_); + Poco::RegularExpression::Match match; + if (re.match(host, match) != 0) + { + return true; + } + } + + return false; } catch (const Exception & e) { diff --git a/src/Access/LDAPClient.cpp b/src/Access/LDAPClient.cpp index 3486be1de33..ff1ee6f3609 100644 --- a/src/Access/LDAPClient.cpp +++ b/src/Access/LDAPClient.cpp @@ -509,7 +509,6 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params) if (referrals) { SCOPE_EXIT({ -// ldap_value_free(referrals); ber_memvfree(reinterpret_cast(referrals)); referrals = nullptr; }); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index cf8ae28df08..f4d3be14da6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -346,6 +346,12 @@ set_source_files_properties( Columns/ColumnString.cpp PROPERTIES COMPILE_FLAGS "${X86_INTRINSICS_FLAGS}") +if (ENABLE_QPL) +set_source_files_properties( + Compression/CompressionCodecDeflateQpl.cpp + PROPERTIES COMPILE_FLAGS "-mwaitpkg") +endif () + target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::re2_st) target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::re2) @@ -447,6 +453,9 @@ if (TARGET ch_contrib::avrocpp) dbms_target_link_libraries(PRIVATE ch_contrib::avrocpp) endif () +set_source_files_properties(Common/CaresPTRResolver.cpp PROPERTIES COMPILE_FLAGS -Wno-reserved-identifier) +target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::c-ares) + if (TARGET OpenSSL::Crypto) dbms_target_link_libraries (PRIVATE OpenSSL::Crypto) target_link_libraries (clickhouse_common_io PRIVATE OpenSSL::Crypto) @@ -527,6 +536,10 @@ endif () target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::lz4) +if (TARGET ch_contrib::qpl) +dbms_target_link_libraries(PUBLIC ch_contrib::qpl) +endif () + dbms_target_link_libraries(PRIVATE _boost_context) if (ENABLE_NLP) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index f0a8794d096..c6f14c7e865 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -152,7 +152,6 @@ static void incrementProfileEventsBlock(Block & dst, const Block & src) auto & dst_column_host_name = typeid_cast(*mutable_columns[name_pos["host_name"]]); auto & dst_array_current_time = typeid_cast(*mutable_columns[name_pos["current_time"]]).getData(); - // auto & dst_array_thread_id = typeid_cast(*mutable_columns[name_pos["thread_id"]]).getData(); auto & dst_array_type = typeid_cast(*mutable_columns[name_pos["type"]]).getData(); auto & dst_column_name = typeid_cast(*mutable_columns[name_pos["name"]]); auto & dst_array_value = typeid_cast(*mutable_columns[name_pos["value"]]).getData(); diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index 62fb69a47e1..17e9bd97669 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -132,14 +132,12 @@ namespace ColumnLowCardinality::ColumnLowCardinality(MutableColumnPtr && column_unique_, MutableColumnPtr && indexes_, bool is_shared) : dictionary(std::move(column_unique_), is_shared), idx(std::move(indexes_)) { - // idx.check(getDictionary().size()); } void ColumnLowCardinality::insert(const Field & x) { compactIfSharedDictionary(); idx.insertPosition(dictionary.getColumnUnique().uniqueInsert(x)); - // idx.check(getDictionary().size()); } void ColumnLowCardinality::insertDefault() @@ -167,15 +165,12 @@ void ColumnLowCardinality::insertFrom(const IColumn & src, size_t n) const auto & nested = *low_cardinality_src->getDictionary().getNestedColumn(); idx.insertPosition(dictionary.getColumnUnique().uniqueInsertFrom(nested, position)); } - - // idx.check(getDictionary().size()); } void ColumnLowCardinality::insertFromFullColumn(const IColumn & src, size_t n) { compactIfSharedDictionary(); idx.insertPosition(dictionary.getColumnUnique().uniqueInsertFrom(src, n)); - // idx.check(getDictionary().size()); } void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, size_t length) @@ -205,7 +200,6 @@ void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, si auto inserted_indexes = dictionary.getColumnUnique().uniqueInsertRangeFrom(*used_keys, 0, used_keys->size()); idx.insertPositionsRange(*inserted_indexes->index(*sub_idx, 0), 0, length); } - // idx.check(getDictionary().size()); } void ColumnLowCardinality::insertRangeFromFullColumn(const IColumn & src, size_t start, size_t length) @@ -213,7 +207,6 @@ void ColumnLowCardinality::insertRangeFromFullColumn(const IColumn & src, size_t compactIfSharedDictionary(); auto inserted_indexes = dictionary.getColumnUnique().uniqueInsertRangeFrom(src, start, length); idx.insertPositionsRange(*inserted_indexes, 0, length); - // idx.check(getDictionary().size()); } static void checkPositionsAreLimited(const IColumn & positions, UInt64 limit) @@ -254,14 +247,12 @@ void ColumnLowCardinality::insertRangeFromDictionaryEncodedColumn(const IColumn compactIfSharedDictionary(); auto inserted_indexes = dictionary.getColumnUnique().uniqueInsertRangeFrom(keys, 0, keys.size()); idx.insertPositionsRange(*inserted_indexes->index(positions, 0), 0, positions.size()); - // idx.check(getDictionary().size()); } void ColumnLowCardinality::insertData(const char * pos, size_t length) { compactIfSharedDictionary(); idx.insertPosition(dictionary.getColumnUnique().uniqueInsertData(pos, length)); - // idx.check(getDictionary().size()); } StringRef ColumnLowCardinality::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const @@ -276,7 +267,6 @@ const char * ColumnLowCardinality::deserializeAndInsertFromArena(const char * po const char * new_pos; idx.insertPosition(dictionary.getColumnUnique().uniqueDeserializeAndInsertFromArena(pos, new_pos)); - // idx.check(getDictionary().size()); return new_pos; } diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 8d61f6e726a..809024316bf 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -273,14 +273,6 @@ llvm::Value * ColumnNullable::compileComparator(llvm::IRBuilderBase & builder, l b.CreateCondBr(lhs_or_rhs_are_null, lhs_or_rhs_are_null_block, lhs_rhs_are_not_null_block); - // if (unlikely(lval_is_null || rval_is_null)) - // { - // if (lval_is_null && rval_is_null) - // return 0; - // else - // return lval_is_null ? null_direction_hint : -null_direction_hint; - // } - b.SetInsertPoint(lhs_or_rhs_are_null_block); auto * lhs_equals_rhs_result = llvm::ConstantInt::getSigned(b.getInt8Ty(), 0); llvm::Value * lhs_and_rhs_are_null = b.CreateAnd(lhs_is_null_value, rhs_is_null_value); @@ -288,8 +280,6 @@ llvm::Value * ColumnNullable::compileComparator(llvm::IRBuilderBase & builder, l llvm::Value * lhs_or_rhs_are_null_block_result = b.CreateSelect(lhs_and_rhs_are_null, lhs_equals_rhs_result, lhs_is_null_result); b.CreateBr(join_block); - // getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint); - b.SetInsertPoint(lhs_rhs_are_not_null_block); llvm::Value * lhs_rhs_are_not_null_block_result = nested_column->compileComparator(builder, lhs_unwrapped_value, rhs_unwrapped_value, nan_direction_hint); diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h index 3c21a65e404..58891e30e12 100644 --- a/src/Columns/ColumnUnique.h +++ b/src/Columns/ColumnUnique.h @@ -548,7 +548,6 @@ MutableColumnPtr ColumnUnique::uniqueInsertRangeImpl( } } - // checkIndexes(*positions_column, column->size() + (overflowed_keys ? overflowed_keys->size() : 0)); return std::move(positions_column); } diff --git a/src/Common/ArrayCache.h b/src/Common/ArrayCache.h index 6efa5c92b5b..f01ff94e38b 100644 --- a/src/Common/ArrayCache.h +++ b/src/Common/ArrayCache.h @@ -514,8 +514,6 @@ private: return allocateFromFreeRegion(*free_region, size); } -// std::cerr << "Requested size: " << size << "\n"; - /// Evict something from cache and continue. while (true) { diff --git a/src/Common/CaresPTRResolver.cpp b/src/Common/CaresPTRResolver.cpp new file mode 100644 index 00000000000..f6228e97c02 --- /dev/null +++ b/src/Common/CaresPTRResolver.cpp @@ -0,0 +1,109 @@ +#include "CaresPTRResolver.h" +#include +#include +#include +#include "ares.h" +#include "netdb.h" + +namespace DB +{ + + namespace ErrorCodes + { + extern const int DNS_ERROR; + } + + static void callback(void * arg, int status, int, struct hostent * host) + { + auto * ptr_records = reinterpret_cast*>(arg); + if (status == ARES_SUCCESS && host->h_aliases) + { + int i = 0; + while (auto * ptr_record = host->h_aliases[i]) + { + ptr_records->emplace_back(ptr_record); + i++; + } + } + } + + CaresPTRResolver::CaresPTRResolver(CaresPTRResolver::provider_token) : channel(nullptr) + { + /* + * ares_library_init is not thread safe. Currently, the only other usage of c-ares seems to be in grpc. + * In grpc, ares_library_init seems to be called only in Windows. + * See https://github.com/grpc/grpc/blob/master/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.cc#L1187 + * That means it's safe to init it here, but we should be cautious when introducing new code that depends on c-ares and even updates + * to grpc. As discussed in https://github.com/ClickHouse/ClickHouse/pull/37827#discussion_r919189085, c-ares should be adapted to be atomic + * */ + if (ares_library_init(ARES_LIB_INIT_ALL) != ARES_SUCCESS || ares_init(&channel) != ARES_SUCCESS) + { + throw DB::Exception("Failed to initialize c-ares", DB::ErrorCodes::DNS_ERROR); + } + } + + CaresPTRResolver::~CaresPTRResolver() + { + ares_destroy(channel); + ares_library_cleanup(); + } + + std::vector CaresPTRResolver::resolve(const std::string & ip) + { + std::vector ptr_records; + + resolve(ip, ptr_records); + wait(); + + return ptr_records; + } + + std::vector CaresPTRResolver::resolve_v6(const std::string & ip) + { + std::vector ptr_records; + + resolve_v6(ip, ptr_records); + wait(); + + return ptr_records; + } + + void CaresPTRResolver::resolve(const std::string & ip, std::vector & response) + { + in_addr addr; + + inet_pton(AF_INET, ip.c_str(), &addr); + + ares_gethostbyaddr(channel, reinterpret_cast(&addr), sizeof(addr), AF_INET, callback, &response); + } + + void CaresPTRResolver::resolve_v6(const std::string & ip, std::vector & response) + { + in6_addr addr; + inet_pton(AF_INET6, ip.c_str(), &addr); + + ares_gethostbyaddr(channel, reinterpret_cast(&addr), sizeof(addr), AF_INET6, callback, &response); + } + + void CaresPTRResolver::wait() + { + timeval * tvp, tv; + fd_set read_fds; + fd_set write_fds; + int nfds; + + for (;;) + { + FD_ZERO(&read_fds); + FD_ZERO(&write_fds); + nfds = ares_fds(channel, &read_fds,&write_fds); + if (nfds == 0) + { + break; + } + tvp = ares_timeout(channel, nullptr, &tv); + select(nfds, &read_fds, &write_fds, nullptr, tvp); + ares_process(channel, &read_fds, &write_fds); + } + } +} diff --git a/src/Common/CaresPTRResolver.h b/src/Common/CaresPTRResolver.h new file mode 100644 index 00000000000..fd6a1cf7bc5 --- /dev/null +++ b/src/Common/CaresPTRResolver.h @@ -0,0 +1,42 @@ +#pragma once + +#include "DNSPTRResolver.h" + +using ares_channel = struct ares_channeldata *; + +namespace DB +{ + + /* + * Implements reverse DNS resolution using c-ares lib. System reverse DNS resolution via + * gethostbyaddr or getnameinfo does not work reliably because in some systems + * it returns all PTR records for a given IP and in others it returns only one. + * */ + class CaresPTRResolver : public DNSPTRResolver + { + friend class DNSPTRResolverProvider; + + /* + * Allow only DNSPTRProvider to instantiate this class + * */ + struct provider_token {}; + + public: + explicit CaresPTRResolver(provider_token); + ~CaresPTRResolver() override; + + std::vector resolve(const std::string & ip) override; + + std::vector resolve_v6(const std::string & ip) override; + + private: + void wait(); + + void resolve(const std::string & ip, std::vector & response); + + void resolve_v6(const std::string & ip, std::vector & response); + + ares_channel channel; + }; +} + diff --git a/src/Common/DNSPTRResolver.h b/src/Common/DNSPTRResolver.h new file mode 100644 index 00000000000..e6cce83f79d --- /dev/null +++ b/src/Common/DNSPTRResolver.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include + +namespace DB +{ + struct DNSPTRResolver + { + + virtual ~DNSPTRResolver() = default; + + virtual std::vector resolve(const std::string & ip) = 0; + + virtual std::vector resolve_v6(const std::string & ip) = 0; + + }; +} diff --git a/src/Common/DNSPTRResolverProvider.cpp b/src/Common/DNSPTRResolverProvider.cpp new file mode 100644 index 00000000000..41c73f4f36f --- /dev/null +++ b/src/Common/DNSPTRResolverProvider.cpp @@ -0,0 +1,13 @@ +#include "DNSPTRResolverProvider.h" +#include "CaresPTRResolver.h" + +namespace DB +{ + std::shared_ptr DNSPTRResolverProvider::get() + { + static auto cares_resolver = std::make_shared( + CaresPTRResolver::provider_token {} + ); + return cares_resolver; + } +} diff --git a/src/Common/DNSPTRResolverProvider.h b/src/Common/DNSPTRResolverProvider.h new file mode 100644 index 00000000000..a7f534749e3 --- /dev/null +++ b/src/Common/DNSPTRResolverProvider.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include "DNSPTRResolver.h" + +namespace DB +{ + /* + * Provides a ready-to-use DNSPTRResolver instance. + * It hides 3rd party lib dependencies, handles initialization and lifetime. + * Since `get` function is static, it can be called from any context. Including cached static functions. + * */ + class DNSPTRResolverProvider + { + public: + static std::shared_ptr get(); + }; +} diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp index 0616e324b73..10797b7a809 100644 --- a/src/Common/DNSResolver.cpp +++ b/src/Common/DNSResolver.cpp @@ -12,6 +12,7 @@ #include #include #include +#include "DNSPTRResolverProvider.h" namespace ProfileEvents { @@ -138,16 +139,17 @@ static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host) return addresses; } -static String reverseResolveImpl(const Poco::Net::IPAddress & address) +static Strings reverseResolveImpl(const Poco::Net::IPAddress & address) { - Poco::Net::SocketAddress sock_addr(address, 0); + auto ptr_resolver = DB::DNSPTRResolverProvider::get(); - /// Resolve by hand, because Poco::Net::DNS::hostByAddress(...) does getaddrinfo(...) after getnameinfo(...) - char host[1024]; - int err = getnameinfo(sock_addr.addr(), sock_addr.length(), host, sizeof(host), nullptr, 0, NI_NAMEREQD); - if (err) - throw Exception("Cannot getnameinfo(" + address.toString() + "): " + gai_strerror(err), ErrorCodes::DNS_ERROR); - return host; + if (address.family() == Poco::Net::IPAddress::Family::IPv4) + { + return ptr_resolver->resolve(address.toString()); + } else + { + return ptr_resolver->resolve_v6(address.toString()); + } } struct DNSResolver::Impl @@ -235,7 +237,7 @@ std::vector DNSResolver::resolveAddressList(const std: return addresses; } -String DNSResolver::reverseResolve(const Poco::Net::IPAddress & address) +Strings DNSResolver::reverseResolve(const Poco::Net::IPAddress & address) { if (impl->disable_cache) return reverseResolveImpl(address); diff --git a/src/Common/DNSResolver.h b/src/Common/DNSResolver.h index fdd9799f96f..84c88586636 100644 --- a/src/Common/DNSResolver.h +++ b/src/Common/DNSResolver.h @@ -36,8 +36,8 @@ public: std::vector resolveAddressList(const std::string & host, UInt16 port); - /// Accepts host IP and resolves its host name - String reverseResolve(const Poco::Net::IPAddress & address); + /// Accepts host IP and resolves its host names + Strings reverseResolve(const Poco::Net::IPAddress & address); /// Get this server host name String getHostName(); diff --git a/src/Common/DateLUTImpl.cpp b/src/Common/DateLUTImpl.cpp index 869954bb2ae..31290c53b49 100644 --- a/src/Common/DateLUTImpl.cpp +++ b/src/Common/DateLUTImpl.cpp @@ -122,9 +122,6 @@ DateLUTImpl::DateLUTImpl(const std::string & time_zone_) values.time_at_offset_change_value = (transition.from - cctz::civil_second(date)) / Values::OffsetChangeFactor; values.amount_of_offset_change_value = (transition.to - transition.from) / Values::OffsetChangeFactor; -// std::cerr << time_zone << ", " << date << ": change from " << transition.from << " to " << transition.to << "\n"; -// std::cerr << time_zone << ", " << date << ": change at " << values.time_at_offset_change() << " with " << values.amount_of_offset_change() << "\n"; - /// We don't support too large changes. if (values.amount_of_offset_change_value > 24 * 4) values.amount_of_offset_change_value = 24 * 4; diff --git a/src/Common/HashTable/SmallTable.h b/src/Common/HashTable/SmallTable.h index ad9537ff94a..b78901b03f6 100644 --- a/src/Common/HashTable/SmallTable.h +++ b/src/Common/HashTable/SmallTable.h @@ -74,7 +74,6 @@ public: using key_type = Key; using mapped_type = typename Cell::mapped_type; using value_type = typename Cell::value_type; - using cell_type = Cell; class Reader final : private Cell::State { @@ -247,39 +246,6 @@ public: } } - - /// Same, but return false if it's full. - bool ALWAYS_INLINE tryEmplace(Key x, iterator & it, bool & inserted) - { - Cell * res = findCell(x); - it = iteratorTo(res); - inserted = res == buf + m_size; - if (inserted) - { - if (res == buf + capacity) - return false; - - new(res) Cell(x, *this); - ++m_size; - } - return true; - } - - - /// Copy the cell from another hash table. It is assumed that there was no such key in the table yet. - void ALWAYS_INLINE insertUnique(const Cell * cell) - { - memcpy(&buf[m_size], cell, sizeof(*cell)); - ++m_size; - } - - void ALWAYS_INLINE insertUnique(Key x) - { - new(&buf[m_size]) Cell(x, *this); - ++m_size; - } - - iterator ALWAYS_INLINE find(Key x) { return iteratorTo(findCell(x)); } const_iterator ALWAYS_INLINE find(Key x) const { return iteratorTo(findCell(x)); } @@ -381,36 +347,3 @@ template > using SmallSet = SmallTable, capacity>; - -template -< - typename Key, - typename Cell, - size_t capacity -> -class SmallMapTable : public SmallTable -{ -public: - using key_type = Key; - using mapped_type = typename Cell::mapped_type; - using value_type = typename Cell::value_type; - using cell_type = Cell; - - mapped_type & ALWAYS_INLINE operator[](Key x) - { - typename SmallMapTable::iterator it; - bool inserted; - this->emplace(x, it, inserted); - new (&it->getMapped()) mapped_type(); - return it->getMapped(); - } -}; - - -template -< - typename Key, - typename Mapped, - size_t capacity -> -using SmallMap = SmallMapTable, capacity>; diff --git a/src/Common/RadixSort.h b/src/Common/RadixSort.h index 4bf975c4c7a..9ca43bee30c 100644 --- a/src/Common/RadixSort.h +++ b/src/Common/RadixSort.h @@ -355,8 +355,6 @@ private: template static inline void radixSortMSDInternal(Element * arr, size_t size, size_t limit) { -// std::cerr << PASS << ", " << size << ", " << limit << "\n"; - /// The beginning of every i-1-th bucket. 0th element will be equal to 1st. /// Last element will point to array end. std::unique_ptr prev_buckets{new Element*[HISTOGRAM_SIZE + 1]}; diff --git a/src/Common/TraceSender.cpp b/src/Common/TraceSender.cpp index ce8adb98740..ad88e508d06 100644 --- a/src/Common/TraceSender.cpp +++ b/src/Common/TraceSender.cpp @@ -42,13 +42,14 @@ void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Int char buffer[buf_size]; WriteBufferFromFileDescriptorDiscardOnFailure out(pipe.fds_rw[1], buf_size, buffer); - StringRef query_id; + std::string_view query_id; UInt64 thread_id; if (CurrentThread::isInitialized()) { - query_id = StringRef(CurrentThread::getQueryId()); - query_id.size = std::min(query_id.size, QUERY_ID_MAX_LEN); + query_id = CurrentThread::getQueryId(); + if (query_id.size() > QUERY_ID_MAX_LEN) + query_id.remove_suffix(query_id.size() - QUERY_ID_MAX_LEN); thread_id = CurrentThread::get().thread_id; } @@ -59,8 +60,8 @@ void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Int writeChar(false, out); /// true if requested to stop the collecting thread. - writeBinary(static_cast(query_id.size), out); - out.write(query_id.data, query_id.size); + writeBinary(static_cast(query_id.size()), out); + out.write(query_id.data(), query_id.size()); size_t stack_trace_size = stack_trace.getSize(); size_t stack_trace_offset = stack_trace.getOffset(); diff --git a/src/Compression/CompressedReadBufferBase.cpp b/src/Compression/CompressedReadBufferBase.cpp index 81e49e445a7..2c85dc6d9a9 100644 --- a/src/Compression/CompressedReadBufferBase.cpp +++ b/src/Compression/CompressedReadBufferBase.cpp @@ -106,21 +106,15 @@ static void validateChecksum(char * data, size_t size, const Checksum expected_c throw Exception(message.str(), ErrorCodes::CHECKSUM_DOESNT_MATCH); } - -/// Read compressed data into compressed_buffer. Get size of decompressed data from block header. Checksum if need. -/// Returns number of compressed bytes read. -size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed, size_t & size_compressed_without_checksum, bool always_copy) +static void readHeaderAndGetCodecAndSize( + const char * compressed_buffer, + UInt8 header_size, + CompressionCodecPtr & codec, + size_t & size_decompressed, + size_t & size_compressed_without_checksum, + bool allow_different_codecs) { - if (compressed_in->eof()) - return 0; - - UInt8 header_size = ICompressionCodec::getHeaderSize(); - own_compressed_buffer.resize(header_size + sizeof(Checksum)); - - compressed_in->readStrict(own_compressed_buffer.data(), sizeof(Checksum) + header_size); - char * compressed_header = own_compressed_buffer.data() + sizeof(Checksum); - - uint8_t method = ICompressionCodec::readMethod(compressed_header); + uint8_t method = ICompressionCodec::readMethod(compressed_buffer); if (!codec) { @@ -142,8 +136,8 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed, } } - size_compressed_without_checksum = ICompressionCodec::readCompressedBlockSize(compressed_header); - size_decompressed = ICompressionCodec::readDecompressedBlockSize(compressed_header); + size_compressed_without_checksum = ICompressionCodec::readCompressedBlockSize(compressed_buffer); + size_decompressed = ICompressionCodec::readDecompressedBlockSize(compressed_buffer); /// This is for clang static analyzer. assert(size_decompressed > 0); @@ -157,8 +151,27 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed, if (size_compressed_without_checksum < header_size) throw Exception("Can't decompress data: the compressed data size (" + toString(size_compressed_without_checksum) + ", this should include header size) is less than the header size (" + toString(header_size) + ")", ErrorCodes::CORRUPTED_DATA); +} - ProfileEvents::increment(ProfileEvents::ReadCompressedBytes, size_compressed_without_checksum + sizeof(Checksum)); +/// Read compressed data into compressed_buffer. Get size of decompressed data from block header. Checksum if need. +/// Returns number of compressed bytes read. +size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed, size_t & size_compressed_without_checksum, bool always_copy) +{ + if (compressed_in->eof()) + return 0; + + UInt8 header_size = ICompressionCodec::getHeaderSize(); + own_compressed_buffer.resize(header_size + sizeof(Checksum)); + + compressed_in->readStrict(own_compressed_buffer.data(), sizeof(Checksum) + header_size); + + readHeaderAndGetCodecAndSize( + own_compressed_buffer.data() + sizeof(Checksum), + header_size, + codec, + size_decompressed, + size_compressed_without_checksum, + allow_different_codecs); auto additional_size_at_the_end_of_buffer = codec->getAdditionalSizeAtTheEndOfBuffer(); @@ -184,9 +197,55 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed, validateChecksum(compressed_buffer, size_compressed_without_checksum, checksum); } + ProfileEvents::increment(ProfileEvents::ReadCompressedBytes, size_compressed_without_checksum + sizeof(Checksum)); return size_compressed_without_checksum + sizeof(Checksum); } +/// Read compressed data into compressed_buffer for asynchronous decompression to avoid the situation of "read compressed block across the compressed_in". +size_t CompressedReadBufferBase::readCompressedDataBlockForAsynchronous(size_t & size_decompressed, size_t & size_compressed_without_checksum) +{ + UInt8 header_size = ICompressionCodec::getHeaderSize(); + /// Make sure the whole header located in 'compressed_in->' buffer. + if (compressed_in->eof() || (compressed_in->available() < (header_size + sizeof(Checksum)))) + return 0; + + own_compressed_buffer.resize(header_size + sizeof(Checksum)); + compressed_in->readStrict(own_compressed_buffer.data(), sizeof(Checksum) + header_size); + + readHeaderAndGetCodecAndSize( + own_compressed_buffer.data() + sizeof(Checksum), + header_size, + codec, + size_decompressed, + size_compressed_without_checksum, + allow_different_codecs); + + auto additional_size_at_the_end_of_buffer = codec->getAdditionalSizeAtTheEndOfBuffer(); + + /// Make sure the whole compressed block located in 'compressed_in->' buffer. + /// Otherwise, abandon header and restore original offset of compressed_in + if (compressed_in->offset() >= header_size + sizeof(Checksum) && + compressed_in->available() >= (size_compressed_without_checksum - header_size) + additional_size_at_the_end_of_buffer + sizeof(Checksum)) + { + compressed_in->position() -= header_size; + compressed_buffer = compressed_in->position(); + compressed_in->position() += size_compressed_without_checksum; + + if (!disable_checksum) + { + Checksum & checksum = *reinterpret_cast(own_compressed_buffer.data()); + validateChecksum(compressed_buffer, size_compressed_without_checksum, checksum); + } + + ProfileEvents::increment(ProfileEvents::ReadCompressedBytes, size_compressed_without_checksum + sizeof(Checksum)); + return size_compressed_without_checksum + sizeof(Checksum); + } + else + { + compressed_in->position() -= (sizeof(Checksum) + header_size); + return 0; + } +} static void readHeaderAndGetCodec(const char * compressed_buffer, size_t size_decompressed, CompressionCodecPtr & codec, bool allow_different_codecs) { @@ -216,14 +275,12 @@ static void readHeaderAndGetCodec(const char * compressed_buffer, size_t size_de } } - void CompressedReadBufferBase::decompressTo(char * to, size_t size_decompressed, size_t size_compressed_without_checksum) { readHeaderAndGetCodec(compressed_buffer, size_decompressed, codec, allow_different_codecs); codec->decompress(compressed_buffer, size_compressed_without_checksum, to); } - void CompressedReadBufferBase::decompress(BufferBase::Buffer & to, size_t size_decompressed, size_t size_compressed_without_checksum) { readHeaderAndGetCodec(compressed_buffer, size_decompressed, codec, allow_different_codecs); @@ -245,6 +302,17 @@ void CompressedReadBufferBase::decompress(BufferBase::Buffer & to, size_t size_d codec->decompress(compressed_buffer, size_compressed_without_checksum, to.begin()); } +void CompressedReadBufferBase::flushAsynchronousDecompressRequests() const +{ + if (codec) + codec->flushAsynchronousDecompressRequests(); +} + +void CompressedReadBufferBase::setDecompressMode(ICompressionCodec::CodecMode mode) const +{ + if (codec) + codec->setDecompressMode(mode); +} /// 'compressed_in' could be initialized lazily, but before first call of 'readCompressedData'. CompressedReadBufferBase::CompressedReadBufferBase(ReadBuffer * in, bool allow_different_codecs_) @@ -253,7 +321,7 @@ CompressedReadBufferBase::CompressedReadBufferBase(ReadBuffer * in, bool allow_d } -CompressedReadBufferBase::~CompressedReadBufferBase() = default; /// Proper destruction of unique_ptr of forward-declared type. +CompressedReadBufferBase::~CompressedReadBufferBase() = default; /// Proper destruction of unique_ptr of forward-declared type. } diff --git a/src/Compression/CompressedReadBufferBase.h b/src/Compression/CompressedReadBufferBase.h index 152447c0b64..baea4d2b855 100644 --- a/src/Compression/CompressedReadBufferBase.h +++ b/src/Compression/CompressedReadBufferBase.h @@ -39,6 +39,17 @@ protected: /// Returns number of compressed bytes read. size_t readCompressedData(size_t & size_decompressed, size_t & size_compressed_without_checksum, bool always_copy); + /// Read compressed data into compressed_buffer for asynchronous decompression to avoid the situation of "read compressed block across the compressed_in". + /// + /// Compressed block may not be completely contained in "compressed_in" buffer which means compressed block may be read across the "compressed_in". + /// For native LZ4/ZSTD, it has no problem in facing situation above because they are synchronous. + /// But for asynchronous decompression, such as QPL deflate, it requires source and target buffer for decompression can not be overwritten until execution complete. + /// + /// Returns number of compressed bytes read. + /// If Returns value > 0, means the address range for current block are maintained in "compressed_in", then asynchronous decompression can be called to boost performance. + /// If Returns value == 0, it means current block cannot be decompressed asynchronously.Meanwhile, asynchronous requests for previous blocks should be flushed if any. + size_t readCompressedDataBlockForAsynchronous(size_t & size_decompressed, size_t & size_compressed_without_checksum); + /// Decompress into memory pointed by `to` void decompressTo(char * to, size_t size_decompressed, size_t size_compressed_without_checksum); @@ -46,6 +57,14 @@ protected: /// It is more efficient for compression codec NONE but not suitable if you want to decompress into specific location. void decompress(BufferBase::Buffer & to, size_t size_decompressed, size_t size_compressed_without_checksum); + /// Flush all asynchronous decompress request. + void flushAsynchronousDecompressRequests() const; + + /// Set decompression mode: Synchronous/Asynchronous/SoftwareFallback. + /// The mode is "Synchronous" by default. + /// flushAsynchronousDecompressRequests must be called subsequently once set "Asynchronous" mode. + void setDecompressMode(ICompressionCodec::CodecMode mode) const; + public: /// 'compressed_in' could be initialized lazily, but before first call of 'readCompressedData'. explicit CompressedReadBufferBase(ReadBuffer * in = nullptr, bool allow_different_codecs_ = false); diff --git a/src/Compression/CompressedReadBufferFromFile.cpp b/src/Compression/CompressedReadBufferFromFile.cpp index 0c347b7ce2c..68f6757e04d 100644 --- a/src/Compression/CompressedReadBufferFromFile.cpp +++ b/src/Compression/CompressedReadBufferFromFile.cpp @@ -91,6 +91,9 @@ void CompressedReadBufferFromFile::seek(size_t offset_in_compressed_file, size_t size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) { size_t bytes_read = 0; + /// The codec mode is only relevant for codecs which support hardware offloading. + ICompressionCodec::CodecMode decompress_mode = ICompressionCodec::CodecMode::Synchronous; + bool read_tail = false; /// If there are unread bytes in the buffer, then we copy needed to `to`. if (pos < working_buffer.end()) @@ -102,10 +105,28 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) size_t size_decompressed = 0; size_t size_compressed_without_checksum = 0; - size_t new_size_compressed = readCompressedData(size_decompressed, size_compressed_without_checksum, false); + ///Try to read block which is entirely located in a single 'compressed_in->' buffer. + size_t new_size_compressed = readCompressedDataBlockForAsynchronous(size_decompressed, size_compressed_without_checksum); + + if (new_size_compressed) + { + /// Current block is entirely located in a single 'compressed_in->' buffer. + /// We can set asynchronous decompression mode if supported to boost performance. + decompress_mode = ICompressionCodec::CodecMode::Asynchronous; + } + else + { + /// Current block cannot be decompressed asynchronously, means it probably span across two compressed_in buffers. + /// Meanwhile, asynchronous requests for previous blocks should be flushed if any. + flushAsynchronousDecompressRequests(); + /// Fallback to generic API + new_size_compressed = readCompressedData(size_decompressed, size_compressed_without_checksum, false); + decompress_mode = ICompressionCodec::CodecMode::Synchronous; + } size_compressed = 0; /// file_in no longer points to the end of the block in working_buffer. + if (!new_size_compressed) - return bytes_read; + break; auto additional_size_at_the_end_of_buffer = codec->getAdditionalSizeAtTheEndOfBuffer(); @@ -113,6 +134,7 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) /// need to skip some bytes in decompressed data (seek happened before readBig call). if (nextimpl_working_buffer_offset == 0 && size_decompressed + additional_size_at_the_end_of_buffer <= n - bytes_read) { + setDecompressMode(decompress_mode); decompressTo(to + bytes_read, size_decompressed, size_compressed_without_checksum); bytes_read += size_decompressed; bytes += size_decompressed; @@ -127,6 +149,8 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) assert(size_decompressed + additional_size_at_the_end_of_buffer > 0); memory.resize(size_decompressed + additional_size_at_the_end_of_buffer); working_buffer = Buffer(memory.data(), &memory[size_decompressed]); + /// Synchronous mode must be set since we need read partial data immediately from working buffer to target buffer. + setDecompressMode(ICompressionCodec::CodecMode::Synchronous); decompress(working_buffer, size_decompressed, size_compressed_without_checksum); /// Read partial data from first block. Won't run here at second block. @@ -145,15 +169,25 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) assert(size_decompressed + additional_size_at_the_end_of_buffer > 0); memory.resize(size_decompressed + additional_size_at_the_end_of_buffer); working_buffer = Buffer(memory.data(), &memory[size_decompressed]); + // Asynchronous mode can be set here because working_buffer wouldn't be overwritten any more since this is the last block. + setDecompressMode(ICompressionCodec::CodecMode::Asynchronous); decompress(working_buffer, size_decompressed, size_compressed_without_checksum); - - ///Read partial data from last block. - pos = working_buffer.begin(); - bytes_read += read(to + bytes_read, n - bytes_read); + read_tail = true; break; } } + /// Here we must make sure all asynchronous requests above are completely done. + flushAsynchronousDecompressRequests(); + + if (read_tail) + { + /// Manually take nextimpl_working_buffer_offset into account, because we don't use + /// nextImpl in this method. + pos = working_buffer.begin(); + bytes_read += read(to + bytes_read, n - bytes_read); + } + return bytes_read; } diff --git a/src/Compression/CompressionCodecDeflateQpl.cpp b/src/Compression/CompressionCodecDeflateQpl.cpp new file mode 100644 index 00000000000..81ec7ee5dca --- /dev/null +++ b/src/Compression/CompressionCodecDeflateQpl.cpp @@ -0,0 +1,413 @@ +#ifdef ENABLE_QPL_COMPRESSION +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int CANNOT_COMPRESS; + extern const int CANNOT_DECOMPRESS; +} + +std::array DeflateQplJobHWPool::hw_job_ptr_pool; +std::array DeflateQplJobHWPool::hw_job_ptr_locks; +bool DeflateQplJobHWPool::job_pool_ready = false; +std::unique_ptr DeflateQplJobHWPool::hw_jobs_buffer; + +DeflateQplJobHWPool & DeflateQplJobHWPool::instance() +{ + static DeflateQplJobHWPool pool; + return pool; +} + +DeflateQplJobHWPool::DeflateQplJobHWPool() + :random_engine(std::random_device()()) + ,distribution(0, MAX_HW_JOB_NUMBER-1) +{ + Poco::Logger * log = &Poco::Logger::get("DeflateQplJobHWPool"); + UInt32 job_size = 0; + const char * qpl_version = qpl_get_library_version(); + + /// Get size required for saving a single qpl job object + qpl_get_job_size(qpl_path_hardware, &job_size); + /// Allocate entire buffer for storing all job objects + hw_jobs_buffer = std::make_unique(job_size * MAX_HW_JOB_NUMBER); + /// Initialize pool for storing all job object pointers + /// Reallocate buffer by shifting address offset for each job object. + for (UInt32 index = 0; index < MAX_HW_JOB_NUMBER; ++index) + { + qpl_job * qpl_job_ptr = reinterpret_cast(hw_jobs_buffer.get() + index * job_size); + if (qpl_init_job(qpl_path_hardware, qpl_job_ptr) != QPL_STS_OK) + { + job_pool_ready = false; + LOG_WARNING(log, "Initialization of hardware-assisted DeflateQpl codec failed, falling back to software DeflateQpl codec. Please check if Intel In-Memory Analytics Accelerator (IAA) is properly set up. QPL Version: {}.",qpl_version); + return; + } + hw_job_ptr_pool[index] = qpl_job_ptr; + unLockJob(index); + } + + job_pool_ready = true; + LOG_DEBUG(log, "Hardware-assisted DeflateQpl codec is ready! QPL Version: {}",qpl_version); +} + +DeflateQplJobHWPool::~DeflateQplJobHWPool() +{ + for (UInt32 i = 0; i < MAX_HW_JOB_NUMBER; ++i) + { + if (hw_job_ptr_pool[i]) + { + while (!tryLockJob(i)); + qpl_fini_job(hw_job_ptr_pool[i]); + unLockJob(i); + hw_job_ptr_pool[i] = nullptr; + } + } + job_pool_ready = false; +} + +qpl_job * DeflateQplJobHWPool::acquireJob(UInt32 &job_id) +{ + if (isJobPoolReady()) + { + UInt32 retry = 0; + auto index = distribution(random_engine); + while (!tryLockJob(index)) + { + index = distribution(random_engine); + retry++; + if (retry > MAX_HW_JOB_NUMBER) + { + return nullptr; + } + } + job_id = MAX_HW_JOB_NUMBER - index; + assert(index < MAX_HW_JOB_NUMBER); + return hw_job_ptr_pool[index]; + } + else + return nullptr; +} + +void DeflateQplJobHWPool::releaseJob(UInt32 job_id) +{ + if (isJobPoolReady()) + unLockJob(MAX_HW_JOB_NUMBER - job_id); +} + +bool DeflateQplJobHWPool::tryLockJob(UInt32 index) +{ + bool expected = false; + assert(index < MAX_HW_JOB_NUMBER); + return hw_job_ptr_locks[index].compare_exchange_strong(expected, true); +} + +void DeflateQplJobHWPool::unLockJob(UInt32 index) +{ + assert(index < MAX_HW_JOB_NUMBER); + hw_job_ptr_locks[index].store(false); +} + +//HardwareCodecDeflateQpl +HardwareCodecDeflateQpl::HardwareCodecDeflateQpl() + :log(&Poco::Logger::get("HardwareCodecDeflateQpl")) +{ +} + +HardwareCodecDeflateQpl::~HardwareCodecDeflateQpl() +{ +#ifndef NDEBUG + assert(decomp_async_job_map.empty()); +#else + if (!decomp_async_job_map.empty()) + { + LOG_WARNING(log, "Find un-released job when HardwareCodecDeflateQpl destroy"); + for (auto it : decomp_async_job_map) + { + DeflateQplJobHWPool::instance().releaseJob(it.first); + } + decomp_async_job_map.clear(); + } +#endif +} + +Int32 HardwareCodecDeflateQpl::doCompressData(const char * source, UInt32 source_size, char * dest, UInt32 dest_size) const +{ + UInt32 job_id = 0; + qpl_job* job_ptr = nullptr; + UInt32 compressed_size = 0; + if (!(job_ptr = DeflateQplJobHWPool::instance().acquireJob(job_id))) + { + LOG_INFO(log, "DeflateQpl HW codec failed, falling back to SW codec.(Details: doCompressData->acquireJob fail, probably job pool exhausted)"); + return RET_ERROR; + } + + job_ptr->op = qpl_op_compress; + job_ptr->next_in_ptr = reinterpret_cast(const_cast(source)); + job_ptr->next_out_ptr = reinterpret_cast(dest); + job_ptr->available_in = source_size; + job_ptr->level = qpl_default_level; + job_ptr->available_out = dest_size; + job_ptr->flags = QPL_FLAG_FIRST | QPL_FLAG_DYNAMIC_HUFFMAN | QPL_FLAG_LAST | QPL_FLAG_OMIT_VERIFY; + + if (auto status = qpl_execute_job(job_ptr); status == QPL_STS_OK) + { + compressed_size = job_ptr->total_out; + DeflateQplJobHWPool::instance().releaseJob(job_id); + return compressed_size; + } + else + { + LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec.(Details: doCompressData->qpl_execute_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", status); + DeflateQplJobHWPool::instance().releaseJob(job_id); + return RET_ERROR; + } +} + +Int32 HardwareCodecDeflateQpl::doDecompressDataSynchronous(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) +{ + UInt32 job_id = 0; + qpl_job * job_ptr = nullptr; + UInt32 decompressed_size = 0; + if (!(job_ptr = DeflateQplJobHWPool::instance().acquireJob(job_id))) + { + LOG_INFO(log, "DeflateQpl HW codec failed, falling back to SW codec.(Details: doDecompressDataSynchronous->acquireJob fail, probably job pool exhausted)"); + return RET_ERROR; + } + + // Performing a decompression operation + job_ptr->op = qpl_op_decompress; + job_ptr->next_in_ptr = reinterpret_cast(const_cast(source)); + job_ptr->next_out_ptr = reinterpret_cast(dest); + job_ptr->available_in = source_size; + job_ptr->available_out = uncompressed_size; + job_ptr->flags = QPL_FLAG_FIRST | QPL_FLAG_LAST; + + if (auto status = qpl_submit_job(job_ptr); status != QPL_STS_OK) + { + DeflateQplJobHWPool::instance().releaseJob(job_id); + LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec.(Details: doDecompressDataSynchronous->qpl_execute_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", status); + return RET_ERROR; + } + /// Busy waiting till job complete. + do + { + _tpause(1, __rdtsc() + 1000); + } while (qpl_check_job(job_ptr) == QPL_STS_BEING_PROCESSED); + + decompressed_size = job_ptr->total_out; + DeflateQplJobHWPool::instance().releaseJob(job_id); + return decompressed_size; +} + +Int32 HardwareCodecDeflateQpl::doDecompressDataAsynchronous(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) +{ + UInt32 job_id = 0; + qpl_job * job_ptr = nullptr; + if (!(job_ptr = DeflateQplJobHWPool::instance().acquireJob(job_id))) + { + LOG_INFO(log, "DeflateQpl HW codec failed, falling back to SW codec.(Details: doDecompressDataAsynchronous->acquireJob fail, probably job pool exhausted)"); + return RET_ERROR; + } + + // Performing a decompression operation + job_ptr->op = qpl_op_decompress; + job_ptr->next_in_ptr = reinterpret_cast(const_cast(source)); + job_ptr->next_out_ptr = reinterpret_cast(dest); + job_ptr->available_in = source_size; + job_ptr->available_out = uncompressed_size; + job_ptr->flags = QPL_FLAG_FIRST | QPL_FLAG_LAST; + + if (auto status = qpl_submit_job(job_ptr); status == QPL_STS_OK) + { + decomp_async_job_map.insert({job_id, job_ptr}); + return job_id; + } + else + { + DeflateQplJobHWPool::instance().releaseJob(job_id); + LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec.(Details: doDecompressDataAsynchronous->qpl_execute_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", status); + return RET_ERROR; + } +} + +void HardwareCodecDeflateQpl::flushAsynchronousDecompressRequests() +{ + UInt32 n_jobs_processing = decomp_async_job_map.size(); + std::map::iterator it = decomp_async_job_map.begin(); + + while (n_jobs_processing) + { + UInt32 job_id = 0; + qpl_job * job_ptr = nullptr; + job_id = it->first; + job_ptr = it->second; + + if (qpl_check_job(job_ptr) == QPL_STS_BEING_PROCESSED) + { + it++; + } + else + { + it = decomp_async_job_map.erase(it); + DeflateQplJobHWPool::instance().releaseJob(job_id); + n_jobs_processing--; + if (n_jobs_processing <= 0) + break; + } + if (it == decomp_async_job_map.end()) + { + it = decomp_async_job_map.begin(); + _tpause(1, __rdtsc() + 1000); + } + } +} + +SoftwareCodecDeflateQpl::~SoftwareCodecDeflateQpl() +{ + if (!sw_job) + qpl_fini_job(sw_job); +} + +qpl_job * SoftwareCodecDeflateQpl::getJobCodecPtr() +{ + if (!sw_job) + { + UInt32 size = 0; + qpl_get_job_size(qpl_path_software, &size); + + sw_buffer = std::make_unique(size); + sw_job = reinterpret_cast(sw_buffer.get()); + + // Job initialization + if (auto status = qpl_init_job(qpl_path_software, sw_job); status != QPL_STS_OK) + throw Exception(ErrorCodes::CANNOT_COMPRESS, + "Initialization of DeflateQpl software fallback codec failed. (Details: qpl_init_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", status); + } + return sw_job; +} + +UInt32 SoftwareCodecDeflateQpl::doCompressData(const char * source, UInt32 source_size, char * dest, UInt32 dest_size) +{ + qpl_job * job_ptr = getJobCodecPtr(); + // Performing a compression operation + job_ptr->op = qpl_op_compress; + job_ptr->next_in_ptr = reinterpret_cast(const_cast(source)); + job_ptr->next_out_ptr = reinterpret_cast(dest); + job_ptr->available_in = source_size; + job_ptr->available_out = dest_size; + job_ptr->level = qpl_default_level; + job_ptr->flags = QPL_FLAG_FIRST | QPL_FLAG_DYNAMIC_HUFFMAN | QPL_FLAG_LAST | QPL_FLAG_OMIT_VERIFY; + + if (auto status = qpl_execute_job(job_ptr); status != QPL_STS_OK) + throw Exception(ErrorCodes::CANNOT_COMPRESS, + "Execution of DeflateQpl software fallback codec failed. (Details: qpl_execute_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", status); + + return job_ptr->total_out; +} + +void SoftwareCodecDeflateQpl::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) +{ + qpl_job * job_ptr = getJobCodecPtr(); + + // Performing a decompression operation + job_ptr->op = qpl_op_decompress; + job_ptr->next_in_ptr = reinterpret_cast(const_cast(source)); + job_ptr->next_out_ptr = reinterpret_cast(dest); + job_ptr->available_in = source_size; + job_ptr->available_out = uncompressed_size; + job_ptr->flags = QPL_FLAG_FIRST | QPL_FLAG_LAST; + + if (auto status = qpl_execute_job(job_ptr); status != QPL_STS_OK) + throw Exception(ErrorCodes::CANNOT_DECOMPRESS, + "Execution of DeflateQpl software fallback codec failed. (Details: qpl_execute_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", status); +} + +//CompressionCodecDeflateQpl +CompressionCodecDeflateQpl::CompressionCodecDeflateQpl() + :hw_codec(std::make_unique()) + ,sw_codec(std::make_unique()) +{ + setCodecDescription("DEFLATE_QPL"); +} + +uint8_t CompressionCodecDeflateQpl::getMethodByte() const +{ + return static_cast(CompressionMethodByte::DeflateQpl); +} + +void CompressionCodecDeflateQpl::updateHash(SipHash & hash) const +{ + getCodecDesc()->updateTreeHash(hash); +} + +UInt32 CompressionCodecDeflateQpl::getMaxCompressedDataSize(UInt32 uncompressed_size) const +{ + /// Aligned with ZLIB + return ((uncompressed_size) + ((uncompressed_size) >> 12) + ((uncompressed_size) >> 14) + ((uncompressed_size) >> 25) + 13); +} + +UInt32 CompressionCodecDeflateQpl::doCompressData(const char * source, UInt32 source_size, char * dest) const +{ + Int32 res = HardwareCodecDeflateQpl::RET_ERROR; + if (DeflateQplJobHWPool::instance().isJobPoolReady()) + res = hw_codec->doCompressData(source, source_size, dest, getMaxCompressedDataSize(source_size)); + if (res == HardwareCodecDeflateQpl::RET_ERROR) + res = sw_codec->doCompressData(source, source_size, dest, getMaxCompressedDataSize(source_size)); + return res; +} + +void CompressionCodecDeflateQpl::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const +{ + switch (getDecompressMode()) + { + case CodecMode::Synchronous: + { + Int32 res = HardwareCodecDeflateQpl::RET_ERROR; + if (DeflateQplJobHWPool::instance().isJobPoolReady()) + { + res = hw_codec->doDecompressDataSynchronous(source, source_size, dest, uncompressed_size); + if (res == HardwareCodecDeflateQpl::RET_ERROR) + sw_codec->doDecompressData(source, source_size, dest, uncompressed_size); + } + else + sw_codec->doDecompressData(source, source_size, dest, uncompressed_size); + return; + } + case CodecMode::Asynchronous: + { + Int32 res = HardwareCodecDeflateQpl::RET_ERROR; + if (DeflateQplJobHWPool::instance().isJobPoolReady()) + res = hw_codec->doDecompressDataAsynchronous(source, source_size, dest, uncompressed_size); + if (res == HardwareCodecDeflateQpl::RET_ERROR) + sw_codec->doDecompressData(source, source_size, dest, uncompressed_size); + return; + } + case CodecMode::SoftwareFallback: + sw_codec->doDecompressData(source, source_size, dest, uncompressed_size); + return; + } + __builtin_unreachable(); +} + +void CompressionCodecDeflateQpl::flushAsynchronousDecompressRequests() +{ + if (DeflateQplJobHWPool::instance().isJobPoolReady()) + hw_codec->flushAsynchronousDecompressRequests(); + /// After flush previous all async requests, we must restore mode to be synchronous by default. + setDecompressMode(CodecMode::Synchronous); +} +void registerCodecDeflateQpl(CompressionCodecFactory & factory) +{ + factory.registerSimpleCompressionCodec( + "DEFLATE_QPL", static_cast(CompressionMethodByte::DeflateQpl), [&]() { return std::make_shared(); }); +} +} +#endif diff --git a/src/Compression/CompressionCodecDeflateQpl.h b/src/Compression/CompressionCodecDeflateQpl.h new file mode 100644 index 00000000000..c15f537fd3f --- /dev/null +++ b/src/Compression/CompressionCodecDeflateQpl.h @@ -0,0 +1,120 @@ +#pragma once + +#include +#include +#include + +namespace Poco +{ +class Logger; +} + +namespace DB +{ + +/// DeflateQplJobHWPool is resource pool to provide the job objects. +/// Job object is used for storing context information during offloading compression job to HW Accelerator. +class DeflateQplJobHWPool +{ +public: + DeflateQplJobHWPool(); + + ~DeflateQplJobHWPool(); + + qpl_job * acquireJob(UInt32 &job_id); + + static void releaseJob(UInt32 job_id); + + static const bool & isJobPoolReady() { return job_pool_ready; } + + static DeflateQplJobHWPool & instance(); + +private: + static bool tryLockJob(UInt32 index); + + static void unLockJob(UInt32 index); + + /// Maximum jobs running in parallel supported by IAA hardware + static constexpr auto MAX_HW_JOB_NUMBER = 1024; + /// Entire buffer for storing all job objects + static std::unique_ptr hw_jobs_buffer; + /// Job pool for storing all job object pointers + static std::array hw_job_ptr_pool; + /// Locks for accessing each job object pointers + static std::array hw_job_ptr_locks; + static bool job_pool_ready; + std::mt19937 random_engine; + std::uniform_int_distribution distribution; +}; + +class SoftwareCodecDeflateQpl +{ +public: + ~SoftwareCodecDeflateQpl(); + UInt32 doCompressData(const char * source, UInt32 source_size, char * dest, UInt32 dest_size); + void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size); + +private: + qpl_job * sw_job = nullptr; + std::unique_ptr sw_buffer; + qpl_job * getJobCodecPtr(); +}; + +class HardwareCodecDeflateQpl +{ +public: + /// RET_ERROR stands for hardware codec fail,need fallback to software codec. + static constexpr Int32 RET_ERROR = -1; + + HardwareCodecDeflateQpl(); + ~HardwareCodecDeflateQpl(); + Int32 doCompressData(const char * source, UInt32 source_size, char * dest, UInt32 dest_size) const; + + ///Submit job request to the IAA hardware and then busy waiting till it complete. + Int32 doDecompressDataSynchronous(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size); + + ///Submit job request to the IAA hardware and return immediately. IAA hardware will process decompression jobs automatically. + Int32 doDecompressDataAsynchronous(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size); + + /// Flush result for all previous requests which means busy waiting till all the jobs in "decomp_async_job_map" are finished. + /// Must be called subsequently after several calls of doDecompressDataReq. + void flushAsynchronousDecompressRequests(); + +private: + /// Asynchronous job map for decompression: job ID - job object. + /// For each submission, push job ID && job object into this map; + /// For flush, pop out job ID && job object from this map. Use job ID to release job lock and use job object to check job status till complete. + std::map decomp_async_job_map; + Poco::Logger * log; +}; + +class CompressionCodecDeflateQpl : public ICompressionCodec +{ +public: + CompressionCodecDeflateQpl(); + uint8_t getMethodByte() const override; + void updateHash(SipHash & hash) const override; + +protected: + bool isCompression() const override + { + return true; + } + + bool isGenericCompression() const override + { + return true; + } + + UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; + void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; + ///Flush result for previous asynchronous decompression requests on asynchronous mode. + void flushAsynchronousDecompressRequests() override; + +private: + UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override; + std::unique_ptr hw_codec; + std::unique_ptr sw_codec; +}; + +} diff --git a/src/Compression/CompressionFactory.cpp b/src/Compression/CompressionFactory.cpp index b8a1c5877a4..7291d42f681 100644 --- a/src/Compression/CompressionFactory.cpp +++ b/src/Compression/CompressionFactory.cpp @@ -166,7 +166,7 @@ void registerCodecLZ4(CompressionCodecFactory & factory); void registerCodecLZ4HC(CompressionCodecFactory & factory); void registerCodecZSTD(CompressionCodecFactory & factory); void registerCodecMultiple(CompressionCodecFactory & factory); - +void registerCodecDeflateQpl(CompressionCodecFactory & factory); /// Keeper use only general-purpose codecs, so we don't need these special codecs /// in standalone build @@ -188,7 +188,6 @@ CompressionCodecFactory::CompressionCodecFactory() registerCodecZSTD(*this); registerCodecLZ4HC(*this); registerCodecMultiple(*this); - #ifndef KEEPER_STANDALONE_BUILD registerCodecDelta(*this); registerCodecT64(*this); @@ -196,6 +195,9 @@ CompressionCodecFactory::CompressionCodecFactory() registerCodecGorilla(*this); registerCodecEncrypted(*this); registerCodecFPC(*this); + #ifdef ENABLE_QPL_COMPRESSION + registerCodecDeflateQpl(*this); + #endif #endif default_codec = get("LZ4", {}); diff --git a/src/Compression/CompressionInfo.h b/src/Compression/CompressionInfo.h index 839fb68e8c3..985d74bbb74 100644 --- a/src/Compression/CompressionInfo.h +++ b/src/Compression/CompressionInfo.h @@ -45,7 +45,8 @@ enum class CompressionMethodByte : uint8_t Gorilla = 0x95, AES_128_GCM_SIV = 0x96, AES_256_GCM_SIV = 0x97, - FPC = 0x98 + FPC = 0x98, + DeflateQpl = 0x99, }; } diff --git a/src/Compression/ICompressionCodec.cpp b/src/Compression/ICompressionCodec.cpp index ba52aee69f8..c48ca99d452 100644 --- a/src/Compression/ICompressionCodec.cpp +++ b/src/Compression/ICompressionCodec.cpp @@ -91,7 +91,6 @@ UInt32 ICompressionCodec::compress(const char * source, UInt32 source_size, char return header_size + compressed_bytes_written; } - UInt32 ICompressionCodec::decompress(const char * source, UInt32 source_size, char * dest) const { assert(source != nullptr && dest != nullptr); diff --git a/src/Compression/ICompressionCodec.h b/src/Compression/ICompressionCodec.h index a741e65dfdd..f40404a84f3 100644 --- a/src/Compression/ICompressionCodec.h +++ b/src/Compression/ICompressionCodec.h @@ -45,9 +45,37 @@ public: /// Compressed bytes from uncompressed source to dest. Dest should preallocate memory UInt32 compress(const char * source, UInt32 source_size, char * dest) const; - /// Decompress bytes from compressed source to dest. Dest should preallocate memory + /// Decompress bytes from compressed source to dest. Dest should preallocate memory; UInt32 decompress(const char * source, UInt32 source_size, char * dest) const; + /// Three kinds of codec mode: + /// Synchronous mode which is commonly used by default; + /// --- For the codec with HW decompressor, it means submit request to HW and busy wait till complete. + /// Asynchronous mode which required HW decompressor support; + /// --- For the codec with HW decompressor, it means submit request to HW and return immediately. + /// --- Must be used in pair with flushAsynchronousDecompressRequests. + /// SoftwareFallback mode is exclusively defined for the codec with HW decompressor, enable its capability of "fallback to SW codec". + enum class CodecMode + { + Synchronous, + Asynchronous, + SoftwareFallback + }; + + /// Get current decompression mode + CodecMode getDecompressMode() const{ return decompressMode; } + + /// if set mode to CodecMode::Asynchronous, must be followed with flushAsynchronousDecompressRequests + void setDecompressMode(CodecMode mode){ decompressMode = mode; } + + /// Flush result for previous asynchronous decompression requests. + /// This function must be called following several requests offload to HW. + /// To make sure asynchronous results have been flushed into target buffer completely. + /// Meanwhile, source and target buffer for decompression can not be overwritten until this function execute completely. + /// Otherwise it would conflict with HW offloading and cause exception. + /// For QPL deflate, it support the maximum number of requests equal to DeflateQplJobHWPool::jobPoolSize + virtual void flushAsynchronousDecompressRequests(){} + /// Number of bytes, that will be used to compress uncompressed_size bytes with current codec virtual UInt32 getCompressedReserveSize(UInt32 uncompressed_size) const { @@ -103,6 +131,7 @@ protected: private: ASTPtr full_codec_desc; + CodecMode decompressMode{CodecMode::Synchronous}; }; } diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 864bb477786..8261f5d1e26 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -112,7 +112,7 @@ KeeperServer::KeeperServer( configuration_and_settings_->snapshot_storage_path, coordination_settings, checkAndGetSuperdigest(configuration_and_settings_->super_digest), - config.getBool("keeper_server.digest_enabled", true))) + config.getBool("keeper_server.digest_enabled", false))) , state_manager(nuraft::cs_new( server_id, "keeper_server", configuration_and_settings_->log_storage_path, configuration_and_settings_->state_file_path, config, coordination_settings)) , log(&Poco::Logger::get("KeeperServer")) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index ed151eb28b5..17e4d27bbcd 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -663,7 +663,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) #define FORMAT_FACTORY_SETTINGS(M) \ M(Char, format_csv_delimiter, ',', "The character to be considered as a delimiter in CSV data. If setting with a string, a string has to have a length of 1.", 0) \ - M(Bool, format_csv_allow_single_quotes, true, "If it is set to true, allow strings in single quotes.", 0) \ + M(Bool, format_csv_allow_single_quotes, false, "If it is set to true, allow strings in single quotes.", 0) \ M(Bool, format_csv_allow_double_quotes, true, "If it is set to true, allow strings in double quotes.", 0) \ M(Bool, output_format_csv_crlf_end_of_line, false, "If it is set true, end of line in CSV format will be \\r\\n instead of \\n.", 0) \ M(Bool, input_format_csv_enum_as_number, false, "Treat inserted enum values in CSV formats as enum indices", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 6d907395221..ba60fb99308 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -79,7 +79,8 @@ namespace SettingsChangesHistory static std::map settings_changes_history = { {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"}, - {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"}}}, + {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"}, + {"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}}, {"22.6", {{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"}}}, {"22.5", {{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"}, {"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}}}, diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp index 558b13927c1..1ef86a8c12f 100644 --- a/src/DataTypes/DataTypeTuple.cpp +++ b/src/DataTypes/DataTypeTuple.cpp @@ -214,6 +214,19 @@ size_t DataTypeTuple::getPositionByName(const String & name) const throw Exception("Tuple doesn't have element with name '" + name + "'", ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); } +std::optional DataTypeTuple::tryGetPositionByName(const String & name) const +{ + size_t size = elems.size(); + for (size_t i = 0; i < size; ++i) + { + if (names[i] == name) + { + return std::optional(i); + } + } + return std::nullopt; +} + String DataTypeTuple::getNameByPosition(size_t i) const { if (i == 0 || i > names.size()) diff --git a/src/DataTypes/DataTypeTuple.h b/src/DataTypes/DataTypeTuple.h index 009a2284a0a..eed04631528 100644 --- a/src/DataTypes/DataTypeTuple.h +++ b/src/DataTypes/DataTypeTuple.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB @@ -60,6 +61,7 @@ public: const Strings & getElementNames() const { return names; } size_t getPositionByName(const String & name) const; + std::optional tryGetPositionByName(const String & name) const; String getNameByPosition(size_t i) const; bool haveExplicitNames() const { return have_explicit_names; } diff --git a/src/DataTypes/Serializations/SerializationEnum.cpp b/src/DataTypes/Serializations/SerializationEnum.cpp index 39e9885fe17..a1b9c8bf95a 100644 --- a/src/DataTypes/Serializations/SerializationEnum.cpp +++ b/src/DataTypes/Serializations/SerializationEnum.cpp @@ -18,7 +18,7 @@ void SerializationEnum::serializeText(const IColumn & column, size_t row_n template void SerializationEnum::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { - writeEscapedString(this->getNameForValue(assert_cast(column).getData()[row_num]), ostr); + writeEscapedString(this->getNameForValue(assert_cast(column).getData()[row_num]).toView(), ostr); } template @@ -69,13 +69,13 @@ void SerializationEnum::deserializeWholeText(IColumn & column, ReadBuffer template void SerializationEnum::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { - writeJSONString(this->getNameForValue(assert_cast(column).getData()[row_num]), ostr, settings); + writeJSONString(this->getNameForValue(assert_cast(column).getData()[row_num]).toView(), ostr, settings); } template void SerializationEnum::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { - writeXMLStringForTextElement(this->getNameForValue(assert_cast(column).getData()[row_num]), ostr); + writeXMLStringForTextElement(this->getNameForValue(assert_cast(column).getData()[row_num]).toView(), ostr); } template diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.cpp b/src/DataTypes/Serializations/SerializationLowCardinality.cpp index c79f588e46c..8e19c5a740b 100644 --- a/src/DataTypes/Serializations/SerializationLowCardinality.cpp +++ b/src/DataTypes/Serializations/SerializationLowCardinality.cpp @@ -511,8 +511,6 @@ void SerializationLowCardinality::serializeBinaryBulkWithMultipleStreams( /// Insert used_keys into global dictionary and update sub_index. auto indexes_with_overflow = global_dictionary->uniqueInsertRangeWithOverflow(*keys, 0, keys->size(), settings.low_cardinality_max_dictionary_size); - // size_t max_size = settings.low_cardinality_max_dictionary_size + indexes_with_overflow.overflowed_keys->size(); - // ColumnLowCardinality::Index(indexes_with_overflow.indexes->getPtr()).check(max_size); if (global_dictionary->size() > settings.low_cardinality_max_dictionary_size) throw Exception("Got dictionary with size " + toString(global_dictionary->size()) + @@ -656,11 +654,6 @@ void SerializationLowCardinality::deserializeBinaryBulkWithMultipleStreams( { auto maps = mapIndexWithAdditionalKeys(*indexes_column, global_dictionary->size()); - // ColumnLowCardinality::Index(maps.additional_keys_map->getPtr()).check(additional_keys->size()); - - // ColumnLowCardinality::Index(indexes_column->getPtr()).check( - // maps.dictionary_map->size() + maps.additional_keys_map->size()); - auto used_keys = IColumn::mutate(global_dictionary->getNestedColumn()->index(*maps.dictionary_map, 0)); if (!maps.additional_keys_map->empty()) diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.h b/src/DataTypes/Serializations/SerializationLowCardinality.h index 0a3597e86c7..96e3a297d6a 100644 --- a/src/DataTypes/Serializations/SerializationLowCardinality.h +++ b/src/DataTypes/Serializations/SerializationLowCardinality.h @@ -78,9 +78,6 @@ private: template void deserializeImpl(IColumn & column, DeserializeFunctionPtr func, Args &&... args) const; - - // template - // static MutableColumnUniquePtr createColumnUniqueImpl(const IDataType & keys_type, const Creator & creator); }; } diff --git a/src/DataTypes/Serializations/SerializationString.cpp b/src/DataTypes/Serializations/SerializationString.cpp index 5614e970315..e07fd4f26cf 100644 --- a/src/DataTypes/Serializations/SerializationString.cpp +++ b/src/DataTypes/Serializations/SerializationString.cpp @@ -213,7 +213,7 @@ void SerializationString::serializeText(const IColumn & column, size_t row_num, void SerializationString::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { - writeEscapedString(assert_cast(column).getDataAt(row_num), ostr); + writeEscapedString(assert_cast(column).getDataAt(row_num).toView(), ostr); } @@ -266,7 +266,7 @@ void SerializationString::deserializeTextQuoted(IColumn & column, ReadBuffer & i void SerializationString::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { - writeJSONString(assert_cast(column).getDataAt(row_num), ostr, settings); + writeJSONString(assert_cast(column).getDataAt(row_num).toView(), ostr, settings); } @@ -278,7 +278,7 @@ void SerializationString::deserializeTextJSON(IColumn & column, ReadBuffer & ist void SerializationString::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { - writeXMLStringForTextElement(assert_cast(column).getDataAt(row_num), ostr); + writeXMLStringForTextElement(assert_cast(column).getDataAt(row_num).toView(), ostr); } diff --git a/src/Dictionaries/DirectDictionary.cpp b/src/Dictionaries/DirectDictionary.cpp index 20d8706ca54..6ecc216e370 100644 --- a/src/Dictionaries/DirectDictionary.cpp +++ b/src/Dictionaries/DirectDictionary.cpp @@ -171,6 +171,15 @@ ColumnUInt8::Ptr DirectDictionary::hasKeys( auto requested_keys = requested_keys_extractor.extractAllKeys(); size_t requested_keys_size = requested_keys.size(); + HashMap requested_key_to_index; + requested_key_to_index.reserve(requested_keys_size); + + for (size_t i = 0; i < requested_keys.size(); ++i) + { + auto requested_key = requested_keys[i]; + requested_key_to_index[requested_key] = i; + } + auto result = ColumnUInt8::create(requested_keys_size, false); auto & result_data = result->getData(); @@ -196,17 +205,15 @@ ColumnUInt8::Ptr DirectDictionary::hasKeys( { auto block_key = block_keys_extractor.extractCurrentKey(); - size_t index; - for (index = 0; index < requested_keys_size; ++index) - { - if (!result_data[index] && requested_keys[index] == block_key) - { - keys_found++; - result_data[index] = true; + const auto * it = requested_key_to_index.find(block_key); + assert(it); - block_keys_extractor.rollbackCurrentKey(); - } - } + size_t result_data_found_index = it->getMapped(); + /// block_keys_size cannot be used, due to duplicates. + keys_found += !result_data[result_data_found_index]; + result_data[result_data_found_index] = true; + + block_keys_extractor.rollbackCurrentKey(); } block_key_columns.clear(); diff --git a/src/Dictionaries/SSDCacheDictionaryStorage.h b/src/Dictionaries/SSDCacheDictionaryStorage.h index d813cf1bcc8..459c4c44668 100644 --- a/src/Dictionaries/SSDCacheDictionaryStorage.h +++ b/src/Dictionaries/SSDCacheDictionaryStorage.h @@ -27,11 +27,6 @@ #include -namespace CurrentMetrics -{ - extern const Metric Write; -} - namespace ProfileEvents { extern const Event FileOpen; @@ -527,8 +522,6 @@ public: throw Exception(ErrorCodes::CANNOT_IO_SUBMIT, "Cannot submit request for asynchronous IO on file {}", file_path); } - // CurrentMetrics::Increment metric_increment_write{CurrentMetrics::Write}; - io_event event; while (io_getevents(aio_context.ctx, 1, 1, &event, nullptr) < 0) diff --git a/src/Dictionaries/XDBCDictionarySource.cpp b/src/Dictionaries/XDBCDictionarySource.cpp index 5e9c2f7ac7a..0a097c4faef 100644 --- a/src/Dictionaries/XDBCDictionarySource.cpp +++ b/src/Dictionaries/XDBCDictionarySource.cpp @@ -275,8 +275,6 @@ void registerDictionarySourceJDBC(DictionarySourceFactory & factory) bool /* created_from_ddl */) -> DictionarySourcePtr { throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Dictionary source of type `jdbc` is disabled until consistent support for nullable fields."); - // BridgeHelperPtr bridge = std::make_shared>(config, context.getSettings().http_receive_timeout, config.getString(config_prefix + ".connection_string")); - // return std::make_unique(dict_struct, config, config_prefix + ".jdbc", sample_block, context, bridge); }; factory.registerSource("jdbc", create_table_source); } diff --git a/src/Disks/DiskWebServer.cpp b/src/Disks/DiskWebServer.cpp index 54dce926893..b6cda8288d7 100644 --- a/src/Disks/DiskWebServer.cpp +++ b/src/Disks/DiskWebServer.cpp @@ -74,7 +74,6 @@ void DiskWebServer::initialize(const String & uri_path) const if (file_data.type == FileType::Directory) { directories_to_load.push_back(file_path); - // file_path = fs::path(file_path) / ""; } file_path = file_path.substr(url.size()); diff --git a/src/Functions/CustomWeekTransforms.h b/src/Functions/CustomWeekTransforms.h index 8656f9da927..c296c8228b1 100644 --- a/src/Functions/CustomWeekTransforms.h +++ b/src/Functions/CustomWeekTransforms.h @@ -63,12 +63,10 @@ struct ToStartOfWeekImpl static inline UInt16 execute(Int64 t, UInt8 week_mode, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t), week_mode); -// return time_zone.toFirstDayNumOfWeek(t, week_mode); } static inline UInt16 execute(UInt32 t, UInt8 week_mode, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t), week_mode); -// return time_zone.toFirstDayNumOfWeek(t, week_mode); } static inline UInt16 execute(Int32 d, UInt8 week_mode, const DateLUTImpl & time_zone) { diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index e0c42401207..b666602e366 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -1091,8 +1091,6 @@ struct ConvertThroughParsing static constexpr bool to_datetime64 = std::is_same_v; - // using ToFieldType = typename ToDataType::FieldType; - static bool isAllRead(ReadBuffer & in) { /// In case of FixedString, skip zero bytes at end. diff --git a/src/Functions/extractGroups.cpp b/src/Functions/extractGroups.cpp index 940e76df1c0..c6633732aaa 100644 --- a/src/Functions/extractGroups.cpp +++ b/src/Functions/extractGroups.cpp @@ -87,10 +87,10 @@ public: for (size_t i = 0; i < input_rows_count; ++i) { - StringRef current_row = column_haystack->getDataAt(i); + std::string_view current_row = column_haystack->getDataAt(i).toView(); - if (re2->Match(re2_st::StringPiece(current_row.data, current_row.size), - 0, current_row.size, re2_st::RE2::UNANCHORED, matched_groups.data(), matched_groups.size())) + if (re2->Match(re2_st::StringPiece(current_row.data(), current_row.size()), + 0, current_row.size(), re2_st::RE2::UNANCHORED, matched_groups.data(), matched_groups.size())) { // 1 is to exclude group #0 which is whole re match. for (size_t group = 1; group <= groups_count; ++group) diff --git a/src/Functions/formatReadableTimeDelta.cpp b/src/Functions/formatReadableTimeDelta.cpp index d781d227c64..219c2d95353 100644 --- a/src/Functions/formatReadableTimeDelta.cpp +++ b/src/Functions/formatReadableTimeDelta.cpp @@ -94,19 +94,19 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - StringRef maximum_unit_str; + std::string_view maximum_unit_str; if (arguments.size() == 2) { const ColumnPtr & maximum_unit_column = arguments[1].column; const ColumnConst * maximum_unit_const_col = checkAndGetColumnConstStringOrFixedString(maximum_unit_column.get()); if (maximum_unit_const_col) - maximum_unit_str = maximum_unit_const_col->getDataColumn().getDataAt(0); + maximum_unit_str = maximum_unit_const_col->getDataColumn().getDataAt(0).toView(); } Unit max_unit; /// Default means "use all available units". - if (maximum_unit_str.size == 0 || maximum_unit_str == "years") + if (maximum_unit_str.empty() || maximum_unit_str == "years") max_unit = Years; else if (maximum_unit_str == "months") max_unit = Months; @@ -122,7 +122,7 @@ public: throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected value of maximum unit argument ({}) for function {}, the only allowed values are:" " 'seconds', 'minutes', 'hours', 'days', 'months', 'years'.", - maximum_unit_str.toString(), getName()); + maximum_unit_str, getName()); auto col_to = ColumnString::create(); diff --git a/src/Functions/geohashDecode.cpp b/src/Functions/geohashDecode.cpp index 199d1a62f1d..b4e5d8e46e9 100644 --- a/src/Functions/geohashDecode.cpp +++ b/src/Functions/geohashDecode.cpp @@ -64,8 +64,8 @@ public: for (size_t i = 0; i < count; ++i) { - StringRef encoded_string = encoded->getDataAt(i); - geohashDecode(encoded_string.data, encoded_string.size, &lon_data[i], &lat_data[i]); + std::string_view encoded_string = encoded->getDataAt(i).toView(); + geohashDecode(encoded_string.data(), encoded_string.size(), &lon_data[i], &lat_data[i]); } MutableColumns result; diff --git a/src/Functions/isIPAddressContainedIn.cpp b/src/Functions/isIPAddressContainedIn.cpp index 1ba719cda63..a6f94c77ad1 100644 --- a/src/Functions/isIPAddressContainedIn.cpp +++ b/src/Functions/isIPAddressContainedIn.cpp @@ -27,7 +27,7 @@ class IPAddressVariant { public: - explicit IPAddressVariant(StringRef address_str) + explicit IPAddressVariant(std::string_view address_str) { /// IP address parser functions require that the input is /// NULL-terminated so we need to copy it. @@ -85,7 +85,7 @@ IPAddressCIDR parseIPWithCIDR(std::string_view cidr_str) throw DB::Exception("The text does not contain '/': " + std::string(cidr_str), DB::ErrorCodes::CANNOT_PARSE_TEXT); std::string_view addr_str = cidr_str.substr(0, pos_slash); - IPAddressVariant addr(StringRef{addr_str.data(), addr_str.size()}); + IPAddressVariant addr(addr_str); uint8_t prefix = 0; auto prefix_str = cidr_str.substr(pos_slash+1); @@ -188,7 +188,7 @@ namespace DB const auto & col_addr = col_addr_const.getDataColumn(); const auto & col_cidr = col_cidr_const.getDataColumn(); - const auto addr = IPAddressVariant(col_addr.getDataAt(0)); + const auto addr = IPAddressVariant(col_addr.getDataAt(0).toView()); const auto cidr = parseIPWithCIDR(col_cidr.getDataAt(0).toView()); ColumnUInt8::MutablePtr col_res = ColumnUInt8::create(1); @@ -204,7 +204,7 @@ namespace DB { const auto & col_addr = col_addr_const.getDataColumn(); - const auto addr = IPAddressVariant(col_addr.getDataAt (0)); + const auto addr = IPAddressVariant(col_addr.getDataAt(0).toView()); ColumnUInt8::MutablePtr col_res = ColumnUInt8::create(input_rows_count); ColumnUInt8::Container & vec_res = col_res->getData(); @@ -228,7 +228,7 @@ namespace DB ColumnUInt8::Container & vec_res = col_res->getData(); for (size_t i = 0; i < input_rows_count; ++i) { - const auto addr = IPAddressVariant(col_addr.getDataAt(i)); + const auto addr = IPAddressVariant(col_addr.getDataAt(i).toView()); vec_res[i] = isAddressInRange(addr, cidr) ? 1 : 0; } return col_res; @@ -242,7 +242,7 @@ namespace DB for (size_t i = 0; i < input_rows_count; ++i) { - const auto addr = IPAddressVariant(col_addr.getDataAt(i)); + const auto addr = IPAddressVariant(col_addr.getDataAt(i).toView()); const auto cidr = parseIPWithCIDR(col_cidr.getDataAt(i).toView()); vec_res[i] = isAddressInRange(addr, cidr) ? 1 : 0; diff --git a/src/Functions/parseTimeDelta.cpp b/src/Functions/parseTimeDelta.cpp index fb5a7621a53..8cb7c229ae8 100644 --- a/src/Functions/parseTimeDelta.cpp +++ b/src/Functions/parseTimeDelta.cpp @@ -18,7 +18,8 @@ namespace ErrorCodes namespace { - const std::unordered_map time_unit_to_float = { + const std::unordered_map time_unit_to_float = + { {"years", 365 * 24 * 3600}, {"year", 365 * 24 * 3600}, {"yr", 365 * 24 * 3600}, @@ -50,6 +51,22 @@ namespace {"second", 1}, {"sec", 1}, {"s", 1}, + + {"milliseconds", 1e-3}, + {"millisecond", 1e-3}, + {"millisec", 1e-3}, + {"ms", 1e-3}, + + {"microseconds", 1e-6}, + {"microsecond", 1e-6}, + {"microsec", 1e-6}, + {"μs", 1e-6}, + {"us", 1e-6}, + + {"nanoseconds", 1e-9}, + {"nanosecond", 1e-9}, + {"nanosec", 1e-9}, + {"ns", 1e-9}, }; /** Prints amount of seconds in form of: @@ -248,7 +265,7 @@ namespace static bool scanUnit(std::string_view & str, Int64 & index, Int64 last_pos) { int64_t begin_index = index; - while (index <= last_pos && isalpha(str[index])) + while (index <= last_pos && !isdigit(str[index]) && !isSeparator(str[index])) { index++; } @@ -271,14 +288,18 @@ namespace scanSpaces(str, index, last_pos); /// ignore separator - if (index <= last_pos - && (str[index] == ';' || str[index] == '-' || str[index] == '+' || str[index] == ',' || str[index] == ':')) + if (index <= last_pos && isSeparator(str[index])) { index++; } scanSpaces(str, index, last_pos); } + + static bool isSeparator(char symbol) + { + return symbol == ';' || symbol == '-' || symbol == '+' || symbol == ',' || symbol == ':' || symbol == ' '; + } }; } diff --git a/src/Functions/tupleElement.cpp b/src/Functions/tupleElement.cpp index 023dc266b43..92ca6b85714 100644 --- a/src/Functions/tupleElement.cpp +++ b/src/Functions/tupleElement.cpp @@ -18,6 +18,10 @@ namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_INDEX; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int NOT_FOUND_COLUMN_IN_BLOCK; + extern const int NUMBER_OF_DIMENSIONS_MISMATCHED; + extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; } namespace @@ -40,9 +44,11 @@ public: return name; } + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { - return 2; + return 0; } bool useDefaultImplementationForConstants() const override @@ -59,8 +65,14 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - size_t count_arrays = 0; + const size_t number_of_arguments = arguments.size(); + if (number_of_arguments < 2 || number_of_arguments > 3) + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + + toString(number_of_arguments) + ", should be 2 or 3", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + size_t count_arrays = 0; const IDataType * tuple_col = arguments[0].type.get(); while (const DataTypeArray * array = checkAndGetDataType(tuple_col)) { @@ -72,16 +84,34 @@ public: if (!tuple) throw Exception("First argument for function " + getName() + " must be tuple or array of tuple.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - size_t index = getElementNum(arguments[1].column, *tuple); - DataTypePtr out_return_type = tuple->getElements()[index]; + auto index = getElementNum(arguments[1].column, *tuple, number_of_arguments); + if (index.has_value()) + { + DataTypePtr out_return_type = tuple->getElements()[index.value()]; - for (; count_arrays; --count_arrays) - out_return_type = std::make_shared(out_return_type); + for (; count_arrays; --count_arrays) + out_return_type = std::make_shared(out_return_type); - return out_return_type; + return out_return_type; + } + else + { + const IDataType * default_col = arguments[2].type.get(); + size_t default_argument_count_arrays = 0; + if (const DataTypeArray * array = checkAndGetDataType(default_col)) + { + default_argument_count_arrays = array->getNumberOfDimensions(); + } + + if (count_arrays != default_argument_count_arrays) + { + throw Exception(ErrorCodes::NUMBER_OF_DIMENSIONS_MISMATCHED, "Dimension of types mismatched between first argument and third argument. Dimension of 1st argument: {}. Dimension of 3rd argument: {}.",count_arrays, default_argument_count_arrays); + } + return arguments[2].type; + } } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { Columns array_offsets; @@ -89,6 +119,12 @@ public: const IDataType * tuple_type = first_arg.type.get(); const IColumn * tuple_col = first_arg.column.get(); + bool first_arg_is_const = false; + if (typeid_cast(tuple_col)) + { + tuple_col = assert_cast(tuple_col)->getDataColumnPtr().get(); + first_arg_is_const = true; + } while (const DataTypeArray * array_type = checkAndGetDataType(tuple_type)) { const ColumnArray * array_col = assert_cast(tuple_col); @@ -103,18 +139,87 @@ public: if (!tuple_type_concrete || !tuple_col_concrete) throw Exception("First argument for function " + getName() + " must be tuple or array of tuple.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - size_t index = getElementNum(arguments[1].column, *tuple_type_concrete); - ColumnPtr res = tuple_col_concrete->getColumns()[index]; + auto index = getElementNum(arguments[1].column, *tuple_type_concrete, arguments.size()); + + if (!index.has_value()) + { + if (!array_offsets.empty()) + { + recursiveCheckArrayOffsets(arguments[0].column, arguments[2].column, array_offsets.size()); + } + return arguments[2].column; + } + + ColumnPtr res = tuple_col_concrete->getColumns()[index.value()]; /// Wrap into Arrays for (auto it = array_offsets.rbegin(); it != array_offsets.rend(); ++it) res = ColumnArray::create(res, *it); + if (first_arg_is_const) + { + res = ColumnConst::create(res, input_rows_count); + } return res; } private: - size_t getElementNum(const ColumnPtr & index_column, const DataTypeTuple & tuple) const + + void recursiveCheckArrayOffsets(ColumnPtr col_x, ColumnPtr col_y, size_t depth) const + { + for (size_t i = 1; i < depth; ++i) + { + checkArrayOffsets(col_x, col_y); + col_x = assert_cast(col_x.get())->getDataPtr(); + col_y = assert_cast(col_y.get())->getDataPtr(); + } + checkArrayOffsets(col_x, col_y); + } + + void checkArrayOffsets(ColumnPtr col_x, ColumnPtr col_y) const + { + if (isColumnConst(*col_x)) + { + checkArrayOffsetsWithFirstArgConst(col_x, col_y); + } + else if (isColumnConst(*col_y)) + { + checkArrayOffsetsWithFirstArgConst(col_y, col_x); + } + else + { + const auto & array_x = *assert_cast(col_x.get()); + const auto & array_y = *assert_cast(col_y.get()); + if (!array_x.hasEqualOffsets(array_y)) + { + throw Exception("The argument 1 and argument 3 of function " + getName() + " have different array sizes", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); + } + } + } + + void checkArrayOffsetsWithFirstArgConst(ColumnPtr col_x, ColumnPtr col_y) const + { + col_x = assert_cast(col_x.get())->getDataColumnPtr(); + col_y = col_y->convertToFullColumnIfConst(); + const auto & array_x = *assert_cast(col_x.get()); + const auto & array_y = *assert_cast(col_y.get()); + + const auto & offsets_x = array_x.getOffsets(); + const auto & offsets_y = array_y.getOffsets(); + + ColumnArray::Offset prev_offset = 0; + size_t row_size = offsets_y.size(); + for (size_t row = 0; row < row_size; ++row) + { + if (unlikely(offsets_x[0] != offsets_y[row] - prev_offset)) + { + throw Exception("The argument 1 and argument 3 of function " + getName() + " have different array sizes", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); + } + prev_offset = offsets_y[row]; + } + } + + std::optional getElementNum(const ColumnPtr & index_column, const DataTypeTuple & tuple, const size_t argument_size) const { if ( checkAndGetColumnConst(index_column.get()) @@ -131,11 +236,21 @@ private: if (index > tuple.getElements().size()) throw Exception("Index for tuple element is out of range.", ErrorCodes::ILLEGAL_INDEX); - return index - 1; + return std::optional(index - 1); } else if (const auto * name_col = checkAndGetColumnConst(index_column.get())) { - return tuple.getPositionByName(name_col->getValue()); + auto index = tuple.tryGetPositionByName(name_col->getValue()); + if (index.has_value()) + { + return index; + } + + if (argument_size == 2) + { + throw Exception("Tuple doesn't have element with name '" + name_col->getValue() + "'", ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); + } + return std::nullopt; } else throw Exception("Second argument to " + getName() + " must be a constant UInt or String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); diff --git a/src/IO/CascadeWriteBuffer.cpp b/src/IO/CascadeWriteBuffer.cpp index 616fbe9b789..ca11290c71b 100644 --- a/src/IO/CascadeWriteBuffer.cpp +++ b/src/IO/CascadeWriteBuffer.cpp @@ -50,8 +50,6 @@ void CascadeWriteBuffer::nextImpl() } set(curr_buffer->position(), curr_buffer->buffer().end() - curr_buffer->position()); -// std::cerr << "CascadeWriteBuffer a count=" << count() << " bytes=" << bytes << " offset=" << offset() -// << " bytes+size=" << bytes + buffer().size() << "\n"; } diff --git a/src/IO/Operators.h b/src/IO/Operators.h index 114ab692dc3..06ff20c43e8 100644 --- a/src/IO/Operators.h +++ b/src/IO/Operators.h @@ -56,7 +56,7 @@ template WriteBuffer & operator<< (BinaryManipWriteBuffer buf, inline WriteBuffer & operator<< (EscapeManipWriteBuffer buf, const String & x) { writeEscapedString(x, buf); return buf; } inline WriteBuffer & operator<< (EscapeManipWriteBuffer buf, std::string_view x) { writeEscapedString(x, buf); return buf; } -inline WriteBuffer & operator<< (EscapeManipWriteBuffer buf, StringRef x) { writeEscapedString(x, buf); return buf; } +inline WriteBuffer & operator<< (EscapeManipWriteBuffer buf, StringRef x) { writeEscapedString(x.toView(), buf); return buf; } inline WriteBuffer & operator<< (EscapeManipWriteBuffer buf, const char * x) { writeEscapedString(x, strlen(x), buf); return buf; } inline WriteBuffer & operator<< (QuoteManipWriteBuffer buf, const char * x) { writeAnyQuotedString<'\''>(x, x + strlen(x), buf.get()); return buf; } diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index 6f35dae8300..2903a70b61a 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -360,19 +360,9 @@ void writeAnyEscapedString(const char * begin, const char * end, WriteBuffer & b } -inline void writeJSONString(StringRef s, WriteBuffer & buf, const FormatSettings & settings) -{ - writeJSONString(s.data, s.data + s.size, buf, settings); -} - inline void writeJSONString(std::string_view s, WriteBuffer & buf, const FormatSettings & settings) { - writeJSONString(StringRef{s}, buf, settings); -} - -inline void writeJSONString(const String & s, WriteBuffer & buf, const FormatSettings & settings) -{ - writeJSONString(StringRef{s}, buf, settings); + writeJSONString(s.data(), s.data() + s.size(), buf, settings); } template @@ -417,7 +407,7 @@ void writeJSONNumber(T x, WriteBuffer & ostr, const FormatSettings & settings) template -void writeAnyEscapedString(const String & s, WriteBuffer & buf) +void writeAnyEscapedString(std::string_view s, WriteBuffer & buf) { writeAnyEscapedString(s.data(), s.data() + s.size(), buf); } @@ -428,18 +418,6 @@ inline void writeEscapedString(const char * str, size_t size, WriteBuffer & buf) writeAnyEscapedString<'\''>(str, str + size, buf); } - -inline void writeEscapedString(const String & s, WriteBuffer & buf) -{ - writeEscapedString(s.data(), s.size(), buf); -} - - -inline void writeEscapedString(StringRef ref, WriteBuffer & buf) -{ - writeEscapedString(ref.data, ref.size, buf); -} - inline void writeEscapedString(std::string_view ref, WriteBuffer & buf) { writeEscapedString(ref.data(), ref.size(), buf); @@ -455,16 +433,9 @@ void writeAnyQuotedString(const char * begin, const char * end, WriteBuffer & bu template -void writeAnyQuotedString(const String & s, WriteBuffer & buf) +void writeAnyQuotedString(std::string_view ref, WriteBuffer & buf) { - writeAnyQuotedString(s.data(), s.data() + s.size(), buf); -} - - -template -void writeAnyQuotedString(StringRef ref, WriteBuffer & buf) -{ - writeAnyQuotedString(ref.data, ref.data + ref.size, buf); + writeAnyQuotedString(ref.data(), ref.data() + ref.size(), buf); } @@ -475,7 +446,7 @@ inline void writeQuotedString(const String & s, WriteBuffer & buf) inline void writeQuotedString(StringRef ref, WriteBuffer & buf) { - writeAnyQuotedString<'\''>(ref, buf); + writeAnyQuotedString<'\''>(ref.toView(), buf); } inline void writeQuotedString(std::string_view ref, WriteBuffer & buf) @@ -490,7 +461,7 @@ inline void writeDoubleQuotedString(const String & s, WriteBuffer & buf) inline void writeDoubleQuotedString(StringRef s, WriteBuffer & buf) { - writeAnyQuotedString<'"'>(s, buf); + writeAnyQuotedString<'"'>(s.toView(), buf); } inline void writeDoubleQuotedString(std::string_view s, WriteBuffer & buf) @@ -501,7 +472,7 @@ inline void writeDoubleQuotedString(std::string_view s, WriteBuffer & buf) /// Outputs a string in backquotes. inline void writeBackQuotedString(StringRef s, WriteBuffer & buf) { - writeAnyQuotedString<'`'>(s, buf); + writeAnyQuotedString<'`'>(s.toView(), buf); } /// Outputs a string in backquotes for MySQL. @@ -611,16 +582,11 @@ inline void writeXMLStringForTextElementOrAttributeValue(const char * begin, con } } -inline void writeXMLStringForTextElementOrAttributeValue(const String & s, WriteBuffer & buf) +inline void writeXMLStringForTextElementOrAttributeValue(std::string_view s, WriteBuffer & buf) { writeXMLStringForTextElementOrAttributeValue(s.data(), s.data() + s.size(), buf); } -inline void writeXMLStringForTextElementOrAttributeValue(StringRef s, WriteBuffer & buf) -{ - writeXMLStringForTextElementOrAttributeValue(s.data, s.data + s.size, buf); -} - /// Writing a string to a text node in XML (not into an attribute - otherwise you need more escaping). inline void writeXMLStringForTextElement(const char * begin, const char * end, WriteBuffer & buf) { @@ -652,16 +618,11 @@ inline void writeXMLStringForTextElement(const char * begin, const char * end, W } } -inline void writeXMLStringForTextElement(const String & s, WriteBuffer & buf) +inline void writeXMLStringForTextElement(std::string_view s, WriteBuffer & buf) { writeXMLStringForTextElement(s.data(), s.data() + s.size(), buf); } -inline void writeXMLStringForTextElement(StringRef s, WriteBuffer & buf) -{ - writeXMLStringForTextElement(s.data, s.data + s.size, buf); -} - template void formatHex(IteratorSrc src, IteratorDst dst, size_t num_bytes); void formatUUID(const UInt8 * src16, UInt8 * dst36); diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 2703773f464..b91fd7ac5cf 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -448,17 +448,7 @@ static ColumnWithTypeAndName executeActionForHeader(const ActionsDAG::Node * nod { case ActionsDAG::ActionType::FUNCTION: { - // bool all_args_are_const = true; - - // for (const auto & argument : arguments) - // if (typeid_cast(argument.column.get()) == nullptr) - // all_args_are_const = false; - res_column.column = node->function->execute(arguments, res_column.type, 0, true); - - // if (!all_args_are_const) - // res_column.column = res_column.column->convertToFullColumnIfConst(); - break; } diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index b54c77b385f..722ba81451a 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -2100,7 +2100,6 @@ std::shared_ptr HashJoin::getNonJoinedBlocks(const Block & left if (multiple_disjuncts) { /// ... calculate `left_columns_count` ... - // throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "TODO"); size_t left_columns_count = left_sample_block.columns(); auto non_joined = std::make_unique>(*this, max_block_size); return std::make_shared(std::move(non_joined), result_sample_block, left_columns_count, table_join->leftToRightKeyRemap()); diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 56b6cd3c136..24bbaea7dcf 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -297,7 +297,8 @@ static ASTPtr parseAdditionalFilterConditionForTable( auto & table = tuple.at(0).safeGet(); auto & filter = tuple.at(1).safeGet(); - if ((table == target.table && context.getCurrentDatabase() == target.database) || + if (table == target.alias || + (table == target.table && context.getCurrentDatabase() == target.database) || (table == target.database + '.' + target.table)) { /// Try to parse expression diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index bdec44b74f7..b2622607760 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -280,7 +280,6 @@ Block InterpreterSelectWithUnionQuery::getSampleBlock(const ASTPtr & query_ptr_, void InterpreterSelectWithUnionQuery::buildQueryPlan(QueryPlan & query_plan) { - // auto num_distinct_union = optimizeUnionList(); size_t num_plans = nested_interpreters.size(); const Settings & settings = context->getSettingsRef(); diff --git a/src/Interpreters/JIT/CHJIT.cpp b/src/Interpreters/JIT/CHJIT.cpp index 9eec82b4179..c2f3fc7c27d 100644 --- a/src/Interpreters/JIT/CHJIT.cpp +++ b/src/Interpreters/JIT/CHJIT.cpp @@ -244,28 +244,6 @@ private: } }; -// class AssemblyPrinter -// { -// public: - -// explicit AssemblyPrinter(llvm::TargetMachine &target_machine_) -// : target_machine(target_machine_) -// { -// } - -// void print(llvm::Module & module) -// { -// llvm::legacy::PassManager pass_manager; -// target_machine.Options.MCOptions.AsmVerbose = true; -// if (target_machine.addPassesToEmitFile(pass_manager, llvm::errs(), nullptr, llvm::CodeGenFileType::CGFT_AssemblyFile)) -// throw Exception(ErrorCodes::CANNOT_COMPILE_CODE, "MachineCode cannot be printed"); - -// pass_manager.run(module); -// } -// private: -// llvm::TargetMachine & target_machine; -// }; - /** MemoryManager for module. * Keep total allocated size during RuntimeDyld linker execution. */ diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index ce79ccf708a..275f3bc75cc 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -584,7 +584,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe auto storage_parse_result = storage_p.parse(pos, storage, expected); - if (storage_parse_result && need_parse_as_select()) + if ((storage_parse_result || is_temporary) && need_parse_as_select()) { if (!select_p.parse(pos, select, expected)) return false; diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index 68225d73ff1..29c57e08573 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -205,7 +205,6 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, std::atomic_bool * yie Stopwatch total_time_watch; #endif - // auto & node = tasks.getNode(thread_num); auto & context = tasks.getThreadContext(thread_num); bool yield = false; diff --git a/src/Processors/Formats/Impl/NativeFormat.cpp b/src/Processors/Formats/Impl/NativeFormat.cpp index 423fd483712..a8e2ddf95e4 100644 --- a/src/Processors/Formats/Impl/NativeFormat.cpp +++ b/src/Processors/Formats/Impl/NativeFormat.cpp @@ -74,15 +74,6 @@ protected: if (chunk) { auto block = getPort(PortKind::Main).getHeader(); - - // const auto & info = chunk.getChunkInfo(); - // const auto * agg_info = typeid_cast(info.get()); - // if (agg_info) - // { - // block.info.bucket_num = agg_info->bucket_num; - // block.info.is_overflows = agg_info->is_overflows; - // } - block.setColumns(chunk.detachColumns()); writer.write(block); } diff --git a/src/Processors/Merges/Algorithms/Graphite.cpp b/src/Processors/Merges/Algorithms/Graphite.cpp index 6c4ca5ef85b..2448a1e2a94 100644 --- a/src/Processors/Merges/Algorithms/Graphite.cpp +++ b/src/Processors/Merges/Algorithms/Graphite.cpp @@ -71,11 +71,11 @@ static const Graphite::Pattern undef_pattern = .type = undef_pattern.TypeUndef, }; -inline static const Patterns & selectPatternsForMetricType(const Graphite::Params & params, StringRef path) +inline static const Patterns & selectPatternsForMetricType(const Graphite::Params & params, std::string_view path) { if (params.patterns_typed) { - std::string_view path_view = path.toView(); + std::string_view path_view = path; if (path_view.find("?"sv) == path_view.npos) return params.patterns_plain; else @@ -89,7 +89,7 @@ inline static const Patterns & selectPatternsForMetricType(const Graphite::Param Graphite::RollupRule selectPatternForPath( const Graphite::Params & params, - StringRef path) + std::string_view path) { const Graphite::Pattern * first_match = &undef_pattern; @@ -119,7 +119,7 @@ Graphite::RollupRule selectPatternForPath( } else { - if (pattern.regexp->match(path.data, path.size)) + if (pattern.regexp->match(path.data(), path.size())) { /// General pattern with matched path if (pattern.type == pattern.TypeAll) diff --git a/src/Processors/Merges/Algorithms/Graphite.h b/src/Processors/Merges/Algorithms/Graphite.h index 05306ebe30f..46b1bbbfcad 100644 --- a/src/Processors/Merges/Algorithms/Graphite.h +++ b/src/Processors/Merges/Algorithms/Graphite.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include @@ -147,7 +146,7 @@ struct Params using RollupRule = std::pair; -Graphite::RollupRule selectPatternForPath(const Graphite::Params & params, StringRef path); +Graphite::RollupRule selectPatternForPath(const Graphite::Params & params, std::string_view path); void setGraphitePatternsFromConfig(ContextPtr context, const String & config_element, Graphite::Params & params); diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp index eff62d73f50..467ded19f4d 100644 --- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp @@ -120,7 +120,7 @@ IMergingAlgorithm::Status GraphiteRollupSortedAlgorithm::merge() return Status(current.impl->order); } - StringRef next_path = current->all_columns[columns_definition.path_column_num]->getDataAt(current->getRow()); + std::string_view next_path = current->all_columns[columns_definition.path_column_num]->getDataAt(current->getRow()).toView(); bool new_path = is_first || next_path != current_group_path; is_first = false; @@ -190,7 +190,7 @@ IMergingAlgorithm::Status GraphiteRollupSortedAlgorithm::merge() current_subgroup_newest_row.set(current, sources[current.impl->order].chunk); /// Small hack: group and subgroups have the same path, so we can set current_group_path here instead of startNextGroup - /// But since we keep in memory current_subgroup_newest_row's block, we could use StringRef for current_group_path and don't + /// But since we keep in memory current_subgroup_newest_row's block, we could use string_view for current_group_path and don't /// make deep copy of the path. current_group_path = next_path; } diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h index 4968cbfc470..d6d2f66fb82 100644 --- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h @@ -92,7 +92,7 @@ private: */ /// Path name of current bucket - StringRef current_group_path; + std::string_view current_group_path; static constexpr size_t max_row_refs = 2; /// current_subgroup_newest_row, current_row. /// Last row with maximum version for current primary key (time bucket). diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index 9b01e048391..9f29ad9ad07 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -126,7 +126,6 @@ void RemoteSource::onCancel() { was_query_canceled = true; query_executor->cancel(&read_context); - // is_async_state = false; } void RemoteSource::onUpdatePorts() @@ -135,7 +134,6 @@ void RemoteSource::onUpdatePorts() { was_query_canceled = true; query_executor->finish(&read_context); - // is_async_state = false; } } diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index 88a52defa1e..340b85efae9 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -323,7 +323,6 @@ QueryPipelineBuilderPtr QueryPipelineBuilder::mergePipelines( left->pipe.processors.emplace_back(transform); left->pipe.processors.insert(left->pipe.processors.end(), right->pipe.processors.begin(), right->pipe.processors.end()); - // left->pipe.holder = std::move(right->pipe.holder); left->pipe.header = left->pipe.output_ports.front()->getHeader(); left->pipe.max_parallel_streams = std::max(left->pipe.max_parallel_streams, right->pipe.max_parallel_streams); return left; diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp index 5e9da48fc68..bf2638f7bc3 100644 --- a/src/Storages/Distributed/DirectoryMonitor.cpp +++ b/src/Storages/Distributed/DirectoryMonitor.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Storages/Hive/HiveCommon.cpp b/src/Storages/Hive/HiveCommon.cpp index 7b2f04f7073..609adcf65c9 100644 --- a/src/Storages/Hive/HiveCommon.cpp +++ b/src/Storages/Hive/HiveCommon.cpp @@ -66,7 +66,6 @@ HiveMetastoreClient::HiveTableMetadataPtr HiveMetastoreClient::getTableMetadata( }; tryCallHiveClient(client_call); - // bool update_cache = shouldUpdateTableMetadata(db_name, table_name, partitions); String cache_key = getCacheKey(db_name, table_name); HiveTableMetadataPtr metadata = table_metadata_cache.get(cache_key); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 60941108f00..95f25aa1955 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -603,22 +603,6 @@ String IMergeTreeDataPart::getColumnNameWithMinimumCompressedSize( return *minimum_size_column; } -// String IMergeTreeDataPart::getFullPath() const -// { -// if (relative_path.empty()) -// throw Exception("Part relative_path cannot be empty. It's bug.", ErrorCodes::LOGICAL_ERROR); - -// return fs::path(storage.getFullPathOnDisk(volume->getDisk())) / (parent_part ? parent_part->relative_path : "") / relative_path / ""; -// } - -// String IMergeTreeDataPart::getRelativePath() const -// { -// if (relative_path.empty()) -// throw Exception("Part relative_path cannot be empty. It's bug.", ErrorCodes::LOGICAL_ERROR); - -// return fs::path(storage.relative_data_path) / (parent_part ? parent_part->relative_path : "") / relative_path / ""; -// } - void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checksums, bool check_consistency) { assertOnDisk(); diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.h b/src/Storages/MergeTree/IMergedBlockOutputStream.h index 3b94b85607a..dbcca1443b5 100644 --- a/src/Storages/MergeTree/IMergedBlockOutputStream.h +++ b/src/Storages/MergeTree/IMergedBlockOutputStream.h @@ -30,9 +30,6 @@ public: } protected: - // using SerializationState = ISerialization::SerializeBinaryBulkStatePtr; - - // ISerialization::OutputStreamGetter createStreamGetter(const String & name, WrittenOffsetColumns & offset_columns); /// Remove all columns marked expired in data_part. Also, clears checksums /// and columns array. Return set of removed files names. diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 7426b384394..dc468174dfa 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -596,7 +596,6 @@ bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() c const auto & projections = global_ctx->metadata_snapshot->getProjections(); - // tasks_for_projections.reserve(projections.size()); for (const auto & projection : projections) { diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 1bc4c26e40e..219093e8d75 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1584,8 +1584,6 @@ bool StorageReplicatedMergeTree::executeLogEntry(LogEntry & entry) return true; /// NOTE Deletion from `virtual_parts` is not done, but it is only necessary for merge. } - // bool do_fetch = false; - switch (entry.type) { case LogEntry::ATTACH_PART: @@ -1593,7 +1591,6 @@ bool StorageReplicatedMergeTree::executeLogEntry(LogEntry & entry) [[fallthrough]]; case LogEntry::GET_PART: return executeFetch(entry); - // do_fetch = true; case LogEntry::MERGE_PARTS: throw Exception(ErrorCodes::LOGICAL_ERROR, "Merge has to be executed by another function"); case LogEntry::MUTATE_PART: @@ -1609,8 +1606,6 @@ bool StorageReplicatedMergeTree::executeLogEntry(LogEntry & entry) default: throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected log entry type: {}", static_cast(entry.type)); } - - // return true; } diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index 5f5a7887e80..d86a0d4f5df 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -131,6 +131,7 @@ const char * auto_contributors[] { "Anton Okhitin", "Anton Okulov", "Anton Patsev", + "Anton Petrov", "Anton Popov", "Anton Tihonov", "Anton Tikhonov", @@ -149,6 +150,7 @@ const char * auto_contributors[] { "Artem Zuikov", "Artemeey", "Artemkin Pavel", + "Arthur Passos", "Arthur Petukhovsky", "Arthur Tokarchuk", "Arthur Wong", @@ -193,7 +195,9 @@ const char * auto_contributors[] { "Chao Ma", "Chao Wang", "CheSema", + "Chebarykov Pavel", "Chen Yufei", + "Cheng Pan", "Chienlung Cheung", "Christian", "Christoph Wurm", @@ -248,6 +252,7 @@ const char * auto_contributors[] { "Dmitry Moskowski", "Dmitry Muzyka", "Dmitry Novik", + "Dmitry Pavlov", "Dmitry Petukhov", "Dmitry Rubashkin", "Dmitry S..ky / skype: dvska-at-skype", @@ -280,6 +285,7 @@ const char * auto_contributors[] { "Evgeniy Udodov", "Evgeny", "Evgeny Konkov", + "Evgeny Kruglov", "Evgeny Markov", "Ewout", "FArthur-cmd", @@ -323,6 +329,7 @@ const char * auto_contributors[] { "Grigory", "Grigory Buteyko", "Grigory Pervakov", + "GruffGemini", "Guillaume Tassery", "Guo Wei (William)", "Haavard Kvaalen", @@ -330,6 +337,7 @@ const char * auto_contributors[] { "HaiBo Li", "Hamoon", "Han Fei", + "Harry Lee", "Harry-Lee", "HarryLeeIBM", "Hasitha Kanchana", @@ -386,6 +394,7 @@ const char * auto_contributors[] { "Jake Liu", "Jakub Kuklis", "James Maidment", + "James Morrison", "JaosnHsieh", "Jason", "Jason Keirstead", @@ -402,6 +411,7 @@ const char * auto_contributors[] { "John Hummel", "John Skopis", "Jonatas Freitas", + "Jordi Villar", "João Figueiredo", "Julian Gilyadov", "Julian Zhou", @@ -444,6 +454,7 @@ const char * auto_contributors[] { "Larry Luo", "Lars Eidnes", "Latysheva Alexandra", + "Laurie Li", "Lemore", "Leonardo Cecchi", "Leonid Krylov", @@ -516,6 +527,7 @@ const char * auto_contributors[] { "Michael Monashev", "Michael Nutt", "Michael Razuvaev", + "Michael Schnerring", "Michael Smitasin", "Michail Safronov", "Michal Lisowski", @@ -632,6 +644,7 @@ const char * auto_contributors[] { "Pawel Rog", "Peignon Melvyn", "Peng Jian", + "Peng Liu", "Persiyanov Dmitriy Andreevich", "Pervakov Grigorii", "Pervakov Grigory", @@ -643,6 +656,7 @@ const char * auto_contributors[] { "Pxl", "Pysaoke", "Quid37", + "Rafael Acevedo", "Rafael David Tinoco", "Rajkumar", "Rajkumar Varada", @@ -670,6 +684,7 @@ const char * auto_contributors[] { "Roman Nozdrin", "Roman Peshkurov", "Roman Tsisyk", + "Roman Vasin", "Roman Zhukov", "Roy Bellingan", "Ruslan", @@ -685,6 +700,7 @@ const char * auto_contributors[] { "SaltTan", "Sami Kerola", "Samuel Chou", + "San", "Saulius Valatka", "Sean Haynes", "Sean Lafferty", @@ -760,6 +776,7 @@ const char * auto_contributors[] { "Tiaonmmn", "Tigran Khudaverdyan", "Timur Magomedov", + "Timur Solodovnikov", "TiunovNN", "Tobias Adamson", "Tobias Lins", @@ -814,6 +831,8 @@ const char * auto_contributors[] { "Vladimir C", "Vladimir Ch", "Vladimir Chebotarev", + "Vladimir Chebotaryov", + "Vladimir Galunshchikov", "Vladimir Golovchenko", "Vladimir Goncharov", "Vladimir Klimontovich", @@ -823,6 +842,7 @@ const char * auto_contributors[] { "Vladimir Smirnov", "Vladislav Rassokhin", "Vladislav Smirnov", + "Vladislav V", "Vojtech Splichal", "Volodymyr Kuznetsov", "Vsevolod Orlov", @@ -831,6 +851,7 @@ const char * auto_contributors[] { "W", "Wang Fenjin", "WangZengrui", + "Wangyang Guo", "Weiqing Xu", "William Shallum", "Winter Zhang", @@ -838,6 +859,7 @@ const char * auto_contributors[] { "Xianda Ke", "Xiang Zhou", "Xin Wang", + "Xoel Lopez Barata", "Xudong Zhang", "Y Lu", "Yakko Majuri", @@ -855,6 +877,8 @@ const char * auto_contributors[] { "Yong Wang", "Yong-Hao Zou", "Youenn Lebras", + "Yu, Peng", + "Yuko Takagi", "Yuntao Wu", "Yuri Dyachenko", "Yurii Vlasenko", @@ -871,6 +895,7 @@ const char * auto_contributors[] { "Zijie Lu", "Zoran Pandovski", "a.palagashvili", + "aaapetrenko", "abdrakhmanov", "abel-wang", "abyss7", @@ -933,6 +958,7 @@ const char * auto_contributors[] { "chang.chen", "changvvb", "chasingegg", + "chen", "chen9t", "chengy8934", "chenjian", @@ -1110,6 +1136,8 @@ const char * auto_contributors[] { "linceyou", "lincion", "lingo-xp", + "lingpeng0314", + "lirulei", "listar", "litao91", "liu-bov", @@ -1119,10 +1147,13 @@ const char * auto_contributors[] { "liuyimin", "liyang", "liyang830", + "lokax", "lomberts", "loneylee", "long2ice", + "loyispa", "lthaooo", + "ltrk2", "ltybc-coder", "luc1ph3r", "lulichao", @@ -1213,6 +1244,7 @@ const char * auto_contributors[] { "redclusive", "rfraposa", "ritaank", + "rnbondarenko", "robert", "robot-clickhouse", "robot-metrika-test", @@ -1225,6 +1257,7 @@ const char * auto_contributors[] { "ryzuo", "s-kat", "santaux", + "santrancisco", "satanson", "save-my-heart", "sdk2", @@ -1327,6 +1360,7 @@ const char * auto_contributors[] { "zhangxiao871", "zhangyifan27", "zhangyuli1", + "zhao zhou", "zhen ni", "zhifeng", "zhongyuankai", diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp index 8fa4d02e8e1..8acd7434d51 100644 --- a/src/Storages/getStructureOfRemoteTable.cpp +++ b/src/Storages/getStructureOfRemoteTable.cpp @@ -123,6 +123,17 @@ ColumnsDescription getStructureOfRemoteTable( std::string fail_messages; + /// Use local shard as first priority, as it needs no network communication + for (const auto & shard_info : shards_info) + { + if (shard_info.isLocal()) + { + const auto & res = getStructureOfRemoteTableInShard(cluster, shard_info, table_id, context, table_func_ptr); + chassert(!res.empty()); + return res; + } + } + for (const auto & shard_info : shards_info) { try diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index a53ce6715d5..420ca7a0ff7 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -3,8 +3,9 @@ import time import os import csv -from env_helper import GITHUB_REPOSITORY +from env_helper import GITHUB_REPOSITORY, GITHUB_RUN_URL from ci_config import CI_CONFIG +from pr_info import SKIP_SIMPLE_CHECK_LABEL RETRY = 5 @@ -73,3 +74,28 @@ def post_labels(gh, pr_info, labels_names): pull_request = repo.get_pull(pr_info.number) for label in labels_names: pull_request.add_to_labels(label) + + +def fail_simple_check(gh, pr_info, description): + if SKIP_SIMPLE_CHECK_LABEL in pr_info.labels: + return + commit = get_commit(gh, pr_info.sha) + commit.create_status( + context="Simple Check", + description=description, + state="failure", + target_url=GITHUB_RUN_URL, + ) + + +def create_simple_check(gh, pr_info): + commit = get_commit(gh, pr_info.sha) + for status in commit.get_statuses(): + if "Simple Check" in status.context: + return + commit.create_status( + context="Simple Check", + description="Skipped", + state="success", + target_url=GITHUB_RUN_URL, + ) diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py index ce5a4195ceb..2e4d54f34c2 100644 --- a/tests/ci/fast_test_check.py +++ b/tests/ci/fast_test_check.py @@ -8,13 +8,16 @@ import sys from github import Github -from env_helper import CACHES_PATH, TEMP_PATH, GITHUB_SERVER_URL, GITHUB_REPOSITORY -from pr_info import FORCE_TESTS_LABEL, PRInfo, SKIP_SIMPLE_CHECK_LABEL +from env_helper import CACHES_PATH, TEMP_PATH +from pr_info import FORCE_TESTS_LABEL, PRInfo from s3_helper import S3Helper from get_robot_token import get_best_robot_token from upload_result_helper import upload_results from docker_pull_helper import get_image_with_version -from commit_status_helper import post_commit_status, get_commit +from commit_status_helper import ( + post_commit_status, + fail_simple_check, +) from clickhouse_helper import ( ClickHouseHelper, mark_flaky_tests, @@ -219,16 +222,5 @@ if __name__ == "__main__": if FORCE_TESTS_LABEL in pr_info.labels and state != "error": print(f"'{FORCE_TESTS_LABEL}' enabled, will report success") else: - if SKIP_SIMPLE_CHECK_LABEL not in pr_info.labels: - url = ( - f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/" - "blob/master/.github/PULL_REQUEST_TEMPLATE.md?plain=1" - ) - commit = get_commit(gh, pr_info.sha) - commit.create_status( - context="Simple Check", - description=f"{NAME} failed", - state="failed", - target_url=url, - ) + fail_simple_check(gh, pr_info, f"{NAME} failed") sys.exit(1) diff --git a/tests/ci/rerun_helper.py b/tests/ci/rerun_helper.py index 35363593db6..0d523640f56 100644 --- a/tests/ci/rerun_helper.py +++ b/tests/ci/rerun_helper.py @@ -36,3 +36,9 @@ class RerunHelper: ): return True return False + + def get_finished_status(self): + for status in self.statuses: + if self.check_name in status.context: + return status + return None diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index b6d654c7bed..a39d97ce81d 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -6,7 +6,12 @@ from typing import Tuple from github import Github -from commit_status_helper import get_commit, post_labels, remove_labels +from commit_status_helper import ( + get_commit, + post_labels, + remove_labels, + create_simple_check, +) from env_helper import GITHUB_RUN_URL, GITHUB_REPOSITORY, GITHUB_SERVER_URL from get_robot_token import get_best_robot_token from pr_info import FORCE_TESTS_LABEL, PRInfo @@ -223,12 +228,7 @@ if __name__ == "__main__": if pr_labels_to_remove: remove_labels(gh, pr_info, pr_labels_to_remove) - commit.create_status( - context="Simple Check", - description="Skipped", - state="success", - target_url=GITHUB_RUN_URL, - ) + create_simple_check(gh, pr_info) if description_error: print( diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 84ed9e5a124..dd63909ad39 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -1,31 +1,29 @@ #!/usr/bin/env python3 -import logging -import subprocess -import os +import argparse import csv +import logging +import os +import subprocess import sys -from github import Github -from env_helper import ( - RUNNER_TEMP, - GITHUB_WORKSPACE, - GITHUB_REPOSITORY, - GITHUB_SERVER_URL, -) -from s3_helper import S3Helper -from pr_info import PRInfo, SKIP_SIMPLE_CHECK_LABEL -from get_robot_token import get_best_robot_token -from upload_result_helper import upload_results -from docker_pull_helper import get_image_with_version -from commit_status_helper import post_commit_status, get_commit from clickhouse_helper import ( ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse, ) -from stopwatch import Stopwatch +from commit_status_helper import fail_simple_check, post_commit_status +from docker_pull_helper import get_image_with_version +from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP +from get_robot_token import get_best_robot_token +from github_helper import GitHub +from git_helper import git_runner +from pr_info import PRInfo from rerun_helper import RerunHelper +from s3_helper import S3Helper +from ssh import SSHKey +from stopwatch import Stopwatch +from upload_result_helper import upload_results NAME = "Style Check (actions)" @@ -57,7 +55,8 @@ def process_result(result_folder): try: results_path = os.path.join(result_folder, "test_results.tsv") - test_results = list(csv.reader(open(results_path, "r"), delimiter="\t")) + with open(results_path, "r", encoding="utf-8") as fd: + test_results = list(csv.reader(fd, delimiter="\t")) if len(test_results) == 0: raise Exception("Empty results") @@ -68,8 +67,77 @@ def process_result(result_folder): return state, description, test_results, additional_files +def parse_args(): + parser = argparse.ArgumentParser("Check and report style issues in the repository") + parser.add_argument("--push", default=True, help=argparse.SUPPRESS) + parser.add_argument( + "--no-push", + action="store_false", + dest="push", + help="do not commit and push automatic fixes", + default=argparse.SUPPRESS, + ) + return parser.parse_args() + + +def checkout_head(pr_info: PRInfo): + # It works ONLY for PRs, and only over ssh, so either + # ROBOT_CLICKHOUSE_SSH_KEY should be set or ssh-agent should work + assert pr_info.number + if not pr_info.head_name == pr_info.base_name: + # We can't push to forks, sorry folks + return + remote_url = pr_info.event["pull_request"]["base"]["repo"]["ssh_url"] + git_prefix = ( # All commits to remote are done as robot-clickhouse + "git -c user.email=robot-clickhouse@clickhouse.com " + "-c user.name=robot-clickhouse -c commit.gpgsign=false " + "-c core.sshCommand=" + "'ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no'" + ) + fetch_cmd = ( + f"{git_prefix} fetch --depth=1 " + f"{remote_url} {pr_info.head_ref}:head-{pr_info.head_ref}" + ) + if os.getenv("ROBOT_CLICKHOUSE_SSH_KEY", ""): + with SSHKey("ROBOT_CLICKHOUSE_SSH_KEY"): + git_runner(fetch_cmd) + else: + git_runner(fetch_cmd) + git_runner(f"git checkout -f head-{pr_info.head_ref}") + + +def commit_push_staged(pr_info: PRInfo): + # It works ONLY for PRs, and only over ssh, so either + # ROBOT_CLICKHOUSE_SSH_KEY should be set or ssh-agent should work + assert pr_info.number + if not pr_info.head_name == pr_info.base_name: + # We can't push to forks, sorry folks + return + git_staged = git_runner("git diff --cached --name-only") + if not git_staged: + return + remote_url = pr_info.event["pull_request"]["base"]["repo"]["ssh_url"] + git_prefix = ( # All commits to remote are done as robot-clickhouse + "git -c user.email=robot-clickhouse@clickhouse.com " + "-c user.name=robot-clickhouse -c commit.gpgsign=false " + "-c core.sshCommand=" + "'ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no'" + ) + git_runner(f"{git_prefix} commit -m 'Automatic style fix'") + push_cmd = ( + f"{git_prefix} push {remote_url} head-{pr_info.head_ref}:{pr_info.head_ref}" + ) + if os.getenv("ROBOT_CLICKHOUSE_SSH_KEY", ""): + with SSHKey("ROBOT_CLICKHOUSE_SSH_KEY"): + git_runner(push_cmd) + else: + git_runner(push_cmd) + + if __name__ == "__main__": logging.basicConfig(level=logging.INFO) + logging.getLogger("git_helper").setLevel(logging.DEBUG) + args = parse_args() stopwatch = Stopwatch() @@ -77,8 +145,10 @@ if __name__ == "__main__": temp_path = os.path.join(RUNNER_TEMP, "style_check") pr_info = PRInfo() + if args.push: + checkout_head(pr_info) - gh = Github(get_best_robot_token()) + gh = GitHub(get_best_robot_token()) rerun_helper = RerunHelper(gh, pr_info, NAME) if rerun_helper.is_already_finished_by_status(): @@ -103,6 +173,9 @@ if __name__ == "__main__": shell=True, ) + if args.push: + commit_push_staged(pr_info) + state, description, test_results, additional_files = process_result(temp_path) ch_helper = ClickHouseHelper() mark_flaky_tests(ch_helper, NAME, test_results) @@ -110,7 +183,7 @@ if __name__ == "__main__": report_url = upload_results( s3_helper, pr_info.number, pr_info.sha, test_results, additional_files, NAME ) - print("::notice ::Report url: {}".format(report_url)) + print(f"::notice ::Report url: {report_url}") post_commit_status(gh, pr_info.sha, NAME, description, state, report_url) prepared_events = prepare_tests_results_for_clickhouse( @@ -124,17 +197,6 @@ if __name__ == "__main__": ) ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) - if state == "error": - if SKIP_SIMPLE_CHECK_LABEL not in pr_info.labels: - url = ( - f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/" - "blob/master/.github/PULL_REQUEST_TEMPLATE.md?plain=1" - ) - commit = get_commit(gh, pr_info.sha) - commit.create_status( - context="Simple Check", - description=f"{NAME} failed", - state="failed", - target_url=url, - ) + if state in ["error", "failure"]: + fail_simple_check(gh, pr_info, f"{NAME} failed") sys.exit(1) diff --git a/tests/integration/README.md b/tests/integration/README.md index 2d44ff70861..18d46908524 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -44,7 +44,9 @@ sudo -H pip install \ dict2xml \ hypothesis \ pyhdfs \ - pika + pika \ + meilisearch \ + nats-py ``` (highly not recommended) If you really want to use OS packages on modern debian/ubuntu instead of "pip": `sudo apt install -y docker docker-compose python3-pytest python3-dicttoxml python3-docker python3-pymysql python3-protobuf python3-pymongo python3-tzlocal python3-kazoo python3-psycopg2 kafka-python python3-pytest-timeout python3-minio` diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 5f1d1a32588..7700fc2dffd 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -385,6 +385,7 @@ class ClickHouseCluster: self.with_jdbc_bridge = False self.with_nginx = False self.with_hive = False + self.with_coredns = False self.with_minio = False self.minio_dir = os.path.join(self.instances_dir, "minio") @@ -428,6 +429,8 @@ class ClickHouseCluster: self.schema_registry_port = get_free_port() self.kafka_docker_id = self.get_instance_docker_id(self.kafka_host) + self.coredns_host = "coredns" + # available when with_kerberozed_kafka == True self.kerberized_kafka_host = "kerberized_kafka1" self.kerberized_kafka_port = get_free_port() @@ -1102,6 +1105,25 @@ class ClickHouseCluster: ] return self.base_mongo_cmd + def setup_coredns_cmd(self, instance, env_variables, docker_compose_yml_dir): + self.with_coredns = True + env_variables["COREDNS_CONFIG_DIR"] = instance.path + "/" + "coredns_config" + self.base_cmd.extend( + ["--file", p.join(docker_compose_yml_dir, "docker_compose_coredns.yml")] + ) + + self.base_coredns_cmd = [ + "docker-compose", + "--env-file", + instance.env_file, + "--project-name", + self.project_name, + "--file", + p.join(docker_compose_yml_dir, "docker_compose_coredns.yml"), + ] + + return self.base_coredns_cmd + def setup_meili_cmd(self, instance, env_variables, docker_compose_yml_dir): self.with_meili = True env_variables["MEILI_HOST"] = self.meili_host @@ -1265,6 +1287,7 @@ class ClickHouseCluster: with_cassandra=False, with_jdbc_bridge=False, with_hive=False, + with_coredns=False, hostname=None, env_variables=None, image="clickhouse/integration-test", @@ -1349,6 +1372,7 @@ class ClickHouseCluster: with_cassandra=with_cassandra, with_jdbc_bridge=with_jdbc_bridge, with_hive=with_hive, + with_coredns=with_coredns, server_bin_path=self.server_bin_path, odbc_bridge_bin_path=self.odbc_bridge_bin_path, library_bridge_bin_path=self.library_bridge_bin_path, @@ -1513,6 +1537,11 @@ class ClickHouseCluster: ) ) + if with_coredns and not self.with_coredns: + cmds.append( + self.setup_coredns_cmd(instance, env_variables, docker_compose_yml_dir) + ) + if with_meili and not self.with_meili: cmds.append( self.setup_meili_cmd(instance, env_variables, docker_compose_yml_dir) @@ -1629,6 +1658,16 @@ class ClickHouseCluster: "IPAddress" ] + def get_instance_global_ipv6(self, instance_name): + logging.debug("get_instance_ip instance_name={}".format(instance_name)) + docker_id = self.get_instance_docker_id(instance_name) + # for cont in self.docker_client.containers.list(): + # logging.debug("CONTAINERS LIST: ID={} NAME={} STATUS={}".format(cont.id, cont.name, cont.status)) + handle = self.docker_client.containers.get(docker_id) + return list(handle.attrs["NetworkSettings"]["Networks"].values())[0][ + "GlobalIPv6Address" + ] + def get_container_id(self, instance_name): return self.get_instance_docker_id(instance_name) # docker_id = self.get_instance_docker_id(instance_name) @@ -2453,6 +2492,12 @@ class ClickHouseCluster: self.up_called = True self.wait_mongo_to_start(30, secure=self.with_mongo_secure) + if self.with_coredns and self.base_coredns_cmd: + logging.debug("Setup coredns") + run_and_check(self.base_coredns_cmd + common_opts) + self.up_called = True + time.sleep(10) + if self.with_meili and self.base_meili_cmd: logging.debug("Setup MeiliSearch") run_and_check(self.base_meili_cmd + common_opts) @@ -2791,6 +2836,7 @@ class ClickHouseInstance: with_azurite, with_jdbc_bridge, with_hive, + with_coredns, with_cassandra, server_bin_path, odbc_bridge_bin_path, @@ -2874,6 +2920,8 @@ class ClickHouseInstance: self.with_cassandra = with_cassandra self.with_jdbc_bridge = with_jdbc_bridge self.with_hive = with_hive + self.with_coredns = with_coredns + self.coredns_config_dir = p.abspath(p.join(base_path, "coredns_config")) self.main_config_name = main_config_name self.users_config_name = users_config_name @@ -3783,6 +3831,11 @@ class ClickHouseInstance: self.kerberos_secrets_dir, p.abspath(p.join(self.path, "secrets")) ) + if self.with_coredns: + shutil.copytree( + self.coredns_config_dir, p.abspath(p.join(self.path, "coredns_config")) + ) + # Copy config.d configs logging.debug( f"Copy custom test config files {self.custom_main_config_paths} to {self.config_d_dir}" diff --git a/tests/integration/test_distributed_respect_user_timeouts/test.py b/tests/integration/test_distributed_respect_user_timeouts/test.py index ea79a9544d5..593843b4e4a 100644 --- a/tests/integration/test_distributed_respect_user_timeouts/test.py +++ b/tests/integration/test_distributed_respect_user_timeouts/test.py @@ -129,15 +129,7 @@ def started_cluster(request): def _check_timeout_and_exception(node, user, query_base, query): repeats = EXPECTED_BEHAVIOR[user]["times"] - extra_repeats = 1 - # Table function remote() are executed two times. - # It tries to get table structure from remote shards. - # On 'node2' it will firstly try to get structure from 'node1' (which is not available), - # so there are 1 extra connection attempts for 'node2' and 'remote' - if node.name == "node2" and query_base == "remote": - extra_repeats = 2 - - expected_timeout = EXPECTED_BEHAVIOR[user]["timeout"] * repeats * extra_repeats + expected_timeout = EXPECTED_BEHAVIOR[user]["timeout"] * repeats start = timeit.default_timer() exception = node.query_and_get_error(query, user=user) diff --git a/tests/integration/test_host_regexp_multiple_ptr_records/__init__.py b/tests/integration/test_host_regexp_multiple_ptr_records/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_host_regexp_multiple_ptr_records/configs/host_regexp.xml b/tests/integration/test_host_regexp_multiple_ptr_records/configs/host_regexp.xml new file mode 100644 index 00000000000..7a2141e6c7e --- /dev/null +++ b/tests/integration/test_host_regexp_multiple_ptr_records/configs/host_regexp.xml @@ -0,0 +1,11 @@ + + + + + + test1\.example\.com$ + + default + + + \ No newline at end of file diff --git a/tests/integration/test_host_regexp_multiple_ptr_records/configs/listen_host.xml b/tests/integration/test_host_regexp_multiple_ptr_records/configs/listen_host.xml new file mode 100644 index 00000000000..58ef55cd3f3 --- /dev/null +++ b/tests/integration/test_host_regexp_multiple_ptr_records/configs/listen_host.xml @@ -0,0 +1,5 @@ + + :: + 0.0.0.0 + 1 + diff --git a/tests/integration/test_host_regexp_multiple_ptr_records/coredns_config/Corefile b/tests/integration/test_host_regexp_multiple_ptr_records/coredns_config/Corefile new file mode 100644 index 00000000000..0dd198441dc --- /dev/null +++ b/tests/integration/test_host_regexp_multiple_ptr_records/coredns_config/Corefile @@ -0,0 +1,8 @@ +. { + hosts /example.com { + reload "200ms" + fallthrough + } + forward . 127.0.0.11 + log +} diff --git a/tests/integration/test_host_regexp_multiple_ptr_records/coredns_config/example.com b/tests/integration/test_host_regexp_multiple_ptr_records/coredns_config/example.com new file mode 100644 index 00000000000..9beb415c290 --- /dev/null +++ b/tests/integration/test_host_regexp_multiple_ptr_records/coredns_config/example.com @@ -0,0 +1 @@ +filled in runtime, but needs to exist in order to be volume mapped in docker \ No newline at end of file diff --git a/tests/integration/test_host_regexp_multiple_ptr_records/test.py b/tests/integration/test_host_regexp_multiple_ptr_records/test.py new file mode 100644 index 00000000000..fa2917411e4 --- /dev/null +++ b/tests/integration/test_host_regexp_multiple_ptr_records/test.py @@ -0,0 +1,91 @@ +import pytest +from helpers.cluster import ClickHouseCluster, get_docker_compose_path, run_and_check +import os + +DOCKER_COMPOSE_PATH = get_docker_compose_path() +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + +cluster = ClickHouseCluster(__file__) + +ch_server = cluster.add_instance( + "clickhouse-server", + with_coredns=True, + main_configs=["configs/listen_host.xml"], + user_configs=["configs/host_regexp.xml"], + ipv6_address="2001:3984:3989::1:1111", +) + +client = cluster.add_instance( + "clickhouse-client", + ipv6_address="2001:3984:3989::1:1112", +) + + +@pytest.fixture(scope="module") +def started_cluster(): + global cluster + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def setup_dns_server(ip): + domains_string = "test3.example.com test2.example.com test1.example.com" + example_file_path = f'{ch_server.env_variables["COREDNS_CONFIG_DIR"]}/example.com' + run_and_check(f"echo '{ip} {domains_string}' > {example_file_path}", shell=True) + + +def setup_ch_server(dns_server_ip): + ch_server.exec_in_container( + (["bash", "-c", f"echo 'nameserver {dns_server_ip}' > /etc/resolv.conf"]) + ) + ch_server.exec_in_container( + (["bash", "-c", "echo 'options ndots:0' >> /etc/resolv.conf"]) + ) + ch_server.query("SYSTEM DROP DNS CACHE") + + +def build_endpoint_v4(ip): + return f"'http://{ip}:8123/?query=SELECT+1&user=test_dns'" + + +def build_endpoint_v6(ip): + return build_endpoint_v4(f"[{ip}]") + + +def test_host_regexp_multiple_ptr_v4_fails_with_wrong_resolution(started_cluster): + server_ip = cluster.get_instance_ip("clickhouse-server") + random_ip = "9.9.9.9" + dns_server_ip = cluster.get_instance_ip(cluster.coredns_host) + + setup_dns_server(random_ip) + setup_ch_server(dns_server_ip) + + endpoint = build_endpoint_v4(server_ip) + + assert "1\n" != client.exec_in_container((["bash", "-c", f"curl {endpoint}"])) + + +def test_host_regexp_multiple_ptr_v4(started_cluster): + server_ip = cluster.get_instance_ip("clickhouse-server") + client_ip = cluster.get_instance_ip("clickhouse-client") + dns_server_ip = cluster.get_instance_ip(cluster.coredns_host) + + setup_dns_server(client_ip) + setup_ch_server(dns_server_ip) + + endpoint = build_endpoint_v4(server_ip) + + assert "1\n" == client.exec_in_container((["bash", "-c", f"curl {endpoint}"])) + + +def test_host_regexp_multiple_ptr_v6(started_cluster): + setup_dns_server(client.ipv6_address) + setup_ch_server(cluster.get_instance_global_ipv6(cluster.coredns_host)) + + endpoint = build_endpoint_v6(ch_server.ipv6_address) + + assert "1\n" == client.exec_in_container((["bash", "-c", f"curl -6 {endpoint}"])) diff --git a/tests/queries/0_stateless/02098_with_types_use_header.sh b/tests/queries/0_stateless/02098_with_types_use_header.sh index 5d88a994052..457182a08f2 100755 --- a/tests/queries/0_stateless/02098_with_types_use_header.sh +++ b/tests/queries/0_stateless/02098_with_types_use_header.sh @@ -19,9 +19,9 @@ echo -e "y\tz\tx\nString\tDate\tUInt32\ntext\t2020-01-01\t1" | $CLICKHOUSE_CLIEN echo -e "x\tz\ty\nUInt32\tString\tDate\n1\ttext\t2020-01-01" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02098 FORMAT CustomSeparatedWithNamesAndTypes" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' echo "CSVWithNamesAndTypes" -echo -e "'x','y','z'\n'String','Date','UInt32'\n'text','2020-01-01',1" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02098 FORMAT CSVWithNamesAndTypes" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' -echo -e "'y','z','x'\n'String','Date','UInt32'\n'text','2020-01-01',1" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02098 FORMAT CSVWithNamesAndTypes" && echo 'OK' || echo 'FAIL' -echo -e "'x','z','y'\n'UInt32','String',Date'\n1,'text','2020-01-01'" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02098 FORMAT CSVWithNamesAndTypes" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' +echo -e "'x','y','z'\n'String','Date','UInt32'\n'text','2020-01-01',1" | $CLICKHOUSE_CLIENT --format_csv_allow_single_quotes=1 --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02098 FORMAT CSVWithNamesAndTypes" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' +echo -e "'y','z','x'\n'String','Date','UInt32'\n'text','2020-01-01',1" | $CLICKHOUSE_CLIENT --format_csv_allow_single_quotes=1 --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02098 FORMAT CSVWithNamesAndTypes" && echo 'OK' || echo 'FAIL' +echo -e "'x','z','y'\n'UInt32','String',Date'\n1,'text','2020-01-01'" | $CLICKHOUSE_CLIENT --format_csv_allow_single_quotes=1 --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02098 FORMAT CSVWithNamesAndTypes" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' echo "JSONCompactEachRowWithNamesAndTypes" diff --git a/tests/queries/0_stateless/02155_csv_with_strings_with_slash.sh b/tests/queries/0_stateless/02155_csv_with_strings_with_slash.sh index 08d380bf559..4f38d662590 100755 --- a/tests/queries/0_stateless/02155_csv_with_strings_with_slash.sh +++ b/tests/queries/0_stateless/02155_csv_with_strings_with_slash.sh @@ -10,13 +10,13 @@ ${CLICKHOUSE_CLIENT} --query="create table test_02155_csv (A Int64, S String, D echo "input_format_null_as_default = 1" -cat $CUR_DIR/data_csv/csv_with_slash.csv | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_02155_csv SETTINGS input_format_null_as_default = 1 FORMAT CSV" +cat $CUR_DIR/data_csv/csv_with_slash.csv | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_02155_csv SETTINGS input_format_null_as_default = 1, format_csv_allow_single_quotes=1 FORMAT CSV" ${CLICKHOUSE_CLIENT} --query="SELECT * FROM test_02155_csv" ${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE test_02155_csv" echo "input_format_null_as_default = 0" -cat $CUR_DIR/data_csv/csv_with_slash.csv | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_02155_csv SETTINGS input_format_null_as_default = 0 FORMAT CSV" +cat $CUR_DIR/data_csv/csv_with_slash.csv | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_02155_csv SETTINGS format_csv_allow_single_quotes = 1, input_format_null_as_default = 0 FORMAT CSV" ${CLICKHOUSE_CLIENT} --query="SELECT * FROM test_02155_csv" diff --git a/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.sh b/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.sh index 6589765f739..e8aa5914912 100755 --- a/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.sh +++ b/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.sh @@ -158,8 +158,10 @@ echo "CSV" echo -e "42,Some string,'[1, 2, 3, 4]','[(1, 2, 3)]' 42\,abcd,'[]','[(4, 5, 6)]'" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')" -$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')" +CLIENT_CMD="$CLICKHOUSE_CLIENT --format_csv_allow_single_quotes=1" + +$CLIENT_CMD -q "desc file('$FILE_NAME', 'CSV')" +$CLIENT_CMD -q "select * from file('$FILE_NAME', 'CSV')" echo -e "\"[({'key' : 42.42}, ['String', 'String2'], 42.42), ({}, [], -42), ({'key2' : NULL}, [NULL], NULL)]\" '[]' @@ -168,8 +170,8 @@ echo -e "\"[({'key' : 42.42}, ['String', 'String2'], 42.42), ({}, [], -42), ({'k \"[({}, ['String3'], NULL)]\" \"[({'key3': NULL}, []), NULL]\""> $DATA_FILE -$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV') settings input_format_csv_use_best_effort_in_schema_inference=false" -$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV') settings input_format_csv_use_best_effort_in_schema_inference=false" +$CLIENT_CMD -q "desc file('$FILE_NAME', 'CSV') settings input_format_csv_use_best_effort_in_schema_inference=false" +$CLIENT_CMD -q "select * from file('$FILE_NAME', 'CSV') settings input_format_csv_use_best_effort_in_schema_inference=false" echo -e "\"[({'key' : 42.42}, ['String', 'String2'], 42.42), ({}, [], -42), ({'key2' : NULL}, [NULL], NULL)]\" '[]' @@ -178,43 +180,43 @@ echo -e "\"[({'key' : 42.42}, ['String', 'String2'], 42.42), ({}, [], -42), ({'k \"[({}, ['String3'], NULL)]\" \"[({'key3': NULL}, [], NULL)]\""> $DATA_FILE -$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')" -$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')" +$CLIENT_CMD -q "desc file('$FILE_NAME', 'CSV')" +$CLIENT_CMD -q "select * from file('$FILE_NAME', 'CSV')" echo -e "true false \N" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')" -$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')" +$CLIENT_CMD -q "desc file('$FILE_NAME', 'CSV')" +$CLIENT_CMD -q "select * from file('$FILE_NAME', 'CSV')" echo -e "'[true, NULL]' '[]' '[NULL]' '[false]'" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')" -$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')" +$CLIENT_CMD -q "desc file('$FILE_NAME', 'CSV')" +$CLIENT_CMD -q "select * from file('$FILE_NAME', 'CSV')" echo -e "'(1, 2, 3)'"> $DATA_FILE -$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')" -$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')" +$CLIENT_CMD -q "desc file('$FILE_NAME', 'CSV')" +$CLIENT_CMD -q "select * from file('$FILE_NAME', 'CSV')" -echo -e "'123.123'"> $DATA_FILE +echo -e '"123.123"'> $DATA_FILE -$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')" -$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')" +$CLIENT_CMD -q "desc file('$FILE_NAME', 'CSV')" +$CLIENT_CMD -q "select * from file('$FILE_NAME', 'CSV')" echo -e "'[(1, 2, 3)]'"> $DATA_FILE -$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')" -$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')" +$CLIENT_CMD -q "desc file('$FILE_NAME', 'CSV')" +$CLIENT_CMD -q "select * from file('$FILE_NAME', 'CSV')" echo -e "\"[(1, 2, 3)]\""> $DATA_FILE -$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')" -$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')" +$CLIENT_CMD -q "desc file('$FILE_NAME', 'CSV')" +$CLIENT_CMD -q "select * from file('$FILE_NAME', 'CSV')" diff --git a/tests/queries/0_stateless/02346_additional_filters.reference b/tests/queries/0_stateless/02346_additional_filters.reference index 0cd345f71cd..22d53173e71 100644 --- a/tests/queries/0_stateless/02346_additional_filters.reference +++ b/tests/queries/0_stateless/02346_additional_filters.reference @@ -66,6 +66,12 @@ select * from system.numbers limit 5; 2 3 4 +select * from system.numbers as t limit 5 settings additional_table_filters={'t' : 'number % 2 != 0'}; +1 +3 +5 +7 +9 select * from system.numbers limit 5 settings additional_table_filters={'system.numbers' : 'number != 3'}; 0 1 diff --git a/tests/queries/0_stateless/02346_additional_filters.sql b/tests/queries/0_stateless/02346_additional_filters.sql index 24e04b9dc8b..9e0bee4549b 100644 --- a/tests/queries/0_stateless/02346_additional_filters.sql +++ b/tests/queries/0_stateless/02346_additional_filters.sql @@ -30,6 +30,7 @@ select * from remote('127.0.0.{1,2}', system.one) settings additional_table_filt select * from remote('127.0.0.{1,2}', system.one) settings additional_table_filters={'system.one' : 'dummy != 0'}; select * from system.numbers limit 5; +select * from system.numbers as t limit 5 settings additional_table_filters={'t' : 'number % 2 != 0'}; select * from system.numbers limit 5 settings additional_table_filters={'system.numbers' : 'number != 3'}; select * from system.numbers limit 5 settings additional_table_filters={'system.numbers':'number != 3','table_1':'x!=2'}; select * from (select number from system.numbers limit 5 union all select x from table_1) order by number settings additional_table_filters={'system.numbers':'number != 3','table_1':'x!=2'}; diff --git a/tests/queries/0_stateless/02354_parse_timedelta.reference b/tests/queries/0_stateless/02354_parse_timedelta.reference index 9bc208ada9d..f9dd7879057 100644 --- a/tests/queries/0_stateless/02354_parse_timedelta.reference +++ b/tests/queries/0_stateless/02354_parse_timedelta.reference @@ -5,3 +5,7 @@ 36806400 1331 40273293 +1.001001001 +1.001001001 +1.001001001 +1.11111111111 diff --git a/tests/queries/0_stateless/02354_parse_timedelta.sql b/tests/queries/0_stateless/02354_parse_timedelta.sql index 3cf282d715a..29f2bf9fdfc 100644 --- a/tests/queries/0_stateless/02354_parse_timedelta.sql +++ b/tests/queries/0_stateless/02354_parse_timedelta.sql @@ -5,6 +5,10 @@ SELECT parseTimeDelta('0.00123 seconds'); SELECT parseTimeDelta('1yr2mo'); SELECT parseTimeDelta('11s+22min'); SELECT parseTimeDelta('1yr-2mo-4w + 12 days, 3 hours : 1 minute ; 33 seconds'); +SELECT parseTimeDelta('1s1ms1us1ns'); +SELECT parseTimeDelta('1s1ms1μs1ns'); +SELECT parseTimeDelta('1s - 1ms : 1μs ; 1ns'); +SELECT parseTimeDelta('1.11s1.11ms1.11us1.11ns'); -- invalid expressions SELECT parseTimeDelta(); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} diff --git a/tests/queries/0_stateless/02354_tuple_element_with_default.reference b/tests/queries/0_stateless/02354_tuple_element_with_default.reference new file mode 100644 index 00000000000..d5dfff17ef1 --- /dev/null +++ b/tests/queries/0_stateless/02354_tuple_element_with_default.reference @@ -0,0 +1,26 @@ +z +SELECT tupleElement(t1, \'z\', \'z\') +FROM t_tuple_element_default +0 +SELECT tupleElement(t1, \'z\', 0) +FROM t_tuple_element_default +z +SELECT tupleElement(t2, \'z\', \'z\') +FROM t_tuple_element_default +-------------------- +[(3,4)] +SELECT tupleElement([(1, 2)], \'a\', [(3, 4)]) +-------------------- +SELECT tupleElement(t1, \'a\', [tuple(1)]) +FROM t_tuple_element_default +-------------------- +[(0)] +SELECT tupleElement(t1, \'a\', [tuple(0)]) +FROM t_tuple_element_default +[0] +SELECT tupleElement(t1, \'a\', [0]) +FROM t_tuple_element_default +[0] +[0] +SELECT tupleElement(t1, \'a\', [0]) +FROM t_tuple_element_default diff --git a/tests/queries/0_stateless/02354_tuple_element_with_default.sql b/tests/queries/0_stateless/02354_tuple_element_with_default.sql new file mode 100644 index 00000000000..908a869885b --- /dev/null +++ b/tests/queries/0_stateless/02354_tuple_element_with_default.sql @@ -0,0 +1,50 @@ +DROP TABLE IF EXISTS t_tuple_element_default; + +CREATE TABLE t_tuple_element_default(t1 Tuple(a UInt32, s String), t2 Tuple(UInt32, String)) ENGINE = Memory; +INSERT INTO t_tuple_element_default VALUES ((1, 'a'), (2, 'b')); + +SELECT tupleElement(t1, 'z', 'z') FROM t_tuple_element_default; +EXPLAIN SYNTAX SELECT tupleElement(t1, 'z', 'z') FROM t_tuple_element_default; +SELECT tupleElement(t1, 'z', 0) FROM t_tuple_element_default; +EXPLAIN SYNTAX SELECT tupleElement(t1, 'z', 0) FROM t_tuple_element_default; +SELECT tupleElement(t2, 'z', 'z') FROM t_tuple_element_default; +EXPLAIN SYNTAX SELECT tupleElement(t2, 'z', 'z') FROM t_tuple_element_default; + +SELECT tupleElement(t1, 3, 'z') FROM t_tuple_element_default; -- { serverError 127 } +SELECT tupleElement(t1, 0, 'z') FROM t_tuple_element_default; -- { serverError 127 } + +DROP TABLE t_tuple_element_default; + +SELECT '--------------------'; + +SELECT tupleElement(array(tuple(1, 2)), 'a', 0); -- { serverError 645 } +SELECT tupleElement(array(tuple(1, 2)), 'a', array(tuple(1, 2), tuple(3, 4))); -- { serverError 190 } +SELECT tupleElement(array(array(tuple(1))), 'a', array(array(1, 2, 3))); -- { serverError 190 } + +SELECT tupleElement(array(tuple(1, 2)), 'a', array(tuple(3, 4))); +EXPLAIN SYNTAX SELECT tupleElement(array(tuple(1, 2)), 'a', array(tuple(3, 4))); + +SELECT '--------------------'; + +CREATE TABLE t_tuple_element_default(t1 Array(Tuple(UInt32)), t2 UInt32) ENGINE = Memory; + +SELECT tupleElement(t1, 'a', array(tuple(1))) FROM t_tuple_element_default; +EXPLAIN SYNTAX SELECT tupleElement(t1, 'a', array(tuple(1))) FROM t_tuple_element_default; + +SELECT '--------------------'; + +INSERT INTO t_tuple_element_default VALUES ([(1)], 100); + +SELECT tupleElement(t1, 'a', array(tuple(0))) FROM t_tuple_element_default; +EXPLAIN SYNTAX SELECT tupleElement(t1, 'a', array(tuple(0))) FROM t_tuple_element_default; + +SELECT tupleElement(t1, 'a', array(0)) FROM t_tuple_element_default; +EXPLAIN SYNTAX SELECT tupleElement(t1, 'a', array(0)) FROM t_tuple_element_default; + +INSERT INTO t_tuple_element_default VALUES ([(2)], 200); + +SELECT tupleElement(t1, 'a', array(0)) FROM t_tuple_element_default; +EXPLAIN SYNTAX SELECT tupleElement(t1, 'a', array(0)) FROM t_tuple_element_default; + +DROP TABLE t_tuple_element_default; + diff --git a/tests/queries/0_stateless/02366_direct_dictionary_dicthas.reference b/tests/queries/0_stateless/02366_direct_dictionary_dicthas.reference deleted file mode 100644 index 49b34f828cd..00000000000 --- a/tests/queries/0_stateless/02366_direct_dictionary_dicthas.reference +++ /dev/null @@ -1,62 +0,0 @@ -0 -0 -0 -1 -0 -1 -0 -2 -1 -0 -0 0 1 -1 0 1 -2 0 1 -3 1 0 -4 0 1 -5 1 0 -6 0 1 -7 2 0 -8 1 0 -9 0 1 -1 -1 -1 -0 -1 -0 -1 -0 -0 -1 -1 -1 -1 -0 -1 -0 -1 -0 -0 -1 -1 -1 -1 -0 -1 -0 -1 -0 -0 -1 -value_0 -value_0 -value_0 -UNKNOWN -value_0 -UNKNOWN -value_0 -UNKNOWN -UNKNOWN -value_0 -4 0 -6 1 diff --git a/tests/queries/0_stateless/02366_direct_dictionary_dicthas.sql b/tests/queries/0_stateless/02366_direct_dictionary_dicthas.sql deleted file mode 100644 index b111415b56c..00000000000 --- a/tests/queries/0_stateless/02366_direct_dictionary_dicthas.sql +++ /dev/null @@ -1,56 +0,0 @@ --- Tags: no-backward-compatibility-check -DROP DATABASE IF EXISTS 02366_dictionary_db; -CREATE DATABASE 02366_dictionary_db; - -CREATE TABLE 02366_dictionary_db.dict_data -( - id UInt64, - val String -) -ENGINE = Memory; - -CREATE TABLE 02366_dictionary_db.lookup_data -( - id UInt64, - lookup_key UInt64, -) -ENGINE = Memory; - -INSERT INTO 02366_dictionary_db.dict_data VALUES(0, 'value_0'); - -INSERT INTO 02366_dictionary_db.lookup_data VALUES(0, 0); -INSERT INTO 02366_dictionary_db.lookup_data VALUES(1, 0); -INSERT INTO 02366_dictionary_db.lookup_data VALUES(2, 0); -INSERT INTO 02366_dictionary_db.lookup_data VALUES(3, 1); -INSERT INTO 02366_dictionary_db.lookup_data VALUES(4, 0); -INSERT INTO 02366_dictionary_db.lookup_data VALUES(5, 1); -INSERT INTO 02366_dictionary_db.lookup_data VALUES(6, 0); -INSERT INTO 02366_dictionary_db.lookup_data VALUES(7, 2); -INSERT INTO 02366_dictionary_db.lookup_data VALUES(8, 1); -INSERT INTO 02366_dictionary_db.lookup_data VALUES(9, 0); - -CREATE DICTIONARY 02366_dictionary_db.dict0 -( - id UInt64, - val String -) -PRIMARY KEY id -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_data')) -LAYOUT(DIRECT()); - -SELECT lookup_key FROM 02366_dictionary_db.lookup_data ORDER BY id ASC; -SELECT id, lookup_key, dictHas(02366_dictionary_db.dict0, lookup_key) FROM 02366_dictionary_db.lookup_data ORDER BY id ASC; - --- Nesting this way seems to help it make all the lookups as a single block, although even then it isn't guaranteed -SELECT dictHas(02366_dictionary_db.dict0, lk) FROM (SELECT any(lookup_key) as lk FROM 02366_dictionary_db.lookup_data group by id ORDER BY id ASC); --- Same with this group by -SELECT dictHas(02366_dictionary_db.dict0, any(lookup_key)) FROM 02366_dictionary_db.lookup_data GROUP BY id ORDER BY id ASC; - - -SELECT dictHas(02366_dictionary_db.dict0, lookup_key) FROM 02366_dictionary_db.lookup_data ORDER BY id ASC; -SELECT dictGetOrDefault(02366_dictionary_db.dict0, 'val', lookup_key, 'UNKNOWN') FROM 02366_dictionary_db.lookup_data ORDER BY id ASC; -SELECT count(), has FROM 02366_dictionary_db.lookup_data group by dictHas(02366_dictionary_db.dict0, lookup_key) as has; - -DROP DICTIONARY 02366_dictionary_db.dict0; -DROP TABLE 02366_dictionary_db.lookup_data; -DROP TABLE 02366_dictionary_db.dict_data; diff --git a/tests/queries/0_stateless/02371_create_temporary_table_as_with_columns_list.reference b/tests/queries/0_stateless/02371_create_temporary_table_as_with_columns_list.reference new file mode 100644 index 00000000000..6fc56adcb1c --- /dev/null +++ b/tests/queries/0_stateless/02371_create_temporary_table_as_with_columns_list.reference @@ -0,0 +1,2 @@ +Vasya +Petya diff --git a/tests/queries/0_stateless/02371_create_temporary_table_as_with_columns_list.sql b/tests/queries/0_stateless/02371_create_temporary_table_as_with_columns_list.sql new file mode 100644 index 00000000000..7d8f297b505 --- /dev/null +++ b/tests/queries/0_stateless/02371_create_temporary_table_as_with_columns_list.sql @@ -0,0 +1,3 @@ +CREATE TEMPORARY TABLE test_02327 (name String) AS SELECT * FROM VALUES(('Vasya'), ('Petya')); +SELECT * FROM test_02327; +DROP TABLE test_02327; diff --git a/utils/check-style/check-black b/utils/check-style/check-black index 45e7820469b..141dcd1b406 100755 --- a/utils/check-style/check-black +++ b/utils/check-style/check-black @@ -6,8 +6,14 @@ set -e GIT_ROOT=$(git rev-parse --show-cdup) GIT_ROOT=${GIT_ROOT:-.} tmp=$(mktemp) -if ! find "$GIT_ROOT" -name '*.py' -not -path "$GIT_ROOT/contrib/*" -exec black --check --diff {} + 1>"$tmp" 2>&1; then +# Find all *.py files in the repo except the contrib directory +find_cmd=(find "$GIT_ROOT" -name '*.py' -not -path "$GIT_ROOT/contrib/*") +if ! "${find_cmd[@]}" -exec black --check --diff {} + 1>"$tmp" 2>&1; then # Show the result only if some files need formatting cat "$tmp" + # Apply formatting + "${find_cmd[@]}" -exec black {} + 1>/dev/null 2>&1 + # Automatically add changed files to stage + "${find_cmd[@]}" -exec git add -u {} + 1>/dev/null 2>&1 fi rm "$tmp" diff --git a/utils/graphite-rollup/graphite-rollup-bench.cpp b/utils/graphite-rollup/graphite-rollup-bench.cpp index 4c11f90b3ff..49a3d509be6 100644 --- a/utils/graphite-rollup/graphite-rollup-bench.cpp +++ b/utils/graphite-rollup/graphite-rollup-bench.cpp @@ -20,9 +20,9 @@ using namespace DB; static SharedContextHolder shared_context = Context::createShared(); -std::vector loadMetrics(const std::string & metrics_file) +std::vector loadMetrics(const std::string & metrics_file) { - std::vector metrics; + std::vector metrics; FILE * stream; char * line = nullptr; @@ -47,7 +47,7 @@ std::vector loadMetrics(const std::string & metrics_file) } if (l > 0) { - metrics.push_back(StringRef(strdup(line), l)); + metrics.emplace_back(std::string_view(strdup(line), l)); } } } @@ -80,7 +80,7 @@ void bench(const std::string & config_path, const std::string & metrics_file, si Graphite::Params params; setGraphitePatternsFromConfig(context, "graphite_rollup", params); - std::vector metrics = loadMetrics(metrics_file); + std::vector metrics = loadMetrics(metrics_file); std::vector durations(metrics.size()); size_t j, i; @@ -99,15 +99,15 @@ void bench(const std::string & config_path, const std::string & metrics_file, si if (j == 0 && verbose) { - std::cout << metrics[i].data << ": rule with regexp '" << rule.second->regexp_str << "' found\n"; + std::cout << metrics[i].data() << ": rule with regexp '" << rule.second->regexp_str << "' found\n"; } } } for (i = 0; i < metrics.size(); i++) { - std::cout << metrics[i].data << " " << durations[i] / n << " ns\n"; - free(const_cast(static_cast(metrics[i].data))); + std::cout << metrics[i].data() << " " << durations[i] / n << " ns\n"; + free(const_cast(static_cast(metrics[i].data()))); } } diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index de9e4cfa0af..e4c7aae8b25 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v22.7.1.2484-stable 2022-07-21 v22.6.3.35-stable 2022-07-06 v22.6.2.12-stable 2022-06-29 v22.6.1.1985-stable 2022-06-16 diff --git a/utils/security-generator/SECURITY.md.sh b/utils/security-generator/SECURITY.md.sh index 381f5b4eaa6..15933da7942 100755 --- a/utils/security-generator/SECURITY.md.sh +++ b/utils/security-generator/SECURITY.md.sh @@ -33,7 +33,7 @@ FROM FROM ( WITH - extractGroups(version, 'v(\\d+).(\\d+)') AS v, + extractGroups(version, 'v(\\d+)\\.(\\d+)') AS v, v[1]::UInt8 AS y, v[2]::UInt8 AS m SELECT