Merge branch 'master' of github.com:ClickHouse/ClickHouse into urlCluster

This commit is contained in:
avogar 2023-05-15 16:35:24 +00:00
commit f9e0eb47d7
327 changed files with 5586 additions and 1858 deletions

View File

@ -23,7 +23,7 @@ Checks: '*,
-bugprone-implicit-widening-of-multiplication-result, -bugprone-implicit-widening-of-multiplication-result,
-bugprone-narrowing-conversions, -bugprone-narrowing-conversions,
-bugprone-not-null-terminated-result, -bugprone-not-null-terminated-result,
-bugprone-reserved-identifier, -bugprone-reserved-identifier, # useful but too slow, TODO retry when https://reviews.llvm.org/rG1c282052624f9d0bd273bde0b47b30c96699c6c7 is merged
-bugprone-unchecked-optional-access, -bugprone-unchecked-optional-access,
-cert-dcl16-c, -cert-dcl16-c,
@ -111,6 +111,7 @@ Checks: '*,
-misc-no-recursion, -misc-no-recursion,
-misc-non-private-member-variables-in-classes, -misc-non-private-member-variables-in-classes,
-misc-confusable-identifiers, # useful but slooow -misc-confusable-identifiers, # useful but slooow
-misc-use-anonymous-namespace,
-modernize-avoid-c-arrays, -modernize-avoid-c-arrays,
-modernize-concat-nested-namespaces, -modernize-concat-nested-namespaces,
@ -136,7 +137,7 @@ Checks: '*,
-readability-function-cognitive-complexity, -readability-function-cognitive-complexity,
-readability-function-size, -readability-function-size,
-readability-identifier-length, -readability-identifier-length,
-readability-identifier-naming, -readability-identifier-naming, # useful but too slow
-readability-implicit-bool-conversion, -readability-implicit-bool-conversion,
-readability-isolate-declaration, -readability-isolate-declaration,
-readability-magic-numbers, -readability-magic-numbers,
@ -148,7 +149,7 @@ Checks: '*,
-readability-uppercase-literal-suffix, -readability-uppercase-literal-suffix,
-readability-use-anyofallof, -readability-use-anyofallof,
-zirkon-*, -zircon-*,
' '
WarningsAsErrors: '*' WarningsAsErrors: '*'
@ -168,11 +169,10 @@ CheckOptions:
readability-identifier-naming.ParameterPackCase: lower_case readability-identifier-naming.ParameterPackCase: lower_case
readability-identifier-naming.StructCase: CamelCase readability-identifier-naming.StructCase: CamelCase
readability-identifier-naming.TemplateTemplateParameterCase: CamelCase readability-identifier-naming.TemplateTemplateParameterCase: CamelCase
readability-identifier-naming.TemplateUsingCase: lower_case readability-identifier-naming.TemplateParameterCase: lower_case
readability-identifier-naming.TypeTemplateParameterCase: CamelCase readability-identifier-naming.TypeTemplateParameterCase: CamelCase
readability-identifier-naming.TypedefCase: CamelCase readability-identifier-naming.TypedefCase: CamelCase
readability-identifier-naming.UnionCase: CamelCase readability-identifier-naming.UnionCase: CamelCase
readability-identifier-naming.UsingCase: CamelCase
modernize-loop-convert.UseCxx20ReverseRanges: false modernize-loop-convert.UseCxx20ReverseRanges: false
performance-move-const-arg.CheckTriviallyCopyableMove: false performance-move-const-arg.CheckTriviallyCopyableMove: false
# Workaround clang-tidy bug: https://github.com/llvm/llvm-project/issues/46097 # Workaround clang-tidy bug: https://github.com/llvm/llvm-project/issues/46097

16
.clangd Normal file
View File

@ -0,0 +1,16 @@
Diagnostics:
# clangd does parse .clang-tidy, but some checks are too slow to run in
# clang-tidy build, so let's enable them explicitly for clangd at least.
ClangTidy:
# The following checks had been disabled due to slowliness with C++23,
# for more details see [1].
#
# [1]: https://github.com/llvm/llvm-project/issues/61418
#
# But the code base had been written in a style that had been checked
# by this check, so at least, let's enable it for clangd.
Add: [
# configured in .clang-tidy
readability-identifier-naming,
bugprone-reserved-identifier,
]

View File

@ -72,6 +72,9 @@ jobs:
with: with:
name: changed_images name: changed_images
path: ${{ runner.temp }}/changed_images.json path: ${{ runner.temp }}/changed_images.json
Codebrowser:
needs: [DockerHubPush]
uses: ./.github/workflows/woboq.yml
BuilderCoverity: BuilderCoverity:
needs: DockerHubPush needs: DockerHubPush
runs-on: [self-hosted, builder] runs-on: [self-hosted, builder]
@ -125,8 +128,8 @@ jobs:
SONAR_SCANNER_VERSION: 4.8.0.2856 SONAR_SCANNER_VERSION: 4.8.0.2856
SONAR_SERVER_URL: "https://sonarcloud.io" SONAR_SERVER_URL: "https://sonarcloud.io"
BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed
CC: clang-15 CC: clang-16
CXX: clang++-15 CXX: clang++-16
steps: steps:
- name: Check out repository code - name: Check out repository code
uses: ClickHouse/checkout@v1 uses: ClickHouse/checkout@v1

View File

@ -6,9 +6,8 @@ env:
concurrency: concurrency:
group: woboq group: woboq
on: # yamllint disable-line rule:truthy on: # yamllint disable-line rule:truthy
schedule:
- cron: '0 */18 * * *'
workflow_dispatch: workflow_dispatch:
workflow_call:
jobs: jobs:
# don't use dockerhub push because this image updates so rarely # don't use dockerhub push because this image updates so rarely
WoboqCodebrowser: WoboqCodebrowser:
@ -26,6 +25,10 @@ jobs:
with: with:
clear-repository: true clear-repository: true
submodules: 'true' submodules: 'true'
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.IMAGES_PATH }}
- name: Codebrowser - name: Codebrowser
run: | run: |
sudo rm -fr "$TEMP_PATH" sudo rm -fr "$TEMP_PATH"

1
.gitignore vendored
View File

@ -129,7 +129,6 @@ website/package-lock.json
/.ccls-cache /.ccls-cache
# clangd cache # clangd cache
/.clangd
/.cache /.cache
/compile_commands.json /compile_commands.json

2
.gitmodules vendored
View File

@ -267,7 +267,7 @@
url = https://github.com/ClickHouse/nats.c url = https://github.com/ClickHouse/nats.c
[submodule "contrib/vectorscan"] [submodule "contrib/vectorscan"]
path = contrib/vectorscan path = contrib/vectorscan
url = https://github.com/VectorCamp/vectorscan url = https://github.com/ClickHouse/vectorscan.git
[submodule "contrib/c-ares"] [submodule "contrib/c-ares"]
path = contrib/c-ares path = contrib/c-ares
url = https://github.com/ClickHouse/c-ares url = https://github.com/ClickHouse/c-ares

View File

@ -342,13 +342,6 @@ if (COMPILER_CLANG)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-vtable-pointers") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-vtable-pointers")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16)
# Set new experimental pass manager, it's a performance, build time and binary size win.
# Can be removed after https://reviews.llvm.org/D66490 merged and released to at least two versions of clang.
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexperimental-new-pass-manager")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fexperimental-new-pass-manager")
endif ()
# We cannot afford to use LTO when compiling unit tests, and it's not enough # We cannot afford to use LTO when compiling unit tests, and it's not enough
# to only supply -fno-lto at the final linking stage. So we disable it # to only supply -fno-lto at the final linking stage. So we disable it
# completely. # completely.

View File

@ -22,7 +22,7 @@ curl https://clickhouse.com/ | sh
## Upcoming Events ## Upcoming Events
* [**v23.4 Release Webinar**](https://clickhouse.com/company/events/v23-4-release-webinar?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-04) - April 26 - 23.4 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release. * [**v23.5 Release Webinar**](https://clickhouse.com/company/events/v23-5-release-webinar?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-05) - May 31 - 23.5 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.
* [**ClickHouse Meetup in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/292892466) - May 16 * [**ClickHouse Meetup in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/292892466) - May 16
* [**ClickHouse Meetup in Barcelona**](https://www.meetup.com/clickhouse-barcelona-user-group/events/292892669) - May 25 * [**ClickHouse Meetup in Barcelona**](https://www.meetup.com/clickhouse-barcelona-user-group/events/292892669) - May 25
* [**ClickHouse Meetup in London**](https://www.meetup.com/clickhouse-london-user-group/events/292892824) - May 25 * [**ClickHouse Meetup in London**](https://www.meetup.com/clickhouse-london-user-group/events/292892824) - May 25
@ -33,5 +33,14 @@ Also, keep an eye out for upcoming meetups in Amsterdam, Boston, NYC, Beijing, a
## Recent Recordings ## Recent Recordings
* **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments" * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"
* **Recording available**: [**v23.4 Release Webinar**]([https://www.youtube.com/watch?v=ISaGUjvBNao](https://www.youtube.com/watch?v=4rrf6bk_mOg)) UNDROP TABLE, server settings introspection, nested dynamic disks, MySQL compatibility, parseDate Time, Lightweight Deletes, Parallel Replicas, integrations updates, and so much more! Watch it now! * **Recording available**: [**v23.4 Release Webinar**](https://www.youtube.com/watch?v=4rrf6bk_mOg) Faster Parquet Reading, Asynchonous Connections to Reoplicas, Trailing Comma before FROM, extractKeyValuePairs, integrations updates, and so much more! Watch it now!
* **All release webinar recordings**: [YouTube playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3jAlSy1JxyP8zluvXaN3nxU) * **All release webinar recordings**: [YouTube playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3jAlSy1JxyP8zluvXaN3nxU)
## Interested in joining ClickHouse and making it your full time job?
We are a globally diverse and distributed team, united behind a common goal of creating industry-leading, real-time analytics. Here, you will have an opportunity to solve some of the most cutting edge technical challenges and have direct ownership of your work and vision. If you are a contributor by nature, a thinker as well as a doer - well definitely click!
Check out our **current openings** here: https://clickhouse.com/company/careers
Cant find what you are looking for, but want to let us know you are interested in joining ClickHouse? Email careers@clickhouse.com!

View File

@ -10,9 +10,16 @@ set (SAN_FLAGS "${SAN_FLAGS} -g -fno-omit-frame-pointer -DSANITIZER")
if (SANITIZE) if (SANITIZE)
if (SANITIZE STREQUAL "address") if (SANITIZE STREQUAL "address")
# LLVM-15 has a bug in Address Sanitizer, preventing the usage of 'sanitize-address-use-after-scope', set (ASAN_FLAGS "-fsanitize=address -fsanitize-address-use-after-scope")
# see https://github.com/llvm/llvm-project/issues/58633 if (COMPILER_CLANG)
set (ASAN_FLAGS "-fsanitize=address -fno-sanitize-address-use-after-scope") if (${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL 15 AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 16)
# LLVM-15 has a bug in Address Sanitizer, preventing the usage
# of 'sanitize-address-use-after-scope', see [1].
#
# [1]: https://github.com/llvm/llvm-project/issues/58633
set (ASAN_FLAGS "${ASAN_FLAGS} -fno-sanitize-address-use-after-scope")
endif()
endif()
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${ASAN_FLAGS}") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${ASAN_FLAGS}")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${ASAN_FLAGS}") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${ASAN_FLAGS}")

View File

@ -70,13 +70,15 @@ if (LINKER_NAME)
if (NOT LLD_PATH) if (NOT LLD_PATH)
message (FATAL_ERROR "Using linker ${LINKER_NAME} but can't find its path.") message (FATAL_ERROR "Using linker ${LINKER_NAME} but can't find its path.")
endif () endif ()
if (COMPILER_CLANG) # This a temporary quirk to emit .debug_aranges with ThinLTO, it is only the case clang/llvm <16
# This a temporary quirk to emit .debug_aranges with ThinLTO, can be removed after upgrade to clang-16 if (COMPILER_CLANG AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16)
set (LLD_WRAPPER "${CMAKE_CURRENT_BINARY_DIR}/ld.lld") set (LLD_WRAPPER "${CMAKE_CURRENT_BINARY_DIR}/ld.lld")
configure_file ("${CMAKE_CURRENT_SOURCE_DIR}/cmake/ld.lld.in" "${LLD_WRAPPER}" @ONLY) configure_file ("${CMAKE_CURRENT_SOURCE_DIR}/cmake/ld.lld.in" "${LLD_WRAPPER}" @ONLY)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_WRAPPER}") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_WRAPPER}")
endif () else ()
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_PATH}")
endif()
endif () endif ()

View File

@ -179,10 +179,10 @@ add_contrib (sqlite-cmake sqlite-amalgamation)
add_contrib (s2geometry-cmake s2geometry) add_contrib (s2geometry-cmake s2geometry)
add_contrib (c-ares-cmake c-ares) add_contrib (c-ares-cmake c-ares)
if (OS_LINUX AND ARCH_AMD64 AND (ENABLE_AVX2 OR ENABLE_AVX512)) if (OS_LINUX AND ARCH_AMD64 AND ENABLE_SSE42)
option (ENABLE_QPL "Enable Intel® Query Processing Library" ${ENABLE_LIBRARIES}) option (ENABLE_QPL "Enable Intel® Query Processing Library" ${ENABLE_LIBRARIES})
elseif(ENABLE_QPL) elseif(ENABLE_QPL)
message (${RECONFIGURE_MESSAGE_LEVEL} "QPL library is only supported on x86_64 arch with avx2/avx512 support") message (${RECONFIGURE_MESSAGE_LEVEL} "QPL library is only supported on x86_64 arch with SSE 4.2 or higher")
endif() endif()
if (ENABLE_QPL) if (ENABLE_QPL)
add_contrib (idxd-config-cmake idxd-config) add_contrib (idxd-config-cmake idxd-config)

2
contrib/qpl vendored

@ -1 +1 @@
Subproject commit 0bce2b03423f6fbeb8bce66cc8be0bf558058848 Subproject commit 3f8f5cea27739f5261e8fd577dc233ffe88bf679

View File

@ -22,8 +22,8 @@ GetLibraryVersion("${HEADER_CONTENT}" QPL_VERSION)
message(STATUS "Intel QPL version: ${QPL_VERSION}") message(STATUS "Intel QPL version: ${QPL_VERSION}")
# There are 5 source subdirectories under $QPL_SRC_DIR: isal, c_api, core-sw, middle-layer, c_api. # There are 5 source subdirectories under $QPL_SRC_DIR: isal, c_api, core-sw, middle-layer, c_api.
# Generate 7 library targets: middle_layer_lib, isal, isal_asm, qplcore_px, qplcore_avx512, core_iaa, middle_layer_lib. # Generate 8 library targets: middle_layer_lib, isal, isal_asm, qplcore_px, qplcore_avx512, qplcore_sw_dispatcher, core_iaa, middle_layer_lib.
# Output ch_contrib::qpl by linking with 7 library targets. # Output ch_contrib::qpl by linking with 8 library targets.
include("${QPL_PROJECT_DIR}/cmake/CompileOptions.cmake") include("${QPL_PROJECT_DIR}/cmake/CompileOptions.cmake")
@ -88,31 +88,36 @@ set(ISAL_ASM_SRC ${QPL_SRC_DIR}/isal/igzip/igzip_body.asm
add_library(isal OBJECT ${ISAL_C_SRC}) add_library(isal OBJECT ${ISAL_C_SRC})
add_library(isal_asm OBJECT ${ISAL_ASM_SRC}) add_library(isal_asm OBJECT ${ISAL_ASM_SRC})
set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS
$<TARGET_OBJECTS:isal>)
set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS
$<TARGET_OBJECTS:isal_asm>)
# Setting external and internal interfaces for ISA-L library # Setting external and internal interfaces for ISA-L library
target_include_directories(isal target_include_directories(isal
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/isal/include> PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/isal/include>
PRIVATE ${QPL_SRC_DIR}/isal/include PRIVATE ${QPL_SRC_DIR}/isal/include
PUBLIC ${QPL_SRC_DIR}/isal/igzip) PUBLIC ${QPL_SRC_DIR}/isal/igzip)
set_target_properties(isal PROPERTIES
CXX_STANDARD 11
C_STANDARD 99)
target_compile_options(isal PRIVATE target_compile_options(isal PRIVATE
"$<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}>" "$<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}>"
"$<$<CONFIG:Debug>:>" "$<$<CONFIG:Debug>:>"
"$<$<CONFIG:Release>:>") "$<$<CONFIG:Release>:>")
# AS_FEATURE_LEVEL=10 means "Check SIMD capabilities of the target system at runtime and use up to AVX512 if available".
# HAVE_KNOWS_AVX512 means rely on AVX512 being available on the target system.
target_compile_options(isal_asm PRIVATE "-I${QPL_SRC_DIR}/isal/include/" target_compile_options(isal_asm PRIVATE "-I${QPL_SRC_DIR}/isal/include/"
PRIVATE "-I${QPL_SRC_DIR}/isal/igzip/" PRIVATE "-I${QPL_SRC_DIR}/isal/igzip/"
PRIVATE "-I${QPL_SRC_DIR}/isal/crc/" PRIVATE "-I${QPL_SRC_DIR}/isal/crc/"
PRIVATE "-DHAVE_AS_KNOWS_AVX512"
PRIVATE "-DAS_FEATURE_LEVEL=10"
PRIVATE "-DQPL_LIB") PRIVATE "-DQPL_LIB")
# AS_FEATURE_LEVEL=10 means "Check SIMD capabilities of the target system at runtime and use up to AVX512 if available".
# AS_FEATURE_LEVEL=5 means "Check SIMD capabilities of the target system at runtime and use up to AVX2 if available".
# HAVE_KNOWS_AVX512 means rely on AVX512 being available on the target system.
if (ENABLE_AVX512)
target_compile_options(isal_asm PRIVATE "-DHAVE_AS_KNOWS_AVX512" "-DAS_FEATURE_LEVEL=10")
else()
target_compile_options(isal_asm PRIVATE "-DAS_FEATURE_LEVEL=5")
endif()
# Here must remove "-fno-sanitize=undefined" from COMPILE_OPTIONS. # Here must remove "-fno-sanitize=undefined" from COMPILE_OPTIONS.
# Otherwise nasm compiler would fail to proceed due to unrecognition of "-fno-sanitize=undefined" # Otherwise nasm compiler would fail to proceed due to unrecognition of "-fno-sanitize=undefined"
if (SANITIZE STREQUAL "undefined") if (SANITIZE STREQUAL "undefined")
@ -126,78 +131,97 @@ target_compile_definitions(isal PUBLIC
NDEBUG) NDEBUG)
# [SUBDIR]core-sw # [SUBDIR]core-sw
# Two libraries:qplcore_avx512/qplcore_px for SW fallback will be created which are implemented by AVX512 and non-AVX512 instructions respectively. # Create set of libraries corresponding to supported platforms for SW fallback which are implemented by AVX512 and non-AVX512 instructions respectively.
# The upper level QPL API will check SIMD capabilities of the target system at runtime and decide to call AVX512 function or non-AVX512 function. # The upper level QPL API will check SIMD capabilities of the target system at runtime and decide to call AVX512 function or non-AVX512 function.
# Hence, here we don't need put qplcore_avx512 under an ENABLE_AVX512 CMake switch. # Hence, here we don't need put ENABLE_AVX512 CMake switch.
# Actually, if we do that, some undefined symbols errors would happen because both of AVX512 function and non-AVX512 function are referenced by QPL API.
# PLATFORM=2 means AVX512 implementation; PLATFORM=0 means non-AVX512 implementation.
# Find Core Sources get_list_of_supported_optimizations(PLATFORMS_LIST)
file(GLOB SOURCES
${QPL_SRC_DIR}/core-sw/src/checksums/*.c
${QPL_SRC_DIR}/core-sw/src/filtering/*.c
${QPL_SRC_DIR}/core-sw/src/other/*.c
${QPL_SRC_DIR}/core-sw/src/compression/*.c)
file(GLOB DATA_SOURCES foreach(PLATFORM_ID IN LISTS PLATFORMS_LIST)
${QPL_SRC_DIR}/core-sw/src/data/*.c) # Find Core Sources
file(GLOB SOURCES
${QPL_SRC_DIR}/core-sw/src/checksums/*.c
${QPL_SRC_DIR}/core-sw/src/filtering/*.c
${QPL_SRC_DIR}/core-sw/src/other/*.c
${QPL_SRC_DIR}/core-sw/src/compression/*.c)
# Create avx512 library file(GLOB DATA_SOURCES
add_library(qplcore_avx512 OBJECT ${SOURCES}) ${QPL_SRC_DIR}/core-sw/src/data/*.c)
target_compile_definitions(qplcore_avx512 PRIVATE PLATFORM=2) # Create library
add_library(qplcore_${PLATFORM_ID} OBJECT ${SOURCES})
target_include_directories(qplcore_avx512 set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/include> $<TARGET_OBJECTS:qplcore_${PLATFORM_ID}>)
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/src/include>
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/src/compression/include>
PRIVATE $<TARGET_PROPERTY:isal,INTERFACE_INCLUDE_DIRECTORIES>)
set_target_properties(qplcore_avx512 PROPERTIES target_include_directories(qplcore_${PLATFORM_ID}
$<$<C_COMPILER_ID:GNU>:C_STANDARD 17>) PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw>
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/include>
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/src/include>
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/src/compression/include>
PRIVATE $<TARGET_PROPERTY:isal,INTERFACE_INCLUDE_DIRECTORIES>)
target_link_libraries(qplcore_avx512 set_target_properties(qplcore_${PLATFORM_ID} PROPERTIES
PRIVATE isal $<$<C_COMPILER_ID:GNU>:C_STANDARD 17>)
PRIVATE ${CMAKE_DL_LIBS})
target_compile_options(qplcore_avx512 target_compile_options(qplcore_${PLATFORM_ID}
PRIVATE ${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS} PRIVATE ${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}
PRIVATE -march=skylake-avx512 PRIVATE "$<$<CONFIG:Debug>:>"
PRIVATE "$<$<CONFIG:Debug>:>" PRIVATE "$<$<CONFIG:Release>:-O3;-D_FORTIFY_SOURCE=2>")
PRIVATE "$<$<CONFIG:Release>:-O3;-D_FORTIFY_SOURCE=2>")
# Set specific compiler options and/or definitions based on a platform
if (${PLATFORM_ID} MATCHES "avx512")
target_compile_definitions(qplcore_${PLATFORM_ID} PRIVATE PLATFORM=2)
target_compile_options(qplcore_${PLATFORM_ID} PRIVATE -march=skylake-avx512)
else() # Create default px library
target_compile_definitions(qplcore_${PLATFORM_ID} PRIVATE PLATFORM=0)
endif()
target_compile_definitions(qplcore_avx512 PUBLIC QPL_BADARG_CHECK) target_link_libraries(qplcore_${PLATFORM_ID} isal)
endforeach()
# #
# Create px library # Create dispatcher between platforms and auto-generated wrappers
# #
#set(CMAKE_INCLUDE_CURRENT_DIR ON) file(GLOB SW_DISPATCHER_SOURCES ${QPL_SRC_DIR}/core-sw/dispatcher/*.cpp)
# Create library add_library(qplcore_sw_dispatcher OBJECT ${SW_DISPATCHER_SOURCES})
add_library(qplcore_px OBJECT ${SOURCES} ${DATA_SOURCES})
target_compile_definitions(qplcore_px PRIVATE PLATFORM=0) set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS
$<TARGET_OBJECTS:qplcore_sw_dispatcher>)
target_include_directories(qplcore_px target_include_directories(qplcore_sw_dispatcher
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/include> PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/dispatcher>)
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/src/include>
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/src/compression/include>
PRIVATE $<TARGET_PROPERTY:isal,INTERFACE_INCLUDE_DIRECTORIES>)
set_target_properties(qplcore_px PROPERTIES # Generate kernel wrappers
$<$<C_COMPILER_ID:GNU>:C_STANDARD 17>) generate_unpack_kernel_arrays(${QPL_BINARY_DIR} "${PLATFORMS_LIST}")
target_link_libraries(qplcore_px foreach(PLATFORM_ID IN LISTS PLATFORMS_LIST)
PRIVATE isal file(GLOB GENERATED_${PLATFORM_ID}_TABLES_SRC ${QPL_BINARY_DIR}/generated/${PLATFORM_ID}_*.cpp)
PRIVATE ${CMAKE_DL_LIBS})
target_compile_options(qplcore_px target_sources(qplcore_sw_dispatcher PRIVATE ${GENERATED_${PLATFORM_ID}_TABLES_SRC})
PRIVATE ${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}
PRIVATE "$<$<CONFIG:Debug>:>"
PRIVATE "$<$<CONFIG:Release>:-O3;-D_FORTIFY_SOURCE=2>")
target_compile_definitions(qplcore_px PUBLIC QPL_BADARG_CHECK) # Set specific compiler options and/or definitions based on a platform
if (${PLATFORM_ID} MATCHES "avx512")
set_source_files_properties(${GENERATED_${PLATFORM_ID}_TABLES_SRC} PROPERTIES COMPILE_DEFINITIONS PLATFORM=2)
else()
set_source_files_properties(${GENERATED_${PLATFORM_ID}_TABLES_SRC} PROPERTIES COMPILE_DEFINITIONS PLATFORM=0)
endif()
target_include_directories(qplcore_sw_dispatcher
PUBLIC $<TARGET_PROPERTY:qplcore_${PLATFORM_ID},INTERFACE_INCLUDE_DIRECTORIES>)
endforeach()
set_target_properties(qplcore_sw_dispatcher PROPERTIES CXX_STANDARD 17)
# w/a for build compatibility with ISAL codebase
target_compile_definitions(qplcore_sw_dispatcher PUBLIC -DQPL_LIB)
target_compile_options(qplcore_sw_dispatcher
PRIVATE $<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS};
${QPL_LINUX_TOOLCHAIN_DYNAMIC_LIBRARY_FLAGS};
$<$<CONFIG:Release>:-O3;-D_FORTIFY_SOURCE=2>>
PRIVATE $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS}>)
# [SUBDIR]core-iaa # [SUBDIR]core-iaa
file(GLOB HW_PATH_SRC ${QPL_SRC_DIR}/core-iaa/sources/aecs/*.c file(GLOB HW_PATH_SRC ${QPL_SRC_DIR}/core-iaa/sources/aecs/*.c
@ -211,13 +235,20 @@ file(GLOB HW_PATH_SRC ${QPL_SRC_DIR}/core-iaa/sources/aecs/*.c
# Create library # Create library
add_library(core_iaa OBJECT ${HW_PATH_SRC}) add_library(core_iaa OBJECT ${HW_PATH_SRC})
set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS
$<TARGET_OBJECTS:core_iaa>)
target_include_directories(core_iaa target_include_directories(core_iaa
PRIVATE ${UUID_DIR} PRIVATE ${UUID_DIR}
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-iaa/include> PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-iaa/include>
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-iaa/sources/include> PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-iaa/sources/include>
PRIVATE $<BUILD_INTERFACE:${QPL_PROJECT_DIR}/include> # status.h in own_checkers.h PRIVATE $<BUILD_INTERFACE:${QPL_PROJECT_DIR}/include> # status.h in own_checkers.h
PRIVATE $<BUILD_INTERFACE:${QPL_PROJECT_DIR}/sources/c_api> # own_checkers.h PRIVATE $<BUILD_INTERFACE:${QPL_PROJECT_DIR}/sources/c_api> # own_checkers.h
PRIVATE $<TARGET_PROPERTY:qplcore_avx512,INTERFACE_INCLUDE_DIRECTORIES>) PRIVATE $<TARGET_PROPERTY:qplcore_sw_dispatcher,INTERFACE_INCLUDE_DIRECTORIES>)
set_target_properties(core_iaa PROPERTIES
$<$<C_COMPILER_ID:GNU>:C_STANDARD 17>
CXX_STANDARD 17)
target_compile_options(core_iaa target_compile_options(core_iaa
PRIVATE $<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}; PRIVATE $<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS};
@ -227,11 +258,10 @@ target_compile_features(core_iaa PRIVATE c_std_11)
target_compile_definitions(core_iaa PRIVATE QPL_BADARG_CHECK target_compile_definitions(core_iaa PRIVATE QPL_BADARG_CHECK
PRIVATE $<$<BOOL:${BLOCK_ON_FAULT}>: BLOCK_ON_FAULT_ENABLED> PRIVATE $<$<BOOL:${BLOCK_ON_FAULT}>: BLOCK_ON_FAULT_ENABLED>
PRIVATE $<$<BOOL:${LOG_HW_INIT}>:LOG_HW_INIT>) PRIVATE $<$<BOOL:${LOG_HW_INIT}>:LOG_HW_INIT>
PRIVATE $<$<BOOL:${DYNAMIC_LOADING_LIBACCEL_CONFIG}>:DYNAMIC_LOADING_LIBACCEL_CONFIG>)
# [SUBDIR]middle-layer # [SUBDIR]middle-layer
generate_unpack_kernel_arrays(${QPL_BINARY_DIR})
file(GLOB MIDDLE_LAYER_SRC file(GLOB MIDDLE_LAYER_SRC
${QPL_SRC_DIR}/middle-layer/analytics/*.cpp ${QPL_SRC_DIR}/middle-layer/analytics/*.cpp
${QPL_SRC_DIR}/middle-layer/c_wrapper/*.cpp ${QPL_SRC_DIR}/middle-layer/c_wrapper/*.cpp
@ -246,14 +276,12 @@ file(GLOB MIDDLE_LAYER_SRC
${QPL_SRC_DIR}/middle-layer/inflate/*.cpp ${QPL_SRC_DIR}/middle-layer/inflate/*.cpp
${QPL_SRC_DIR}/core-iaa/sources/accelerator/*.cpp) # todo ${QPL_SRC_DIR}/core-iaa/sources/accelerator/*.cpp) # todo
file(GLOB GENERATED_PX_TABLES_SRC ${QPL_BINARY_DIR}/generated/px_*.cpp)
file(GLOB GENERATED_AVX512_TABLES_SRC ${QPL_BINARY_DIR}/generated/avx512_*.cpp)
add_library(middle_layer_lib OBJECT add_library(middle_layer_lib OBJECT
${GENERATED_PX_TABLES_SRC}
${GENERATED_AVX512_TABLES_SRC}
${MIDDLE_LAYER_SRC}) ${MIDDLE_LAYER_SRC})
set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS
$<TARGET_OBJECTS:middle_layer_lib>)
target_compile_options(middle_layer_lib target_compile_options(middle_layer_lib
PRIVATE $<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}; PRIVATE $<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS};
${QPL_LINUX_TOOLCHAIN_DYNAMIC_LIBRARY_FLAGS}; ${QPL_LINUX_TOOLCHAIN_DYNAMIC_LIBRARY_FLAGS};
@ -264,17 +292,16 @@ target_compile_definitions(middle_layer_lib
PUBLIC QPL_VERSION="${QPL_VERSION}" PUBLIC QPL_VERSION="${QPL_VERSION}"
PUBLIC $<$<BOOL:${LOG_HW_INIT}>:LOG_HW_INIT> PUBLIC $<$<BOOL:${LOG_HW_INIT}>:LOG_HW_INIT>
PUBLIC $<$<BOOL:${EFFICIENT_WAIT}>:QPL_EFFICIENT_WAIT> PUBLIC $<$<BOOL:${EFFICIENT_WAIT}>:QPL_EFFICIENT_WAIT>
PUBLIC QPL_BADARG_CHECK) PUBLIC QPL_BADARG_CHECK
PUBLIC $<$<BOOL:${DYNAMIC_LOADING_LIBACCEL_CONFIG}>:DYNAMIC_LOADING_LIBACCEL_CONFIG>)
set_source_files_properties(${GENERATED_PX_TABLES_SRC} PROPERTIES COMPILE_DEFINITIONS PLATFORM=0) set_target_properties(middle_layer_lib PROPERTIES CXX_STANDARD 17)
set_source_files_properties(${GENERATED_AVX512_TABLES_SRC} PROPERTIES COMPILE_DEFINITIONS PLATFORM=2)
target_include_directories(middle_layer_lib target_include_directories(middle_layer_lib
PRIVATE ${UUID_DIR} PRIVATE ${UUID_DIR}
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/middle-layer> PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/middle-layer>
PUBLIC $<TARGET_PROPERTY:_qpl,INTERFACE_INCLUDE_DIRECTORIES> PUBLIC $<TARGET_PROPERTY:_qpl,INTERFACE_INCLUDE_DIRECTORIES>
PUBLIC $<TARGET_PROPERTY:qplcore_px,INTERFACE_INCLUDE_DIRECTORIES> PUBLIC $<TARGET_PROPERTY:qplcore_sw_dispatcher,INTERFACE_INCLUDE_DIRECTORIES>
PUBLIC $<TARGET_PROPERTY:qplcore_avx512,INTERFACE_INCLUDE_DIRECTORIES>
PUBLIC $<TARGET_PROPERTY:isal,INTERFACE_INCLUDE_DIRECTORIES> PUBLIC $<TARGET_PROPERTY:isal,INTERFACE_INCLUDE_DIRECTORIES>
PUBLIC $<TARGET_PROPERTY:core_iaa,INTERFACE_INCLUDE_DIRECTORIES>) PUBLIC $<TARGET_PROPERTY:core_iaa,INTERFACE_INCLUDE_DIRECTORIES>)
@ -285,20 +312,19 @@ file(GLOB_RECURSE QPL_C_API_SRC
${QPL_SRC_DIR}/c_api/*.c ${QPL_SRC_DIR}/c_api/*.c
${QPL_SRC_DIR}/c_api/*.cpp) ${QPL_SRC_DIR}/c_api/*.cpp)
add_library(_qpl STATIC ${QPL_C_API_SRC} get_property(LIB_DEPS GLOBAL PROPERTY QPL_LIB_DEPS)
$<TARGET_OBJECTS:middle_layer_lib>
$<TARGET_OBJECTS:isal> add_library(_qpl STATIC ${QPL_C_API_SRC} ${LIB_DEPS})
$<TARGET_OBJECTS:isal_asm>
$<TARGET_OBJECTS:qplcore_px>
$<TARGET_OBJECTS:qplcore_avx512>
$<TARGET_OBJECTS:core_iaa>
$<TARGET_OBJECTS:middle_layer_lib>)
target_include_directories(_qpl target_include_directories(_qpl
PUBLIC $<BUILD_INTERFACE:${QPL_PROJECT_DIR}/include/> PUBLIC $<BUILD_INTERFACE:${QPL_PROJECT_DIR}/include/> $<INSTALL_INTERFACE:include>
PRIVATE $<TARGET_PROPERTY:middle_layer_lib,INTERFACE_INCLUDE_DIRECTORIES> PRIVATE $<TARGET_PROPERTY:middle_layer_lib,INTERFACE_INCLUDE_DIRECTORIES>
PRIVATE $<BUILD_INTERFACE:${QPL_SRC_DIR}/c_api>) PRIVATE $<BUILD_INTERFACE:${QPL_SRC_DIR}/c_api>)
set_target_properties(_qpl PROPERTIES
$<$<C_COMPILER_ID:GNU>:C_STANDARD 17>
CXX_STANDARD 17)
target_compile_options(_qpl target_compile_options(_qpl
PRIVATE $<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}; PRIVATE $<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS};
${QPL_LINUX_TOOLCHAIN_DYNAMIC_LIBRARY_FLAGS}; ${QPL_LINUX_TOOLCHAIN_DYNAMIC_LIBRARY_FLAGS};
@ -308,12 +334,12 @@ target_compile_options(_qpl
target_compile_definitions(_qpl target_compile_definitions(_qpl
PRIVATE -DQPL_LIB PRIVATE -DQPL_LIB
PRIVATE -DQPL_BADARG_CHECK PRIVATE -DQPL_BADARG_CHECK
PRIVATE $<$<BOOL:${DYNAMIC_LOADING_LIBACCEL_CONFIG}>:DYNAMIC_LOADING_LIBACCEL_CONFIG>
PUBLIC -DENABLE_QPL_COMPRESSION) PUBLIC -DENABLE_QPL_COMPRESSION)
target_link_libraries(_qpl target_link_libraries(_qpl
PRIVATE ch_contrib::accel-config PRIVATE ch_contrib::accel-config
PRIVATE ch_contrib::isal PRIVATE ch_contrib::isal)
PRIVATE ${CMAKE_DL_LIBS})
target_include_directories(_qpl SYSTEM BEFORE target_include_directories(_qpl SYSTEM BEFORE
PUBLIC "${QPL_PROJECT_DIR}/include" PUBLIC "${QPL_PROJECT_DIR}/include"

2
contrib/vectorscan vendored

@ -1 +1 @@
Subproject commit b4bba94b1a250603b0b198e0394946e32f6c3f30 Subproject commit 1f4d448314e581473103187765e4c949d01b4259

View File

@ -362,17 +362,16 @@ def parse_args() -> argparse.Namespace:
parser.add_argument( parser.add_argument(
"--compiler", "--compiler",
choices=( choices=(
"clang-15", "clang-16",
"clang-15-darwin", "clang-16-darwin",
"clang-15-darwin-aarch64", "clang-16-darwin-aarch64",
"clang-15-aarch64", "clang-16-aarch64",
"clang-15-aarch64-v80compat", "clang-16-aarch64-v80compat",
"clang-15-ppc64le", "clang-16-ppc64le",
"clang-15-amd64-compat", "clang-16-amd64-compat",
"clang-15-freebsd", "clang-16-freebsd",
"gcc-11",
), ),
default="clang-15", default="clang-16",
help="a compiler to use", help="a compiler to use",
) )
parser.add_argument( parser.add_argument(

View File

@ -10,51 +10,21 @@ RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
RUN apt-get update && apt-get --yes --allow-unauthenticated install libclang-${LLVM_VERSION}-dev libmlir-${LLVM_VERSION}-dev RUN apt-get update && apt-get --yes --allow-unauthenticated install libclang-${LLVM_VERSION}-dev libmlir-${LLVM_VERSION}-dev
# libclang-15-dev does not contain proper symlink:
#
# This is what cmake will search for:
#
# # readlink -f /usr/lib/llvm-15/lib/libclang-15.so.1
# /usr/lib/x86_64-linux-gnu/libclang-15.so.1
#
# This is what exists:
#
# # ls -l /usr/lib/x86_64-linux-gnu/libclang-15*
# lrwxrwxrwx 1 root root 16 Sep 5 13:31 /usr/lib/x86_64-linux-gnu/libclang-15.so -> libclang-15.so.1
# lrwxrwxrwx 1 root root 21 Sep 5 13:31 /usr/lib/x86_64-linux-gnu/libclang-15.so.15 -> libclang-15.so.15.0.0
# -rw-r--r-- 1 root root 31835760 Sep 5 13:31 /usr/lib/x86_64-linux-gnu/libclang-15.so.15.0.0
#
ARG TARGETARCH ARG TARGETARCH
RUN arch=${TARGETARCH:-amd64} \ RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \ && case $arch in \
amd64) rarch=x86_64 ;; \ amd64) rarch=x86_64 ;; \
arm64) rarch=aarch64 ;; \ arm64) rarch=aarch64 ;; \
*) exit 1 ;; \ *) exit 1 ;; \
esac \ esac
&& ln -rsf /usr/lib/$rarch-linux-gnu/libclang-15.so.15 /usr/lib/$rarch-linux-gnu/libclang-15.so.1
# repo versions doesn't work correctly with C++17 # repo versions doesn't work correctly with C++17
# also we push reports to s3, so we add index.html to subfolder urls # also we push reports to s3, so we add index.html to subfolder urls
# https://github.com/ClickHouse-Extras/woboq_codebrowser/commit/37e15eaf377b920acb0b48dbe82471be9203f76b # https://github.com/ClickHouse/woboq_codebrowser/commit/37e15eaf377b920acb0b48dbe82471be9203f76b
RUN git clone --depth=1 https://github.com/ClickHouse/woboq_codebrowser /woboq_codebrowser \ RUN git clone --branch=master --depth=1 https://github.com/ClickHouse/woboq_codebrowser /woboq_codebrowser \
&& cd /woboq_codebrowser \ && cd /woboq_codebrowser \
&& cmake . -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-${LLVM_VERSION} -DCMAKE_C_COMPILER=clang-${LLVM_VERSION} \ && cmake . -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-${LLVM_VERSION} -DCMAKE_C_COMPILER=clang-${LLVM_VERSION} -DCLANG_BUILTIN_HEADERS_DIR=/usr/lib/llvm-${LLVM_VERSION}/lib/clang/${LLVM_VERSION}/include \
&& ninja && ninja
ENV CODEGEN=/woboq_codebrowser/generator/codebrowser_generator COPY build.sh /
ENV CODEINDEX=/woboq_codebrowser/indexgenerator/codebrowser_indexgenerator CMD ["bash", "-c", "/build.sh 2>&1"]
ENV STATIC_DATA=/woboq_codebrowser/data
ENV SOURCE_DIRECTORY=/repo_folder
ENV BUILD_DIRECTORY=/build
ENV HTML_RESULT_DIRECTORY=$BUILD_DIRECTORY/html_report
ENV SHA=nosha
ENV DATA="https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data"
CMD mkdir -p $BUILD_DIRECTORY && cd $BUILD_DIRECTORY && \
cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-${LLVM_VERSION} -DCMAKE_C_COMPILER=/usr/bin/clang-${LLVM_VERSION} -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_S3=0 && \
mkdir -p $HTML_RESULT_DIRECTORY && \
$CODEGEN -b $BUILD_DIRECTORY -a -o $HTML_RESULT_DIRECTORY -p ClickHouse:$SOURCE_DIRECTORY:$SHA -d $DATA | ts '%Y-%m-%d %H:%M:%S' && \
cp -r $STATIC_DATA $HTML_RESULT_DIRECTORY/ &&\
$CODEINDEX $HTML_RESULT_DIRECTORY -d "$DATA" | ts '%Y-%m-%d %H:%M:%S' && \
mv $HTML_RESULT_DIRECTORY /test_output

View File

@ -0,0 +1,29 @@
#!/usr/bin/env bash
set -x -e
STATIC_DATA=${STATIC_DATA:-/woboq_codebrowser/data}
SOURCE_DIRECTORY=${SOURCE_DIRECTORY:-/build}
BUILD_DIRECTORY=${BUILD_DIRECTORY:-/workdir/build}
OUTPUT_DIRECTORY=${OUTPUT_DIRECTORY:-/workdir/output}
HTML_RESULT_DIRECTORY=${HTML_RESULT_DIRECTORY:-$OUTPUT_DIRECTORY/html_report}
SHA=${SHA:-nosha}
DATA=${DATA:-https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data}
nproc=$(($(nproc) + 2)) # increase parallelism
read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}"
mkdir -p "$BUILD_DIRECTORY" && cd "$BUILD_DIRECTORY"
cmake "$SOURCE_DIRECTORY" -DCMAKE_CXX_COMPILER="/usr/bin/clang++-${LLVM_VERSION}" -DCMAKE_C_COMPILER="/usr/bin/clang-${LLVM_VERSION}" -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_EMBEDDED_COMPILER=0 "${CMAKE_FLAGS[@]}"
mkdir -p "$HTML_RESULT_DIRECTORY"
echo 'Filter out too noisy "Error: filename" lines and keep them in full codebrowser_generator.log'
/woboq_codebrowser/generator/codebrowser_generator -b "$BUILD_DIRECTORY" -a \
-o "$HTML_RESULT_DIRECTORY" --execute-concurrency="$nproc" -p "ClickHouse:$SOURCE_DIRECTORY:$SHA" \
-d "$DATA" \
|& ts '%Y-%m-%d %H:%M:%S' \
| tee "$OUTPUT_DIRECTORY/codebrowser_generator.log" \
| grep --line-buffered -v ':[0-9]* Error: '
cp -r "$STATIC_DATA" "$HTML_RESULT_DIRECTORY/"
/woboq_codebrowser/indexgenerator/codebrowser_indexgenerator "$HTML_RESULT_DIRECTORY" \
-d "$DATA" |& ts '%Y-%m-%d %H:%M:%S'

View File

@ -9,7 +9,7 @@ trap 'kill $(jobs -pr) ||:' EXIT
stage=${stage:-} stage=${stage:-}
# Compiler version, normally set by Dockerfile # Compiler version, normally set by Dockerfile
export LLVM_VERSION=${LLVM_VERSION:-13} export LLVM_VERSION=${LLVM_VERSION:-16}
# A variable to pass additional flags to CMake. # A variable to pass additional flags to CMake.
# Here we explicitly default it to nothing so that bash doesn't complain about # Here we explicitly default it to nothing so that bash doesn't complain about

View File

@ -15,7 +15,7 @@ stage=${stage:-}
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
echo "$script_dir" echo "$script_dir"
repo_dir=ch repo_dir=ch
BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-15_debug_none_unsplitted_disable_False_binary"} BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-16_debug_none_unsplitted_disable_False_binary"}
BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"} BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"}
function git_clone_with_retry function git_clone_with_retry

View File

@ -2,7 +2,7 @@
set -euo pipefail set -euo pipefail
CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-15_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"} CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-16_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"}
CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""} CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""}

View File

@ -2,7 +2,7 @@
set -euo pipefail set -euo pipefail
CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-15_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"} CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-16_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"}
CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""} CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""}

View File

@ -20,31 +20,27 @@ install_packages package_folder
# Thread Fuzzer allows to check more permutations of possible thread scheduling # Thread Fuzzer allows to check more permutations of possible thread scheduling
# and find more potential issues. # and find more potential issues.
# Temporarily disable ThreadFuzzer with tsan because of https://github.com/google/sanitizers/issues/1540 export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000
is_tsan_build=$(clickhouse local -q "select value like '% -fsanitize=thread %' from system.build_options where name='CXX_FLAGS'") export THREAD_FUZZER_SLEEP_PROBABILITY=0.1
if [ "$is_tsan_build" -eq "0" ]; then export THREAD_FUZZER_SLEEP_TIME_US=100000
export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000
export THREAD_FUZZER_SLEEP_PROBABILITY=0.1
export THREAD_FUZZER_SLEEP_TIME_US=100000
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1 export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1 export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1 export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1 export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001 export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001 export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001 export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001 export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000 export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000 export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000 export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000 export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000
export THREAD_FUZZER_EXPLICIT_SLEEP_PROBABILITY=0.01 export THREAD_FUZZER_EXPLICIT_SLEEP_PROBABILITY=0.01
export THREAD_FUZZER_EXPLICIT_MEMORY_EXCEPTION_PROBABILITY=0.01 export THREAD_FUZZER_EXPLICIT_MEMORY_EXCEPTION_PROBABILITY=0.01
fi
export ZOOKEEPER_FAULT_INJECTION=1 export ZOOKEEPER_FAULT_INJECTION=1
# Initial run without S3 to create system.*_log on local file system to make it # Initial run without S3 to create system.*_log on local file system to make it

View File

@ -6,7 +6,7 @@ ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
# 15.0.2 # 15.0.2
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=15 ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=16
RUN apt-get update \ RUN apt-get update \
&& apt-get install \ && apt-get install \
@ -52,6 +52,7 @@ RUN apt-get update \
lld-${LLVM_VERSION} \ lld-${LLVM_VERSION} \
llvm-${LLVM_VERSION} \ llvm-${LLVM_VERSION} \
llvm-${LLVM_VERSION}-dev \ llvm-${LLVM_VERSION}-dev \
libclang-${LLVM_VERSION}-dev \
moreutils \ moreutils \
nasm \ nasm \
ninja-build \ ninja-build \

View File

@ -11,14 +11,14 @@ This is intended for continuous integration checks that run on Linux servers. If
The cross-build for macOS is based on the [Build instructions](../development/build.md), follow them first. The cross-build for macOS is based on the [Build instructions](../development/build.md), follow them first.
## Install Clang-15 ## Install Clang-16
Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup. Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup.
For example the commands for Bionic are like: For example the commands for Bionic are like:
``` bash ``` bash
sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-15 main" >> /etc/apt/sources.list sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-16 main" >> /etc/apt/sources.list
sudo apt-get install clang-15 sudo apt-get install clang-16
``` ```
## Install Cross-Compilation Toolset {#install-cross-compilation-toolset} ## Install Cross-Compilation Toolset {#install-cross-compilation-toolset}
@ -55,7 +55,7 @@ curl -L 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX1
cd ClickHouse cd ClickHouse
mkdir build-darwin mkdir build-darwin
cd build-darwin cd build-darwin
CC=clang-15 CXX=clang++-15 cmake -DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar -DCMAKE_INSTALL_NAME_TOOL=${CCTOOLS}/bin/x86_64-apple-darwin-install_name_tool -DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib -DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld -DCMAKE_TOOLCHAIN_FILE=cmake/darwin/toolchain-x86_64.cmake .. CC=clang-16 CXX=clang++-16 cmake -DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar -DCMAKE_INSTALL_NAME_TOOL=${CCTOOLS}/bin/x86_64-apple-darwin-install_name_tool -DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib -DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld -DCMAKE_TOOLCHAIN_FILE=cmake/darwin/toolchain-x86_64.cmake ..
ninja ninja
``` ```

View File

@ -11,7 +11,7 @@ This is for the case when you have Linux machine and want to use it to build `cl
The cross-build for RISC-V 64 is based on the [Build instructions](../development/build.md), follow them first. The cross-build for RISC-V 64 is based on the [Build instructions](../development/build.md), follow them first.
## Install Clang-13 ## Install Clang-16
Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup or do Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup or do
``` ```
@ -23,7 +23,7 @@ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
``` bash ``` bash
cd ClickHouse cd ClickHouse
mkdir build-riscv64 mkdir build-riscv64
CC=clang-14 CXX=clang++-14 cmake . -Bbuild-riscv64 -G Ninja -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-riscv64.cmake -DGLIBC_COMPATIBILITY=OFF -DENABLE_LDAP=OFF -DOPENSSL_NO_ASM=ON -DENABLE_JEMALLOC=ON -DENABLE_PARQUET=OFF -DUSE_UNWIND=OFF -DENABLE_GRPC=OFF -DENABLE_HDFS=OFF -DENABLE_MYSQL=OFF CC=clang-16 CXX=clang++-16 cmake . -Bbuild-riscv64 -G Ninja -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-riscv64.cmake -DGLIBC_COMPATIBILITY=OFF -DENABLE_LDAP=OFF -DOPENSSL_NO_ASM=ON -DENABLE_JEMALLOC=ON -DENABLE_PARQUET=OFF -DUSE_UNWIND=OFF -DENABLE_GRPC=OFF -DENABLE_HDFS=OFF -DENABLE_MYSQL=OFF
ninja -C build-riscv64 ninja -C build-riscv64
``` ```

View File

@ -47,8 +47,8 @@ GCC as a compiler is not supported
To build with a specific Clang version: To build with a specific Clang version:
``` bash ``` bash
export CC=clang-15 export CC=clang-16
export CXX=clang++-15 export CXX=clang++-16
``` ```
### Checkout ClickHouse Sources {#checkout-clickhouse-sources} ### Checkout ClickHouse Sources {#checkout-clickhouse-sources}

View File

@ -4,20 +4,22 @@ sidebar_position: 73
sidebar_label: Building and Benchmarking DEFLATE_QPL sidebar_label: Building and Benchmarking DEFLATE_QPL
description: How to build Clickhouse and run benchmark with DEFLATE_QPL Codec description: How to build Clickhouse and run benchmark with DEFLATE_QPL Codec
--- ---
# Build Clickhouse with DEFLATE_QPL # Build Clickhouse with DEFLATE_QPL
- Make sure your target machine meet the QPL required [Prerequisites](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#prerequisites)
- Pass the following flag to CMake when building ClickHouse, depending on the capabilities of your target machine: - Make sure your target machine meet the QPL required [prerequisites](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#prerequisites)
- Pass the following flag to CMake when building ClickHouse:
``` bash ``` bash
cmake -DENABLE_AVX2=1 -DENABLE_QPL=1 .. cmake -DENABLE_QPL=1 ..
```
or
``` bash
cmake -DENABLE_AVX512=1 -DENABLE_QPL=1 ..
``` ```
- For generic requirements, please refer to Clickhouse generic [build instructions](/docs/en/development/build.md) - For generic requirements, please refer to Clickhouse generic [build instructions](/docs/en/development/build.md)
# Run Benchmark with DEFLATE_QPL # Run Benchmark with DEFLATE_QPL
## Files list ## Files list
The folders `benchmark_sample` under [qpl-cmake](https://github.com/ClickHouse/ClickHouse/tree/master/contrib/qpl-cmake) give example to run benchmark with python scripts: The folders `benchmark_sample` under [qpl-cmake](https://github.com/ClickHouse/ClickHouse/tree/master/contrib/qpl-cmake) give example to run benchmark with python scripts:
`client_scripts` contains python scripts for running typical benchmark, for example: `client_scripts` contains python scripts for running typical benchmark, for example:
@ -28,48 +30,60 @@ The folders `benchmark_sample` under [qpl-cmake](https://github.com/ClickHouse/C
`database_files` means it will store database files according to lz4/deflate/zstd codec. `database_files` means it will store database files according to lz4/deflate/zstd codec.
## Run benchmark automatically for Star Schema: ## Run benchmark automatically for Star Schema:
``` bash ``` bash
$ cd ./benchmark_sample/client_scripts $ cd ./benchmark_sample/client_scripts
$ sh run_ssb.sh $ sh run_ssb.sh
``` ```
After complete, please check all the results in this folder:`./output/` After complete, please check all the results in this folder:`./output/`
In case you run into failure, please manually run benchmark as below sections. In case you run into failure, please manually run benchmark as below sections.
## Definition ## Definition
[CLICKHOUSE_EXE] means the path of clickhouse executable program. [CLICKHOUSE_EXE] means the path of clickhouse executable program.
## Environment ## Environment
- CPU: Sapphire Rapid - CPU: Sapphire Rapid
- OS Requirements refer to [System Requirements for QPL](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#system-requirements) - OS Requirements refer to [System Requirements for QPL](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#system-requirements)
- IAA Setup refer to [Accelerator Configuration](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#accelerator-configuration) - IAA Setup refer to [Accelerator Configuration](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#accelerator-configuration)
- Install python modules: - Install python modules:
``` bash ``` bash
pip3 install clickhouse_driver numpy pip3 install clickhouse_driver numpy
``` ```
[Self-check for IAA] [Self-check for IAA]
``` bash ``` bash
$ accel-config list | grep -P 'iax|state' $ accel-config list | grep -P 'iax|state'
``` ```
Expected output like this: Expected output like this:
``` bash ``` bash
"dev":"iax1", "dev":"iax1",
"state":"enabled", "state":"enabled",
"state":"enabled", "state":"enabled",
``` ```
If you see nothing output, it means IAA is not ready to work. Please check IAA setup again. If you see nothing output, it means IAA is not ready to work. Please check IAA setup again.
## Generate raw data ## Generate raw data
``` bash ``` bash
$ cd ./benchmark_sample $ cd ./benchmark_sample
$ mkdir rawdata_dir && cd rawdata_dir $ mkdir rawdata_dir && cd rawdata_dir
``` ```
Use [`dbgen`](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema) to generate 100 million rows data with the parameters: Use [`dbgen`](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema) to generate 100 million rows data with the parameters:
-s 20 -s 20
The files like `*.tbl` are expected to output under `./benchmark_sample/rawdata_dir/ssb-dbgen`: The files like `*.tbl` are expected to output under `./benchmark_sample/rawdata_dir/ssb-dbgen`:
## Database setup ## Database setup
Set up database with LZ4 codec Set up database with LZ4 codec
``` bash ``` bash
@ -77,6 +91,7 @@ $ cd ./database_dir/lz4
$ [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null& $ [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null&
$ [CLICKHOUSE_EXE] client $ [CLICKHOUSE_EXE] client
``` ```
Here you should see the message `Connected to ClickHouse server` from console which means client successfully setup connection with server. Here you should see the message `Connected to ClickHouse server` from console which means client successfully setup connection with server.
Complete below three steps mentioned in [Star Schema Benchmark](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema) Complete below three steps mentioned in [Star Schema Benchmark](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema)
@ -114,6 +129,7 @@ You are expected to see below output:
└───────────┘ └───────────┘
``` ```
[Self-check for IAA Deflate codec] [Self-check for IAA Deflate codec]
At the first time you execute insertion or query from client, clickhouse server console is expected to print this log: At the first time you execute insertion or query from client, clickhouse server console is expected to print this log:
```text ```text
Hardware-assisted DeflateQpl codec is ready! Hardware-assisted DeflateQpl codec is ready!
@ -125,17 +141,21 @@ Initialization of hardware-assisted DeflateQpl codec failed
That means IAA devices is not ready, you need check IAA setup again. That means IAA devices is not ready, you need check IAA setup again.
## Benchmark with single instance ## Benchmark with single instance
- Before start benchmark, Please disable C6 and set CPU frequency governor to be `performance` - Before start benchmark, Please disable C6 and set CPU frequency governor to be `performance`
``` bash ``` bash
$ cpupower idle-set -d 3 $ cpupower idle-set -d 3
$ cpupower frequency-set -g performance $ cpupower frequency-set -g performance
``` ```
- To eliminate impact of memory bound on cross sockets, we use `numactl` to bind server on one socket and client on another socket. - To eliminate impact of memory bound on cross sockets, we use `numactl` to bind server on one socket and client on another socket.
- Single instance means single server connected with single client - Single instance means single server connected with single client
Now run benchmark for LZ4/Deflate/ZSTD respectively: Now run benchmark for LZ4/Deflate/ZSTD respectively:
LZ4: LZ4:
``` bash ``` bash
$ cd ./database_dir/lz4 $ cd ./database_dir/lz4
$ numactl -m 0 -N 0 [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null& $ numactl -m 0 -N 0 [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null&
@ -144,13 +164,16 @@ $ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 1 > lz4.log
``` ```
IAA deflate: IAA deflate:
``` bash ``` bash
$ cd ./database_dir/deflate $ cd ./database_dir/deflate
$ numactl -m 0 -N 0 [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null& $ numactl -m 0 -N 0 [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null&
$ cd ./client_scripts $ cd ./client_scripts
$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 1 > deflate.log $ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 1 > deflate.log
``` ```
ZSTD: ZSTD:
``` bash ``` bash
$ cd ./database_dir/zstd $ cd ./database_dir/zstd
$ numactl -m 0 -N 0 [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null& $ numactl -m 0 -N 0 [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null&
@ -170,6 +193,7 @@ How to check performance metrics:
We focus on QPS, please search the keyword: `QPS_Final` and collect statistics We focus on QPS, please search the keyword: `QPS_Final` and collect statistics
## Benchmark with multi-instances ## Benchmark with multi-instances
- To reduce impact of memory bound on too much threads, We recommend run benchmark with multi-instances. - To reduce impact of memory bound on too much threads, We recommend run benchmark with multi-instances.
- Multi-instance means multiple2 or 4servers connected with respective client. - Multi-instance means multiple2 or 4servers connected with respective client.
- The cores of one socket need to be divided equally and assigned to the servers respectively. - The cores of one socket need to be divided equally and assigned to the servers respectively.
@ -182,35 +206,46 @@ There are 2 differences:
Here we assume there are 60 cores per socket and take 2 instances for example. Here we assume there are 60 cores per socket and take 2 instances for example.
Launch server for first instance Launch server for first instance
LZ4: LZ4:
``` bash ``` bash
$ cd ./database_dir/lz4 $ cd ./database_dir/lz4
$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null& $ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null&
``` ```
ZSTD: ZSTD:
``` bash ``` bash
$ cd ./database_dir/zstd $ cd ./database_dir/zstd
$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null& $ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null&
``` ```
IAA Deflate: IAA Deflate:
``` bash ``` bash
$ cd ./database_dir/deflate $ cd ./database_dir/deflate
$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null& $ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null&
``` ```
[Launch server for second instance] [Launch server for second instance]
LZ4: LZ4:
``` bash ``` bash
$ cd ./database_dir && mkdir lz4_s2 && cd lz4_s2 $ cd ./database_dir && mkdir lz4_s2 && cd lz4_s2
$ cp ../../server_config/config_lz4_s2.xml ./ $ cp ../../server_config/config_lz4_s2.xml ./
$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_lz4_s2.xml >&/dev/null& $ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_lz4_s2.xml >&/dev/null&
``` ```
ZSTD: ZSTD:
``` bash ``` bash
$ cd ./database_dir && mkdir zstd_s2 && cd zstd_s2 $ cd ./database_dir && mkdir zstd_s2 && cd zstd_s2
$ cp ../../server_config/config_zstd_s2.xml ./ $ cp ../../server_config/config_zstd_s2.xml ./
$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_zstd_s2.xml >&/dev/null& $ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_zstd_s2.xml >&/dev/null&
``` ```
IAA Deflate: IAA Deflate:
``` bash ``` bash
$ cd ./database_dir && mkdir deflate_s2 && cd deflate_s2 $ cd ./database_dir && mkdir deflate_s2 && cd deflate_s2
$ cp ../../server_config/config_deflate_s2.xml ./ $ cp ../../server_config/config_deflate_s2.xml ./
@ -220,19 +255,24 @@ $ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_deflate_s2.xml >&/d
Creating tables && Inserting data for second instance Creating tables && Inserting data for second instance
Creating tables: Creating tables:
``` bash ``` bash
$ [CLICKHOUSE_EXE] client -m --port=9001 $ [CLICKHOUSE_EXE] client -m --port=9001
``` ```
Inserting data: Inserting data:
``` bash ``` bash
$ [CLICKHOUSE_EXE] client --query "INSERT INTO [TBL_FILE_NAME] FORMAT CSV" < [TBL_FILE_NAME].tbl --port=9001 $ [CLICKHOUSE_EXE] client --query "INSERT INTO [TBL_FILE_NAME] FORMAT CSV" < [TBL_FILE_NAME].tbl --port=9001
``` ```
- [TBL_FILE_NAME] represents the name of a file named with the regular expression: *. tbl under `./benchmark_sample/rawdata_dir/ssb-dbgen`. - [TBL_FILE_NAME] represents the name of a file named with the regular expression: *. tbl under `./benchmark_sample/rawdata_dir/ssb-dbgen`.
- `--port=9001` stands for the assigned port for server instance which is also defined in config_lz4_s2.xml/config_zstd_s2.xml/config_deflate_s2.xml. For even more instances, you need replace it with the value: 9002/9003 which stand for s3/s4 instance respectively. If you don't assign it, the port is 9000 by default which has been used by first instance. - `--port=9001` stands for the assigned port for server instance which is also defined in config_lz4_s2.xml/config_zstd_s2.xml/config_deflate_s2.xml. For even more instances, you need replace it with the value: 9002/9003 which stand for s3/s4 instance respectively. If you don't assign it, the port is 9000 by default which has been used by first instance.
Benchmarking with 2 instances Benchmarking with 2 instances
LZ4: LZ4:
``` bash ``` bash
$ cd ./database_dir/lz4 $ cd ./database_dir/lz4
$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null& $ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null&
@ -241,7 +281,9 @@ $ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_lz4_s2.xml >&/dev/n
$ cd ./client_scripts $ cd ./client_scripts
$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 2 > lz4_2insts.log $ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 2 > lz4_2insts.log
``` ```
ZSTD: ZSTD:
``` bash ``` bash
$ cd ./database_dir/zstd $ cd ./database_dir/zstd
$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null& $ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null&
@ -250,7 +292,9 @@ $ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_zstd_s2.xml >&/dev/
$ cd ./client_scripts $ cd ./client_scripts
$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 2 > zstd_2insts.log $ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 2 > zstd_2insts.log
``` ```
IAA deflate IAA deflate
``` bash ``` bash
$ cd ./database_dir/deflate $ cd ./database_dir/deflate
$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null& $ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null&
@ -259,9 +303,11 @@ $ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_deflate_s2.xml >&/d
$ cd ./client_scripts $ cd ./client_scripts
$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 2 > deflate_2insts.log $ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 2 > deflate_2insts.log
``` ```
Here the last argument: `2` of client_stressing_test.py stands for the number of instances. For more instances, you need replace it with the value: 3 or 4. This script support up to 4 instances/ Here the last argument: `2` of client_stressing_test.py stands for the number of instances. For more instances, you need replace it with the value: 3 or 4. This script support up to 4 instances/
Now three logs should be output as expected: Now three logs should be output as expected:
``` text ``` text
lz4_2insts.log lz4_2insts.log
deflate_2insts.log deflate_2insts.log
@ -275,7 +321,9 @@ Benchmark setup for 4 instances is similar with 2 instances above.
We recommend use 2 instances benchmark data as final report for review. We recommend use 2 instances benchmark data as final report for review.
## Tips ## Tips
Each time before launch new clickhouse server, please make sure no background clickhouse process running, please check and kill old one: Each time before launch new clickhouse server, please make sure no background clickhouse process running, please check and kill old one:
``` bash ``` bash
$ ps -aux| grep clickhouse $ ps -aux| grep clickhouse
$ kill -9 [PID] $ kill -9 [PID]

View File

@ -102,7 +102,7 @@ Builds ClickHouse in various configurations for use in further steps. You have t
### Report Details ### Report Details
- **Compiler**: `clang-15`, optionally with the name of a target platform - **Compiler**: `clang-16`, optionally with the name of a target platform
- **Build type**: `Debug` or `RelWithDebInfo` (cmake). - **Build type**: `Debug` or `RelWithDebInfo` (cmake).
- **Sanitizer**: `none` (without sanitizers), `address` (ASan), `memory` (MSan), `undefined` (UBSan), or `thread` (TSan). - **Sanitizer**: `none` (without sanitizers), `address` (ASan), `memory` (MSan), `undefined` (UBSan), or `thread` (TSan).
- **Status**: `success` or `fail` - **Status**: `success` or `fail`

View File

@ -152,7 +152,7 @@ While inside the `build` directory, configure your build by running CMake. Befor
export CC=clang CXX=clang++ export CC=clang CXX=clang++
cmake .. cmake ..
If you installed clang using the automatic installation script above, also specify the version of clang installed in the first command, e.g. `export CC=clang-15 CXX=clang++-15`. The clang version will be in the script output. If you installed clang using the automatic installation script above, also specify the version of clang installed in the first command, e.g. `export CC=clang-16 CXX=clang++-16`. The clang version will be in the script output.
The `CC` variable specifies the compiler for C (short for C Compiler), and `CXX` variable instructs which C++ compiler is to be used for building. The `CC` variable specifies the compiler for C (short for C Compiler), and `CXX` variable instructs which C++ compiler is to be used for building.

View File

@ -143,8 +143,9 @@ You can also download and install packages manually from [here](https://packages
#### Install standalone ClickHouse Keeper #### Install standalone ClickHouse Keeper
:::tip :::tip
If you are going to run ClickHouse Keeper on the same server as ClickHouse server you In production environment we [strongly recommend](/docs/en/operations/tips.md#L143-L144) running ClickHouse Keeper on dedicated nodes.
do not need to install ClickHouse Keeper as it is included with ClickHouse server. This command is only needed on standalone ClickHouse Keeper servers. In test environments, if you decide to run ClickHouse Server and ClickHouse Keeper on the same server, you do not need to install ClickHouse Keeper as it is included with ClickHouse server.
This command is only needed on standalone ClickHouse Keeper servers.
::: :::
```bash ```bash
@ -211,8 +212,9 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password.
#### Install standalone ClickHouse Keeper #### Install standalone ClickHouse Keeper
:::tip :::tip
If you are going to run ClickHouse Keeper on the same server as ClickHouse server you In production environment we [strongly recommend](/docs/en/operations/tips.md#L143-L144) running ClickHouse Keeper on dedicated nodes.
do not need to install ClickHouse Keeper as it is included with ClickHouse server. This command is only needed on standalone ClickHouse Keeper servers. In test environments, if you decide to run ClickHouse Server and ClickHouse Keeper on the same server, you do not need to install ClickHouse Keeper as it is included with ClickHouse server.
This command is only needed on standalone ClickHouse Keeper servers.
::: :::
```bash ```bash

View File

@ -38,6 +38,10 @@ Structure of the `users` section:
</table_name> </table_name>
</database_name> </database_name>
</databases> </databases>
<grants>
<query>GRANT SELECT ON system.*</query>
</grants>
</user_name> </user_name>
<!-- Other users settings --> <!-- Other users settings -->
</users> </users>
@ -86,6 +90,28 @@ Possible values:
Default value: 0. Default value: 0.
### grants {#grants-user-setting}
This setting allows to grant any rights to selected user.
Each element of the list should be `GRANT` query without any grantees specified.
Example:
```xml
<user1>
<grants>
<query>GRANT SHOW ON *.*</query>
<query>GRANT CREATE ON *.* WITH GRANT OPTION</query>
<query>GRANT SELECT ON system.*</query>
</grants>
</user1>
```
This setting can't be specified at the same time with
`dictionaries`, `access_management`, `named_collection_control`, `show_named_collections_secrets`
and `allow_databases` settings.
### user_name/networks {#user-namenetworks} ### user_name/networks {#user-namenetworks}
List of networks from which the user can connect to the ClickHouse server. List of networks from which the user can connect to the ClickHouse server.

View File

@ -608,6 +608,17 @@ See also:
- [JOIN strictness](../../sql-reference/statements/select/join.md/#join-settings) - [JOIN strictness](../../sql-reference/statements/select/join.md/#join-settings)
## max_rows_in_set_to_optimize_join
Maximal size of the set to filter joined tables by each other's row sets before joining.
Possible values:
- 0 — Disable.
- Any positive integer.
Default value: 100000.
## temporary_files_codec {#temporary_files_codec} ## temporary_files_codec {#temporary_files_codec}
Sets compression codec for temporary files used in sorting and joining operations on disk. Sets compression codec for temporary files used in sorting and joining operations on disk.
@ -1636,7 +1647,7 @@ For not replicated tables see [non_replicated_deduplication_window](merge-tree-s
### async_insert {#async-insert} ### async_insert {#async-insert}
Enables or disables asynchronous inserts. This makes sense only for insertion over HTTP protocol. Note that deduplication isn't working for such inserts. Enables or disables asynchronous inserts. Note that deduplication is disabled by default, see [async_insert_deduplicate](#async-insert-deduplicate).
If enabled, the data is combined into batches before the insertion into tables, so it is possible to do small and frequent insertions into ClickHouse (up to 15000 queries per second) without buffer tables. If enabled, the data is combined into batches before the insertion into tables, so it is possible to do small and frequent insertions into ClickHouse (up to 15000 queries per second) without buffer tables.

View File

@ -215,7 +215,7 @@ Cache **system tables**:
Cache **commands**: Cache **commands**:
- `SYSTEM DROP FILESYSTEM CACHE (<path>) (ON CLUSTER)` - `SYSTEM DROP FILESYSTEM CACHE (<cache_name>) (ON CLUSTER)` -- `ON CLUSTER` is only supported when no `<cache_name>` is provided
- `SHOW FILESYSTEM CACHES` -- show list of filesystem caches which were configured on the server. (For versions <= `22.8` the command is named `SHOW CACHES`) - `SHOW FILESYSTEM CACHES` -- show list of filesystem caches which were configured on the server. (For versions <= `22.8` the command is named `SHOW CACHES`)
@ -231,10 +231,10 @@ Result:
└───────────┘ └───────────┘
``` ```
- `DESCRIBE CACHE '<cache_name>'` - show cache configuration and some general statistics for a specific cache. Cache name can be taken from `SHOW CACHES` command. (For versions <= `22.8` the command is named `DESCRIBE CACHE`) - `DESCRIBE FILESYSTEM CACHE '<cache_name>'` - show cache configuration and some general statistics for a specific cache. Cache name can be taken from `SHOW FILESYSTEM CACHES` command. (For versions <= `22.8` the command is named `DESCRIBE CACHE`)
```sql ```sql
DESCRIBE CACHE 's3_cache' DESCRIBE FILESYSTEM CACHE 's3_cache'
``` ```
``` text ``` text

View File

@ -0,0 +1,48 @@
---
slug: /en/sql-reference/aggregate-functions/reference/greatest
title: greatest
---
Aggregate function that returns the greatest across a list of values. All of the list members must be of comparable types.
Examples:
```sql
SELECT
toTypeName(greatest(toUInt8(1), 2, toUInt8(3), 3.)),
greatest(1, 2, toUInt8(3), 3.)
```
```response
┌─toTypeName(greatest(toUInt8(1), 2, toUInt8(3), 3.))─┬─greatest(1, 2, toUInt8(3), 3.)─┐
│ Float64 │ 3 │
└─────────────────────────────────────────────────────┴────────────────────────────────┘
```
:::note
The type returned is a Float64 as the UInt8 must be promoted to 64 bit for the comparison.
:::
```sql
SELECT greatest(['hello'], ['there'], ['world'])
```
```response
┌─greatest(['hello'], ['there'], ['world'])─┐
│ ['world'] │
└───────────────────────────────────────────┘
```
```sql
SELECT greatest(toDateTime32(now() + toIntervalDay(1)), toDateTime64(now(), 3))
```
```response
┌─greatest(toDateTime32(plus(now(), toIntervalDay(1))), toDateTime64(now(), 3))─┐
│ 2023-05-12 01:16:59.000 │
└──---──────────────────────────────────────────────────────────────────────────┘
```
:::note
The type returned is a DateTime64 as the DataTime32 must be promoted to 64 bit for the comparison.
:::
Also see [least](/docs/en/sql-reference/aggregate-functions/reference/least.md).

View File

@ -0,0 +1,48 @@
---
slug: /en/sql-reference/aggregate-functions/reference/least
title: least
---
Aggregate function that returns the least across a list of values. All of the list members must be of comparable types.
Examples:
```sql
SELECT
toTypeName(least(toUInt8(1), 2, toUInt8(3), 3.)),
least(1, 2, toUInt8(3), 3.)
```
```response
┌─toTypeName(least(toUInt8(1), 2, toUInt8(3), 3.))─┬─least(1, 2, toUInt8(3), 3.)─┐
│ Float64 │ 1 │
└──────────────────────────────────────────────────┴─────────────────────────────┘
```
:::note
The type returned is a Float64 as the UInt8 must be promoted to 64 bit for the comparison.
:::
```sql
SELECT least(['hello'], ['there'], ['world'])
```
```response
┌─least(['hello'], ['there'], ['world'])─┐
│ ['hello'] │
└────────────────────────────────────────┘
```
```sql
SELECT least(toDateTime32(now() + toIntervalDay(1)), toDateTime64(now(), 3))
```
```response
┌─least(toDateTime32(plus(now(), toIntervalDay(1))), toDateTime64(now(), 3))─┐
│ 2023-05-12 01:16:59.000 │
└────────────────────────────────────────────────────────────────────────────┘
```
:::note
The type returned is a DateTime64 as the DataTime32 must be promoted to 64 bit for the comparison.
:::
Also see [greatest](/docs/en/sql-reference/aggregate-functions/reference/greatest.md).

View File

@ -46,8 +46,6 @@ SELECT [1, 2] AS x, toTypeName(x)
## Working with Data Types ## Working with Data Types
The maximum size of an array is limited to one million elements.
When creating an array on the fly, ClickHouse automatically defines the argument type as the narrowest data type that can store all the listed arguments. If there are any [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable) or literal [NULL](../../sql-reference/syntax.md#null-literal) values, the type of an array element also becomes [Nullable](../../sql-reference/data-types/nullable.md). When creating an array on the fly, ClickHouse automatically defines the argument type as the narrowest data type that can store all the listed arguments. If there are any [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable) or literal [NULL](../../sql-reference/syntax.md#null-literal) values, the type of an array element also becomes [Nullable](../../sql-reference/data-types/nullable.md).
If ClickHouse couldnt determine the data type, it generates an exception. For instance, this happens when trying to create an array with strings and numbers simultaneously (`SELECT array(1, 'a')`). If ClickHouse couldnt determine the data type, it generates an exception. For instance, this happens when trying to create an array with strings and numbers simultaneously (`SELECT array(1, 'a')`).

View File

@ -2218,8 +2218,6 @@ LAYOUT(regexp_tree)
... ...
``` ```
We only allow `YAMLRegExpTree` to work with regexp_tree dicitionary layout. If you want to use other sources, please set variable `regexp_dict_allow_other_sources` true.
**Source** **Source**
We introduce a type of source called `YAMLRegExpTree` representing the structure of Regexp Tree dictionary. An Example of a valid yaml config is like: We introduce a type of source called `YAMLRegExpTree` representing the structure of Regexp Tree dictionary. An Example of a valid yaml config is like:

View File

@ -1218,12 +1218,16 @@ Rounds the time to the half hour.
Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 100 + MM). Accepts a second optional timezone argument. If provided, the timezone must be a string constant. Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 100 + MM). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
### example **Example**
```sql
``` sql
SELECT SELECT
toYYYYMM(now(), 'US/Eastern') toYYYYMM(now(), 'US/Eastern')
``` ```
```response
Result:
``` text
┌─toYYYYMM(now(), 'US/Eastern')─┐ ┌─toYYYYMM(now(), 'US/Eastern')─┐
│ 202303 │ │ 202303 │
└───────────────────────────────┘ └───────────────────────────────┘
@ -1233,11 +1237,15 @@ SELECT
Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 10000 + MM \* 100 + DD). Accepts a second optional timezone argument. If provided, the timezone must be a string constant. Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 10000 + MM \* 100 + DD). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
### example **Example**
```sql ```sql
SELECT SELECT
toYYYYMMDD(now(), 'US/Eastern') toYYYYMMDD(now(), 'US/Eastern')
``` ```
Result:
```response ```response
┌─toYYYYMMDD(now(), 'US/Eastern')─┐ ┌─toYYYYMMDD(now(), 'US/Eastern')─┐
│ 20230302 │ │ 20230302 │
@ -1248,11 +1256,15 @@ SELECT
Converts a date or date with time to a UInt64 number containing the year and month number (YYYY \* 10000000000 + MM \* 100000000 + DD \* 1000000 + hh \* 10000 + mm \* 100 + ss). Accepts a second optional timezone argument. If provided, the timezone must be a string constant. Converts a date or date with time to a UInt64 number containing the year and month number (YYYY \* 10000000000 + MM \* 100000000 + DD \* 1000000 + hh \* 10000 + mm \* 100 + ss). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
### example **Example**
```sql ```sql
SELECT SELECT
toYYYYMMDDhhmmss(now(), 'US/Eastern') toYYYYMMDDhhmmss(now(), 'US/Eastern')
``` ```
Result:
```response ```response
┌─toYYYYMMDDhhmmss(now(), 'US/Eastern')─┐ ┌─toYYYYMMDDhhmmss(now(), 'US/Eastern')─┐
│ 20230302112209 │ │ 20230302112209 │

View File

@ -279,6 +279,8 @@ cityHash64(par1,...)
This is a fast non-cryptographic hash function. It uses the CityHash algorithm for string parameters and implementation-specific fast non-cryptographic hash function for parameters with other data types. The function uses the CityHash combinator to get the final results. This is a fast non-cryptographic hash function. It uses the CityHash algorithm for string parameters and implementation-specific fast non-cryptographic hash function for parameters with other data types. The function uses the CityHash combinator to get the final results.
Note that Google changed the algorithm of CityHash after it has been added to ClickHouse. In other words, ClickHouse's cityHash64 and Google's upstream CityHash now produce different results. ClickHouse cityHash64 corresponds to CityHash v1.0.2.
**Arguments** **Arguments**
The function takes a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). The function takes a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data).

View File

@ -59,244 +59,6 @@ A lambda function that accepts multiple arguments can also be passed to a higher
For some functions the first argument (the lambda function) can be omitted. In this case, identical mapping is assumed. For some functions the first argument (the lambda function) can be omitted. In this case, identical mapping is assumed.
## SQL User Defined Functions ## User Defined Functions (UDFs)
Custom functions from lambda expressions can be created using the [CREATE FUNCTION](../statements/create/function.md) statement. To delete these functions use the [DROP FUNCTION](../statements/drop.md#drop-function) statement. ClickHouse supports user-defined functions. See [UDFs](/docs/en/sql-reference/functions/udf.md).
## Executable User Defined Functions
ClickHouse can call any external executable program or script to process data.
The configuration of executable user defined functions can be located in one or more xml-files. The path to the configuration is specified in the [user_defined_executable_functions_config](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_defined_executable_functions_config) parameter.
A function configuration contains the following settings:
- `name` - a function name.
- `command` - script name to execute or command if `execute_direct` is false.
- `argument` - argument description with the `type`, and optional `name` of an argument. Each argument is described in a separate setting. Specifying name is necessary if argument names are part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Default argument name value is `c` + argument_number.
- `format` - a [format](../../interfaces/formats.md) in which arguments are passed to the command.
- `return_type` - the type of a returned value.
- `return_name` - name of retuned value. Specifying return name is necessary if return name is part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Optional. Default value is `result`.
- `type` - an executable type. If `type` is set to `executable` then single command is started. If it is set to `executable_pool` then a pool of commands is created.
- `max_command_execution_time` - maximum execution time in seconds for processing block of data. This setting is valid for `executable_pool` commands only. Optional. Default value is `10`.
- `command_termination_timeout` - time in seconds during which a command should finish after its pipe is closed. After that time `SIGTERM` is sent to the process executing the command. Optional. Default value is `10`.
- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter.
- `command_write_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter.
- `pool_size` - the size of a command pool. Optional. Default value is `16`.
- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`.
- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_scripts_path). Additional script arguments can be specified using whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `1`. Optional parameter.
- `lifetime` - the reload interval of a function in seconds. If it is set to `0` then the function is not reloaded. Default value is `0`. Optional parameter.
The command must read arguments from `STDIN` and must output the result to `STDOUT`. The command must process arguments iteratively. That is after processing a chunk of arguments it must wait for the next chunk.
**Example**
Creating `test_function` using XML configuration.
File `test_function.xml` (`/etc/clickhouse-server/test_function.xml` with default path settings).
```xml
<functions>
<function>
<type>executable</type>
<name>test_function_python</name>
<return_type>String</return_type>
<argument>
<type>UInt64</type>
<name>value</name>
</argument>
<format>TabSeparated</format>
<command>test_function.py</command>
</function>
</functions>
```
Script file inside `user_scripts` folder `test_function.py` (`/var/lib/clickhouse/user_scripts/test_function.py` with default path settings).
```python
#!/usr/bin/python3
import sys
if __name__ == '__main__':
for line in sys.stdin:
print("Value " + line, end='')
sys.stdout.flush()
```
Query:
``` sql
SELECT test_function_python(toUInt64(2));
```
Result:
``` text
┌─test_function_python(2)─┐
│ Value 2 │
└─────────────────────────┘
```
Creating `test_function_sum` manually specifying `execute_direct` to `0` using XML configuration.
File `test_function.xml` (`/etc/clickhouse-server/test_function.xml` with default path settings).
```xml
<functions>
<function>
<type>executable</type>
<name>test_function_sum</name>
<return_type>UInt64</return_type>
<argument>
<type>UInt64</type>
<name>lhs</name>
</argument>
<argument>
<type>UInt64</type>
<name>rhs</name>
</argument>
<format>TabSeparated</format>
<command>cd /; clickhouse-local --input-format TabSeparated --output-format TabSeparated --structure 'x UInt64, y UInt64' --query "SELECT x + y FROM table"</command>
<execute_direct>0</execute_direct>
</function>
</functions>
```
Query:
``` sql
SELECT test_function_sum(2, 2);
```
Result:
``` text
┌─test_function_sum(2, 2)─┐
│ 4 │
└─────────────────────────┘
```
Creating `test_function_sum_json` with named arguments and format [JSONEachRow](../../interfaces/formats.md#jsoneachrow) using XML configuration.
File `test_function.xml` (`/etc/clickhouse-server/test_function.xml` with default path settings).
```xml
<functions>
<function>
<type>executable</type>
<name>test_function_sum_json</name>
<return_type>UInt64</return_type>
<return_name>result_name</return_name>
<argument>
<type>UInt64</type>
<name>argument_1</name>
</argument>
<argument>
<type>UInt64</type>
<name>argument_2</name>
</argument>
<format>JSONEachRow</format>
<command>test_function_sum_json.py</command>
</function>
</functions>
```
Script file inside `user_scripts` folder `test_function_sum_json.py` (`/var/lib/clickhouse/user_scripts/test_function_sum_json.py` with default path settings).
```python
#!/usr/bin/python3
import sys
import json
if __name__ == '__main__':
for line in sys.stdin:
value = json.loads(line)
first_arg = int(value['argument_1'])
second_arg = int(value['argument_2'])
result = {'result_name': first_arg + second_arg}
print(json.dumps(result), end='\n')
sys.stdout.flush()
```
Query:
``` sql
SELECT test_function_sum_json(2, 2);
```
Result:
``` text
┌─test_function_sum_json(2, 2)─┐
│ 4 │
└──────────────────────────────┘
```
Executable user defined functions can take constant parameters configured in `command` setting (works only for user defined functions with `executable` type).
File `test_function_parameter_python.xml` (`/etc/clickhouse-server/test_function_parameter_python.xml` with default path settings).
```xml
<functions>
<function>
<type>executable</type>
<name>test_function_parameter_python</name>
<return_type>String</return_type>
<argument>
<type>UInt64</type>
</argument>
<format>TabSeparated</format>
<command>test_function_parameter_python.py {test_parameter:UInt64}</command>
</function>
</functions>
```
Script file inside `user_scripts` folder `test_function_parameter_python.py` (`/var/lib/clickhouse/user_scripts/test_function_parameter_python.py` with default path settings).
```python
#!/usr/bin/python3
import sys
if __name__ == "__main__":
for line in sys.stdin:
print("Parameter " + str(sys.argv[1]) + " value " + str(line), end="")
sys.stdout.flush()
```
Query:
``` sql
SELECT test_function_parameter_python(1)(2);
```
Result:
``` text
┌─test_function_parameter_python(1)(2)─┐
│ Parameter 1 value 2 │
└──────────────────────────────────────┘
```
## Error Handling
Some functions might throw an exception if the data is invalid. In this case, the query is canceled and an error text is returned to the client. For distributed processing, when an exception occurs on one of the servers, the other servers also attempt to abort the query.
## Evaluation of Argument Expressions
In almost all programming languages, one of the arguments might not be evaluated for certain operators. This is usually the operators `&&`, `||`, and `?:`.
But in ClickHouse, arguments of functions (operators) are always evaluated. This is because entire parts of columns are evaluated at once, instead of calculating each row separately.
## Performing Functions for Distributed Query Processing
For distributed query processing, as many stages of query processing as possible are performed on remote servers, and the rest of the stages (merging intermediate results and everything after that) are performed on the requestor server.
This means that functions can be performed on different servers.
For example, in the query `SELECT f(sum(g(x))) FROM distributed_table GROUP BY h(y),`
- if a `distributed_table` has at least two shards, the functions g and h are performed on remote servers, and the function f is performed on the requestor server.
- if a `distributed_table` has only one shard, all the f, g, and h functions are performed on this shards server.
The result of a function usually does not depend on which server it is performed on. However, sometimes this is important.
For example, functions that work with dictionaries use the dictionary that exists on the server they are running on.
Another example is the `hostName` function, which returns the name of the server it is running on in order to make `GROUP BY` by servers in a `SELECT` query.
If a function in a query is performed on the requestor server, but you need to perform it on remote servers, you can wrap it in an any aggregate function or add it to a key in `GROUP BY`.
## Related Content
- [User-defined functions in ClickHouse Cloud](https://clickhouse.com/blog/user-defined-functions-clickhouse-udfs)

View File

@ -0,0 +1,249 @@
---
slug: /en/sql-reference/functions/udf
sidebar_position: 15
sidebar_label: UDF
---
# UDFs User Defined Functions
## Executable User Defined Functions
ClickHouse can call any external executable program or script to process data.
The configuration of executable user defined functions can be located in one or more xml-files. The path to the configuration is specified in the [user_defined_executable_functions_config](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_defined_executable_functions_config) parameter.
A function configuration contains the following settings:
- `name` - a function name.
- `command` - script name to execute or command if `execute_direct` is false.
- `argument` - argument description with the `type`, and optional `name` of an argument. Each argument is described in a separate setting. Specifying name is necessary if argument names are part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Default argument name value is `c` + argument_number.
- `format` - a [format](../../interfaces/formats.md) in which arguments are passed to the command.
- `return_type` - the type of a returned value.
- `return_name` - name of retuned value. Specifying return name is necessary if return name is part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Optional. Default value is `result`.
- `type` - an executable type. If `type` is set to `executable` then single command is started. If it is set to `executable_pool` then a pool of commands is created.
- `max_command_execution_time` - maximum execution time in seconds for processing block of data. This setting is valid for `executable_pool` commands only. Optional. Default value is `10`.
- `command_termination_timeout` - time in seconds during which a command should finish after its pipe is closed. After that time `SIGTERM` is sent to the process executing the command. Optional. Default value is `10`.
- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter.
- `command_write_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter.
- `pool_size` - the size of a command pool. Optional. Default value is `16`.
- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`.
- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_scripts_path). Additional script arguments can be specified using whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `1`. Optional parameter.
- `lifetime` - the reload interval of a function in seconds. If it is set to `0` then the function is not reloaded. Default value is `0`. Optional parameter.
The command must read arguments from `STDIN` and must output the result to `STDOUT`. The command must process arguments iteratively. That is after processing a chunk of arguments it must wait for the next chunk.
**Example**
Creating `test_function` using XML configuration.
File `test_function.xml` (`/etc/clickhouse-server/test_function.xml` with default path settings).
```xml
<functions>
<function>
<type>executable</type>
<name>test_function_python</name>
<return_type>String</return_type>
<argument>
<type>UInt64</type>
<name>value</name>
</argument>
<format>TabSeparated</format>
<command>test_function.py</command>
</function>
</functions>
```
Script file inside `user_scripts` folder `test_function.py` (`/var/lib/clickhouse/user_scripts/test_function.py` with default path settings).
```python
#!/usr/bin/python3
import sys
if __name__ == '__main__':
for line in sys.stdin:
print("Value " + line, end='')
sys.stdout.flush()
```
Query:
``` sql
SELECT test_function_python(toUInt64(2));
```
Result:
``` text
┌─test_function_python(2)─┐
│ Value 2 │
└─────────────────────────┘
```
Creating `test_function_sum` manually specifying `execute_direct` to `0` using XML configuration.
File `test_function.xml` (`/etc/clickhouse-server/test_function.xml` with default path settings).
```xml
<functions>
<function>
<type>executable</type>
<name>test_function_sum</name>
<return_type>UInt64</return_type>
<argument>
<type>UInt64</type>
<name>lhs</name>
</argument>
<argument>
<type>UInt64</type>
<name>rhs</name>
</argument>
<format>TabSeparated</format>
<command>cd /; clickhouse-local --input-format TabSeparated --output-format TabSeparated --structure 'x UInt64, y UInt64' --query "SELECT x + y FROM table"</command>
<execute_direct>0</execute_direct>
</function>
</functions>
```
Query:
``` sql
SELECT test_function_sum(2, 2);
```
Result:
``` text
┌─test_function_sum(2, 2)─┐
│ 4 │
└─────────────────────────┘
```
Creating `test_function_sum_json` with named arguments and format [JSONEachRow](../../interfaces/formats.md#jsoneachrow) using XML configuration.
File `test_function.xml` (`/etc/clickhouse-server/test_function.xml` with default path settings).
```xml
<functions>
<function>
<type>executable</type>
<name>test_function_sum_json</name>
<return_type>UInt64</return_type>
<return_name>result_name</return_name>
<argument>
<type>UInt64</type>
<name>argument_1</name>
</argument>
<argument>
<type>UInt64</type>
<name>argument_2</name>
</argument>
<format>JSONEachRow</format>
<command>test_function_sum_json.py</command>
</function>
</functions>
```
Script file inside `user_scripts` folder `test_function_sum_json.py` (`/var/lib/clickhouse/user_scripts/test_function_sum_json.py` with default path settings).
```python
#!/usr/bin/python3
import sys
import json
if __name__ == '__main__':
for line in sys.stdin:
value = json.loads(line)
first_arg = int(value['argument_1'])
second_arg = int(value['argument_2'])
result = {'result_name': first_arg + second_arg}
print(json.dumps(result), end='\n')
sys.stdout.flush()
```
Query:
``` sql
SELECT test_function_sum_json(2, 2);
```
Result:
``` text
┌─test_function_sum_json(2, 2)─┐
│ 4 │
└──────────────────────────────┘
```
Executable user defined functions can take constant parameters configured in `command` setting (works only for user defined functions with `executable` type).
File `test_function_parameter_python.xml` (`/etc/clickhouse-server/test_function_parameter_python.xml` with default path settings).
```xml
<functions>
<function>
<type>executable</type>
<name>test_function_parameter_python</name>
<return_type>String</return_type>
<argument>
<type>UInt64</type>
</argument>
<format>TabSeparated</format>
<command>test_function_parameter_python.py {test_parameter:UInt64}</command>
</function>
</functions>
```
Script file inside `user_scripts` folder `test_function_parameter_python.py` (`/var/lib/clickhouse/user_scripts/test_function_parameter_python.py` with default path settings).
```python
#!/usr/bin/python3
import sys
if __name__ == "__main__":
for line in sys.stdin:
print("Parameter " + str(sys.argv[1]) + " value " + str(line), end="")
sys.stdout.flush()
```
Query:
``` sql
SELECT test_function_parameter_python(1)(2);
```
Result:
``` text
┌─test_function_parameter_python(1)(2)─┐
│ Parameter 1 value 2 │
└──────────────────────────────────────┘
```
## Error Handling
Some functions might throw an exception if the data is invalid. In this case, the query is canceled and an error text is returned to the client. For distributed processing, when an exception occurs on one of the servers, the other servers also attempt to abort the query.
## Evaluation of Argument Expressions
In almost all programming languages, one of the arguments might not be evaluated for certain operators. This is usually the operators `&&`, `||`, and `?:`.
But in ClickHouse, arguments of functions (operators) are always evaluated. This is because entire parts of columns are evaluated at once, instead of calculating each row separately.
## Performing Functions for Distributed Query Processing
For distributed query processing, as many stages of query processing as possible are performed on remote servers, and the rest of the stages (merging intermediate results and everything after that) are performed on the requestor server.
This means that functions can be performed on different servers.
For example, in the query `SELECT f(sum(g(x))) FROM distributed_table GROUP BY h(y),`
- if a `distributed_table` has at least two shards, the functions g and h are performed on remote servers, and the function f is performed on the requestor server.
- if a `distributed_table` has only one shard, all the f, g, and h functions are performed on this shards server.
The result of a function usually does not depend on which server it is performed on. However, sometimes this is important.
For example, functions that work with dictionaries use the dictionary that exists on the server they are running on.
Another example is the `hostName` function, which returns the name of the server it is running on in order to make `GROUP BY` by servers in a `SELECT` query.
If a function in a query is performed on the requestor server, but you need to perform it on remote servers, you can wrap it in an any aggregate function or add it to a key in `GROUP BY`.
## SQL User Defined Functions
Custom functions from lambda expressions can be created using the [CREATE FUNCTION](../statements/create/function.md) statement. To delete these functions use the [DROP FUNCTION](../statements/drop.md#drop-function) statement.
## Related Content
### [User-defined functions in ClickHouse Cloud](https://clickhouse.com/blog/user-defined-functions-clickhouse-udfs)

View File

@ -132,7 +132,7 @@ Comments are stored in the `comment_expression` column returned by the [DESCRIBE
Example: Example:
``` sql ``` sql
ALTER TABLE visits COMMENT COLUMN browser 'The table shows the browser used for accessing the site.' ALTER TABLE visits COMMENT COLUMN browser 'This column shows the browser used for accessing the site.'
``` ```
## MODIFY COLUMN ## MODIFY COLUMN

View File

@ -61,4 +61,6 @@ Result:
## Related Content ## Related Content
- [User-defined functions in ClickHouse Cloud](https://clickhouse.com/blog/user-defined-functions-clickhouse-udfs) ### [Executable UDFs](/docs/en/sql-reference/functions/udf.md).
### [User-defined functions in ClickHouse Cloud](https://clickhouse.com/blog/user-defined-functions-clickhouse-udfs)

View File

@ -381,9 +381,9 @@ High compression levels are useful for asymmetric scenarios, like compress once,
`DEFLATE_QPL` — [Deflate compression algorithm](https://github.com/intel/qpl) implemented by Intel® Query Processing Library. Some limitations apply: `DEFLATE_QPL` — [Deflate compression algorithm](https://github.com/intel/qpl) implemented by Intel® Query Processing Library. Some limitations apply:
- DEFLATE_QPL is experimental and can only be used after setting configuration parameter `allow_experimental_codecs=1`. - DEFLATE_QPL is experimental and can only be used after setting configuration parameter `allow_experimental_codecs=1`.
- DEFLATE_QPL only works if ClickHouse was compiled with support for AVX2 or AVX512 instructions. Refer to [Build Clickhouse with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Build-Clickhouse-with-DEFLATE_QPL) for more details. - DEFLATE_QPL requires a ClickHouse build compiled with SSE 4.2 instructions (by default, this is the case). Refer to [Build Clickhouse with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Build-Clickhouse-with-DEFLATE_QPL) for more details.
- DEFLATE_QPL works best if the system has a Intel® IAA (In-Memory Analytics Accelerator) offloading device. Refer to [Accelerator Configuration](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#accelerator-configuration) and [Benchmark with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Run-Benchmark-with-DEFLATE_QPL) for more details. - DEFLATE_QPL works best if the system has a Intel® IAA (In-Memory Analytics Accelerator) offloading device. Refer to [Accelerator Configuration](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#accelerator-configuration) and [Benchmark with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Run-Benchmark-with-DEFLATE_QPL) for more details.
- DEFLATE_QPL-compressed data can only be transferred between ClickHouse nodes compiled with support for AVX2/AVX512 - DEFLATE_QPL-compressed data can only be transferred between ClickHouse nodes compiled with SSE 4.2 enabled.
### Specialized Codecs ### Specialized Codecs

View File

@ -37,6 +37,10 @@ sidebar_label: "Настройки пользователей"
<table_name> <table_name>
</database_name> </database_name>
</databases> </databases>
<grants>
<query>GRANT SELECT ON system.*</query>
</grants>
</user_name> </user_name>
<!-- Other users settings --> <!-- Other users settings -->
</users> </users>
@ -89,6 +93,27 @@ sidebar_label: "Настройки пользователей"
Значение по умолчанию: 0. Значение по умолчанию: 0.
### grants {#grants-user-setting}
Настройка позволяет указать набор прав для заданного пользователя.
Каждый элемент списка должен представлять собой `GRANT` запрос без указания пользователей в самом запросе.
Пример:
```xml
<user1>
<grants>
<query>GRANT SHOW ON *.*</query>
<query>GRANT CREATE ON *.* WITH GRANT OPTION</query>
<query>GRANT SELECT ON system.*</query>
</grants>
</user1>
```
Настройка не может быть выставлена одновременно с
`dictionaries`, `access_management`, `named_collection_control`, `show_named_collections_secrets`
или `allow_databases`.
### user_name/networks {#user-namenetworks} ### user_name/networks {#user-namenetworks}
Список сетей, из которых пользователь может подключиться к серверу ClickHouse. Список сетей, из которых пользователь может подключиться к серверу ClickHouse.

View File

@ -175,7 +175,7 @@ public:
Coordination::Stat stat{}; Coordination::Stat stat{};
String _some_data; String _some_data;
auto watch_callback = auto watch_callback =
[stale = stale] (const Coordination::WatchResponse & rsp) [my_stale = stale] (const Coordination::WatchResponse & rsp)
{ {
auto logger = &Poco::Logger::get("ClusterCopier"); auto logger = &Poco::Logger::get("ClusterCopier");
if (rsp.error == Coordination::Error::ZOK) if (rsp.error == Coordination::Error::ZOK)
@ -184,11 +184,11 @@ public:
{ {
case Coordination::CREATED: case Coordination::CREATED:
LOG_DEBUG(logger, "CleanStateClock change: CREATED, at {}", rsp.path); LOG_DEBUG(logger, "CleanStateClock change: CREATED, at {}", rsp.path);
stale->store(true); my_stale->store(true);
break; break;
case Coordination::CHANGED: case Coordination::CHANGED:
LOG_DEBUG(logger, "CleanStateClock change: CHANGED, at {}", rsp.path); LOG_DEBUG(logger, "CleanStateClock change: CHANGED, at {}", rsp.path);
stale->store(true); my_stale->store(true);
} }
} }
}; };

View File

@ -498,18 +498,18 @@ try
/// Prometheus (if defined and not setup yet with http_port) /// Prometheus (if defined and not setup yet with http_port)
port_name = "prometheus.port"; port_name = "prometheus.port";
createServer(listen_host, port_name, listen_try, [&, http_context = std::move(http_context)](UInt16 port) mutable createServer(listen_host, port_name, listen_try, [&, my_http_context = std::move(http_context)](UInt16 port) mutable
{ {
Poco::Net::ServerSocket socket; Poco::Net::ServerSocket socket;
auto address = socketBindListen(socket, listen_host, port); auto address = socketBindListen(socket, listen_host, port);
socket.setReceiveTimeout(http_context->getReceiveTimeout()); socket.setReceiveTimeout(my_http_context->getReceiveTimeout());
socket.setSendTimeout(http_context->getSendTimeout()); socket.setSendTimeout(my_http_context->getSendTimeout());
servers->emplace_back( servers->emplace_back(
listen_host, listen_host,
port_name, port_name,
"Prometheus: http://" + address.toString(), "Prometheus: http://" + address.toString(),
std::make_unique<HTTPServer>( std::make_unique<HTTPServer>(
std::move(http_context), createPrometheusMainHandlerFactory(*this, config_getter(), async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params)); std::move(my_http_context), createPrometheusMainHandlerFactory(*this, config_getter(), async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params));
}); });
} }

View File

@ -1294,10 +1294,10 @@
<!-- Configuration of external dictionaries. See: <!-- Configuration of external dictionaries. See:
https://clickhouse.com/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts https://clickhouse.com/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts
--> -->
<dictionaries_config>*_dictionary.xml</dictionaries_config> <dictionaries_config>*_dictionary.*ml</dictionaries_config>
<!-- Configuration of user defined executable functions --> <!-- Configuration of user defined executable functions -->
<user_defined_executable_functions_config>*_function.xml</user_defined_executable_functions_config> <user_defined_executable_functions_config>*_function.*ml</user_defined_executable_functions_config>
<!-- Path in ZooKeeper to store user-defined SQL functions created by the command CREATE FUNCTION. <!-- Path in ZooKeeper to store user-defined SQL functions created by the command CREATE FUNCTION.
If not specified they will be stored locally. --> If not specified they will be stored locally. -->

View File

@ -47,10 +47,10 @@ scope_guard AccessChangesNotifier::subscribeForChanges(AccessEntityType type, co
list.push_back(handler); list.push_back(handler);
auto handler_it = std::prev(list.end()); auto handler_it = std::prev(list.end());
return [handlers=handlers, type, handler_it] return [my_handlers = handlers, type, handler_it]
{ {
std::lock_guard lock2{handlers->mutex}; std::lock_guard lock2{my_handlers->mutex};
auto & list2 = handlers->by_type[static_cast<size_t>(type)]; auto & list2 = my_handlers->by_type[static_cast<size_t>(type)];
list2.erase(handler_it); list2.erase(handler_it);
}; };
} }
@ -63,13 +63,13 @@ scope_guard AccessChangesNotifier::subscribeForChanges(const UUID & id, const On
list.push_back(handler); list.push_back(handler);
auto handler_it = std::prev(list.end()); auto handler_it = std::prev(list.end());
return [handlers=handlers, it, handler_it] return [my_handlers = handlers, it, handler_it]
{ {
std::lock_guard lock2{handlers->mutex}; std::lock_guard lock2{my_handlers->mutex};
auto & list2 = it->second; auto & list2 = it->second;
list2.erase(handler_it); list2.erase(handler_it);
if (list2.empty()) if (list2.empty())
handlers->by_id.erase(it); my_handlers->by_id.erase(it);
}; };
} }

View File

@ -10,6 +10,7 @@
#include <Common/OpenSSLHelpers.h> #include <Common/OpenSSLHelpers.h>
#include <Poco/SHA1Engine.h> #include <Poco/SHA1Engine.h>
#include <base/types.h> #include <base/types.h>
#include <base/hex.h>
#include <boost/algorithm/hex.hpp> #include <boost/algorithm/hex.hpp>
#include <boost/algorithm/string/case_conv.hpp> #include <boost/algorithm/string/case_conv.hpp>

View File

@ -742,9 +742,9 @@ void DiskAccessStorage::restoreFromBackup(RestorerFromBackup & restorer)
bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace); bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace);
bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate); bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate);
restorer.addDataRestoreTask([this, entities = std::move(entities), replace_if_exists, throw_if_exists] restorer.addDataRestoreTask([this, my_entities = std::move(entities), replace_if_exists, throw_if_exists]
{ {
for (const auto & [id, entity] : entities) for (const auto & [id, entity] : my_entities)
insertWithID(id, entity, replace_if_exists, throw_if_exists, /* write_on_disk= */ true); insertWithID(id, entity, replace_if_exists, throw_if_exists, /* write_on_disk= */ true);
}); });
} }

View File

@ -26,10 +26,10 @@ scope_guard EnabledRoles::subscribeForChanges(const OnChangeHandler & handler) c
handlers->list.push_back(handler); handlers->list.push_back(handler);
auto it = std::prev(handlers->list.end()); auto it = std::prev(handlers->list.end());
return [handlers=handlers, it] return [my_handlers = handlers, it]
{ {
std::lock_guard lock2{handlers->mutex}; std::lock_guard lock2{my_handlers->mutex};
handlers->list.erase(it); my_handlers->list.erase(it);
}; };
} }
@ -53,10 +53,10 @@ void EnabledRoles::setRolesInfo(const std::shared_ptr<const EnabledRolesInfo> &
} }
notifications->join(scope_guard( notifications->join(scope_guard(
[info = info, handlers_to_notify = std::move(handlers_to_notify)] [my_info = info, my_handlers_to_notify = std::move(handlers_to_notify)]
{ {
for (const auto & handler : handlers_to_notify) for (const auto & handler : my_handlers_to_notify)
handler(info); handler(my_info);
})); }));
} }
} }

View File

@ -297,9 +297,9 @@ void MemoryAccessStorage::restoreFromBackup(RestorerFromBackup & restorer)
bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace); bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace);
bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate); bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate);
restorer.addDataRestoreTask([this, entities = std::move(entities), replace_if_exists, throw_if_exists] restorer.addDataRestoreTask([this, my_entities = std::move(entities), replace_if_exists, throw_if_exists]
{ {
for (const auto & [id, entity] : entities) for (const auto & [id, entity] : my_entities)
insertWithID(id, entity, replace_if_exists, throw_if_exists); insertWithID(id, entity, replace_if_exists, throw_if_exists);
}); });
} }

View File

@ -525,9 +525,9 @@ void ReplicatedAccessStorage::refreshEntities(const zkutil::ZooKeeperPtr & zooke
} }
const String zookeeper_uuids_path = zookeeper_path + "/uuid"; const String zookeeper_uuids_path = zookeeper_path + "/uuid";
auto watch_entities_list = [watched_queue = watched_queue](const Coordination::WatchResponse &) auto watch_entities_list = [my_watched_queue = watched_queue](const Coordination::WatchResponse &)
{ {
[[maybe_unused]] bool push_result = watched_queue->push(UUIDHelpers::Nil); [[maybe_unused]] bool push_result = my_watched_queue->push(UUIDHelpers::Nil);
}; };
Coordination::Stat stat; Coordination::Stat stat;
const auto entity_uuid_strs = zookeeper->getChildrenWatch(zookeeper_uuids_path, &stat, watch_entities_list); const auto entity_uuid_strs = zookeeper->getChildrenWatch(zookeeper_uuids_path, &stat, watch_entities_list);
@ -592,10 +592,10 @@ void ReplicatedAccessStorage::refreshEntityNoLock(const zkutil::ZooKeeperPtr & z
AccessEntityPtr ReplicatedAccessStorage::tryReadEntityFromZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id) const AccessEntityPtr ReplicatedAccessStorage::tryReadEntityFromZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id) const
{ {
const auto watch_entity = [watched_queue = watched_queue, id](const Coordination::WatchResponse & response) const auto watch_entity = [my_watched_queue = watched_queue, id](const Coordination::WatchResponse & response)
{ {
if (response.type == Coordination::Event::CHANGED) if (response.type == Coordination::Event::CHANGED)
[[maybe_unused]] bool push_result = watched_queue->push(id); [[maybe_unused]] bool push_result = my_watched_queue->push(id);
}; };
Coordination::Stat entity_stat; Coordination::Stat entity_stat;
@ -680,12 +680,12 @@ void ReplicatedAccessStorage::backup(BackupEntriesCollector & backup_entries_col
backup_entries_collector.addPostTask( backup_entries_collector.addPostTask(
[backup_entry = backup_entry_with_path.second, [backup_entry = backup_entry_with_path.second,
zookeeper_path = zookeeper_path, my_zookeeper_path = zookeeper_path,
type, type,
&backup_entries_collector, &backup_entries_collector,
backup_coordination] backup_coordination]
{ {
for (const String & path : backup_coordination->getReplicatedAccessFilePaths(zookeeper_path, type)) for (const String & path : backup_coordination->getReplicatedAccessFilePaths(my_zookeeper_path, type))
backup_entries_collector.addBackupEntry(path, backup_entry); backup_entries_collector.addBackupEntry(path, backup_entry);
}); });
} }
@ -708,9 +708,9 @@ void ReplicatedAccessStorage::restoreFromBackup(RestorerFromBackup & restorer)
bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace); bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace);
bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate); bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate);
restorer.addDataRestoreTask([this, entities = std::move(entities), replace_if_exists, throw_if_exists] restorer.addDataRestoreTask([this, my_entities = std::move(entities), replace_if_exists, throw_if_exists]
{ {
for (const auto & [id, entity] : entities) for (const auto & [id, entity] : my_entities)
insertWithID(id, entity, replace_if_exists, throw_if_exists); insertWithID(id, entity, replace_if_exists, throw_if_exists);
}); });
} }

View File

@ -11,6 +11,10 @@
#include <Common/StringUtils/StringUtils.h> #include <Common/StringUtils/StringUtils.h>
#include <Common/quoteString.h> #include <Common/quoteString.h>
#include <Core/Settings.h> #include <Core/Settings.h>
#include <Interpreters/executeQuery.h>
#include <Parsers/Access/ASTGrantQuery.h>
#include <Parsers/Access/ParserGrantQuery.h>
#include <Parsers/parseQuery.h>
#include <Poco/Util/AbstractConfiguration.h> #include <Poco/Util/AbstractConfiguration.h>
#include <Poco/MD5Engine.h> #include <Poco/MD5Engine.h>
#include <Poco/JSON/JSON.h> #include <Poco/JSON/JSON.h>
@ -49,7 +53,12 @@ namespace
UUID generateID(const IAccessEntity & entity) { return generateID(entity.getType(), entity.getName()); } UUID generateID(const IAccessEntity & entity) { return generateID(entity.getType(), entity.getName()); }
UserPtr parseUser(const Poco::Util::AbstractConfiguration & config, const String & user_name, const std::unordered_set<UUID> & allowed_profile_ids, bool allow_no_password, bool allow_plaintext_password) UserPtr parseUser(
const Poco::Util::AbstractConfiguration & config,
const String & user_name,
const std::unordered_set<UUID> & allowed_profile_ids,
bool allow_no_password,
bool allow_plaintext_password)
{ {
auto user = std::make_shared<User>(); auto user = std::make_shared<User>();
user->setName(user_name); user->setName(user_name);
@ -207,42 +216,99 @@ namespace
} }
} }
/// By default all databases are accessible const auto grants_config = user_config + ".grants";
/// and the user can grant everything he has. std::optional<Strings> grant_queries;
user->access.grantWithGrantOption(AccessType::ALL); if (config.has(grants_config))
if (databases)
{ {
user->access.revoke(AccessFlags::allFlags() - AccessFlags::allGlobalFlags()); Poco::Util::AbstractConfiguration::Keys keys;
user->access.grantWithGrantOption(AccessFlags::allDictionaryFlags(), IDictionary::NO_DATABASE_TAG); config.keys(grants_config, keys);
for (const String & database : *databases) grant_queries.emplace();
user->access.grantWithGrantOption(AccessFlags::allFlags(), database); grant_queries->reserve(keys.size());
} for (const auto & key : keys)
{
if (dictionaries) const auto query = config.getString(grants_config + "." + key);
{ grant_queries->push_back(query);
user->access.revoke(AccessFlags::allDictionaryFlags(), IDictionary::NO_DATABASE_TAG); }
for (const String & dictionary : *dictionaries)
user->access.grantWithGrantOption(AccessFlags::allDictionaryFlags(), IDictionary::NO_DATABASE_TAG, dictionary);
} }
bool access_management = config.getBool(user_config + ".access_management", false); bool access_management = config.getBool(user_config + ".access_management", false);
if (!access_management)
{
user->access.revoke(AccessType::ACCESS_MANAGEMENT);
user->access.revokeGrantOption(AccessType::ALL);
}
bool named_collection_control = config.getBool(user_config + ".named_collection_control", false); bool named_collection_control = config.getBool(user_config + ".named_collection_control", false);
if (!named_collection_control)
{
user->access.revoke(AccessType::NAMED_COLLECTION_CONTROL);
}
bool show_named_collections_secrets = config.getBool(user_config + ".show_named_collections_secrets", false); bool show_named_collections_secrets = config.getBool(user_config + ".show_named_collections_secrets", false);
if (!show_named_collections_secrets)
if (grant_queries)
if (databases || dictionaries || access_management || named_collection_control || show_named_collections_secrets)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Any other access control settings can't be specified with `grants`");
if (grant_queries)
{ {
user->access.revoke(AccessType::SHOW_NAMED_COLLECTIONS_SECRETS); ParserGrantQuery parser;
parser.parseWithoutGrantees();
for (const auto & string_query : *grant_queries)
{
String error_message;
const char * pos = string_query.data();
auto ast = tryParseQuery(parser, pos, pos + string_query.size(), error_message, false, "", false, 0, 0);
if (!ast)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Failed to parse grant query. Error: {}", error_message);
auto & query = ast->as<ASTGrantQuery &>();
if (query.roles)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Roles can't be granted in config file");
if (!query.cluster.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Can't grant on cluster using config file");
if (query.grantees)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "You can't specify grantees in query using config file");
for (auto & element : query.access_rights_elements)
{
if (query.is_revoke)
user->access.revoke(element);
else
user->access.grant(element);
}
}
}
else
{
/// By default all databases are accessible
/// and the user can grant everything he has.
user->access.grantWithGrantOption(AccessType::ALL);
if (databases)
{
user->access.revoke(AccessFlags::allFlags() - AccessFlags::allGlobalFlags());
user->access.grantWithGrantOption(AccessFlags::allDictionaryFlags(), IDictionary::NO_DATABASE_TAG);
for (const String & database : *databases)
user->access.grantWithGrantOption(AccessFlags::allFlags(), database);
}
if (dictionaries)
{
user->access.revoke(AccessFlags::allDictionaryFlags(), IDictionary::NO_DATABASE_TAG);
for (const String & dictionary : *dictionaries)
user->access.grantWithGrantOption(AccessFlags::allDictionaryFlags(), IDictionary::NO_DATABASE_TAG, dictionary);
}
if (!access_management)
{
user->access.revoke(AccessType::ACCESS_MANAGEMENT);
user->access.revokeGrantOption(AccessType::ALL);
}
if (!named_collection_control)
{
user->access.revoke(AccessType::NAMED_COLLECTION_CONTROL);
}
if (!show_named_collections_secrets)
{
user->access.revoke(AccessType::SHOW_NAMED_COLLECTIONS_SECRETS);
}
} }
String default_database = config.getString(user_config + ".default_database", ""); String default_database = config.getString(user_config + ".default_database", "");
@ -252,7 +318,11 @@ namespace
} }
std::vector<AccessEntityPtr> parseUsers(const Poco::Util::AbstractConfiguration & config, const std::unordered_set<UUID> & allowed_profile_ids, bool allow_no_password, bool allow_plaintext_password) std::vector<AccessEntityPtr> parseUsers(
const Poco::Util::AbstractConfiguration & config,
const std::unordered_set<UUID> & allowed_profile_ids,
bool allow_no_password,
bool allow_plaintext_password)
{ {
Poco::Util::AbstractConfiguration::Keys user_names; Poco::Util::AbstractConfiguration::Keys user_names;
config.keys("users", user_names); config.keys("users", user_names);

View File

@ -2,6 +2,7 @@
#include <Columns/ColumnArray.h> #include <Columns/ColumnArray.h>
#include <Common/assert_cast.h> #include <Common/assert_cast.h>
#include <Common/Arena.h>
#include <base/arithmeticOverflow.h> #include <base/arithmeticOverflow.h>
#include <DataTypes/DataTypeArray.h> #include <DataTypes/DataTypeArray.h>
#include <AggregateFunctions/IAggregateFunction.h> #include <AggregateFunctions/IAggregateFunction.h>

View File

@ -10,6 +10,7 @@
#include <DataTypes/IDataType.h> #include <DataTypes/IDataType.h>
#include <DataTypes/DataTypesNumber.h> #include <DataTypes/DataTypesNumber.h>
#include <base/StringRef.h> #include <base/StringRef.h>
#include <Common/Arena.h>
#include <Common/assert_cast.h> #include <Common/assert_cast.h>
#include <DataTypes/DataTypeNullable.h> #include <DataTypes/DataTypeNullable.h>
#include <AggregateFunctions/IAggregateFunction.h> #include <AggregateFunctions/IAggregateFunction.h>

View File

@ -173,13 +173,13 @@ BackupCoordinationRemote::BackupCoordinationRemote(
log, log,
get_zookeeper_, get_zookeeper_,
keeper_settings, keeper_settings,
[zookeeper_path = zookeeper_path, current_host = current_host, is_internal = is_internal] [my_zookeeper_path = zookeeper_path, my_current_host = current_host, my_is_internal = is_internal]
(WithRetries::FaultyKeeper & zk) (WithRetries::FaultyKeeper & zk)
{ {
/// Recreate this ephemeral node to signal that we are alive. /// Recreate this ephemeral node to signal that we are alive.
if (is_internal) if (my_is_internal)
{ {
String alive_node_path = zookeeper_path + "/stage/alive|" + current_host; String alive_node_path = my_zookeeper_path + "/stage/alive|" + my_current_host;
auto code = zk->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral); auto code = zk->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral);
if (code == Coordination::Error::ZNODEEXISTS) if (code == Coordination::Error::ZNODEEXISTS)

View File

@ -470,17 +470,17 @@ std::vector<std::pair<ASTPtr, StoragePtr>> BackupEntriesCollector::findTablesInD
const auto & database_info = database_infos.at(database_name); const auto & database_info = database_infos.at(database_name);
const auto & database = database_info.database; const auto & database = database_info.database;
auto filter_by_table_name = [database_info = &database_info](const String & table_name) auto filter_by_table_name = [my_database_info = &database_info](const String & table_name)
{ {
/// We skip inner tables of materialized views. /// We skip inner tables of materialized views.
if (table_name.starts_with(".inner_id.")) if (table_name.starts_with(".inner_id."))
return false; return false;
if (database_info->tables.contains(table_name)) if (my_database_info->tables.contains(table_name))
return true; return true;
if (database_info->all_tables) if (my_database_info->all_tables)
return !database_info->except_table_names.contains(table_name); return !my_database_info->except_table_names.contains(table_name);
return false; return false;
}; };

View File

@ -208,7 +208,7 @@ void BackupImpl::openArchive()
if (!reader->fileExists(archive_name)) if (!reader->fileExists(archive_name))
throw Exception(ErrorCodes::BACKUP_NOT_FOUND, "Backup {} not found", backup_name_for_logging); throw Exception(ErrorCodes::BACKUP_NOT_FOUND, "Backup {} not found", backup_name_for_logging);
size_t archive_size = reader->getFileSize(archive_name); size_t archive_size = reader->getFileSize(archive_name);
archive_reader = createArchiveReader(archive_name, [reader=reader, archive_name]{ return reader->readFile(archive_name); }, archive_size); archive_reader = createArchiveReader(archive_name, [my_reader = reader, archive_name]{ return my_reader->readFile(archive_name); }, archive_size);
archive_reader->setPassword(archive_params.password); archive_reader->setPassword(archive_params.password);
} }
else else

View File

@ -34,13 +34,13 @@ RestoreCoordinationRemote::RestoreCoordinationRemote(
log, log,
get_zookeeper_, get_zookeeper_,
keeper_settings, keeper_settings,
[zookeeper_path = zookeeper_path, current_host = current_host, is_internal = is_internal] [my_zookeeper_path = zookeeper_path, my_current_host = current_host, my_is_internal = is_internal]
(WithRetries::FaultyKeeper & zk) (WithRetries::FaultyKeeper & zk)
{ {
/// Recreate this ephemeral node to signal that we are alive. /// Recreate this ephemeral node to signal that we are alive.
if (is_internal) if (my_is_internal)
{ {
String alive_node_path = zookeeper_path + "/stage/alive|" + current_host; String alive_node_path = my_zookeeper_path + "/stage/alive|" + my_current_host;
auto code = zk->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral); auto code = zk->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral);
if (code == Coordination::Error::ZNODEEXISTS) if (code == Coordination::Error::ZNODEEXISTS)

View File

@ -211,6 +211,7 @@ endif()
if (TARGET ch_contrib::jemalloc) if (TARGET ch_contrib::jemalloc)
target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::jemalloc) target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::jemalloc)
endif() endif()
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash)
add_subdirectory(Access/Common) add_subdirectory(Access/Common)
add_subdirectory(Common/ZooKeeper) add_subdirectory(Common/ZooKeeper)
@ -463,7 +464,7 @@ endif ()
if (TARGET ch_contrib::ldap) if (TARGET ch_contrib::ldap)
dbms_target_link_libraries (PRIVATE ch_contrib::ldap ch_contrib::lber) dbms_target_link_libraries (PRIVATE ch_contrib::ldap ch_contrib::lber)
endif () endif ()
dbms_target_link_libraries (PRIVATE ch_contrib::sparsehash) dbms_target_link_libraries (PUBLIC ch_contrib::sparsehash)
if (TARGET ch_contrib::protobuf) if (TARGET ch_contrib::protobuf)
dbms_target_link_libraries (PRIVATE ch_contrib::protobuf) dbms_target_link_libraries (PRIVATE ch_contrib::protobuf)

View File

@ -73,9 +73,6 @@ void LocalConnection::sendQuery(
bool, bool,
std::function<void(const Progress &)> process_progress_callback) std::function<void(const Progress &)> process_progress_callback)
{ {
if (!query_parameters.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "clickhouse local does not support query parameters");
/// Suggestion comes without client_info. /// Suggestion comes without client_info.
if (client_info) if (client_info)
query_context = session.makeQueryContext(*client_info); query_context = session.makeQueryContext(*client_info);
@ -90,6 +87,7 @@ void LocalConnection::sendQuery(
if (!current_database.empty()) if (!current_database.empty())
query_context->setCurrentDatabase(current_database); query_context->setCurrentDatabase(current_database);
query_context->addQueryParameters(query_parameters);
state.reset(); state.reset();
state.emplace(); state.emplace();
@ -484,7 +482,7 @@ void LocalConnection::setDefaultDatabase(const String & database)
UInt64 LocalConnection::getServerRevision(const ConnectionTimeouts &) UInt64 LocalConnection::getServerRevision(const ConnectionTimeouts &)
{ {
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented"); return DBMS_TCP_PROTOCOL_VERSION;
} }
const String & LocalConnection::getServerTimezone(const ConnectionTimeouts &) const String & LocalConnection::getServerTimezone(const ConnectionTimeouts &)

View File

@ -108,14 +108,14 @@ static String getLoadSuggestionQuery(Int32 suggestion_limit, bool basic_suggesti
template <typename ConnectionType> template <typename ConnectionType>
void Suggest::load(ContextPtr context, const ConnectionParameters & connection_parameters, Int32 suggestion_limit) void Suggest::load(ContextPtr context, const ConnectionParameters & connection_parameters, Int32 suggestion_limit)
{ {
loading_thread = std::thread([context=Context::createCopy(context), connection_parameters, suggestion_limit, this] loading_thread = std::thread([my_context = Context::createCopy(context), connection_parameters, suggestion_limit, this]
{ {
ThreadStatus thread_status; ThreadStatus thread_status;
for (size_t retry = 0; retry < 10; ++retry) for (size_t retry = 0; retry < 10; ++retry)
{ {
try try
{ {
auto connection = ConnectionType::createConnection(connection_parameters, context); auto connection = ConnectionType::createConnection(connection_parameters, my_context);
fetch(*connection, connection_parameters.timeouts, getLoadSuggestionQuery(suggestion_limit, std::is_same_v<ConnectionType, LocalConnection>)); fetch(*connection, connection_parameters.timeouts, getLoadSuggestionQuery(suggestion_limit, std::is_same_v<ConnectionType, LocalConnection>));
} }
catch (const Exception & e) catch (const Exception & e)

View File

@ -942,9 +942,9 @@ ColumnPtr ColumnArray::compress() const
size_t byte_size = data_compressed->byteSize() + offsets_compressed->byteSize(); size_t byte_size = data_compressed->byteSize() + offsets_compressed->byteSize();
return ColumnCompressed::create(size(), byte_size, return ColumnCompressed::create(size(), byte_size,
[data_compressed = std::move(data_compressed), offsets_compressed = std::move(offsets_compressed)] [my_data_compressed = std::move(data_compressed), my_offsets_compressed = std::move(offsets_compressed)]
{ {
return ColumnArray::create(data_compressed->decompress(), offsets_compressed->decompress()); return ColumnArray::create(my_data_compressed->decompress(), my_offsets_compressed->decompress());
}); });
} }

View File

@ -64,7 +64,7 @@ public:
return ColumnCompressed::create( return ColumnCompressed::create(
size, size,
bytes, bytes,
[column = std::move(column)]{ return column; }); [my_column = std::move(column)]{ return my_column; });
} }
/// Helper methods for compression. /// Helper methods for compression.

View File

@ -386,11 +386,11 @@ ColumnPtr ColumnDecimal<T>::compress() const
const size_t compressed_size = compressed->size(); const size_t compressed_size = compressed->size();
return ColumnCompressed::create(data_size, compressed_size, return ColumnCompressed::create(data_size, compressed_size,
[compressed = std::move(compressed), column_size = data_size, scale = this->scale] [my_compressed = std::move(compressed), column_size = data_size, my_scale = this->scale]
{ {
auto res = ColumnDecimal<T>::create(column_size, scale); auto res = ColumnDecimal<T>::create(column_size, my_scale);
ColumnCompressed::decompressBuffer( ColumnCompressed::decompressBuffer(
compressed->data(), res->getData().data(), compressed->size(), column_size * sizeof(T)); my_compressed->data(), res->getData().data(), my_compressed->size(), column_size * sizeof(T));
return res; return res;
}); });
} }

View File

@ -393,13 +393,13 @@ ColumnPtr ColumnFixedString::compress() const
const size_t column_size = size(); const size_t column_size = size();
const size_t compressed_size = compressed->size(); const size_t compressed_size = compressed->size();
return ColumnCompressed::create(column_size, compressed_size, return ColumnCompressed::create(column_size, compressed_size,
[compressed = std::move(compressed), column_size, n = n] [my_compressed = std::move(compressed), column_size, my_n = n]
{ {
size_t chars_size = n * column_size; size_t chars_size = my_n * column_size;
auto res = ColumnFixedString::create(n); auto res = ColumnFixedString::create(my_n);
res->getChars().resize(chars_size); res->getChars().resize(chars_size);
ColumnCompressed::decompressBuffer( ColumnCompressed::decompressBuffer(
compressed->data(), res->getChars().data(), compressed->size(), chars_size); my_compressed->data(), res->getChars().data(), my_compressed->size(), chars_size);
return res; return res;
}); });
} }

View File

@ -312,9 +312,9 @@ ColumnPtr ColumnMap::compress() const
const auto byte_size = compressed->byteSize(); const auto byte_size = compressed->byteSize();
/// The order of evaluation of function arguments is unspecified /// The order of evaluation of function arguments is unspecified
/// and could cause interacting with object in moved-from state /// and could cause interacting with object in moved-from state
return ColumnCompressed::create(size(), byte_size, [compressed = std::move(compressed)] return ColumnCompressed::create(size(), byte_size, [my_compressed = std::move(compressed)]
{ {
return ColumnMap::create(compressed->decompress()); return ColumnMap::create(my_compressed->decompress());
}); });
} }

View File

@ -644,9 +644,9 @@ ColumnPtr ColumnNullable::compress() const
size_t byte_size = nested_column->byteSize() + null_map->byteSize(); size_t byte_size = nested_column->byteSize() + null_map->byteSize();
return ColumnCompressed::create(size(), byte_size, return ColumnCompressed::create(size(), byte_size,
[nested_column = std::move(nested_compressed), null_map = std::move(null_map_compressed)] [my_nested_column = std::move(nested_compressed), my_null_map = std::move(null_map_compressed)]
{ {
return ColumnNullable::create(nested_column->decompress(), null_map->decompress()); return ColumnNullable::create(my_nested_column->decompress(), my_null_map->decompress());
}); });
} }

View File

@ -738,9 +738,9 @@ ColumnPtr ColumnSparse::compress() const
size_t byte_size = values_compressed->byteSize() + offsets_compressed->byteSize(); size_t byte_size = values_compressed->byteSize() + offsets_compressed->byteSize();
return ColumnCompressed::create(size(), byte_size, return ColumnCompressed::create(size(), byte_size,
[values_compressed = std::move(values_compressed), offsets_compressed = std::move(offsets_compressed), size = size()] [my_values_compressed = std::move(values_compressed), my_offsets_compressed = std::move(offsets_compressed), size = size()]
{ {
return ColumnSparse::create(values_compressed->decompress(), offsets_compressed->decompress(), size); return ColumnSparse::create(my_values_compressed->decompress(), my_offsets_compressed->decompress(), size);
}); });
} }

View File

@ -532,8 +532,8 @@ ColumnPtr ColumnString::compress() const
const size_t offsets_compressed_size = offsets_compressed->size(); const size_t offsets_compressed_size = offsets_compressed->size();
return ColumnCompressed::create(source_offsets_elements, chars_compressed_size + offsets_compressed_size, return ColumnCompressed::create(source_offsets_elements, chars_compressed_size + offsets_compressed_size,
[ [
chars_compressed = std::move(chars_compressed), my_chars_compressed = std::move(chars_compressed),
offsets_compressed = std::move(offsets_compressed), my_offsets_compressed = std::move(offsets_compressed),
source_chars_size, source_chars_size,
source_offsets_elements source_offsets_elements
] ]
@ -544,10 +544,10 @@ ColumnPtr ColumnString::compress() const
res->getOffsets().resize(source_offsets_elements); res->getOffsets().resize(source_offsets_elements);
ColumnCompressed::decompressBuffer( ColumnCompressed::decompressBuffer(
chars_compressed->data(), res->getChars().data(), chars_compressed->size(), source_chars_size); my_chars_compressed->data(), res->getChars().data(), my_chars_compressed->size(), source_chars_size);
ColumnCompressed::decompressBuffer( ColumnCompressed::decompressBuffer(
offsets_compressed->data(), res->getOffsets().data(), offsets_compressed->size(), source_offsets_elements * sizeof(Offset)); my_offsets_compressed->data(), res->getOffsets().data(), my_offsets_compressed->size(), source_offsets_elements * sizeof(Offset));
return res; return res;
}); });

View File

@ -552,11 +552,11 @@ ColumnPtr ColumnTuple::compress() const
} }
return ColumnCompressed::create(size(), byte_size, return ColumnCompressed::create(size(), byte_size,
[compressed = std::move(compressed)]() mutable [my_compressed = std::move(compressed)]() mutable
{ {
for (auto & column : compressed) for (auto & column : my_compressed)
column = column->decompress(); column = column->decompress();
return ColumnTuple::create(compressed); return ColumnTuple::create(my_compressed);
}); });
} }

View File

@ -927,11 +927,11 @@ ColumnPtr ColumnVector<T>::compress() const
const size_t compressed_size = compressed->size(); const size_t compressed_size = compressed->size();
return ColumnCompressed::create(data_size, compressed_size, return ColumnCompressed::create(data_size, compressed_size,
[compressed = std::move(compressed), column_size = data_size] [my_compressed = std::move(compressed), column_size = data_size]
{ {
auto res = ColumnVector<T>::create(column_size); auto res = ColumnVector<T>::create(column_size);
ColumnCompressed::decompressBuffer( ColumnCompressed::decompressBuffer(
compressed->data(), res->getData().data(), compressed->size(), column_size * sizeof(T)); my_compressed->data(), res->getData().data(), my_compressed->size(), column_size * sizeof(T));
return res; return res;
}); });
} }

640
src/Common/AsyncLoader.cpp Normal file
View File

@ -0,0 +1,640 @@
#include <Common/AsyncLoader.h>
#include <base/defines.h>
#include <Common/ErrorCodes.h>
#include <Common/Exception.h>
#include <Common/noexcept_scope.h>
#include <Common/setThreadName.h>
#include <Common/logger_useful.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ASYNC_LOAD_CYCLE;
extern const int ASYNC_LOAD_FAILED;
extern const int ASYNC_LOAD_CANCELED;
}
static constexpr size_t PRINT_MESSAGE_EACH_N_OBJECTS = 256;
static constexpr size_t PRINT_MESSAGE_EACH_N_SECONDS = 5;
void logAboutProgress(Poco::Logger * log, size_t processed, size_t total, AtomicStopwatch & watch)
{
if (processed % PRINT_MESSAGE_EACH_N_OBJECTS == 0 || watch.compareAndRestart(PRINT_MESSAGE_EACH_N_SECONDS))
{
LOG_INFO(log, "Processed: {}%", processed * 100.0 / total);
watch.restart();
}
}
LoadStatus LoadJob::status() const
{
std::unique_lock lock{mutex};
return load_status;
}
std::exception_ptr LoadJob::exception() const
{
std::unique_lock lock{mutex};
return load_exception;
}
ssize_t LoadJob::priority() const
{
return load_priority;
}
void LoadJob::wait() const
{
std::unique_lock lock{mutex};
waiters++;
finished.wait(lock, [this] { return load_status != LoadStatus::PENDING; });
waiters--;
if (load_exception)
std::rethrow_exception(load_exception);
}
void LoadJob::waitNoThrow() const noexcept
{
std::unique_lock lock{mutex};
waiters++;
finished.wait(lock, [this] { return load_status != LoadStatus::PENDING; });
waiters--;
}
size_t LoadJob::waitersCount() const
{
std::unique_lock lock{mutex};
return waiters;
}
void LoadJob::ok()
{
std::unique_lock lock{mutex};
load_status = LoadStatus::OK;
finish();
}
void LoadJob::failed(const std::exception_ptr & ptr)
{
std::unique_lock lock{mutex};
load_status = LoadStatus::FAILED;
load_exception = ptr;
finish();
}
void LoadJob::canceled(const std::exception_ptr & ptr)
{
std::unique_lock lock{mutex};
load_status = LoadStatus::CANCELED;
load_exception = ptr;
finish();
}
void LoadJob::finish()
{
func = {}; // To ensure job function is destructed before `AsyncLoader::wait()` and `LoadJob::wait()` return
finish_time = std::chrono::system_clock::now();
if (waiters > 0)
finished.notify_all();
}
void LoadJob::scheduled()
{
schedule_time = std::chrono::system_clock::now();
}
void LoadJob::enqueued()
{
if (enqueue_time.load() == TimePoint{}) // Do not rewrite in case of requeue
enqueue_time = std::chrono::system_clock::now();
}
void LoadJob::execute(const LoadJobPtr & self)
{
start_time = std::chrono::system_clock::now();
func(self);
}
LoadTask::LoadTask(AsyncLoader & loader_, LoadJobSet && jobs_, LoadJobSet && goal_jobs_)
: loader(loader_)
, jobs(std::move(jobs_))
, goal_jobs(std::move(goal_jobs_))
{}
LoadTask::~LoadTask()
{
remove();
}
void LoadTask::merge(const LoadTaskPtr & task)
{
chassert(&loader == &task->loader);
jobs.merge(task->jobs);
goal_jobs.merge(task->goal_jobs);
}
void LoadTask::schedule()
{
loader.schedule(*this);
}
void LoadTask::remove()
{
if (!jobs.empty())
{
loader.remove(jobs);
jobs.clear();
}
}
void LoadTask::detach()
{
jobs.clear();
}
AsyncLoader::AsyncLoader(Metric metric_threads, Metric metric_active_threads, size_t max_threads_, bool log_failures_, bool log_progress_)
: log_failures(log_failures_)
, log_progress(log_progress_)
, log(&Poco::Logger::get("AsyncLoader"))
, max_threads(max_threads_)
, pool(metric_threads, metric_active_threads, max_threads)
{
}
AsyncLoader::~AsyncLoader()
{
stop();
}
void AsyncLoader::start()
{
std::unique_lock lock{mutex};
is_running = true;
for (size_t i = 0; workers < max_threads && i < ready_queue.size(); i++)
spawn(lock);
}
void AsyncLoader::wait()
{
pool.wait();
}
void AsyncLoader::stop()
{
{
std::unique_lock lock{mutex};
is_running = false;
// NOTE: there is no need to notify because workers never wait
}
pool.wait();
}
void AsyncLoader::schedule(LoadTask & task)
{
chassert(this == &task.loader);
scheduleImpl(task.jobs);
}
void AsyncLoader::schedule(const LoadTaskPtr & task)
{
chassert(this == &task->loader);
scheduleImpl(task->jobs);
}
void AsyncLoader::schedule(const std::vector<LoadTaskPtr> & tasks)
{
LoadJobSet all_jobs;
for (const auto & task : tasks)
{
chassert(this == &task->loader);
all_jobs.insert(task->jobs.begin(), task->jobs.end());
}
scheduleImpl(all_jobs);
}
void AsyncLoader::scheduleImpl(const LoadJobSet & input_jobs)
{
std::unique_lock lock{mutex};
// Restart watches after idle period
if (scheduled_jobs.empty())
{
busy_period_start_time = std::chrono::system_clock::now();
stopwatch.restart();
old_jobs = finished_jobs.size();
}
// Make set of jobs to schedule:
// 1) exclude already scheduled or finished jobs
// 2) include pending dependencies, that are not yet scheduled
LoadJobSet jobs;
for (const auto & job : input_jobs)
gatherNotScheduled(job, jobs, lock);
// Ensure scheduled_jobs graph will have no cycles. The only way to get a cycle is to add a cycle, assuming old jobs cannot reference new ones.
checkCycle(jobs, lock);
// We do not want any exception to be throws after this point, because the following code is not exception-safe
DENY_ALLOCATIONS_IN_SCOPE;
// Schedule all incoming jobs
for (const auto & job : jobs)
{
NOEXCEPT_SCOPE({
ALLOW_ALLOCATIONS_IN_SCOPE;
scheduled_jobs.emplace(job, Info{.initial_priority = job->load_priority, .priority = job->load_priority});
job->scheduled();
});
}
// Process dependencies on scheduled pending jobs
for (const auto & job : jobs)
{
Info & info = scheduled_jobs.find(job)->second;
for (const auto & dep : job->dependencies)
{
// Register every dependency on scheduled job with back-link to dependent job
if (auto dep_info = scheduled_jobs.find(dep); dep_info != scheduled_jobs.end())
{
NOEXCEPT_SCOPE({
ALLOW_ALLOCATIONS_IN_SCOPE;
dep_info->second.dependent_jobs.insert(job);
});
info.dependencies_left++;
// Priority inheritance: prioritize deps to have at least given `priority` to avoid priority inversion
prioritize(dep, info.priority, lock);
}
}
// Enqueue non-blocked jobs (w/o dependencies) to ready queue
if (!info.is_blocked())
enqueue(info, job, lock);
}
// Process dependencies on other jobs. It is done in a separate pass to facilitate propagation of cancel signals (if any).
for (const auto & job : jobs)
{
if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end())
{
for (const auto & dep : job->dependencies)
{
if (scheduled_jobs.contains(dep))
continue; // Skip dependencies on scheduled pending jobs (already processed)
LoadStatus dep_status = dep->status();
if (dep_status == LoadStatus::OK)
continue; // Dependency on already successfully finished job -- it's okay.
// Dependency on not scheduled pending job -- it's bad.
// Probably, there is an error in `jobs` set, `gatherNotScheduled()` should have fixed it.
chassert(dep_status != LoadStatus::PENDING);
if (dep_status == LoadStatus::FAILED || dep_status == LoadStatus::CANCELED)
{
// Dependency on already failed or canceled job -- it's okay. Cancel all dependent jobs.
std::exception_ptr e;
NOEXCEPT_SCOPE({
ALLOW_ALLOCATIONS_IN_SCOPE;
e = std::make_exception_ptr(Exception(ErrorCodes::ASYNC_LOAD_CANCELED,
"Load job '{}' -> {}",
job->name,
getExceptionMessage(dep->exception(), /* with_stacktrace = */ false)));
});
finish(lock, job, LoadStatus::CANCELED, e);
break; // This job is now finished, stop its dependencies processing
}
}
}
else
{
// Job was already canceled on previous iteration of this cycle -- skip
}
}
}
void AsyncLoader::gatherNotScheduled(const LoadJobPtr & job, LoadJobSet & jobs, std::unique_lock<std::mutex> & lock)
{
if (job->status() == LoadStatus::PENDING && !scheduled_jobs.contains(job) && !jobs.contains(job))
{
jobs.insert(job);
for (const auto & dep : job->dependencies)
gatherNotScheduled(dep, jobs, lock);
}
}
void AsyncLoader::prioritize(const LoadJobPtr & job, ssize_t new_priority)
{
if (!job)
return;
DENY_ALLOCATIONS_IN_SCOPE;
std::unique_lock lock{mutex};
prioritize(job, new_priority, lock);
}
void AsyncLoader::remove(const LoadJobSet & jobs)
{
DENY_ALLOCATIONS_IN_SCOPE;
std::unique_lock lock{mutex};
// On the first pass:
// - cancel all not executing jobs to avoid races
// - do not wait executing jobs (otherwise, on unlock a worker could start executing a dependent job, that should be canceled)
for (const auto & job : jobs)
{
if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end())
{
if (info->second.is_executing())
continue; // Skip executing jobs on the first pass
std::exception_ptr e;
NOEXCEPT_SCOPE({
ALLOW_ALLOCATIONS_IN_SCOPE;
e = std::make_exception_ptr(Exception(ErrorCodes::ASYNC_LOAD_CANCELED, "Load job '{}' canceled", job->name));
});
finish(lock, job, LoadStatus::CANCELED, e);
}
}
// On the second pass wait for executing jobs to finish
for (const auto & job : jobs)
{
if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end())
{
// Job is currently executing
chassert(info->second.is_executing());
lock.unlock();
job->waitNoThrow(); // Wait for job to finish
lock.lock();
}
}
// On the third pass all jobs are finished - remove them all
// It is better to do it under one lock to avoid exposing intermediate states
for (const auto & job : jobs)
{
size_t erased = finished_jobs.erase(job);
if (old_jobs >= erased && job->finishTime() != LoadJob::TimePoint{} && job->finishTime() < busy_period_start_time)
old_jobs -= erased;
}
}
void AsyncLoader::setMaxThreads(size_t value)
{
std::unique_lock lock{mutex};
pool.setMaxThreads(value);
pool.setMaxFreeThreads(value);
pool.setQueueSize(value);
max_threads = value;
if (!is_running)
return;
for (size_t i = 0; workers < max_threads && i < ready_queue.size(); i++)
spawn(lock);
}
size_t AsyncLoader::getMaxThreads() const
{
std::unique_lock lock{mutex};
return max_threads;
}
size_t AsyncLoader::getScheduledJobCount() const
{
std::unique_lock lock{mutex};
return scheduled_jobs.size();
}
std::vector<AsyncLoader::JobState> AsyncLoader::getJobStates() const
{
std::unique_lock lock{mutex};
std::multimap<String, JobState> states;
for (const auto & [job, info] : scheduled_jobs)
states.emplace(job->name, JobState{
.job = job,
.dependencies_left = info.dependencies_left,
.is_executing = info.is_executing(),
.is_blocked = info.is_blocked(),
.is_ready = info.is_ready(),
.initial_priority = info.initial_priority,
.ready_seqno = last_ready_seqno
});
for (const auto & job : finished_jobs)
states.emplace(job->name, JobState{.job = job});
lock.unlock();
std::vector<JobState> result;
for (auto && [_, state] : states)
result.emplace_back(std::move(state));
return result;
}
void AsyncLoader::checkCycle(const LoadJobSet & jobs, std::unique_lock<std::mutex> & lock)
{
LoadJobSet left = jobs;
LoadJobSet visited;
visited.reserve(left.size());
while (!left.empty())
{
LoadJobPtr job = *left.begin();
checkCycleImpl(job, left, visited, lock);
}
}
String AsyncLoader::checkCycleImpl(const LoadJobPtr & job, LoadJobSet & left, LoadJobSet & visited, std::unique_lock<std::mutex> & lock)
{
if (!left.contains(job))
return {}; // Do not consider external dependencies and already processed jobs
if (auto [_, inserted] = visited.insert(job); !inserted)
{
visited.erase(job); // Mark where cycle ends
return job->name;
}
for (const auto & dep : job->dependencies)
{
if (auto chain = checkCycleImpl(dep, left, visited, lock); !chain.empty())
{
if (!visited.contains(job)) // Check for cycle end
throw Exception(ErrorCodes::ASYNC_LOAD_CYCLE, "Load job dependency cycle detected: {} -> {}", job->name, chain);
else
return fmt::format("{} -> {}", job->name, chain); // chain is not a cycle yet -- continue building
}
}
left.erase(job);
return {};
}
void AsyncLoader::finish(std::unique_lock<std::mutex> & lock, const LoadJobPtr & job, LoadStatus status, std::exception_ptr exception_from_job)
{
if (status == LoadStatus::OK)
{
// Notify waiters
job->ok();
// Update dependent jobs and enqueue if ready
chassert(scheduled_jobs.contains(job)); // Job was pending
for (const auto & dep : scheduled_jobs[job].dependent_jobs)
{
chassert(scheduled_jobs.contains(dep)); // All depended jobs must be pending
Info & dep_info = scheduled_jobs[dep];
dep_info.dependencies_left--;
if (!dep_info.is_blocked())
enqueue(dep_info, dep, lock);
}
}
else
{
// Notify waiters
if (status == LoadStatus::FAILED)
job->failed(exception_from_job);
else if (status == LoadStatus::CANCELED)
job->canceled(exception_from_job);
chassert(scheduled_jobs.contains(job)); // Job was pending
Info & info = scheduled_jobs[job];
if (info.is_ready())
{
ready_queue.erase(info.key());
info.ready_seqno = 0;
}
// Recurse into all dependent jobs
LoadJobSet dependent;
dependent.swap(info.dependent_jobs); // To avoid container modification during recursion
for (const auto & dep : dependent)
{
if (!scheduled_jobs.contains(dep))
continue; // Job has already been canceled
std::exception_ptr e;
NOEXCEPT_SCOPE({
ALLOW_ALLOCATIONS_IN_SCOPE;
e = std::make_exception_ptr(
Exception(ErrorCodes::ASYNC_LOAD_CANCELED,
"Load job '{}' -> {}",
dep->name,
getExceptionMessage(exception_from_job, /* with_stacktrace = */ false)));
});
finish(lock, dep, LoadStatus::CANCELED, e);
}
// Clean dependency graph edges pointing to canceled jobs
for (const auto & dep : job->dependencies)
if (auto dep_info = scheduled_jobs.find(dep); dep_info != scheduled_jobs.end())
dep_info->second.dependent_jobs.erase(job);
}
// Job became finished
scheduled_jobs.erase(job);
NOEXCEPT_SCOPE({
ALLOW_ALLOCATIONS_IN_SCOPE;
finished_jobs.insert(job);
if (log_progress)
logAboutProgress(log, finished_jobs.size() - old_jobs, finished_jobs.size() + scheduled_jobs.size() - old_jobs, stopwatch);
});
}
void AsyncLoader::prioritize(const LoadJobPtr & job, ssize_t new_priority, std::unique_lock<std::mutex> & lock)
{
if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end())
{
if (info->second.priority >= new_priority)
return; // Never lower priority
// Update priority and push job forward through ready queue if needed
if (info->second.ready_seqno)
ready_queue.erase(info->second.key());
info->second.priority = new_priority;
job->load_priority.store(new_priority); // Set user-facing priority (may affect executing jobs)
if (info->second.ready_seqno)
{
NOEXCEPT_SCOPE({
ALLOW_ALLOCATIONS_IN_SCOPE;
ready_queue.emplace(info->second.key(), job);
});
}
// Recurse into dependencies
for (const auto & dep : job->dependencies)
prioritize(dep, new_priority, lock);
}
}
void AsyncLoader::enqueue(Info & info, const LoadJobPtr & job, std::unique_lock<std::mutex> & lock)
{
chassert(!info.is_blocked());
chassert(info.ready_seqno == 0);
info.ready_seqno = ++last_ready_seqno;
NOEXCEPT_SCOPE({
ALLOW_ALLOCATIONS_IN_SCOPE;
ready_queue.emplace(info.key(), job);
});
job->enqueued();
if (is_running && workers < max_threads)
spawn(lock);
}
void AsyncLoader::spawn(std::unique_lock<std::mutex> &)
{
workers++;
NOEXCEPT_SCOPE({
ALLOW_ALLOCATIONS_IN_SCOPE;
pool.scheduleOrThrowOnError([this] { worker(); });
});
}
void AsyncLoader::worker()
{
DENY_ALLOCATIONS_IN_SCOPE;
LoadJobPtr job;
std::exception_ptr exception_from_job;
while (true)
{
// This is inside the loop to also reset previous thread names set inside the jobs
setThreadName("AsyncLoader");
{
std::unique_lock lock{mutex};
// Handle just executed job
if (exception_from_job)
finish(lock, job, LoadStatus::FAILED, exception_from_job);
else if (job)
finish(lock, job, LoadStatus::OK);
if (!is_running || ready_queue.empty() || workers > max_threads)
{
workers--;
return;
}
// Take next job to be executed from the ready queue
auto it = ready_queue.begin();
job = it->second;
ready_queue.erase(it);
scheduled_jobs.find(job)->second.ready_seqno = 0; // This job is no longer in the ready queue
}
ALLOW_ALLOCATIONS_IN_SCOPE;
try
{
job->execute(job);
exception_from_job = {};
}
catch (...)
{
NOEXCEPT_SCOPE({
if (log_failures)
tryLogCurrentException(__PRETTY_FUNCTION__);
exception_from_job = std::make_exception_ptr(
Exception(ErrorCodes::ASYNC_LOAD_FAILED,
"Load job '{}' failed: {}",
job->name,
getCurrentExceptionMessage(/* with_stacktrace = */ true)));
});
}
}
}
}

439
src/Common/AsyncLoader.h Normal file
View File

@ -0,0 +1,439 @@
#pragma once
#include <condition_variable>
#include <exception>
#include <memory>
#include <map>
#include <mutex>
#include <vector>
#include <unordered_set>
#include <unordered_map>
#include <boost/noncopyable.hpp>
#include <base/types.h>
#include <Common/CurrentMetrics.h>
#include <Common/Stopwatch.h>
#include <Common/ThreadPool.h>
namespace Poco { class Logger; }
namespace DB
{
class LoadJob;
using LoadJobPtr = std::shared_ptr<LoadJob>;
using LoadJobSet = std::unordered_set<LoadJobPtr>;
class LoadTask;
using LoadTaskPtr = std::shared_ptr<LoadTask>;
using LoadTaskPtrs = std::vector<LoadTaskPtr>;
class AsyncLoader;
void logAboutProgress(Poco::Logger * log, size_t processed, size_t total, AtomicStopwatch & watch);
// Execution status of a load job.
enum class LoadStatus
{
PENDING, // Load job is not started yet.
OK, // Load job executed and was successful.
FAILED, // Load job executed and failed.
CANCELED // Load job is not going to be executed due to removal or dependency failure.
};
// Smallest indivisible part of a loading process. Load job can have multiple dependencies, thus jobs constitute a direct acyclic graph (DAG).
// Job encapsulates a function to be executed by `AsyncLoader` as soon as job functions of all dependencies are successfully executed.
// Job can be waited for by an arbitrary number of threads. See `AsyncLoader` class description for more details.
class LoadJob : private boost::noncopyable
{
public:
template <class Func, class LoadJobSetType>
LoadJob(LoadJobSetType && dependencies_, String name_, Func && func_, ssize_t priority_ = 0)
: dependencies(std::forward<LoadJobSetType>(dependencies_))
, name(std::move(name_))
, func(std::forward<Func>(func_))
, load_priority(priority_)
{}
// Current job status.
LoadStatus status() const;
std::exception_ptr exception() const;
// Returns current value of a priority of the job. May differ from initial priority.
ssize_t priority() const;
// Sync wait for a pending job to be finished: OK, FAILED or CANCELED status.
// Throws if job is FAILED or CANCELED. Returns or throws immediately on non-pending job.
void wait() const;
// Wait for a job to reach any non PENDING status.
void waitNoThrow() const noexcept;
// Returns number of threads blocked by `wait()` or `waitNoThrow()` calls.
size_t waitersCount() const;
// Introspection
using TimePoint = std::chrono::system_clock::time_point;
TimePoint scheduleTime() const { return schedule_time; }
TimePoint enqueueTime() const { return enqueue_time; }
TimePoint startTime() const { return start_time; }
TimePoint finishTime() const { return finish_time; }
const LoadJobSet dependencies; // Jobs to be done before this one (with ownership), it is `const` to make creation of cycles hard
const String name;
private:
friend class AsyncLoader;
void ok();
void failed(const std::exception_ptr & ptr);
void canceled(const std::exception_ptr & ptr);
void finish();
void scheduled();
void enqueued();
void execute(const LoadJobPtr & self);
std::function<void(const LoadJobPtr & self)> func;
std::atomic<ssize_t> load_priority;
mutable std::mutex mutex;
mutable std::condition_variable finished;
mutable size_t waiters = 0;
LoadStatus load_status{LoadStatus::PENDING};
std::exception_ptr load_exception;
std::atomic<TimePoint> schedule_time{TimePoint{}};
std::atomic<TimePoint> enqueue_time{TimePoint{}};
std::atomic<TimePoint> start_time{TimePoint{}};
std::atomic<TimePoint> finish_time{TimePoint{}};
};
struct EmptyJobFunc
{
void operator()(const LoadJobPtr &) {}
};
template <class Func = EmptyJobFunc>
LoadJobPtr makeLoadJob(LoadJobSet && dependencies, String name, Func && func = EmptyJobFunc())
{
return std::make_shared<LoadJob>(std::move(dependencies), std::move(name), std::forward<Func>(func));
}
template <class Func = EmptyJobFunc>
LoadJobPtr makeLoadJob(const LoadJobSet & dependencies, String name, Func && func = EmptyJobFunc())
{
return std::make_shared<LoadJob>(dependencies, std::move(name), std::forward<Func>(func));
}
template <class Func = EmptyJobFunc>
LoadJobPtr makeLoadJob(LoadJobSet && dependencies, ssize_t priority, String name, Func && func = EmptyJobFunc())
{
return std::make_shared<LoadJob>(std::move(dependencies), std::move(name), std::forward<Func>(func), priority);
}
template <class Func = EmptyJobFunc>
LoadJobPtr makeLoadJob(const LoadJobSet & dependencies, ssize_t priority, String name, Func && func = EmptyJobFunc())
{
return std::make_shared<LoadJob>(dependencies, std::move(name), std::forward<Func>(func), priority);
}
// Represents a logically connected set of LoadJobs required to achieve some goals (final LoadJob in the set).
class LoadTask : private boost::noncopyable
{
public:
LoadTask(AsyncLoader & loader_, LoadJobSet && jobs_, LoadJobSet && goal_jobs_ = {});
~LoadTask();
// Merge all jobs from other task into this task.
void merge(const LoadTaskPtr & task);
// Schedule all jobs with AsyncLoader.
void schedule();
// Remove all jobs of this task from AsyncLoader.
void remove();
// Do not track jobs in this task.
// WARNING: Jobs will never be removed() and are going to be stored as finished jobs until ~AsyncLoader().
void detach();
// Return the final jobs in this tasks. This job subset should be used as `dependencies` for dependent jobs or tasks:
// auto load_task = loadSomethingAsync(async_loader, load_after_task.goals(), something);
const LoadJobSet & goals() const { return goal_jobs.empty() ? jobs : goal_jobs; }
private:
friend class AsyncLoader;
AsyncLoader & loader;
LoadJobSet jobs;
LoadJobSet goal_jobs;
};
inline LoadTaskPtr makeLoadTask(AsyncLoader & loader, LoadJobSet && jobs, LoadJobSet && goals = {})
{
return std::make_shared<LoadTask>(loader, std::move(jobs), std::move(goals));
}
inline void scheduleLoad(const LoadTaskPtr & task)
{
task->schedule();
}
inline void scheduleLoad(const LoadTaskPtrs & tasks)
{
for (const auto & task : tasks)
task->schedule();
}
template <class... Args>
inline void scheduleLoad(Args && ... args)
{
(scheduleLoad(std::forward<Args>(args)), ...);
}
inline void waitLoad(const LoadJobSet & jobs)
{
for (const auto & job : jobs)
job->wait();
}
inline void waitLoad(const LoadTaskPtr & task)
{
waitLoad(task->goals());
}
inline void waitLoad(const LoadTaskPtrs & tasks)
{
for (const auto & task : tasks)
waitLoad(task->goals());
}
template <class... Args>
inline void waitLoad(Args && ... args)
{
(waitLoad(std::forward<Args>(args)), ...);
}
template <class... Args>
inline void scheduleAndWaitLoad(Args && ... args)
{
scheduleLoad(std::forward<Args>(args)...);
waitLoad(std::forward<Args>(args)...);
}
inline LoadJobSet getGoals(const LoadTaskPtrs & tasks)
{
LoadJobSet result;
for (const auto & task : tasks)
result.insert(task->goals().begin(), task->goals().end());
return result;
}
inline LoadJobSet joinJobs(const LoadJobSet & jobs1, const LoadJobSet & jobs2)
{
LoadJobSet result;
if (!jobs1.empty())
result.insert(jobs1.begin(), jobs1.end());
if (!jobs2.empty())
result.insert(jobs2.begin(), jobs2.end());
return result;
}
inline LoadTaskPtrs joinTasks(const LoadTaskPtrs & tasks1, const LoadTaskPtrs & tasks2)
{
if (tasks1.empty())
return tasks2;
if (tasks2.empty())
return tasks1;
LoadTaskPtrs result;
result.reserve(tasks1.size() + tasks2.size());
result.insert(result.end(), tasks1.begin(), tasks1.end());
result.insert(result.end(), tasks2.begin(), tasks2.end());
return result;
}
// `AsyncLoader` is a scheduler for DAG of `LoadJob`s. It tracks dependencies and priorities of jobs.
// Basic usage example:
// auto job_func = [&] (const LoadJobPtr & self) {
// LOG_TRACE(log, "Executing load job '{}' with priority '{}'", self->name, self->priority());
// };
// auto job1 = makeLoadJob({}, "job1", job_func);
// auto job2 = makeLoadJob({ job1 }, "job2", job_func);
// auto job3 = makeLoadJob({ job1 }, "job3", job_func);
// auto task = makeLoadTask(async_loader, { job1, job2, job3 });
// task.schedule();
// Here we have created and scheduled a task consisting of three jobs. Job1 has no dependencies and is run first.
// Job2 and job3 depend on job1 and are run only after job1 completion. Another thread may prioritize a job and wait for it:
// async_loader->prioritize(job3, /* priority = */ 1); // higher priority jobs are run first, default priority is zero.
// job3->wait(); // blocks until job completion or cancellation and rethrow an exception (if any)
//
// AsyncLoader tracks state of all scheduled jobs. Job lifecycle is the following:
// 1) Job is constructed with PENDING status and initial priority. The job is placed into a task.
// 2) The task is scheduled with all its jobs and their dependencies. A scheduled job may be ready (i.e. have all its dependencies finished) or blocked.
// 3a) When all dependencies are successfully executed, the job became ready. A ready job is enqueued into the ready queue.
// 3b) If at least one of the job dependencies is failed or canceled, then this job is canceled (with all it's dependent jobs as well).
// On cancellation an ASYNC_LOAD_CANCELED exception is generated and saved inside LoadJob object. The job status is changed to CANCELED.
// Exception is rethrown by any existing or new `wait()` call. The job is moved to the set of the finished jobs.
// 4) The scheduled pending ready job starts execution by a worker. The job is dequeued. Callback `job_func` is called.
// Status of an executing job is PENDING. And it is still considered as a scheduled job by AsyncLoader.
// Note that `job_func` of a CANCELED job is never executed.
// 5a) On successful execution the job status is changed to OK and all existing and new `wait()` calls finish w/o exceptions.
// 5b) Any exception thrown out of `job_func` is wrapped into an ASYNC_LOAD_FAILED exception and saved inside LoadJob.
// The job status is changed to FAILED. All the dependent jobs are canceled. The exception is rethrown from all existing and new `wait()` calls.
// 6) The job is no longer considered as scheduled and is instead moved to the finished jobs set. This is just for introspection of the finished jobs.
// 7) The task containing this job is destructed or `remove()` is explicitly called. The job is removed from the finished job set.
// 8) The job is destructed.
//
// Every job has a priority associated with it. AsyncLoader runs higher priority (greater `priority` value) jobs first. Job priority can be elevated
// (a) if either it has a dependent job with higher priority (in this case priority of a dependent job is inherited);
// (b) or job was explicitly prioritized by `prioritize(job, higher_priority)` call (this also leads to a priority inheritance for all the dependencies).
// Note that to avoid priority inversion `job_func` should use `self->priority()` to schedule new jobs in AsyncLoader or any other pool.
// Value stored in load job priority field is atomic and can be increased even during job execution.
//
// When a task is scheduled it can contain dependencies on previously scheduled jobs. These jobs can have any status. If job A being scheduled depends on
// another job B that is not yet scheduled, then job B will also be scheduled (even if the task does not contain it).
class AsyncLoader : private boost::noncopyable
{
private:
// Key of a pending job in the ready queue.
struct ReadyKey
{
ssize_t priority; // Ascending order
ssize_t initial_priority; // Ascending order
UInt64 ready_seqno; // Descending order
bool operator<(const ReadyKey & rhs) const
{
if (priority > rhs.priority)
return true;
if (priority < rhs.priority)
return false;
if (initial_priority > rhs.initial_priority)
return true;
if (initial_priority < rhs.initial_priority)
return false;
return ready_seqno < rhs.ready_seqno;
}
};
// Scheduling information for a pending job.
struct Info
{
ssize_t initial_priority = 0; // Initial priority passed into schedule().
ssize_t priority = 0; // Elevated priority, due to priority inheritance or prioritize().
size_t dependencies_left = 0; // Current number of dependencies on pending jobs.
UInt64 ready_seqno = 0; // Zero means that job is not in ready queue.
LoadJobSet dependent_jobs; // Set of jobs dependent on this job.
// Three independent states of a non-finished job.
bool is_blocked() const { return dependencies_left > 0; }
bool is_ready() const { return dependencies_left == 0 && ready_seqno > 0; }
bool is_executing() const { return dependencies_left == 0 && ready_seqno == 0; }
// Get key of a ready job
ReadyKey key() const
{
return {.priority = priority, .initial_priority = initial_priority, .ready_seqno = ready_seqno};
}
};
public:
using Metric = CurrentMetrics::Metric;
AsyncLoader(Metric metric_threads, Metric metric_active_threads, size_t max_threads_, bool log_failures_, bool log_progress_);
// WARNING: all tasks instances should be destructed before associated AsyncLoader.
~AsyncLoader();
// Start workers to execute scheduled load jobs.
void start();
// Wait for all load jobs to finish, including all new jobs. So at first take care to stop adding new jobs.
void wait();
// Wait for currently executing jobs to finish, but do not run any other pending jobs.
// Not finished jobs are left in pending state:
// - they can be executed by calling start() again;
// - or canceled using ~Task() or remove() later.
void stop();
// Schedule all jobs of given `task` and their dependencies (if any, not scheduled yet).
// Higher priority jobs (with greater `job->priority()` value) are executed earlier.
// All dependencies of a scheduled job inherit its priority if it is higher. This way higher priority job
// never wait for (blocked by) lower priority jobs. No priority inversion is possible.
// Note that `task` destructor ensures that all its jobs are finished (OK, FAILED or CANCELED)
// and are removed from AsyncLoader, so it is thread-safe to destroy them.
void schedule(LoadTask & task);
void schedule(const LoadTaskPtr & task);
// Schedule all tasks atomically. To ensure only highest priority jobs among all tasks are run first.
void schedule(const std::vector<LoadTaskPtr> & tasks);
// Increase priority of a job and all its dependencies recursively.
void prioritize(const LoadJobPtr & job, ssize_t new_priority);
// Remove finished jobs, cancel scheduled jobs, wait for executing jobs to finish and remove them.
void remove(const LoadJobSet & jobs);
// Increase or decrease maximum number of simultaneously executing jobs.
void setMaxThreads(size_t value);
size_t getMaxThreads() const;
size_t getScheduledJobCount() const;
// Helper class for introspection
struct JobState
{
LoadJobPtr job;
size_t dependencies_left = 0;
bool is_executing = false;
bool is_blocked = false;
bool is_ready = false;
std::optional<ssize_t> initial_priority;
std::optional<UInt64> ready_seqno;
};
// For introspection and debug only, see `system.async_loader` table
std::vector<JobState> getJobStates() const;
private:
void checkCycle(const LoadJobSet & jobs, std::unique_lock<std::mutex> & lock);
String checkCycleImpl(const LoadJobPtr & job, LoadJobSet & left, LoadJobSet & visited, std::unique_lock<std::mutex> & lock);
void finish(std::unique_lock<std::mutex> & lock, const LoadJobPtr & job, LoadStatus status, std::exception_ptr exception_from_job = {});
void scheduleImpl(const LoadJobSet & input_jobs);
void gatherNotScheduled(const LoadJobPtr & job, LoadJobSet & jobs, std::unique_lock<std::mutex> & lock);
void prioritize(const LoadJobPtr & job, ssize_t new_priority, std::unique_lock<std::mutex> & lock);
void enqueue(Info & info, const LoadJobPtr & job, std::unique_lock<std::mutex> & lock);
void spawn(std::unique_lock<std::mutex> &);
void worker();
// Logging
const bool log_failures; // Worker should log all exceptions caught from job functions.
const bool log_progress; // Periodically log total progress
Poco::Logger * log;
std::chrono::system_clock::time_point busy_period_start_time;
AtomicStopwatch stopwatch;
size_t old_jobs = 0; // Number of jobs that were finished in previous busy period (for correct progress indication)
mutable std::mutex mutex; // Guards all the fields below.
bool is_running = false;
// Full set of scheduled pending jobs along with scheduling info.
std::unordered_map<LoadJobPtr, Info> scheduled_jobs;
// Subset of scheduled pending non-blocked jobs (waiting for a worker to be executed).
// Represent a queue of jobs in order of decreasing priority and FIFO for jobs with equal priorities.
std::map<ReadyKey, LoadJobPtr> ready_queue;
// Set of finished jobs (for introspection only, until jobs are removed).
LoadJobSet finished_jobs;
// Increasing counter for `ReadyKey` assignment (to preserve FIFO order of the jobs with equal priorities).
UInt64 last_ready_seqno = 0;
// For executing jobs. Note that we avoid using an internal queue of the pool to be able to prioritize jobs.
size_t max_threads;
size_t workers = 0;
ThreadPool pool;
};
}

View File

@ -1,30 +0,0 @@
#include <Common/Documentation.h>
namespace DB
{
std::string Documentation::examplesAsString() const
{
std::string res;
for (const auto & [example_name, example_query] : examples)
{
res += example_name + ":\n\n";
res += "```sql\n";
res += example_query + "\n";
res += "```\n";
}
return res;
}
std::string Documentation::categoriesAsString() const
{
if (categories.empty())
return "";
std::string res = categories[0];
for (size_t i = 1; i < categories.size(); ++i)
res += ", " + categories[i];
return res;
}
}

View File

@ -576,6 +576,9 @@
M(691, UNKNOWN_ELEMENT_OF_ENUM) \ M(691, UNKNOWN_ELEMENT_OF_ENUM) \
M(692, TOO_MANY_MUTATIONS) \ M(692, TOO_MANY_MUTATIONS) \
M(693, AWS_ERROR) \ M(693, AWS_ERROR) \
M(694, ASYNC_LOAD_CYCLE) \
M(695, ASYNC_LOAD_FAILED) \
M(696, ASYNC_LOAD_CANCELED) \
\ \
M(999, KEEPER_EXCEPTION) \ M(999, KEEPER_EXCEPTION) \
M(1000, POCO_EXCEPTION) \ M(1000, POCO_EXCEPTION) \

View File

@ -0,0 +1,44 @@
#include <Common/FunctionDocumentation.h>
namespace DB
{
std::string FunctionDocumentation::argumentsAsString() const
{
std::string res;
for (const auto & [name, desc] : arguments)
{
res += "- " + name + ":" + desc + "\n";
}
return res;
}
std::string FunctionDocumentation::examplesAsString() const
{
std::string res;
for (const auto & [name, query, result] : examples)
{
res += name + ":\n\n";
res += "``` sql\n";
res += query + "\n";
res += "```\n\n";
res += "``` text\n";
res += result + "\n";
res += "```\n";
}
return res;
}
std::string FunctionDocumentation::categoriesAsString() const
{
if (categories.empty())
return "";
auto it = categories.begin();
std::string res = *it;
for (; it != categories.end(); ++it)
res += ", " + *it;
return res;
}
}

View File

@ -1,15 +1,14 @@
#pragma once #pragma once
#include <set>
#include <string> #include <string>
#include <vector> #include <vector>
#include <map>
namespace DB namespace DB
{ {
/** Embedded reference documentation for high-level server components, /** Embedded reference documentation for functions.
* such as SQL functions, table functions, data types, table engines, etc.
* *
* The advantages of embedded documentation are: * The advantages of embedded documentation are:
* - it is easy to write and update with code; * - it is easy to write and update with code;
@ -34,50 +33,49 @@ namespace DB
* - examples (queries that can be referenced from the text by names); * - examples (queries that can be referenced from the text by names);
* - categories - one or a few text strings like {"Mathematical", "Array Processing"}; * - categories - one or a few text strings like {"Mathematical", "Array Processing"};
* *
* Only the description is mandatory.
*
* The description should be represented in Markdown (or just plaintext). * The description should be represented in Markdown (or just plaintext).
* Some extensions for Markdown are added: * Some extensions for Markdown are added:
* - [example:name] will reference to an example with the corresponding name. * - [example:name] will reference to an example with the corresponding name.
* *
* Documentation does not support multiple languages. * Documentation does not support multiple languages.
* The only available language is English. * The only available language is English.
*
* TODO: Allow to specify Syntax, Argument(s) and a Returned Value.
* TODO: Organize Examples as a struct of ExampleName, ExampleQuery and ExampleResult.
*/ */
struct Documentation struct FunctionDocumentation
{ {
using Description = std::string; using Description = std::string;
using Syntax = std::string; using Syntax = std::string;
using Argument = std::string; struct Argument
{
std::string name;
std::string description;
};
using Arguments = std::vector<Argument>; using Arguments = std::vector<Argument>;
using ReturnedValue = std::string; using ReturnedValue = std::string;
using ExampleName = std::string; struct Example
using ExampleQuery = std::string; {
using Examples = std::map<ExampleName, ExampleQuery>; std::string name;
std::string query;
std::string result;
};
using Examples = std::vector<Example>;
using Category = std::string; using Category = std::string;
using Categories = std::vector<Category>; using Categories = std::set<Category>;
using Related = std::string; using Related = std::string;
Description description; Description description; /// E.g. "Returns the position (in bytes, starting at 1) of a substring needle in a string haystack."
Examples examples; Syntax syntax; /// E.g. "position(haystack, needle)"
Categories categories; Arguments arguments; /// E.g. ["haystack — String in which the search is performed. String.", "needle — Substring to be searched. String."]
ReturnedValue returned_value; /// E.g. "Starting position in bytes and counting from 1, if the substring was found."
Documentation(Description description_) : description(std::move(description_)) {} /// NOLINT Examples examples; ///
Documentation(Description description_, Examples examples_) : description(std::move(description_)), examples(std::move(examples_)) {} Categories categories; /// E.g. {"String Search"}
Documentation(Description description_, Examples examples_, Categories categories_)
: description(std::move(description_)), examples(std::move(examples_)), categories(std::move(categories_)) {}
/// TODO: Please remove this constructor. Documentation should always be non-empty.
Documentation() = default;
std::string argumentsAsString() const;
std::string examplesAsString() const; std::string examplesAsString() const;
std::string categoriesAsString() const; std::string categoriesAsString() const;
}; };

View File

@ -251,7 +251,10 @@ requires (sizeof(T) <= sizeof(UInt64))
inline size_t DefaultHash64(T key) inline size_t DefaultHash64(T key)
{ {
DB::UInt64 out {0}; DB::UInt64 out {0};
std::memcpy(&out, &key, sizeof(T)); if constexpr (std::endian::native == std::endian::little)
std::memcpy(&out, &key, sizeof(T));
else
std::memcpy(reinterpret_cast<char*>(&out) + sizeof(DB::UInt64) - sizeof(T), &key, sizeof(T));
return intHash64(out); return intHash64(out);
} }

View File

@ -8,19 +8,34 @@
#include <atomic> #include <atomic>
#include <memory> #include <memory>
/// From clock_getres(2):
///
/// Similar to CLOCK_MONOTONIC, but provides access to a raw hardware-based
/// time that is not subject to NTP adjustments or the incremental
/// adjustments performed by adjtime(3).
#ifdef CLOCK_MONOTONIC_RAW
static constexpr clockid_t STOPWATCH_DEFAULT_CLOCK = CLOCK_MONOTONIC_RAW;
#else
static constexpr clockid_t STOPWATCH_DEFAULT_CLOCK = CLOCK_MONOTONIC;
#endif
inline UInt64 clock_gettime_ns(clockid_t clock_type = CLOCK_MONOTONIC) inline UInt64 clock_gettime_ns(clockid_t clock_type = STOPWATCH_DEFAULT_CLOCK)
{ {
struct timespec ts; struct timespec ts;
clock_gettime(clock_type, &ts); clock_gettime(clock_type, &ts);
return UInt64(ts.tv_sec * 1000000000LL + ts.tv_nsec); return UInt64(ts.tv_sec * 1000000000LL + ts.tv_nsec);
} }
/// Sometimes monotonic clock may not be monotonic (due to bug in kernel?).
/// It may cause some operations to fail with "Timeout exceeded: elapsed 18446744073.709553 seconds".
/// Takes previously returned value and returns it again if time stepped back for some reason. /// Takes previously returned value and returns it again if time stepped back for some reason.
inline UInt64 clock_gettime_ns_adjusted(UInt64 prev_time, clockid_t clock_type = CLOCK_MONOTONIC) ///
/// You should use this if OS does not support CLOCK_MONOTONIC_RAW
inline UInt64 clock_gettime_ns_adjusted(UInt64 prev_time, clockid_t clock_type = STOPWATCH_DEFAULT_CLOCK)
{ {
#ifdef CLOCK_MONOTONIC_RAW
if (likely(clock_type == CLOCK_MONOTONIC_RAW))
return clock_gettime_ns(clock_type);
#endif
UInt64 current_time = clock_gettime_ns(clock_type); UInt64 current_time = clock_gettime_ns(clock_type);
if (likely(prev_time <= current_time)) if (likely(prev_time <= current_time))
return current_time; return current_time;
@ -36,10 +51,10 @@ inline UInt64 clock_gettime_ns_adjusted(UInt64 prev_time, clockid_t clock_type =
class Stopwatch class Stopwatch
{ {
public: public:
/** CLOCK_MONOTONIC works relatively efficient (~15 million calls/sec) and doesn't lead to syscall. /** CLOCK_MONOTONIC/CLOCK_MONOTONIC_RAW works relatively efficient (~40-50 million calls/sec) and doesn't lead to syscall.
* Pass CLOCK_MONOTONIC_COARSE, if you need better performance with acceptable cost of several milliseconds of inaccuracy. * Pass CLOCK_MONOTONIC_COARSE, if you need better performance with acceptable cost of several milliseconds of inaccuracy.
*/ */
explicit Stopwatch(clockid_t clock_type_ = CLOCK_MONOTONIC) : clock_type(clock_type_) { start(); } explicit Stopwatch(clockid_t clock_type_ = STOPWATCH_DEFAULT_CLOCK) : clock_type(clock_type_) { start(); }
explicit Stopwatch(clockid_t clock_type_, UInt64 start_nanoseconds, bool is_running_) explicit Stopwatch(clockid_t clock_type_, UInt64 start_nanoseconds, bool is_running_)
: start_ns(start_nanoseconds), clock_type(clock_type_), is_running(is_running_) : start_ns(start_nanoseconds), clock_type(clock_type_), is_running(is_running_)
{ {
@ -75,7 +90,7 @@ using StopwatchUniquePtr = std::unique_ptr<Stopwatch>;
class AtomicStopwatch class AtomicStopwatch
{ {
public: public:
explicit AtomicStopwatch(clockid_t clock_type_ = CLOCK_MONOTONIC) : clock_type(clock_type_) { restart(); } explicit AtomicStopwatch(clockid_t clock_type_ = STOPWATCH_DEFAULT_CLOCK) : clock_type(clock_type_) { restart(); }
void restart() { start_ns = nanoseconds(0); } void restart() { start_ns = nanoseconds(0); }
UInt64 elapsed() const UInt64 elapsed() const

View File

@ -206,21 +206,21 @@ public:
/// - If this will throw an exception, the destructor won't be called /// - If this will throw an exception, the destructor won't be called
/// - this pointer cannot be passed in the lambda, since after detach() it will not be valid /// - this pointer cannot be passed in the lambda, since after detach() it will not be valid
GlobalThreadPool::instance().scheduleOrThrow([ GlobalThreadPool::instance().scheduleOrThrow([
state = state, my_state = state,
func = std::forward<Function>(func), my_func = std::forward<Function>(func),
args = std::make_tuple(std::forward<Args>(args)...)]() mutable /// mutable is needed to destroy capture my_args = std::make_tuple(std::forward<Args>(args)...)]() mutable /// mutable is needed to destroy capture
{ {
SCOPE_EXIT( SCOPE_EXIT(
state->thread_id = std::thread::id(); my_state->thread_id = std::thread::id();
state->event.set(); my_state->event.set();
); );
state->thread_id = std::this_thread::get_id(); my_state->thread_id = std::this_thread::get_id();
/// This moves are needed to destroy function and arguments before exit. /// This moves are needed to destroy function and arguments before exit.
/// It will guarantee that after ThreadFromGlobalPool::join all captured params are destroyed. /// It will guarantee that after ThreadFromGlobalPool::join all captured params are destroyed.
auto function = std::move(func); auto function = std::move(my_func);
auto arguments = std::move(args); auto arguments = std::move(my_args);
/// Thread status holds raw pointer on query context, thus it always must be destroyed /// Thread status holds raw pointer on query context, thus it always must be destroyed
/// before sending signal that permits to join this thread. /// before sending signal that permits to join this thread.

View File

@ -386,8 +386,6 @@ protected:
FallbackSearcher fallback_searcher; FallbackSearcher fallback_searcher;
public: public:
using Searcher = FallbackSearcher;
/** haystack_size_hint - the expected total size of the haystack for `search` calls. Optional (zero means unspecified). /** haystack_size_hint - the expected total size of the haystack for `search` calls. Optional (zero means unspecified).
* If you specify it small enough, the fallback algorithm will be used, * If you specify it small enough, the fallback algorithm will be used,
* since it is considered that it's useless to waste time initializing the hash table. * since it is considered that it's useless to waste time initializing the hash table.
@ -729,7 +727,7 @@ public:
using Volnitsky = VolnitskyBase<true, true, ASCIICaseSensitiveStringSearcher>; using Volnitsky = VolnitskyBase<true, true, ASCIICaseSensitiveStringSearcher>;
using VolnitskyUTF8 = VolnitskyBase<true, false, ASCIICaseSensitiveStringSearcher>; /// exactly same as Volnitsky using VolnitskyUTF8 = VolnitskyBase<true, false, UTF8CaseSensitiveStringSearcher>;
using VolnitskyCaseInsensitive = VolnitskyBase<false, true, ASCIICaseInsensitiveStringSearcher>; /// ignores non-ASCII bytes using VolnitskyCaseInsensitive = VolnitskyBase<false, true, ASCIICaseInsensitiveStringSearcher>; /// ignores non-ASCII bytes
using VolnitskyCaseInsensitiveUTF8 = VolnitskyBase<false, false, UTF8CaseInsensitiveStringSearcher>; using VolnitskyCaseInsensitiveUTF8 = VolnitskyBase<false, false, UTF8CaseInsensitiveStringSearcher>;
@ -737,7 +735,7 @@ using VolnitskyCaseSensitiveToken = VolnitskyBase<true, true, ASCIICaseSensitive
using VolnitskyCaseInsensitiveToken = VolnitskyBase<false, true, ASCIICaseInsensitiveTokenSearcher>; using VolnitskyCaseInsensitiveToken = VolnitskyBase<false, true, ASCIICaseInsensitiveTokenSearcher>;
using MultiVolnitsky = MultiVolnitskyBase<true, true, ASCIICaseSensitiveStringSearcher>; using MultiVolnitsky = MultiVolnitskyBase<true, true, ASCIICaseSensitiveStringSearcher>;
using MultiVolnitskyUTF8 = MultiVolnitskyBase<true, false, ASCIICaseSensitiveStringSearcher>; using MultiVolnitskyUTF8 = MultiVolnitskyBase<true, false, UTF8CaseSensitiveStringSearcher>;
using MultiVolnitskyCaseInsensitive = MultiVolnitskyBase<false, true, ASCIICaseInsensitiveStringSearcher>; using MultiVolnitskyCaseInsensitive = MultiVolnitskyBase<false, true, ASCIICaseInsensitiveStringSearcher>;
using MultiVolnitskyCaseInsensitiveUTF8 = MultiVolnitskyBase<false, false, UTF8CaseInsensitiveStringSearcher>; using MultiVolnitskyCaseInsensitiveUTF8 = MultiVolnitskyBase<false, false, UTF8CaseInsensitiveStringSearcher>;

View File

@ -42,7 +42,7 @@ namespace ErrorCodes
#define __NR_renameat2 316 #define __NR_renameat2 316
#elif defined(__aarch64__) #elif defined(__aarch64__)
#define __NR_renameat2 276 #define __NR_renameat2 276
#elif defined(__ppc64__) #elif defined(__powerpc64__)
#define __NR_renameat2 357 #define __NR_renameat2 357
#elif defined(__riscv) #elif defined(__riscv)
#define __NR_renameat2 276 #define __NR_renameat2 276

View File

@ -17,18 +17,18 @@ namespace Format
{ {
using IndexPositions = PODArrayWithStackMemory<UInt64, 64>; using IndexPositions = PODArrayWithStackMemory<UInt64, 64>;
static inline void parseNumber(const String & description, UInt64 l, UInt64 r, UInt64 & res, UInt64 argument_number) static inline UInt64 parseNumber(const String & description, UInt64 l, UInt64 r, UInt64 argument_number)
{ {
res = 0; UInt64 res = 0;
for (UInt64 pos = l; pos < r; ++pos) for (UInt64 pos = l; pos < r; ++pos)
{ {
if (!isNumericASCII(description[pos])) if (!isNumericASCII(description[pos]))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not a number in curly braces at position {}", std::to_string(pos)); throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not a number in curly braces at position {}", pos);
res = res * 10 + description[pos] - '0'; res = res * 10 + description[pos] - '0';
if (res >= argument_number) if (res >= argument_number)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Too big number for arguments, must be at most {}", throw Exception(ErrorCodes::BAD_ARGUMENTS, "Too big number for arguments, must be at most {}", argument_number - 1);
argument_number - 1);
} }
return res;
} }
static inline void init( static inline void init(
@ -132,8 +132,7 @@ namespace Format
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot switch from automatic field numbering to manual field specification"); throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot switch from automatic field numbering to manual field specification");
is_plain_numbering = false; is_plain_numbering = false;
UInt64 arg; UInt64 arg = parseNumber(pattern, last_open, i, argument_number);
parseNumber(pattern, last_open, i, arg, argument_number);
if (arg >= argument_number) if (arg >= argument_number)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Argument is too big for formatting. Note that indexing starts from zero"); throw Exception(ErrorCodes::BAD_ARGUMENTS, "Argument is too big for formatting. Note that indexing starts from zero");

View File

@ -0,0 +1,749 @@
#include <gtest/gtest.h>
#include <barrier>
#include <chrono>
#include <mutex>
#include <stdexcept>
#include <string_view>
#include <vector>
#include <thread>
#include <pcg_random.hpp>
#include <base/types.h>
#include <base/sleep.h>
#include <Common/Exception.h>
#include <Common/AsyncLoader.h>
#include <Common/randomSeed.h>
using namespace DB;
namespace CurrentMetrics
{
extern const Metric TablesLoaderThreads;
extern const Metric TablesLoaderThreadsActive;
}
namespace DB::ErrorCodes
{
extern const int ASYNC_LOAD_CYCLE;
extern const int ASYNC_LOAD_FAILED;
extern const int ASYNC_LOAD_CANCELED;
}
struct AsyncLoaderTest
{
AsyncLoader loader;
std::mutex rng_mutex;
pcg64 rng{randomSeed()};
explicit AsyncLoaderTest(size_t max_threads = 1)
: loader(CurrentMetrics::TablesLoaderThreads, CurrentMetrics::TablesLoaderThreadsActive, max_threads, /* log_failures = */ false, /* log_progress = */ false)
{}
template <typename T>
T randomInt(T from, T to)
{
std::uniform_int_distribution<T> distribution(from, to);
std::scoped_lock lock(rng_mutex);
return distribution(rng);
}
void randomSleepUs(UInt64 min_us, UInt64 max_us, int probability_percent)
{
if (randomInt(0, 99) < probability_percent)
std::this_thread::sleep_for(std::chrono::microseconds(randomInt(min_us, max_us)));
}
template <typename JobFunc>
LoadJobSet randomJobSet(int job_count, int dep_probability_percent, JobFunc job_func, std::string_view name_prefix = "job")
{
std::vector<LoadJobPtr> jobs;
jobs.reserve(job_count);
for (int j = 0; j < job_count; j++)
{
LoadJobSet deps;
for (int d = 0; d < j; d++)
{
if (randomInt(0, 99) < dep_probability_percent)
deps.insert(jobs[d]);
}
jobs.push_back(makeLoadJob(std::move(deps), fmt::format("{}{}", name_prefix, j), job_func));
}
return {jobs.begin(), jobs.end()};
}
template <typename JobFunc>
LoadJobSet randomJobSet(int job_count, int dep_probability_percent, const std::vector<LoadJobPtr> & external_deps, JobFunc job_func, std::string_view name_prefix = "job")
{
std::vector<LoadJobPtr> jobs;
jobs.reserve(job_count);
for (int j = 0; j < job_count; j++)
{
LoadJobSet deps;
for (int d = 0; d < j; d++)
{
if (randomInt(0, 99) < dep_probability_percent)
deps.insert(jobs[d]);
}
if (!external_deps.empty() && randomInt(0, 99) < dep_probability_percent)
deps.insert(external_deps[randomInt<size_t>(0, external_deps.size() - 1)]);
jobs.push_back(makeLoadJob(std::move(deps), fmt::format("{}{}", name_prefix, j), job_func));
}
return {jobs.begin(), jobs.end()};
}
template <typename JobFunc>
LoadJobSet chainJobSet(int job_count, JobFunc job_func, std::string_view name_prefix = "job")
{
std::vector<LoadJobPtr> jobs;
jobs.reserve(job_count);
jobs.push_back(makeLoadJob({}, fmt::format("{}{}", name_prefix, 0), job_func));
for (int j = 1; j < job_count; j++)
jobs.push_back(makeLoadJob({ jobs[j - 1] }, fmt::format("{}{}", name_prefix, j), job_func));
return {jobs.begin(), jobs.end()};
}
LoadTaskPtr schedule(LoadJobSet && jobs)
{
LoadTaskPtr task = makeLoadTask(loader, std::move(jobs));
task->schedule();
return task;
}
};
TEST(AsyncLoader, Smoke)
{
AsyncLoaderTest t(2);
static constexpr ssize_t low_priority = -1;
std::atomic<size_t> jobs_done{0};
std::atomic<size_t> low_priority_jobs_done{0};
auto job_func = [&] (const LoadJobPtr & self) {
jobs_done++;
if (self->priority() == low_priority)
low_priority_jobs_done++;
};
{
auto job1 = makeLoadJob({}, "job1", job_func);
auto job2 = makeLoadJob({ job1 }, "job2", job_func);
auto task1 = t.schedule({ job1, job2 });
auto job3 = makeLoadJob({ job2 }, "job3", job_func);
auto job4 = makeLoadJob({ job2 }, "job4", job_func);
auto task2 = t.schedule({ job3, job4 });
auto job5 = makeLoadJob({ job3, job4 }, low_priority, "job5", job_func);
task2->merge(t.schedule({ job5 }));
std::thread waiter_thread([=] { job5->wait(); });
t.loader.start();
job3->wait();
t.loader.wait();
job4->wait();
waiter_thread.join();
ASSERT_EQ(job1->status(), LoadStatus::OK);
ASSERT_EQ(job2->status(), LoadStatus::OK);
}
ASSERT_EQ(jobs_done, 5);
ASSERT_EQ(low_priority_jobs_done, 1);
t.loader.stop();
}
TEST(AsyncLoader, CycleDetection)
{
AsyncLoaderTest t;
auto job_func = [&] (const LoadJobPtr &) {};
LoadJobPtr cycle_breaker; // To avoid memleak we introduce with a cycle
try
{
std::vector<LoadJobPtr> jobs;
jobs.reserve(16);
jobs.push_back(makeLoadJob({}, "job0", job_func));
jobs.push_back(makeLoadJob({ jobs[0] }, "job1", job_func));
jobs.push_back(makeLoadJob({ jobs[0], jobs[1] }, "job2", job_func));
jobs.push_back(makeLoadJob({ jobs[0], jobs[2] }, "job3", job_func));
// Actually it is hard to construct a cycle, but suppose someone was able to succeed violating constness
const_cast<LoadJobSet &>(jobs[1]->dependencies).insert(jobs[3]);
cycle_breaker = jobs[1];
// Add couple unrelated jobs
jobs.push_back(makeLoadJob({ jobs[1] }, "job4", job_func));
jobs.push_back(makeLoadJob({ jobs[4] }, "job5", job_func));
jobs.push_back(makeLoadJob({ jobs[3] }, "job6", job_func));
jobs.push_back(makeLoadJob({ jobs[1], jobs[2], jobs[3], jobs[4], jobs[5], jobs[6] }, "job7", job_func));
// Also add another not connected jobs
jobs.push_back(makeLoadJob({}, "job8", job_func));
jobs.push_back(makeLoadJob({}, "job9", job_func));
jobs.push_back(makeLoadJob({ jobs[9] }, "job10", job_func));
auto task1 = t.schedule({ jobs.begin(), jobs.end()});
FAIL();
}
catch (Exception & e)
{
int present[] = { 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 };
for (int i = 0; i < std::size(present); i++)
ASSERT_EQ(e.message().find(fmt::format("job{}", i)) != String::npos, present[i]);
}
const_cast<LoadJobSet &>(cycle_breaker->dependencies).clear();
}
TEST(AsyncLoader, CancelPendingJob)
{
AsyncLoaderTest t;
auto job_func = [&] (const LoadJobPtr &) {};
auto job = makeLoadJob({}, "job", job_func);
auto task = t.schedule({ job });
task->remove(); // this cancels pending the job (async loader was not started to execute it)
ASSERT_EQ(job->status(), LoadStatus::CANCELED);
try
{
job->wait();
FAIL();
}
catch (Exception & e)
{
ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_CANCELED);
}
}
TEST(AsyncLoader, CancelPendingTask)
{
AsyncLoaderTest t;
auto job_func = [&] (const LoadJobPtr &) {};
auto job1 = makeLoadJob({}, "job1", job_func);
auto job2 = makeLoadJob({ job1 }, "job2", job_func);
auto task = t.schedule({ job1, job2 });
task->remove(); // this cancels both jobs (async loader was not started to execute it)
ASSERT_EQ(job1->status(), LoadStatus::CANCELED);
ASSERT_EQ(job2->status(), LoadStatus::CANCELED);
try
{
job1->wait();
FAIL();
}
catch (Exception & e)
{
ASSERT_TRUE(e.code() == ErrorCodes::ASYNC_LOAD_CANCELED);
}
try
{
job2->wait();
FAIL();
}
catch (Exception & e)
{
ASSERT_TRUE(e.code() == ErrorCodes::ASYNC_LOAD_CANCELED);
}
}
TEST(AsyncLoader, CancelPendingDependency)
{
AsyncLoaderTest t;
auto job_func = [&] (const LoadJobPtr &) {};
auto job1 = makeLoadJob({}, "job1", job_func);
auto job2 = makeLoadJob({ job1 }, "job2", job_func);
auto task1 = t.schedule({ job1 });
auto task2 = t.schedule({ job2 });
task1->remove(); // this cancels both jobs, due to dependency (async loader was not started to execute it)
ASSERT_EQ(job1->status(), LoadStatus::CANCELED);
ASSERT_EQ(job2->status(), LoadStatus::CANCELED);
try
{
job1->wait();
FAIL();
}
catch (Exception & e)
{
ASSERT_TRUE(e.code() == ErrorCodes::ASYNC_LOAD_CANCELED);
}
try
{
job2->wait();
FAIL();
}
catch (Exception & e)
{
ASSERT_TRUE(e.code() == ErrorCodes::ASYNC_LOAD_CANCELED);
}
}
TEST(AsyncLoader, CancelExecutingJob)
{
AsyncLoaderTest t;
t.loader.start();
std::barrier sync(2);
auto job_func = [&] (const LoadJobPtr &)
{
sync.arrive_and_wait(); // (A) sync with main thread
sync.arrive_and_wait(); // (B) wait for waiter
// signals (C)
};
auto job = makeLoadJob({}, "job", job_func);
auto task = t.schedule({ job });
sync.arrive_and_wait(); // (A) wait for job to start executing
std::thread canceler([&]
{
task->remove(); // waits for (C)
});
while (job->waitersCount() == 0)
std::this_thread::yield();
ASSERT_EQ(job->status(), LoadStatus::PENDING);
sync.arrive_and_wait(); // (B) sync with job
canceler.join();
ASSERT_EQ(job->status(), LoadStatus::OK);
job->wait();
}
TEST(AsyncLoader, CancelExecutingTask)
{
AsyncLoaderTest t(16);
t.loader.start();
std::barrier sync(2);
auto blocker_job_func = [&] (const LoadJobPtr &)
{
sync.arrive_and_wait(); // (A) sync with main thread
sync.arrive_and_wait(); // (B) wait for waiter
// signals (C)
};
auto job_to_cancel_func = [&] (const LoadJobPtr &)
{
FAIL(); // this job should be canceled
};
auto job_to_succeed_func = [&] (const LoadJobPtr &)
{
};
// Make several iterations to catch the race (if any)
for (int iteration = 0; iteration < 10; iteration++) {
std::vector<LoadJobPtr> task1_jobs;
task1_jobs.reserve(256);
auto blocker_job = makeLoadJob({}, "blocker_job", blocker_job_func);
task1_jobs.push_back(blocker_job);
for (int i = 0; i < 100; i++)
task1_jobs.push_back(makeLoadJob({ blocker_job }, "job_to_cancel", job_to_cancel_func));
auto task1 = t.schedule({ task1_jobs.begin(), task1_jobs.end() });
auto job_to_succeed = makeLoadJob({ blocker_job }, "job_to_succeed", job_to_succeed_func);
auto task2 = t.schedule({ job_to_succeed });
sync.arrive_and_wait(); // (A) wait for job to start executing
std::thread canceler([&]
{
task1->remove(); // waits for (C)
});
while (blocker_job->waitersCount() == 0)
std::this_thread::yield();
ASSERT_EQ(blocker_job->status(), LoadStatus::PENDING);
sync.arrive_and_wait(); // (B) sync with job
canceler.join();
t.loader.wait();
ASSERT_EQ(blocker_job->status(), LoadStatus::OK);
ASSERT_EQ(job_to_succeed->status(), LoadStatus::OK);
for (const auto & job : task1_jobs)
{
if (job != blocker_job)
ASSERT_EQ(job->status(), LoadStatus::CANCELED);
}
}
}
TEST(AsyncLoader, DISABLED_JobFailure)
{
AsyncLoaderTest t;
t.loader.start();
std::string error_message = "test job failure";
auto job_func = [&] (const LoadJobPtr &) {
throw std::runtime_error(error_message);
};
auto job = makeLoadJob({}, "job", job_func);
auto task = t.schedule({ job });
t.loader.wait();
ASSERT_EQ(job->status(), LoadStatus::FAILED);
try
{
job->wait();
FAIL();
}
catch (Exception & e)
{
ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_FAILED);
ASSERT_TRUE(e.message().find(error_message) != String::npos);
}
}
TEST(AsyncLoader, ScheduleJobWithFailedDependencies)
{
AsyncLoaderTest t;
t.loader.start();
std::string_view error_message = "test job failure";
auto failed_job_func = [&] (const LoadJobPtr &) {
throw Exception(ErrorCodes::ASYNC_LOAD_FAILED, "{}", error_message);
};
auto failed_job = makeLoadJob({}, "failed_job", failed_job_func);
auto failed_task = t.schedule({ failed_job });
t.loader.wait();
auto job_func = [&] (const LoadJobPtr &) {};
auto job1 = makeLoadJob({ failed_job }, "job1", job_func);
auto job2 = makeLoadJob({ job1 }, "job2", job_func);
auto task = t.schedule({ job1, job2 });
t.loader.wait();
ASSERT_EQ(job1->status(), LoadStatus::CANCELED);
ASSERT_EQ(job2->status(), LoadStatus::CANCELED);
try
{
job1->wait();
FAIL();
}
catch (Exception & e)
{
ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_CANCELED);
ASSERT_TRUE(e.message().find(error_message) != String::npos);
}
try
{
job2->wait();
FAIL();
}
catch (Exception & e)
{
ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_CANCELED);
ASSERT_TRUE(e.message().find(error_message) != String::npos);
}
}
TEST(AsyncLoader, ScheduleJobWithCanceledDependencies)
{
AsyncLoaderTest t;
auto canceled_job_func = [&] (const LoadJobPtr &) {};
auto canceled_job = makeLoadJob({}, "canceled_job", canceled_job_func);
auto canceled_task = t.schedule({ canceled_job });
canceled_task->remove();
t.loader.start();
auto job_func = [&] (const LoadJobPtr &) {};
auto job1 = makeLoadJob({ canceled_job }, "job1", job_func);
auto job2 = makeLoadJob({ job1 }, "job2", job_func);
auto task = t.schedule({ job1, job2 });
t.loader.wait();
ASSERT_EQ(job1->status(), LoadStatus::CANCELED);
ASSERT_EQ(job2->status(), LoadStatus::CANCELED);
try
{
job1->wait();
FAIL();
}
catch (Exception & e)
{
ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_CANCELED);
}
try
{
job2->wait();
FAIL();
}
catch (Exception & e)
{
ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_CANCELED);
}
}
TEST(AsyncLoader, TestConcurrency)
{
AsyncLoaderTest t(10);
t.loader.start();
for (int concurrency = 1; concurrency <= 10; concurrency++)
{
std::barrier sync(concurrency);
std::atomic<int> executing{0};
auto job_func = [&] (const LoadJobPtr &)
{
executing++;
ASSERT_LE(executing, concurrency);
sync.arrive_and_wait();
executing--;
};
std::vector<LoadTaskPtr> tasks;
tasks.reserve(concurrency);
for (int i = 0; i < concurrency; i++)
tasks.push_back(t.schedule(t.chainJobSet(5, job_func)));
t.loader.wait();
ASSERT_EQ(executing, 0);
}
}
TEST(AsyncLoader, TestOverload)
{
AsyncLoaderTest t(3);
t.loader.start();
size_t max_threads = t.loader.getMaxThreads();
std::atomic<int> executing{0};
for (int concurrency = 4; concurrency <= 8; concurrency++)
{
auto job_func = [&] (const LoadJobPtr &)
{
executing++;
t.randomSleepUs(100, 200, 100);
ASSERT_LE(executing, max_threads);
executing--;
};
t.loader.stop();
std::vector<LoadTaskPtr> tasks;
tasks.reserve(concurrency);
for (int i = 0; i < concurrency; i++)
tasks.push_back(t.schedule(t.chainJobSet(5, job_func)));
t.loader.start();
t.loader.wait();
ASSERT_EQ(executing, 0);
}
}
TEST(AsyncLoader, StaticPriorities)
{
AsyncLoaderTest t(1);
std::string schedule;
auto job_func = [&] (const LoadJobPtr & self)
{
schedule += fmt::format("{}{}", self->name, self->priority());
};
std::vector<LoadJobPtr> jobs;
jobs.push_back(makeLoadJob({}, 0, "A", job_func)); // 0
jobs.push_back(makeLoadJob({ jobs[0] }, 3, "B", job_func)); // 1
jobs.push_back(makeLoadJob({ jobs[0] }, 4, "C", job_func)); // 2
jobs.push_back(makeLoadJob({ jobs[0] }, 1, "D", job_func)); // 3
jobs.push_back(makeLoadJob({ jobs[0] }, 2, "E", job_func)); // 4
jobs.push_back(makeLoadJob({ jobs[3], jobs[4] }, 0, "F", job_func)); // 5
jobs.push_back(makeLoadJob({ jobs[5] }, 0, "G", job_func)); // 6
jobs.push_back(makeLoadJob({ jobs[6] }, 9, "H", job_func)); // 7
auto task = t.schedule({ jobs.begin(), jobs.end() });
t.loader.start();
t.loader.wait();
ASSERT_EQ(schedule, "A9E9D9F9G9H9C4B3");
}
TEST(AsyncLoader, DynamicPriorities)
{
AsyncLoaderTest t(1);
for (bool prioritize : {false, true})
{
std::string schedule;
LoadJobPtr job_to_prioritize;
auto job_func = [&] (const LoadJobPtr & self)
{
if (prioritize && self->name == "C")
t.loader.prioritize(job_to_prioritize, 9); // dynamic prioritization
schedule += fmt::format("{}{}", self->name, self->priority());
};
// Job DAG with initial priorities. During execution of C4, job G0 priority is increased to G9, postponing B3 job executing.
// A0 -+-> B3
// |
// `-> C4
// |
// `-> D1 -.
// | +-> F0 --> G0 --> H0
// `-> E2 -'
std::vector<LoadJobPtr> jobs;
jobs.push_back(makeLoadJob({}, 0, "A", job_func)); // 0
jobs.push_back(makeLoadJob({ jobs[0] }, 3, "B", job_func)); // 1
jobs.push_back(makeLoadJob({ jobs[0] }, 4, "C", job_func)); // 2
jobs.push_back(makeLoadJob({ jobs[0] }, 1, "D", job_func)); // 3
jobs.push_back(makeLoadJob({ jobs[0] }, 2, "E", job_func)); // 4
jobs.push_back(makeLoadJob({ jobs[3], jobs[4] }, 0, "F", job_func)); // 5
jobs.push_back(makeLoadJob({ jobs[5] }, 0, "G", job_func)); // 6
jobs.push_back(makeLoadJob({ jobs[6] }, 0, "H", job_func)); // 7
auto task = t.schedule({ jobs.begin(), jobs.end() });
job_to_prioritize = jobs[6];
t.loader.start();
t.loader.wait();
t.loader.stop();
if (prioritize)
ASSERT_EQ(schedule, "A4C4E9D9F9G9B3H0");
else
ASSERT_EQ(schedule, "A4C4B3E2D1F0G0H0");
}
}
TEST(AsyncLoader, RandomIndependentTasks)
{
AsyncLoaderTest t(16);
t.loader.start();
auto job_func = [&] (const LoadJobPtr & self)
{
for (const auto & dep : self->dependencies)
ASSERT_EQ(dep->status(), LoadStatus::OK);
t.randomSleepUs(100, 500, 5);
};
std::vector<LoadTaskPtr> tasks;
tasks.reserve(512);
for (int i = 0; i < 512; i++)
{
int job_count = t.randomInt(1, 32);
tasks.push_back(t.schedule(t.randomJobSet(job_count, 5, job_func)));
t.randomSleepUs(100, 900, 20); // avg=100us
}
}
TEST(AsyncLoader, RandomDependentTasks)
{
AsyncLoaderTest t(16);
t.loader.start();
std::mutex mutex;
std::condition_variable cv;
std::vector<LoadTaskPtr> tasks;
std::vector<LoadJobPtr> all_jobs;
auto job_func = [&] (const LoadJobPtr & self)
{
for (const auto & dep : self->dependencies)
ASSERT_EQ(dep->status(), LoadStatus::OK);
cv.notify_one();
};
std::unique_lock lock{mutex};
int tasks_left = 1000;
tasks.reserve(tasks_left);
while (tasks_left-- > 0)
{
cv.wait(lock, [&] { return t.loader.getScheduledJobCount() < 100; });
// Add one new task
int job_count = t.randomInt(1, 32);
LoadJobSet jobs = t.randomJobSet(job_count, 5, all_jobs, job_func);
all_jobs.insert(all_jobs.end(), jobs.begin(), jobs.end());
tasks.push_back(t.schedule(std::move(jobs)));
// Cancel random old task
if (tasks.size() > 100)
tasks.erase(tasks.begin() + t.randomInt<size_t>(0, tasks.size() - 1));
}
t.loader.wait();
}
TEST(AsyncLoader, SetMaxThreads)
{
AsyncLoaderTest t(1);
std::atomic<int> sync_index{0};
std::atomic<int> executing{0};
int max_threads_values[] = {1, 2, 3, 4, 5, 4, 3, 2, 1, 5, 10, 5, 1, 20, 1};
std::vector<std::unique_ptr<std::barrier<>>> syncs;
syncs.reserve(std::size(max_threads_values));
for (int max_threads : max_threads_values)
syncs.push_back(std::make_unique<std::barrier<>>(max_threads + 1));
auto job_func = [&] (const LoadJobPtr &)
{
int idx = sync_index;
if (idx < syncs.size())
{
executing++;
syncs[idx]->arrive_and_wait(); // (A)
executing--;
syncs[idx]->arrive_and_wait(); // (B)
}
};
// Generate enough independent jobs
for (int i = 0; i < 1000; i++)
t.schedule({makeLoadJob({}, "job", job_func)})->detach();
t.loader.start();
while (sync_index < syncs.size())
{
// Wait for `max_threads` jobs to start executing
int idx = sync_index;
while (executing.load() != max_threads_values[idx])
{
ASSERT_LE(executing, max_threads_values[idx]);
std::this_thread::yield();
}
// Allow all jobs to finish
syncs[idx]->arrive_and_wait(); // (A)
sync_index++;
if (sync_index < syncs.size())
t.loader.setMaxThreads(max_threads_values[sync_index]);
syncs[idx]->arrive_and_wait(); // (B) this sync point is required to allow `executing` value to go back down to zero after we change number of workers
}
t.loader.wait();
}

View File

@ -234,12 +234,12 @@ TEST(ConcurrencyControl, MultipleThreads)
while (auto slot = slots->tryAcquire()) while (auto slot = slots->tryAcquire())
{ {
std::unique_lock lock{threads_mutex}; std::unique_lock lock{threads_mutex};
threads.emplace_back([&, slot = std::move(slot)] threads.emplace_back([&, my_slot = std::move(slot)]
{ {
pcg64 rng(randomSeed()); pcg64 rng(randomSeed());
std::uniform_int_distribution<size_t> distribution(1, cfg_work_us); std::uniform_int_distribution<size_t> distribution(1, cfg_work_us);
size_t steps = distribution(rng); size_t steps = distribution(rng);
for (size_t step = 0; step < steps; step++) for (size_t step = 0; step < steps; ++step)
{ {
sleepForMicroseconds(distribution(rng)); // emulate work sleepForMicroseconds(distribution(rng)); // emulate work
spawn_threads(); // upscale spawn_threads(); // upscale

View File

@ -56,8 +56,17 @@ static bool check2()
{ {
ThreadPool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, 2); ThreadPool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, 2);
pool.scheduleOrThrowOnError([&]{ throw std::runtime_error("Hello, world!"); }); try
pool.scheduleOrThrowOnError([]{}); {
pool.scheduleOrThrowOnError([&]{ throw std::runtime_error("Hello, world!"); });
pool.scheduleOrThrowOnError([]{});
}
catch (const std::runtime_error &)
{
/// Sometimes exception may be thrown from schedule.
/// Just retry test in that case.
return true;
}
try try
{ {

View File

@ -41,7 +41,7 @@ enum PollPidResult
#define SYS_pidfd_open 434 #define SYS_pidfd_open 434
#elif defined(__aarch64__) #elif defined(__aarch64__)
#define SYS_pidfd_open 434 #define SYS_pidfd_open 434
#elif defined(__ppc64__) #elif defined(__powerpc64__)
#define SYS_pidfd_open 434 #define SYS_pidfd_open 434
#elif defined(__riscv) #elif defined(__riscv)
#define SYS_pidfd_open 434 #define SYS_pidfd_open 434

View File

@ -471,9 +471,9 @@ void KeeperDispatcher::shutdown()
const auto raft_result = server->putRequestBatch(close_requests); const auto raft_result = server->putRequestBatch(close_requests);
auto sessions_closing_done_promise = std::make_shared<std::promise<void>>(); auto sessions_closing_done_promise = std::make_shared<std::promise<void>>();
auto sessions_closing_done = sessions_closing_done_promise->get_future(); auto sessions_closing_done = sessions_closing_done_promise->get_future();
raft_result->when_ready([sessions_closing_done_promise = std::move(sessions_closing_done_promise)]( raft_result->when_ready([my_sessions_closing_done_promise = std::move(sessions_closing_done_promise)](
nuraft::cmd_result<nuraft::ptr<nuraft::buffer>> & /*result*/, nuraft::cmd_result<nuraft::ptr<nuraft::buffer>> & /*result*/,
nuraft::ptr<std::exception> & /*exception*/) { sessions_closing_done_promise->set_value(); }); nuraft::ptr<std::exception> & /*exception*/) { my_sessions_closing_done_promise->set_value(); });
auto session_shutdown_timeout = configuration_and_settings->coordination_settings->session_shutdown_timeout.totalMilliseconds(); auto session_shutdown_timeout = configuration_and_settings->coordination_settings->session_shutdown_timeout.totalMilliseconds();
if (sessions_closing_done.wait_for(std::chrono::milliseconds(session_shutdown_timeout)) != std::future_status::ready) if (sessions_closing_done.wait_for(std::chrono::milliseconds(session_shutdown_timeout)) != std::future_status::ready)

View File

@ -342,34 +342,34 @@ void KeeperStorage::UncommittedState::applyDelta(const Delta & delta)
auto & [node, acls, last_applied_zxid] = nodes.at(delta.path); auto & [node, acls, last_applied_zxid] = nodes.at(delta.path);
std::visit( std::visit(
[&, &node = node, &acls = acls, &last_applied_zxid = last_applied_zxid]<typename DeltaType>(const DeltaType & operation) [&, &my_node = node, &my_acls = acls, &my_last_applied_zxid = last_applied_zxid]<typename DeltaType>(const DeltaType & operation)
{ {
if constexpr (std::same_as<DeltaType, CreateNodeDelta>) if constexpr (std::same_as<DeltaType, CreateNodeDelta>)
{ {
assert(!node); assert(!my_node);
node = std::make_shared<Node>(); my_node = std::make_shared<Node>();
node->stat = operation.stat; my_node->stat = operation.stat;
node->setData(operation.data); my_node->setData(operation.data);
acls = operation.acls; my_acls = operation.acls;
last_applied_zxid = delta.zxid; my_last_applied_zxid = delta.zxid;
} }
else if constexpr (std::same_as<DeltaType, RemoveNodeDelta>) else if constexpr (std::same_as<DeltaType, RemoveNodeDelta>)
{ {
assert(node); assert(my_node);
node = nullptr; my_node = nullptr;
last_applied_zxid = delta.zxid; my_last_applied_zxid = delta.zxid;
} }
else if constexpr (std::same_as<DeltaType, UpdateNodeDelta>) else if constexpr (std::same_as<DeltaType, UpdateNodeDelta>)
{ {
assert(node); assert(my_node);
node->invalidateDigestCache(); my_node->invalidateDigestCache();
operation.update_fn(*node); operation.update_fn(*node);
last_applied_zxid = delta.zxid; my_last_applied_zxid = delta.zxid;
} }
else if constexpr (std::same_as<DeltaType, SetACLDelta>) else if constexpr (std::same_as<DeltaType, SetACLDelta>)
{ {
acls = operation.acls; my_acls = operation.acls;
last_applied_zxid = delta.zxid; my_last_applied_zxid = delta.zxid;
} }
}, },
delta.operation); delta.operation);

View File

@ -140,7 +140,7 @@ int64_t deserializeStorageData(KeeperStorage & storage, ReadBuffer & in, Poco::L
if (itr.key != "/") if (itr.key != "/")
{ {
auto parent_path = parentPath(itr.key); auto parent_path = parentPath(itr.key);
storage.container.updateValue(parent_path, [path = itr.key] (KeeperStorage::Node & value) { value.addChild(getBaseName(path)); value.stat.numChildren++; }); storage.container.updateValue(parent_path, [my_path = itr.key] (KeeperStorage::Node & value) { value.addChild(getBaseName(my_path)); ++value.stat.numChildren; });
} }
} }

View File

@ -501,9 +501,11 @@ void BaseSettings<TTraits>::read(ReadBuffer & in, SettingsWriteFormat format)
const auto & accessor = Traits::Accessor::instance(); const auto & accessor = Traits::Accessor::instance();
while (true) while (true)
{ {
String name = BaseSettingsHelpers::readString(in); String read_name = BaseSettingsHelpers::readString(in);
if (name.empty() /* empty string is a marker of the end of settings */) if (read_name.empty() /* empty string is a marker of the end of settings */)
break; break;
std::string_view name = TTraits::resolveName(read_name);
size_t index = accessor.find(name); size_t index = accessor.find(name);
using Flags = BaseSettingsHelpers::Flags; using Flags = BaseSettingsHelpers::Flags;

View File

@ -663,12 +663,10 @@ Names Block::getDataTypeNames() const
Block::NameMap Block::getNamesToIndexesMap() const Block::NameMap Block::getNamesToIndexesMap() const
{ {
NameMap res; NameMap res(index_by_name.size());
res.reserve(index_by_name.size()); res.set_empty_key(StringRef{});
for (const auto & [name, index] : index_by_name) for (const auto & [name, index] : index_by_name)
res[name] = index; res[name] = index;
return res; return res;
} }

View File

@ -5,13 +5,11 @@
#include <Core/ColumnsWithTypeAndName.h> #include <Core/ColumnsWithTypeAndName.h>
#include <Core/NamesAndTypes.h> #include <Core/NamesAndTypes.h>
#include <Common/HashTable/HashMap.h>
#include <initializer_list> #include <initializer_list>
#include <list> #include <list>
#include <map>
#include <set> #include <set>
#include <vector> #include <vector>
#include <sparsehash/dense_hash_map>
namespace DB namespace DB
@ -97,7 +95,7 @@ public:
Names getDataTypeNames() const; Names getDataTypeNames() const;
/// Hash table match `column name -> position in the block`. /// Hash table match `column name -> position in the block`.
using NameMap = HashMap<StringRef, size_t, StringRefHash>; using NameMap = ::google::dense_hash_map<StringRef, size_t, StringRefHash>;
NameMap getNamesToIndexesMap() const; NameMap getNamesToIndexesMap() const;
Serializations getSerializations() const; Serializations getSerializations() const;

View File

@ -642,7 +642,7 @@ class IColumn;
M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \ M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \
M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \ M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \
\ \
M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::mmap, "Method of reading data from storage file, one of: read, pread, mmap.", 0) \ M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::mmap, "Method of reading data from storage file, one of: read, pread, mmap. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local).", 0) \
M(String, local_filesystem_read_method, "pread_threadpool", "Method of reading data from local filesystem, one of: read, pread, mmap, io_uring, pread_threadpool. The 'io_uring' method is experimental and does not work for Log, TinyLog, StripeLog, File, Set and Join, and other tables with append-able files in presence of concurrent reads and writes.", 0) \ M(String, local_filesystem_read_method, "pread_threadpool", "Method of reading data from local filesystem, one of: read, pread, mmap, io_uring, pread_threadpool. The 'io_uring' method is experimental and does not work for Log, TinyLog, StripeLog, File, Set and Join, and other tables with append-able files in presence of concurrent reads and writes.", 0) \
M(String, remote_filesystem_read_method, "threadpool", "Method of reading data from remote filesystem, one of: read, threadpool.", 0) \ M(String, remote_filesystem_read_method, "threadpool", "Method of reading data from remote filesystem, one of: read, threadpool.", 0) \
M(Bool, local_filesystem_read_prefetch, false, "Should use prefetching when reading data from local filesystem.", 0) \ M(Bool, local_filesystem_read_prefetch, false, "Should use prefetching when reading data from local filesystem.", 0) \
@ -745,9 +745,10 @@ class IColumn;
M(Bool, implicit_transaction, false, "If enabled and not already inside a transaction, wraps the query inside a full transaction (begin + commit or rollback)", 0) \ M(Bool, implicit_transaction, false, "If enabled and not already inside a transaction, wraps the query inside a full transaction (begin + commit or rollback)", 0) \
M(UInt64, grace_hash_join_initial_buckets, 1, "Initial number of grace hash join buckets", 0) \ M(UInt64, grace_hash_join_initial_buckets, 1, "Initial number of grace hash join buckets", 0) \
M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \ M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \
M(Bool, optimize_distinct_in_order, false, "This optimization has a bug and it is disabled. Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \ M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \
M(Bool, allow_experimental_undrop_table_query, false, "Allow to use undrop query to restore dropped table in a limited time", 0) \ M(Bool, allow_experimental_undrop_table_query, false, "Allow to use undrop query to restore dropped table in a limited time", 0) \
M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \ M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \
M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \
// End of COMMON_SETTINGS // End of COMMON_SETTINGS
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS. // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.
@ -905,7 +906,6 @@ class IColumn;
M(UInt64, output_format_pretty_max_value_width, 10000, "Maximum width of value to display in Pretty formats. If greater - it will be cut.", 0) \ M(UInt64, output_format_pretty_max_value_width, 10000, "Maximum width of value to display in Pretty formats. If greater - it will be cut.", 0) \
M(Bool, output_format_pretty_color, true, "Use ANSI escape sequences to paint colors in Pretty formats", 0) \ M(Bool, output_format_pretty_color, true, "Use ANSI escape sequences to paint colors in Pretty formats", 0) \
M(String, output_format_pretty_grid_charset, "UTF-8", "Charset for printing grid borders. Available charsets: ASCII, UTF-8 (default one).", 0) \ M(String, output_format_pretty_grid_charset, "UTF-8", "Charset for printing grid borders. Available charsets: ASCII, UTF-8 (default one).", 0) \
M(Milliseconds, output_format_pretty_squash_ms, 100, "Squash blocks in Pretty formats if the time passed after the previous block is not greater than the specified threshold in milliseconds. This avoids printing miltiple small blocks.", 0) \
M(UInt64, output_format_parquet_row_group_size, 1000000, "Target row group size in rows.", 0) \ M(UInt64, output_format_parquet_row_group_size, 1000000, "Target row group size in rows.", 0) \
M(UInt64, output_format_parquet_row_group_size_bytes, 512 * 1024 * 1024, "Target row group size in bytes, before compression.", 0) \ M(UInt64, output_format_parquet_row_group_size_bytes, 512 * 1024 * 1024, "Target row group size in bytes, before compression.", 0) \
M(Bool, output_format_parquet_string_as_string, false, "Use Parquet String type instead of Binary for String columns.", 0) \ M(Bool, output_format_parquet_string_as_string, false, "Use Parquet String type instead of Binary for String columns.", 0) \
@ -973,7 +973,6 @@ class IColumn;
M(Bool, output_format_bson_string_as_string, false, "Use BSON String type instead of Binary for String columns.", 0) \ M(Bool, output_format_bson_string_as_string, false, "Use BSON String type instead of Binary for String columns.", 0) \
M(Bool, input_format_bson_skip_fields_with_unsupported_types_in_schema_inference, false, "Skip fields with unsupported types while schema inference for format BSON.", 0) \ M(Bool, input_format_bson_skip_fields_with_unsupported_types_in_schema_inference, false, "Skip fields with unsupported types while schema inference for format BSON.", 0) \
\ \
M(Bool, regexp_dict_allow_other_sources, false, "Allow regexp_tree dictionary to use sources other than yaml source.", 0) \
M(Bool, format_display_secrets_in_show_and_select, false, "Do not hide secrets in SHOW and SELECT queries.", IMPORTANT) \ M(Bool, format_display_secrets_in_show_and_select, false, "Do not hide secrets in SHOW and SELECT queries.", IMPORTANT) \
M(Bool, regexp_dict_allow_hyperscan, true, "Allow regexp_tree dictionary using Hyperscan library.", 0) \ M(Bool, regexp_dict_allow_hyperscan, true, "Allow regexp_tree dictionary using Hyperscan library.", 0) \
\ \

View File

@ -7,6 +7,7 @@
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
#include <Common/assert_cast.h> #include <Common/assert_cast.h>
#include <Common/AlignedBuffer.h> #include <Common/AlignedBuffer.h>
#include <Common/Arena.h>
#include <Formats/FormatSettings.h> #include <Formats/FormatSettings.h>
#include <Formats/ProtobufReader.h> #include <Formats/ProtobufReader.h>

View File

@ -32,9 +32,9 @@ void SerializationArray::serializeBinary(const Field & field, WriteBuffer & ostr
{ {
const Array & a = field.get<const Array &>(); const Array & a = field.get<const Array &>();
writeVarUInt(a.size(), ostr); writeVarUInt(a.size(), ostr);
for (size_t i = 0; i < a.size(); ++i) for (const auto & i : a)
{ {
nested->serializeBinary(a[i], ostr, settings); nested->serializeBinary(i, ostr, settings);
} }
} }

View File

@ -67,7 +67,7 @@ DatabaseMySQL::DatabaseMySQL(
try try
{ {
/// Test that the database is working fine; it will also fetch tables. /// Test that the database is working fine; it will also fetch tables.
empty(); empty(); // NOLINT(bugprone-standalone-empty)
} }
catch (...) catch (...)
{ {

Some files were not shown because too many files have changed in this diff Show More