Merge branch 'master' into startup-sanity-checks

This commit is contained in:
alesapin 2022-03-29 14:09:20 +02:00
commit ed2c461fe1
326 changed files with 5762 additions and 1553 deletions

View File

@ -210,3 +210,6 @@ CheckOptions:
value: false value: false
- key: performance-move-const-arg.CheckTriviallyCopyableMove - key: performance-move-const-arg.CheckTriviallyCopyableMove
value: false value: false
# Workaround clang-tidy bug: https://github.com/llvm/llvm-project/issues/46097
- key: readability-identifier-naming.TypeTemplateParameterIgnoredRegexp
value: expr-type

View File

@ -360,6 +360,52 @@ jobs:
docker kill "$(docker ps -q)" ||: docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||: docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinGCC:
needs: [DockerHubPush]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
CHECK_NAME=ClickHouse build check (actions)
BUILD_NAME=binary_gcc
EOF
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'true'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderDebAsan: BuilderDebAsan:
needs: [DockerHubPush] needs: [DockerHubPush]
runs-on: [self-hosted, builder] runs-on: [self-hosted, builder]
@ -918,6 +964,7 @@ jobs:
- BuilderDebRelease - BuilderDebRelease
- BuilderDebAarch64 - BuilderDebAarch64
- BuilderBinRelease - BuilderBinRelease
- BuilderBinGCC
- BuilderDebAsan - BuilderDebAsan
- BuilderDebTsan - BuilderDebTsan
- BuilderDebUBsan - BuilderDebUBsan
@ -2608,6 +2655,40 @@ jobs:
docker kill "$(docker ps -q)" ||: docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||: docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH" sudo rm -fr "$TEMP_PATH"
UnitTestsReleaseGCC:
needs: [BuilderBinGCC]
runs-on: [self-hosted, fuzzer-unit-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (release-gcc, actions)
REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Unit test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 unit_tests_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
UnitTestsTsan: UnitTestsTsan:
needs: [BuilderDebTsan] needs: [BuilderDebTsan]
runs-on: [self-hosted, fuzzer-unit-tester] runs-on: [self-hosted, fuzzer-unit-tester]

View File

@ -370,6 +370,48 @@ jobs:
docker kill "$(docker ps -q)" ||: docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||: docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinGCC:
needs: [DockerHubPush, FastTest]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
CHECK_NAME=ClickHouse build check (actions)
BUILD_NAME=binary_gcc
EOF
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'true'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderDebAarch64: BuilderDebAarch64:
needs: [DockerHubPush, FastTest] needs: [DockerHubPush, FastTest]
runs-on: [self-hosted, builder] runs-on: [self-hosted, builder]
@ -963,6 +1005,7 @@ jobs:
- BuilderDebRelease - BuilderDebRelease
- BuilderDebAarch64 - BuilderDebAarch64
- BuilderBinRelease - BuilderBinRelease
- BuilderBinGCC
- BuilderDebAsan - BuilderDebAsan
- BuilderDebTsan - BuilderDebTsan
- BuilderDebUBsan - BuilderDebUBsan
@ -2808,6 +2851,40 @@ jobs:
docker kill "$(docker ps -q)" ||: docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||: docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH" sudo rm -fr "$TEMP_PATH"
UnitTestsReleaseGCC:
needs: [BuilderBinGCC]
runs-on: [self-hosted, fuzzer-unit-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (release-gcc, actions)
REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Unit test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 unit_tests_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
UnitTestsTsan: UnitTestsTsan:
needs: [BuilderDebTsan] needs: [BuilderDebTsan]
runs-on: [self-hosted, fuzzer-unit-tester] runs-on: [self-hosted, fuzzer-unit-tester]

View File

@ -11,6 +11,7 @@
* Make `arrayCompact` function behave as other higher-order functions: perform compaction not of lambda function results but on the original array. If you're using nontrivial lambda functions in arrayCompact you may restore old behaviour by wrapping `arrayCompact` arguments into `arrayMap`. Closes [#34010](https://github.com/ClickHouse/ClickHouse/issues/34010) [#18535](https://github.com/ClickHouse/ClickHouse/issues/18535) [#14778](https://github.com/ClickHouse/ClickHouse/issues/14778). [#34795](https://github.com/ClickHouse/ClickHouse/pull/34795) ([Alexandre Snarskii](https://github.com/snar)). * Make `arrayCompact` function behave as other higher-order functions: perform compaction not of lambda function results but on the original array. If you're using nontrivial lambda functions in arrayCompact you may restore old behaviour by wrapping `arrayCompact` arguments into `arrayMap`. Closes [#34010](https://github.com/ClickHouse/ClickHouse/issues/34010) [#18535](https://github.com/ClickHouse/ClickHouse/issues/18535) [#14778](https://github.com/ClickHouse/ClickHouse/issues/14778). [#34795](https://github.com/ClickHouse/ClickHouse/pull/34795) ([Alexandre Snarskii](https://github.com/snar)).
* Change implementation specific behavior on overflow of function `toDatetime`. It will be saturated to the nearest min/max supported instant of datetime instead of wraparound. This change is highlighted as "backward incompatible" because someone may unintentionally rely on the old behavior. [#32898](https://github.com/ClickHouse/ClickHouse/pull/32898) ([HaiBo Li](https://github.com/marising)). * Change implementation specific behavior on overflow of function `toDatetime`. It will be saturated to the nearest min/max supported instant of datetime instead of wraparound. This change is highlighted as "backward incompatible" because someone may unintentionally rely on the old behavior. [#32898](https://github.com/ClickHouse/ClickHouse/pull/32898) ([HaiBo Li](https://github.com/marising)).
* Make function `cast(value, 'IPv4')`, `cast(value, 'IPv6')` behave same as `toIPv4`, `toIPv6` functions. Changed behavior of incorrect IP address passed into functions `toIPv4`,` toIPv6`, now if invalid IP address passes into this functions exception will be raised, before this function return default value. Added functions `IPv4StringToNumOrDefault`, `IPv4StringToNumOrNull`, `IPv6StringToNumOrDefault`, `IPv6StringOrNull` `toIPv4OrDefault`, `toIPv4OrNull`, `toIPv6OrDefault`, `toIPv6OrNull`. Functions `IPv4StringToNumOrDefault `, `toIPv4OrDefault `, `toIPv6OrDefault ` should be used if previous logic relied on `IPv4StringToNum`, `toIPv4`, `toIPv6` returning default value for invalid address. Added setting `cast_ipv4_ipv6_default_on_conversion_error`, if this setting enabled, then IP address conversion functions will behave as before. Closes [#22825](https://github.com/ClickHouse/ClickHouse/issues/22825). Closes [#5799](https://github.com/ClickHouse/ClickHouse/issues/5799). Closes [#35156](https://github.com/ClickHouse/ClickHouse/issues/35156). [#35240](https://github.com/ClickHouse/ClickHouse/pull/35240) ([Maksim Kita](https://github.com/kitaisreal)).
#### New Feature #### New Feature
@ -366,7 +367,7 @@
#### Improvement #### Improvement
* Now date time conversion functions that generates time before `1970-01-01 00:00:00` will be saturated to zero instead of overflow. [#29953](https://github.com/ClickHouse/ClickHouse/pull/29953) ([Amos Bird](https://github.com/amosbird)). It also fixes a bug in index analysis if date truncation function would yield result before the Unix epoch. * Now date time conversion functions that generates time before `1970-01-01 00:00:00` will be saturated to zero instead of overflow. [#29953](https://github.com/ClickHouse/ClickHouse/pull/29953) ([Amos Bird](https://github.com/amosbird)). It also fixes a bug in index analysis if date truncation function would yield result before the Unix epoch.
* Always display resource usage (total CPU usage, total RAM usage and max RAM usage per host) in client. [#33271](https://github.com/ClickHouse/ClickHouse/pull/33271) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Always display resource usage (total CPU usage, total RAM usage and max RAM usage per host) in client. [#33271](https://github.com/ClickHouse/ClickHouse/pull/33271) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Improve `Bool` type serialization and deserialization, check the range of values. [#32984](https://github.com/ClickHouse/ClickHouse/pull/32984) ([Kruglov Pavel](https://github.com/Avogar)). * Improve `Bool` type serialization and deserialization, check the range of values. [#32984](https://github.com/ClickHouse/ClickHouse/pull/32984) ([Kruglov Pavel](https://github.com/Avogar)).
* If an invalid setting is defined using the `SET` query or using the query parameters in the HTTP request, error message will contain suggestions that are similar to the invalid setting string (if any exists). [#32946](https://github.com/ClickHouse/ClickHouse/pull/32946) ([Antonio Andelic](https://github.com/antonio2368)). * If an invalid setting is defined using the `SET` query or using the query parameters in the HTTP request, error message will contain suggestions that are similar to the invalid setting string (if any exists). [#32946](https://github.com/ClickHouse/ClickHouse/pull/32946) ([Antonio Andelic](https://github.com/antonio2368)).

View File

@ -261,12 +261,12 @@ endif ()
# Add a section with the hash of the compiled machine code for integrity checks. # Add a section with the hash of the compiled machine code for integrity checks.
# Only for official builds, because adding a section can be time consuming (rewrite of several GB). # Only for official builds, because adding a section can be time consuming (rewrite of several GB).
# And cross compiled binaries are not supported (since you cannot execute clickhouse hash-binary) # And cross compiled binaries are not supported (since you cannot execute clickhouse hash-binary)
if (OBJCOPY_PATH AND YANDEX_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE)) if (OBJCOPY_PATH AND CLICKHOUSE_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE))
set (USE_BINARY_HASH 1) set (USE_BINARY_HASH 1)
endif () endif ()
# Allows to build stripped binary in a separate directory # Allows to build stripped binary in a separate directory
if (OBJCOPY_PATH AND READELF_PATH) if (OBJCOPY_PATH AND STRIP_PATH)
option(INSTALL_STRIPPED_BINARIES "Build stripped binaries with debug info in separate directory" OFF) option(INSTALL_STRIPPED_BINARIES "Build stripped binaries with debug info in separate directory" OFF)
if (INSTALL_STRIPPED_BINARIES) if (INSTALL_STRIPPED_BINARIES)
set(STRIPPED_BINARIES_OUTPUT "stripped" CACHE STRING "A separate directory for stripped information") set(STRIPPED_BINARIES_OUTPUT "stripped" CACHE STRING "A separate directory for stripped information")

View File

@ -51,6 +51,6 @@ if (GLIBC_COMPATIBILITY)
message (STATUS "Some symbols from glibc will be replaced for compatibility") message (STATUS "Some symbols from glibc will be replaced for compatibility")
elseif (YANDEX_OFFICIAL_BUILD) elseif (CLICKHOUSE_OFFICIAL_BUILD)
message (WARNING "Option GLIBC_COMPATIBILITY must be turned on for production builds.") message (WARNING "Option GLIBC_COMPATIBILITY must be turned on for production builds.")
endif () endif ()

View File

@ -1,28 +0,0 @@
#!/usr/bin/env bash
BINARY_PATH=$1
BINARY_NAME=$(basename "$BINARY_PATH")
DESTINATION_STRIPPED_DIR=$2
OBJCOPY_PATH=${3:objcopy}
READELF_PATH=${4:readelf}
BUILD_ID=$($READELF_PATH -n "$1" | sed -n '/Build ID/ { s/.*: //p; q; }')
BUILD_ID_PREFIX=${BUILD_ID:0:2}
BUILD_ID_SUFFIX=${BUILD_ID:2}
DESTINATION_DEBUG_INFO_DIR="$DESTINATION_STRIPPED_DIR/lib/debug/.build-id"
DESTINATION_STRIP_BINARY_DIR="$DESTINATION_STRIPPED_DIR/bin"
mkdir -p "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX"
mkdir -p "$DESTINATION_STRIP_BINARY_DIR"
cp "$BINARY_PATH" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"
$OBJCOPY_PATH --only-keep-debug --compress-debug-sections "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME" "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
chmod 0644 "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
chown 0:0 "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
strip --remove-section=.comment --remove-section=.note "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"
$OBJCOPY_PATH --add-gnu-debuglink "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"

View File

@ -11,16 +11,43 @@ macro(clickhouse_strip_binary)
message(FATAL_ERROR "A binary path name must be provided for stripping binary") message(FATAL_ERROR "A binary path name must be provided for stripping binary")
endif() endif()
if (NOT DEFINED STRIP_DESTINATION_DIR) if (NOT DEFINED STRIP_DESTINATION_DIR)
message(FATAL_ERROR "Destination directory for stripped binary must be provided") message(FATAL_ERROR "Destination directory for stripped binary must be provided")
endif() endif()
add_custom_command(TARGET ${STRIP_TARGET} POST_BUILD add_custom_command(TARGET ${STRIP_TARGET} POST_BUILD
COMMAND bash ${ClickHouse_SOURCE_DIR}/cmake/strip.sh ${STRIP_BINARY_PATH} ${STRIP_DESTINATION_DIR} ${OBJCOPY_PATH} ${READELF_PATH} COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/lib/debug/bin"
COMMENT "Stripping clickhouse binary" VERBATIM COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/bin"
COMMAND cp "${STRIP_BINARY_PATH}" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
COMMAND "${OBJCOPY_PATH}" --only-keep-debug --compress-debug-sections "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
COMMENT "Stripping clickhouse binary" VERBATIM
) )
install(PROGRAMS ${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET} DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) install(PROGRAMS ${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET} DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
install(DIRECTORY ${STRIP_DESTINATION_DIR}/lib/debug DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse) install(FILES ${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR}/${STRIP_TARGET}.debug COMPONENT clickhouse)
endmacro()
macro(clickhouse_make_empty_debug_info_for_nfpm)
set(oneValueArgs TARGET DESTINATION_DIR)
cmake_parse_arguments(EMPTY_DEBUG "" "${oneValueArgs}" "" ${ARGN})
if (NOT DEFINED EMPTY_DEBUG_TARGET)
message(FATAL_ERROR "A target name must be provided for stripping binary")
endif()
if (NOT DEFINED EMPTY_DEBUG_DESTINATION_DIR)
message(FATAL_ERROR "Destination directory for empty debug must be provided")
endif()
add_custom_command(TARGET ${EMPTY_DEBUG_TARGET} POST_BUILD
COMMAND mkdir -p "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug"
COMMAND touch "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug/${EMPTY_DEBUG_TARGET}.debug"
COMMENT "Addiding empty debug info for NFPM" VERBATIM
)
install(FILES "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug/${EMPTY_DEBUG_TARGET}.debug" DESTINATION "${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR}" COMPONENT clickhouse)
endmacro() endmacro()

View File

@ -170,32 +170,32 @@ else ()
message (FATAL_ERROR "Cannot find objcopy.") message (FATAL_ERROR "Cannot find objcopy.")
endif () endif ()
# Readelf (FIXME copypaste) # Strip (FIXME copypaste)
if (COMPILER_GCC) if (COMPILER_GCC)
find_program (READELF_PATH NAMES "llvm-readelf" "llvm-readelf-13" "llvm-readelf-12" "llvm-readelf-11" "readelf") find_program (STRIP_PATH NAMES "llvm-strip" "llvm-strip-13" "llvm-strip-12" "llvm-strip-11" "strip")
else () else ()
find_program (READELF_PATH NAMES "llvm-readelf-${COMPILER_VERSION_MAJOR}" "llvm-readelf" "readelf") find_program (STRIP_PATH NAMES "llvm-strip-${COMPILER_VERSION_MAJOR}" "llvm-strip" "strip")
endif () endif ()
if (NOT READELF_PATH AND OS_DARWIN) if (NOT STRIP_PATH AND OS_DARWIN)
find_program (BREW_PATH NAMES "brew") find_program (BREW_PATH NAMES "brew")
if (BREW_PATH) if (BREW_PATH)
execute_process (COMMAND ${BREW_PATH} --prefix llvm ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE LLVM_PREFIX) execute_process (COMMAND ${BREW_PATH} --prefix llvm ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE LLVM_PREFIX)
if (LLVM_PREFIX) if (LLVM_PREFIX)
find_program (READELF_PATH NAMES "llvm-readelf" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH) find_program (STRIP_PATH NAMES "llvm-strip" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH)
endif () endif ()
if (NOT READELF_PATH) if (NOT STRIP_PATH)
execute_process (COMMAND ${BREW_PATH} --prefix binutils ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE BINUTILS_PREFIX) execute_process (COMMAND ${BREW_PATH} --prefix binutils ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE BINUTILS_PREFIX)
if (BINUTILS_PREFIX) if (BINUTILS_PREFIX)
find_program (READELF_PATH NAMES "readelf" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH) find_program (STRIP_PATH NAMES "strip" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH)
endif () endif ()
endif () endif ()
endif () endif ()
endif () endif ()
if (READELF_PATH) if (STRIP_PATH)
message (STATUS "Using readelf: ${READELF_PATH}") message (STATUS "Using strip: ${STRIP_PATH}")
else () else ()
message (FATAL_ERROR "Cannot find readelf.") message (FATAL_ERROR "Cannot find strip.")
endif () endif ()

View File

@ -18,6 +18,6 @@ set (VERSION_STRING_SHORT "${VERSION_MAJOR}.${VERSION_MINOR}")
math (EXPR VERSION_INTEGER "${VERSION_PATCH} + ${VERSION_MINOR}*1000 + ${VERSION_MAJOR}*1000000") math (EXPR VERSION_INTEGER "${VERSION_PATCH} + ${VERSION_MINOR}*1000 + ${VERSION_MAJOR}*1000000")
if(YANDEX_OFFICIAL_BUILD) if(CLICKHOUSE_OFFICIAL_BUILD)
set(VERSION_OFFICIAL " (official build)") set(VERSION_OFFICIAL " (official build)")
endif() endif()

View File

@ -69,9 +69,10 @@ endif ()
target_compile_options(_avrocpp PRIVATE ${SUPPRESS_WARNINGS}) target_compile_options(_avrocpp PRIVATE ${SUPPRESS_WARNINGS})
# create a symlink to include headers with <avro/...> # create a symlink to include headers with <avro/...>
set(AVRO_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/include")
ADD_CUSTOM_TARGET(avro_symlink_headers ALL ADD_CUSTOM_TARGET(avro_symlink_headers ALL
COMMAND ${CMAKE_COMMAND} -E make_directory "${AVROCPP_ROOT_DIR}/include" COMMAND ${CMAKE_COMMAND} -E make_directory "${AVRO_INCLUDE_DIR}"
COMMAND ${CMAKE_COMMAND} -E create_symlink "${AVROCPP_ROOT_DIR}/api" "${AVROCPP_ROOT_DIR}/include/avro" COMMAND ${CMAKE_COMMAND} -E create_symlink "${AVROCPP_ROOT_DIR}/api" "${AVRO_INCLUDE_DIR}/avro"
) )
add_dependencies(_avrocpp avro_symlink_headers) add_dependencies(_avrocpp avro_symlink_headers)
target_include_directories(_avrocpp SYSTEM BEFORE PUBLIC "${AVROCPP_ROOT_DIR}/include") target_include_directories(_avrocpp SYSTEM BEFORE PUBLIC "${AVRO_INCLUDE_DIR}")

View File

@ -27,7 +27,11 @@ target_include_directories (_boost_headers_only SYSTEM BEFORE INTERFACE ${LIBRAR
# asio # asio
target_compile_definitions (_boost_headers_only INTERFACE BOOST_ASIO_STANDALONE=1) target_compile_definitions (_boost_headers_only INTERFACE
BOOST_ASIO_STANDALONE=1
# Avoid using of deprecated in c++ > 17 std::result_of
BOOST_ASIO_HAS_STD_INVOKE_RESULT=1
)
# iostreams # iostreams

2
contrib/hyperscan vendored

@ -1 +1 @@
Subproject commit e9f08df0213fc637aac0a5bbde9beeaeba2fe9fa Subproject commit 5edc68c5ac68d2d4f876159e9ee84def6d3dc87c

2
contrib/libcxx vendored

@ -1 +1 @@
Subproject commit 61e60294b1de01483caa9f5d00f437c99b674de6 Subproject commit 172b2ae074f6755145b91c53a95c8540c1468239

View File

@ -18,12 +18,14 @@ set(SRCS
"${LIBCXX_SOURCE_DIR}/src/filesystem/directory_iterator.cpp" "${LIBCXX_SOURCE_DIR}/src/filesystem/directory_iterator.cpp"
"${LIBCXX_SOURCE_DIR}/src/filesystem/int128_builtins.cpp" "${LIBCXX_SOURCE_DIR}/src/filesystem/int128_builtins.cpp"
"${LIBCXX_SOURCE_DIR}/src/filesystem/operations.cpp" "${LIBCXX_SOURCE_DIR}/src/filesystem/operations.cpp"
"${LIBCXX_SOURCE_DIR}/src/format.cpp"
"${LIBCXX_SOURCE_DIR}/src/functional.cpp" "${LIBCXX_SOURCE_DIR}/src/functional.cpp"
"${LIBCXX_SOURCE_DIR}/src/future.cpp" "${LIBCXX_SOURCE_DIR}/src/future.cpp"
"${LIBCXX_SOURCE_DIR}/src/hash.cpp" "${LIBCXX_SOURCE_DIR}/src/hash.cpp"
"${LIBCXX_SOURCE_DIR}/src/ios.cpp" "${LIBCXX_SOURCE_DIR}/src/ios.cpp"
"${LIBCXX_SOURCE_DIR}/src/ios.instantiations.cpp" "${LIBCXX_SOURCE_DIR}/src/ios.instantiations.cpp"
"${LIBCXX_SOURCE_DIR}/src/iostream.cpp" "${LIBCXX_SOURCE_DIR}/src/iostream.cpp"
"${LIBCXX_SOURCE_DIR}/src/legacy_pointer_safety.cpp"
"${LIBCXX_SOURCE_DIR}/src/locale.cpp" "${LIBCXX_SOURCE_DIR}/src/locale.cpp"
"${LIBCXX_SOURCE_DIR}/src/memory.cpp" "${LIBCXX_SOURCE_DIR}/src/memory.cpp"
"${LIBCXX_SOURCE_DIR}/src/mutex.cpp" "${LIBCXX_SOURCE_DIR}/src/mutex.cpp"
@ -33,6 +35,9 @@ set(SRCS
"${LIBCXX_SOURCE_DIR}/src/random.cpp" "${LIBCXX_SOURCE_DIR}/src/random.cpp"
"${LIBCXX_SOURCE_DIR}/src/random_shuffle.cpp" "${LIBCXX_SOURCE_DIR}/src/random_shuffle.cpp"
"${LIBCXX_SOURCE_DIR}/src/regex.cpp" "${LIBCXX_SOURCE_DIR}/src/regex.cpp"
"${LIBCXX_SOURCE_DIR}/src/ryu/d2fixed.cpp"
"${LIBCXX_SOURCE_DIR}/src/ryu/d2s.cpp"
"${LIBCXX_SOURCE_DIR}/src/ryu/f2s.cpp"
"${LIBCXX_SOURCE_DIR}/src/shared_mutex.cpp" "${LIBCXX_SOURCE_DIR}/src/shared_mutex.cpp"
"${LIBCXX_SOURCE_DIR}/src/stdexcept.cpp" "${LIBCXX_SOURCE_DIR}/src/stdexcept.cpp"
"${LIBCXX_SOURCE_DIR}/src/string.cpp" "${LIBCXX_SOURCE_DIR}/src/string.cpp"
@ -49,7 +54,9 @@ set(SRCS
add_library(cxx ${SRCS}) add_library(cxx ${SRCS})
set_target_properties(cxx PROPERTIES FOLDER "contrib/libcxx-cmake") set_target_properties(cxx PROPERTIES FOLDER "contrib/libcxx-cmake")
target_include_directories(cxx SYSTEM BEFORE PUBLIC $<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}/include>) target_include_directories(cxx SYSTEM BEFORE PUBLIC
$<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}/include>
$<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}>/src)
target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI) target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI)
# Enable capturing stack traces for all exceptions. # Enable capturing stack traces for all exceptions.

2
contrib/libcxxabi vendored

@ -1 +1 @@
Subproject commit df8f1e727dbc9e2bedf2282096fa189dc3fe0076 Subproject commit 6eb7cc7a7bdd779e6734d1b9fb451df2274462d7

View File

@ -1,24 +1,24 @@
set(LIBCXXABI_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libcxxabi") set(LIBCXXABI_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libcxxabi")
set(SRCS set(SRCS
"${LIBCXXABI_SOURCE_DIR}/src/stdlib_stdexcept.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_virtual.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_thread_atexit.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/fallback_malloc.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_guard.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_default_handlers.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_personality.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/stdlib_exception.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/abort_message.cpp" "${LIBCXXABI_SOURCE_DIR}/src/abort_message.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_aux_runtime.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_default_handlers.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_demangle.cpp" "${LIBCXXABI_SOURCE_DIR}/src/cxa_demangle.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_exception.cpp" "${LIBCXXABI_SOURCE_DIR}/src/cxa_exception.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_handlers.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_exception_storage.cpp" "${LIBCXXABI_SOURCE_DIR}/src/cxa_exception_storage.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/private_typeinfo.cpp" "${LIBCXXABI_SOURCE_DIR}/src/cxa_guard.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/stdlib_typeinfo.cpp" "${LIBCXXABI_SOURCE_DIR}/src/cxa_handlers.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_aux_runtime.cpp" "${LIBCXXABI_SOURCE_DIR}/src/cxa_personality.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_thread_atexit.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_vector.cpp" "${LIBCXXABI_SOURCE_DIR}/src/cxa_vector.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_virtual.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/fallback_malloc.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/private_typeinfo.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/stdlib_exception.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/stdlib_new_delete.cpp" "${LIBCXXABI_SOURCE_DIR}/src/stdlib_new_delete.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/stdlib_stdexcept.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/stdlib_typeinfo.cpp"
) )
add_library(cxxabi ${SRCS}) add_library(cxxabi ${SRCS})
@ -30,6 +30,7 @@ target_compile_options(cxxabi PRIVATE -w)
target_include_directories(cxxabi SYSTEM BEFORE target_include_directories(cxxabi SYSTEM BEFORE
PUBLIC $<BUILD_INTERFACE:${LIBCXXABI_SOURCE_DIR}/include> PUBLIC $<BUILD_INTERFACE:${LIBCXXABI_SOURCE_DIR}/include>
PRIVATE $<BUILD_INTERFACE:${LIBCXXABI_SOURCE_DIR}/../libcxx/include> PRIVATE $<BUILD_INTERFACE:${LIBCXXABI_SOURCE_DIR}/../libcxx/include>
PRIVATE $<BUILD_INTERFACE:${LIBCXXABI_SOURCE_DIR}/../libcxx/src>
) )
target_compile_definitions(cxxabi PRIVATE -D_LIBCPP_BUILDING_LIBRARY) target_compile_definitions(cxxabi PRIVATE -D_LIBCPP_BUILDING_LIBRARY)
target_compile_options(cxxabi PRIVATE -nostdinc++ -fno-sanitize=undefined -Wno-macro-redefined) # If we don't disable UBSan, infinite recursion happens in dynamic_cast. target_compile_options(cxxabi PRIVATE -nostdinc++ -fno-sanitize=undefined -Wno-macro-redefined) # If we don't disable UBSan, infinite recursion happens in dynamic_cast.

2
contrib/libxml2 vendored

@ -1 +1 @@
Subproject commit 18890f471c420411aa3c989e104d090966ec9dbf Subproject commit a075d256fd9ff15590b86d981b75a50ead124fca

View File

@ -1,12 +1,9 @@
# During cross-compilation in our CI we have to use llvm-tblgen and other building tools if (APPLE OR NOT ARCH_AMD64 OR SANITIZE STREQUAL "undefined")
# tools to be build for host architecture and everything else for target architecture (e.g. AArch64) set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF)
# Possible workaround is to use llvm-tblgen from some package...
# But lets just enable LLVM for native builds
if (CMAKE_CROSSCOMPILING OR SANITIZE STREQUAL "undefined")
set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF)
else() else()
set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON) set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON)
endif() endif()
option (ENABLE_EMBEDDED_COMPILER "Enable support for 'compile_expressions' option for query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT}) option (ENABLE_EMBEDDED_COMPILER "Enable support for 'compile_expressions' option for query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT})
if (NOT ENABLE_EMBEDDED_COMPILER) if (NOT ENABLE_EMBEDDED_COMPILER)

2
contrib/replxx vendored

@ -1 +1 @@
Subproject commit 9460e5e0fc10f78f460af26a6bd928798cac864d Subproject commit 6f0b6f151ae2a044625ae93acd19ca365fcea64d

View File

@ -1,4 +1,3 @@
# rebuild in #33610
# docker build -t clickhouse/docs-check . # docker build -t clickhouse/docs-check .
ARG FROM_TAG=latest ARG FROM_TAG=latest
FROM clickhouse/docs-builder:$FROM_TAG FROM clickhouse/docs-builder:$FROM_TAG

View File

@ -163,6 +163,7 @@ def parse_env_variables(
cmake_flags.append("-DCMAKE_INSTALL_PREFIX=/usr") cmake_flags.append("-DCMAKE_INSTALL_PREFIX=/usr")
cmake_flags.append("-DCMAKE_INSTALL_SYSCONFDIR=/etc") cmake_flags.append("-DCMAKE_INSTALL_SYSCONFDIR=/etc")
cmake_flags.append("-DCMAKE_INSTALL_LOCALSTATEDIR=/var") cmake_flags.append("-DCMAKE_INSTALL_LOCALSTATEDIR=/var")
cmake_flags.append("-DBUILD_STANDALONE_KEEPER=ON")
if is_release_build(build_type, package_type, sanitizer, split_binary): if is_release_build(build_type, package_type, sanitizer, split_binary):
cmake_flags.append("-DINSTALL_STRIPPED_BINARIES=ON") cmake_flags.append("-DINSTALL_STRIPPED_BINARIES=ON")
@ -244,7 +245,7 @@ def parse_env_variables(
result.append(f"AUTHOR='{author}'") result.append(f"AUTHOR='{author}'")
if official: if official:
cmake_flags.append("-DYANDEX_OFFICIAL_BUILD=1") cmake_flags.append("-DCLICKHOUSE_OFFICIAL_BUILD=1")
result.append('CMAKE_FLAGS="' + " ".join(cmake_flags) + '"') result.append('CMAKE_FLAGS="' + " ".join(cmake_flags) + '"')

View File

@ -267,6 +267,7 @@ function run_tests
local test_opts=( local test_opts=(
--hung-check --hung-check
--fast-tests-only --fast-tests-only
--no-random-settings
--no-long --no-long
--testname --testname
--shard --shard

View File

@ -13,7 +13,7 @@ script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
echo "$script_dir" echo "$script_dir"
repo_dir=ch repo_dir=ch
BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-13_debug_none_bundled_unsplitted_disable_False_binary"} BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-13_debug_none_bundled_unsplitted_disable_False_binary"}
BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"} BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"}
function clone function clone
{ {

View File

@ -2,7 +2,7 @@
set -euo pipefail set -euo pipefail
CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-13_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse"} CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-13_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse"}
CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""} CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""}
@ -10,7 +10,7 @@ if [ -z "$CLICKHOUSE_REPO_PATH" ]; then
CLICKHOUSE_REPO_PATH=ch CLICKHOUSE_REPO_PATH=ch
rm -rf ch ||: rm -rf ch ||:
mkdir ch ||: mkdir ch ||:
wget -nv -nd -c "https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/repo/clickhouse_no_subs.tar.gz" wget -nv -nd -c "https://clickhouse-test-reports.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/repo/clickhouse_no_subs.tar.gz"
tar -C ch --strip-components=1 -xf clickhouse_no_subs.tar.gz tar -C ch --strip-components=1 -xf clickhouse_no_subs.tar.gz
ls -lath ||: ls -lath ||:
fi fi

View File

@ -1294,15 +1294,15 @@ create table ci_checks engine File(TSVWithNamesAndTypes, 'ci-checks.tsv')
select '' test_name, select '' test_name,
'$(sed -n 's/.*<!--message: \(.*\)-->/\1/p' report.html)' test_status, '$(sed -n 's/.*<!--message: \(.*\)-->/\1/p' report.html)' test_status,
0 test_duration_ms, 0 test_duration_ms,
'https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#fail1' report_url 'https://clickhouse-test-reports.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#fail1' report_url
union all union all
select test || ' #' || toString(query_index), 'slower' test_status, 0 test_duration_ms, select test || ' #' || toString(query_index), 'slower' test_status, 0 test_duration_ms,
'https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#changes-in-performance.' 'https://clickhouse-test-reports.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#changes-in-performance.'
|| test || '.' || toString(query_index) report_url || test || '.' || toString(query_index) report_url
from queries where changed_fail != 0 and diff > 0 from queries where changed_fail != 0 and diff > 0
union all union all
select test || ' #' || toString(query_index), 'unstable' test_status, 0 test_duration_ms, select test || ' #' || toString(query_index), 'unstable' test_status, 0 test_duration_ms,
'https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#unstable-queries.' 'https://clickhouse-test-reports.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#unstable-queries.'
|| test || '.' || toString(query_index) report_url || test || '.' || toString(query_index) report_url
from queries where unstable_fail != 0 from queries where unstable_fail != 0
) )

View File

@ -16,26 +16,17 @@ right_sha=$4
datasets=${CHPC_DATASETS-"hits1 hits10 hits100 values"} datasets=${CHPC_DATASETS-"hits1 hits10 hits100 values"}
declare -A dataset_paths declare -A dataset_paths
if [[ $S3_URL == *"s3.amazonaws.com"* ]]; then dataset_paths["hits10"]="https://clickhouse-private-datasets.s3.amazonaws.com/hits_10m_single/partitions/hits_10m_single.tar"
dataset_paths["hits10"]="https://clickhouse-private-datasets.s3.amazonaws.com/hits_10m_single/partitions/hits_10m_single.tar" dataset_paths["hits100"]="https://clickhouse-private-datasets.s3.amazonaws.com/hits_100m_single/partitions/hits_100m_single.tar"
dataset_paths["hits100"]="https://clickhouse-private-datasets.s3.amazonaws.com/hits_100m_single/partitions/hits_100m_single.tar" dataset_paths["hits1"]="https://clickhouse-datasets.s3.amazonaws.com/hits/partitions/hits_v1.tar"
dataset_paths["hits1"]="https://clickhouse-datasets.s3.amazonaws.com/hits/partitions/hits_v1.tar" dataset_paths["values"]="https://clickhouse-datasets.s3.amazonaws.com/values_with_expressions/partitions/test_values.tar"
dataset_paths["values"]="https://clickhouse-datasets.s3.amazonaws.com/values_with_expressions/partitions/test_values.tar"
else
dataset_paths["hits10"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_10m_single/partitions/hits_10m_single.tar"
dataset_paths["hits100"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_100m_single/partitions/hits_100m_single.tar"
dataset_paths["hits1"]="https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_v1.tar"
dataset_paths["values"]="https://clickhouse-datasets.s3.yandex.net/values_with_expressions/partitions/test_values.tar"
fi
function download function download
{ {
# Historically there were various paths for the performance test package. # Historically there were various paths for the performance test package.
# Test all of them. # Test all of them.
declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/$left_pr/$left_sha/performance/performance.tgz" declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/$left_pr/$left_sha/performance/performance.tgz")
"https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/clickhouse_build_check/performance/performance.tgz"
)
for path in "${urls_to_try[@]}" for path in "${urls_to_try[@]}"
do do

View File

@ -4,7 +4,7 @@ set -ex
CHPC_CHECK_START_TIMESTAMP="$(date +%s)" CHPC_CHECK_START_TIMESTAMP="$(date +%s)"
export CHPC_CHECK_START_TIMESTAMP export CHPC_CHECK_START_TIMESTAMP
S3_URL=${S3_URL:="https://clickhouse-builds.s3.yandex.net"} S3_URL=${S3_URL:="https://clickhouse-builds.s3.amazonaws.com"}
COMMON_BUILD_PREFIX="/clickhouse_build_check" COMMON_BUILD_PREFIX="/clickhouse_build_check"
if [[ $S3_URL == *"s3.amazonaws.com"* ]]; then if [[ $S3_URL == *"s3.amazonaws.com"* ]]; then
@ -64,9 +64,7 @@ function find_reference_sha
# Historically there were various path for the performance test package, # Historically there were various path for the performance test package,
# test all of them. # test all of them.
unset found unset found
declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/0/$REF_SHA/performance/performance.tgz" declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/0/$REF_SHA/performance/performance.tgz")
"https://clickhouse-builds.s3.yandex.net/0/$REF_SHA/clickhouse_build_check/performance/performance.tgz"
)
for path in "${urls_to_try[@]}" for path in "${urls_to_try[@]}"
do do
if curl_with_retry "$path" if curl_with_retry "$path"

View File

@ -11,7 +11,7 @@ RUN apt-get update -y \
COPY s3downloader /s3downloader COPY s3downloader /s3downloader
ENV S3_URL="https://clickhouse-datasets.s3.yandex.net" ENV S3_URL="https://clickhouse-datasets.s3.amazonaws.com"
ENV DATASETS="hits visits" ENV DATASETS="hits visits"
ENV EXPORT_S3_STORAGE_POLICIES=1 ENV EXPORT_S3_STORAGE_POLICIES=1

View File

@ -10,7 +10,7 @@ import requests
import tempfile import tempfile
DEFAULT_URL = 'https://clickhouse-datasets.s3.yandex.net' DEFAULT_URL = 'https://clickhouse-datasets.s3.amazonaws.com'
AVAILABLE_DATASETS = { AVAILABLE_DATASETS = {
'hits': 'hits_v1.tar', 'hits': 'hits_v1.tar',

View File

@ -41,6 +41,7 @@ sleep 5
./mc admin user add clickminio test testtest ./mc admin user add clickminio test testtest
./mc admin policy set clickminio readwrite user=test ./mc admin policy set clickminio readwrite user=test
./mc mb clickminio/test ./mc mb clickminio/test
./mc policy set public clickminio/test
# Upload data to Minio. By default after unpacking all tests will in # Upload data to Minio. By default after unpacking all tests will in

View File

@ -29,7 +29,7 @@ COPY ./download_previous_release /download_previous_release
COPY run.sh / COPY run.sh /
ENV DATASETS="hits visits" ENV DATASETS="hits visits"
ENV S3_URL="https://clickhouse-datasets.s3.yandex.net" ENV S3_URL="https://clickhouse-datasets.s3.amazonaws.com"
ENV EXPORT_S3_STORAGE_POLICIES=1 ENV EXPORT_S3_STORAGE_POLICIES=1
CMD ["/bin/bash", "/run.sh"] CMD ["/bin/bash", "/run.sh"]

View File

@ -137,7 +137,7 @@ CREATE TABLE test.test_orc
`f_array_array_float` Array(Array(Float32)), `f_array_array_float` Array(Array(Float32)),
`day` String `day` String
) )
ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc') ENGINE = Hive('thrift://localhost:9083', 'test', 'test_orc')
PARTITION BY day PARTITION BY day
``` ```

View File

@ -1616,3 +1616,14 @@ Possible values:
Default value: `10000`. Default value: `10000`.
## global_memory_usage_overcommit_max_wait_microseconds {#global_memory_usage_overcommit_max_wait_microseconds}
Sets maximum waiting time for global overcommit tracker.
Possible values:
- Positive integer.
Default value: `0`.

View File

@ -0,0 +1,31 @@
# Memory overcommit
Memory overcommit is an experimental technique intended to allow to set more flexible memory limits for queries.
The idea of this technique is to introduce settings which can represent guaranteed amount of memory a query can use.
When memory overcommit is enabled and the memory limit is reached ClickHouse will select the most overcommitted query and try to free memory by killing this query.
When memory limit is reached any query will wait some time during atempt to allocate new memory.
If timeout is passed and memory is freed, the query continues execution. Otherwise an exception will be thrown and the query is killed.
Selection of query to stop or kill is performed by either global or user overcommit trackers depending on what memory limit is reached.
## User overcommit tracker
User overcommit tracker finds a query with the biggest overcommit ratio in the user's query list.
Overcommit ratio is computed as number of allocated bytes divided by value of `max_guaranteed_memory_usage` setting.
Waiting timeout is set by `memory_usage_overcommit_max_wait_microseconds` setting.
**Example**
```sql
SELECT number FROM numbers(1000) GROUP BY number SETTINGS max_guaranteed_memory_usage=4000, memory_usage_overcommit_max_wait_microseconds=500
```
## Global overcommit tracker
Global overcommit tracker finds a query with the biggest overcommit ratio in the list of all queries.
In this case overcommit ratio is computed as number of allocated bytes divided by value of `max_guaranteed_memory_usage_for_user` setting.
Waiting timeout is set by `global_memory_usage_overcommit_max_wait_microseconds` parameter in the configuration file.

View File

@ -4220,10 +4220,36 @@ Possible values:
- 0 — Disabled. - 0 — Disabled.
- 1 — Enabled. The wait time equal shutdown_wait_unfinished config. - 1 — Enabled. The wait time equal shutdown_wait_unfinished config.
Default value: 0. Default value: `0`.
## shutdown_wait_unfinished ## shutdown_wait_unfinished
The waiting time in seconds for currently handled connections when shutdown server. The waiting time in seconds for currently handled connections when shutdown server.
Default Value: 5. Default Value: `5`.
## max_guaranteed_memory_usage
Maximum guaranteed memory usage for processing of single query.
It represents soft limit in case when hard limit is reached on user level.
Zero means unlimited.
Read more about [memory overcommit](memory-overcommit.md).
Default value: `0`.
## memory_usage_overcommit_max_wait_microseconds
Maximum time thread will wait for memory to be freed in the case of memory overcommit on a user level.
If the timeout is reached and memory is not freed, an exception is thrown.
Read more about [memory overcommit](memory-overcommit.md).
Default value: `0`.
## max_guaranteed_memory_usage_for_user
Maximum guaranteed memory usage for processing all concurrently running queries for the user.
It represents soft limit in case when hard limit is reached on global level.
Zero means unlimited.
Read more about [memory overcommit](memory-overcommit.md).
Default value: `0`.

View File

@ -10,7 +10,7 @@ cssmin==0.2.0
future==0.18.2 future==0.18.2
htmlmin==0.1.12 htmlmin==0.1.12
idna==2.10 idna==2.10
Jinja2>=3.0.3 Jinja2==3.0.3
jinja2-highlight==0.6.1 jinja2-highlight==0.6.1
jsmin==3.0.0 jsmin==3.0.0
livereload==2.6.3 livereload==2.6.3

View File

@ -140,7 +140,7 @@ CREATE TABLE test.test_orc
`f_array_array_float` Array(Array(Float32)), `f_array_array_float` Array(Array(Float32)),
`day` String `day` String
) )
ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc') ENGINE = Hive('thrift://localhost:9083', 'test', 'test_orc')
PARTITION BY day PARTITION BY day
``` ```

View File

@ -15,7 +15,7 @@
``` ```
┌─name─────────────────────┬─is_aggregate─┬─case_insensitive─┬─alias_to─┐ ┌─name─────────────────────┬─is_aggregate─┬─case_insensitive─┬─alias_to─┐
│ sumburConsistentHash │ 0 │ 0 │ │ │ sumburConsistentHash │ 0 │ 0 │ │
yandexConsistentHash │ 0 │ 0 │ │ kostikConsistentHash │ 0 │ 0 │ │
│ demangle │ 0 │ 0 │ │ │ demangle │ 0 │ 0 │ │
│ addressToLine │ 0 │ 0 │ │ │ addressToLine │ 0 │ 0 │ │
│ JSONExtractRaw │ 0 │ 0 │ │ │ JSONExtractRaw │ 0 │ 0 │ │

View File

@ -21,8 +21,12 @@ description: |
This package contains the debugging symbols for clickhouse-common. This package contains the debugging symbols for clickhouse-common.
contents: contents:
- src: root/usr/lib/debug - src: root/usr/lib/debug/usr/bin/clickhouse.debug
dst: /usr/lib/debug dst: /usr/lib/debug/usr/bin/clickhouse.debug
- src: root/usr/lib/debug/usr/bin/clickhouse-odbc-bridge.debug
dst: /usr/lib/debug/usr/bin/clickhouse-odbc-bridge.debug
- src: root/usr/lib/debug/usr/bin/clickhouse-library-bridge.debug
dst: /usr/lib/debug/usr/bin/clickhouse-library-bridge.debug
# docs # docs
- src: ../AUTHORS - src: ../AUTHORS
dst: /usr/share/doc/clickhouse-common-static-dbg/AUTHORS dst: /usr/share/doc/clickhouse-common-static-dbg/AUTHORS

View File

@ -0,0 +1,28 @@
# package sources should be placed in ${PWD}/root
# nfpm should run from the same directory with a config
name: "clickhouse-keeper-dbg"
arch: "${DEB_ARCH}" # amd64, arm64
platform: "linux"
version: "${CLICKHOUSE_VERSION_STRING}"
vendor: "ClickHouse Inc."
homepage: "https://clickhouse.com"
license: "Apache"
section: "database"
priority: "optional"
maintainer: "ClickHouse Dev Team <packages+linux@clickhouse.com>"
description: |
debugging symbols for clickhouse-keeper
This package contains the debugging symbols for clickhouse-keeper.
contents:
- src: root/usr/lib/debug/usr/bin/clickhouse-keeper.debug
dst: /usr/lib/debug/usr/bin/clickhouse-keeper.debug
# docs
- src: ../AUTHORS
dst: /usr/share/doc/clickhouse-keeper-dbg/AUTHORS
- src: ../CHANGELOG.md
dst: /usr/share/doc/clickhouse-keeper-dbg/CHANGELOG.md
- src: ../LICENSE
dst: /usr/share/doc/clickhouse-keeper-dbg/LICENSE
- src: ../README.md
dst: /usr/share/doc/clickhouse-keeper-dbg/README.md

View File

@ -0,0 +1,40 @@
# package sources should be placed in ${PWD}/root
# nfpm should run from the same directory with a config
name: "clickhouse-keeper"
arch: "${DEB_ARCH}" # amd64, arm64
platform: "linux"
version: "${CLICKHOUSE_VERSION_STRING}"
vendor: "ClickHouse Inc."
homepage: "https://clickhouse.com"
license: "Apache"
section: "database"
priority: "optional"
conflicts:
- clickhouse-server
depends:
- adduser
suggests:
- clickhouse-keeper-dbg
maintainer: "ClickHouse Dev Team <packages+linux@clickhouse.com>"
description: |
Static clickhouse-keeper binary
A stand-alone clickhouse-keeper package
contents:
- src: root/etc/clickhouse-keeper
dst: /etc/clickhouse-keeper
type: config
- src: root/usr/bin/clickhouse-keeper
dst: /usr/bin/clickhouse-keeper
# docs
- src: ../AUTHORS
dst: /usr/share/doc/clickhouse-keeper/AUTHORS
- src: ../CHANGELOG.md
dst: /usr/share/doc/clickhouse-keeper/CHANGELOG.md
- src: ../LICENSE
dst: /usr/share/doc/clickhouse-keeper/LICENSE
- src: ../README.md
dst: /usr/share/doc/clickhouse-keeper/README.md

View File

@ -473,18 +473,11 @@ else ()
if (INSTALL_STRIPPED_BINARIES) if (INSTALL_STRIPPED_BINARIES)
clickhouse_strip_binary(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT} BINARY_PATH clickhouse) clickhouse_strip_binary(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT} BINARY_PATH clickhouse)
else() else()
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT})
install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif() endif()
endif() endif()
if (NOT INSTALL_STRIPPED_BINARIES)
# Install dunny debug directory
# TODO: move logic to every place where clickhouse_strip_binary is used
add_custom_command(TARGET clickhouse POST_BUILD COMMAND echo > .empty )
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/.empty" DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/.empty)
endif()
if (ENABLE_TESTS) if (ENABLE_TESTS)
set (CLICKHOUSE_UNIT_TESTS_TARGETS unit_tests_dbms) set (CLICKHOUSE_UNIT_TESTS_TARGETS unit_tests_dbms)
add_custom_target (clickhouse-tests ALL DEPENDS ${CLICKHOUSE_UNIT_TESTS_TARGETS}) add_custom_target (clickhouse-tests ALL DEPENDS ${CLICKHOUSE_UNIT_TESTS_TARGETS})

View File

@ -71,17 +71,11 @@ if (BUILD_STANDALONE_KEEPER)
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBuffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBuffer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBufferFromFile.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBufferFromFile.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedWriteBuffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedWriteBuffer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecDelta.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecDoubleDelta.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecEncrypted.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecGorilla.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecLZ4.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecLZ4.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecMultiple.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecMultiple.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecNone.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecNone.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecT64.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecZSTD.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecZSTD.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionFactory.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionFactory.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/getCompressionCodecForFile.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/ICompressionCodec.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/ICompressionCodec.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/LZ4_decompress_faster.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/LZ4_decompress_faster.cpp
@ -137,5 +131,10 @@ if (BUILD_STANDALONE_KEEPER)
add_dependencies(clickhouse-keeper clickhouse_keeper_configs) add_dependencies(clickhouse-keeper clickhouse_keeper_configs)
set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../) set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../)
install(TARGETS clickhouse-keeper RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) if (INSTALL_STRIPPED_BINARIES)
clickhouse_strip_binary(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-keeper)
else()
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT})
install(TARGETS clickhouse-keeper RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif()
endif() endif()

View File

@ -27,5 +27,6 @@ set_target_properties(clickhouse-library-bridge PROPERTIES RUNTIME_OUTPUT_DIRECT
if (INSTALL_STRIPPED_BINARIES) if (INSTALL_STRIPPED_BINARIES)
clickhouse_strip_binary(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-library-bridge) clickhouse_strip_binary(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-library-bridge)
else() else()
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT})
install(TARGETS clickhouse-library-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) install(TARGETS clickhouse-library-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif() endif()

View File

@ -42,6 +42,7 @@ endif()
if (INSTALL_STRIPPED_BINARIES) if (INSTALL_STRIPPED_BINARIES)
clickhouse_strip_binary(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-odbc-bridge) clickhouse_strip_binary(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-odbc-bridge)
else() else()
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT})
install(TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) install(TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif() endif()

View File

@ -966,6 +966,36 @@ int Server::main(const std::vector<std::string> & /*args*/)
fs::create_directories(path / "metadata_dropped/"); fs::create_directories(path / "metadata_dropped/");
} }
#if USE_ROCKSDB
/// Initialize merge tree metadata cache
if (config().has("merge_tree_metadata_cache"))
{
fs::create_directories(path / "rocksdb/");
size_t size = config().getUInt64("merge_tree_metadata_cache.lru_cache_size", 256 << 20);
bool continue_if_corrupted = config().getBool("merge_tree_metadata_cache.continue_if_corrupted", false);
try
{
LOG_DEBUG(
log, "Initiailizing merge tree metadata cache lru_cache_size:{} continue_if_corrupted:{}", size, continue_if_corrupted);
global_context->initializeMergeTreeMetadataCache(path_str + "/" + "rocksdb", size);
}
catch (...)
{
if (continue_if_corrupted)
{
/// Rename rocksdb directory and reinitialize merge tree metadata cache
time_t now = time(nullptr);
fs::rename(path / "rocksdb", path / ("rocksdb.old." + std::to_string(now)));
global_context->initializeMergeTreeMetadataCache(path_str + "/" + "rocksdb", size);
}
else
{
throw;
}
}
}
#endif
if (config().has("interserver_http_port") && config().has("interserver_https_port")) if (config().has("interserver_http_port") && config().has("interserver_https_port"))
throw Exception("Both http and https interserver ports are specified", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG); throw Exception("Both http and https interserver ports are specified", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);

View File

@ -1294,4 +1294,10 @@
</tables> </tables>
</rocksdb> </rocksdb>
--> -->
<!-- Uncomment if enable merge tree metadata cache -->
<merge_tree_metadata_cache>
<lru_cache_size>268435456</lru_cache_size>
<continue_if_corrupted>true</continue_if_corrupted>
</merge_tree_metadata_cache>
</clickhouse> </clickhouse>

View File

@ -13,7 +13,7 @@ enum class QuotaType
{ {
QUERIES, /// Number of queries. QUERIES, /// Number of queries.
QUERY_SELECTS, /// Number of select queries. QUERY_SELECTS, /// Number of select queries.
QUERY_INSERTS, /// Number of inserts queries. QUERY_INSERTS, /// Number of insert queries.
ERRORS, /// Number of queries with exceptions. ERRORS, /// Number of queries with exceptions.
RESULT_ROWS, /// Number of rows returned as result. RESULT_ROWS, /// Number of rows returned as result.
RESULT_BYTES, /// Number of bytes returned as result. RESULT_BYTES, /// Number of bytes returned as result.

View File

@ -67,7 +67,7 @@ auto parseArguments(const std::string & name, const DataTypes & arguments)
values_types.push_back(array_type->getNestedType()); values_types.push_back(array_type->getNestedType());
} }
return std::tuple{std::move(keys_type), std::move(values_types), tuple_argument}; return std::tuple<DataTypePtr, DataTypes, bool>{std::move(keys_type), std::move(values_types), tuple_argument};
} }
// This function instantiates a particular overload of the sumMap family of // This function instantiates a particular overload of the sumMap family of

View File

@ -494,6 +494,11 @@ endif()
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::fast_float) target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::fast_float)
if (USE_ORC)
dbms_target_link_libraries(PUBLIC ${ORC_LIBRARIES})
dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${ORC_INCLUDE_DIR} "${CMAKE_BINARY_DIR}/contrib/orc/c++/include")
endif ()
if (TARGET ch_contrib::rocksdb) if (TARGET ch_contrib::rocksdb)
dbms_target_link_libraries(PUBLIC ch_contrib::rocksdb) dbms_target_link_libraries(PUBLIC ch_contrib::rocksdb)
endif() endif()
@ -573,10 +578,6 @@ if (ENABLE_TESTS)
target_link_libraries(unit_tests_dbms PRIVATE ch_contrib::simdjson) target_link_libraries(unit_tests_dbms PRIVATE ch_contrib::simdjson)
endif() endif()
if(TARGET ch_contrib::rapidjson)
target_include_directories(unit_tests_dbms PRIVATE ch_contrib::rapidjson)
endif()
if (TARGET ch_contrib::yaml_cpp) if (TARGET ch_contrib::yaml_cpp)
target_link_libraries(unit_tests_dbms PRIVATE ch_contrib::yaml_cpp) target_link_libraries(unit_tests_dbms PRIVATE ch_contrib::yaml_cpp)
endif() endif()

View File

@ -29,15 +29,15 @@ ConnectionPoolWithFailover::ConnectionPoolWithFailover(
time_t decrease_error_period_, time_t decrease_error_period_,
size_t max_error_cap_) size_t max_error_cap_)
: Base(std::move(nested_pools_), decrease_error_period_, max_error_cap_, &Poco::Logger::get("ConnectionPoolWithFailover")) : Base(std::move(nested_pools_), decrease_error_period_, max_error_cap_, &Poco::Logger::get("ConnectionPoolWithFailover"))
, default_load_balancing(load_balancing) , get_priority_load_balancing(load_balancing)
{ {
const std::string & local_hostname = getFQDNOrHostName(); const std::string & local_hostname = getFQDNOrHostName();
hostname_differences.resize(nested_pools.size()); get_priority_load_balancing.hostname_differences.resize(nested_pools.size());
for (size_t i = 0; i < nested_pools.size(); ++i) for (size_t i = 0; i < nested_pools.size(); ++i)
{ {
ConnectionPool & connection_pool = dynamic_cast<ConnectionPool &>(*nested_pools[i]); ConnectionPool & connection_pool = dynamic_cast<ConnectionPool &>(*nested_pools[i]);
hostname_differences[i] = getHostNameDifference(local_hostname, connection_pool.getHost()); get_priority_load_balancing.hostname_differences[i] = getHostNameDifference(local_hostname, connection_pool.getHost());
} }
} }
@ -51,36 +51,15 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts
}; };
size_t offset = 0; size_t offset = 0;
LoadBalancing load_balancing = get_priority_load_balancing.load_balancing;
if (settings) if (settings)
offset = settings->load_balancing_first_offset % nested_pools.size();
GetPriorityFunc get_priority;
switch (settings ? LoadBalancing(settings->load_balancing) : default_load_balancing)
{ {
case LoadBalancing::NEAREST_HOSTNAME: offset = settings->load_balancing_first_offset % nested_pools.size();
get_priority = [&](size_t i) { return hostname_differences[i]; }; load_balancing = LoadBalancing(settings->load_balancing);
break;
case LoadBalancing::IN_ORDER:
get_priority = [](size_t i) { return i; };
break;
case LoadBalancing::RANDOM:
break;
case LoadBalancing::FIRST_OR_RANDOM:
get_priority = [offset](size_t i) -> size_t { return i != offset; };
break;
case LoadBalancing::ROUND_ROBIN:
if (last_used >= nested_pools.size())
last_used = 0;
++last_used;
/* Consider nested_pools.size() equals to 5
* last_used = 1 -> get_priority: 0 1 2 3 4
* last_used = 2 -> get_priority: 4 0 1 2 3
* last_used = 3 -> get_priority: 4 3 0 1 2
* ...
* */
get_priority = [&](size_t i) { ++i; return i < last_used ? nested_pools.size() - i : i - last_used; };
break;
} }
GetPriorityFunc get_priority = get_priority_load_balancing.getPriorityFunc(load_balancing, offset, nested_pools.size());
UInt64 max_ignored_errors = settings ? settings->distributed_replica_max_ignored_errors.value : 0; UInt64 max_ignored_errors = settings ? settings->distributed_replica_max_ignored_errors.value : 0;
bool fallback_to_stale_replicas = settings ? settings->fallback_to_stale_replicas_for_distributed_queries.value : true; bool fallback_to_stale_replicas = settings ? settings->fallback_to_stale_replicas_for_distributed_queries.value : true;
@ -173,38 +152,14 @@ std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::g
ConnectionPoolWithFailover::Base::GetPriorityFunc ConnectionPoolWithFailover::makeGetPriorityFunc(const Settings * settings) ConnectionPoolWithFailover::Base::GetPriorityFunc ConnectionPoolWithFailover::makeGetPriorityFunc(const Settings * settings)
{ {
size_t offset = 0; size_t offset = 0;
LoadBalancing load_balancing = get_priority_load_balancing.load_balancing;
if (settings) if (settings)
offset = settings->load_balancing_first_offset % nested_pools.size();
GetPriorityFunc get_priority;
switch (settings ? LoadBalancing(settings->load_balancing) : default_load_balancing)
{ {
case LoadBalancing::NEAREST_HOSTNAME: offset = settings->load_balancing_first_offset % nested_pools.size();
get_priority = [&](size_t i) { return hostname_differences[i]; }; load_balancing = LoadBalancing(settings->load_balancing);
break;
case LoadBalancing::IN_ORDER:
get_priority = [](size_t i) { return i; };
break;
case LoadBalancing::RANDOM:
break;
case LoadBalancing::FIRST_OR_RANDOM:
get_priority = [offset](size_t i) -> size_t { return i != offset; };
break;
case LoadBalancing::ROUND_ROBIN:
if (last_used >= nested_pools.size())
last_used = 0;
++last_used;
/* Consider nested_pools.size() equals to 5
* last_used = 1 -> get_priority: 0 1 2 3 4
* last_used = 2 -> get_priority: 5 0 1 2 3
* last_used = 3 -> get_priority: 5 4 0 1 2
* ...
* */
get_priority = [&](size_t i) { ++i; return i < last_used ? nested_pools.size() - i : i - last_used; };
break;
} }
return get_priority; return get_priority_load_balancing.getPriorityFunc(load_balancing, offset, nested_pools.size());
} }
std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::getManyImpl( std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::getManyImpl(

View File

@ -1,6 +1,7 @@
#pragma once #pragma once
#include <Common/PoolWithFailoverBase.h> #include <Common/PoolWithFailoverBase.h>
#include <Common/GetPriorityForLoadBalancing.h>
#include <Client/ConnectionPool.h> #include <Client/ConnectionPool.h>
#include <chrono> #include <chrono>
@ -109,9 +110,7 @@ private:
GetPriorityFunc makeGetPriorityFunc(const Settings * settings); GetPriorityFunc makeGetPriorityFunc(const Settings * settings);
std::vector<size_t> hostname_differences; /// Distances from name of this host to the names of hosts of pools. GetPriorityForLoadBalancing get_priority_load_balancing;
size_t last_used = 0; /// Last used for round_robin policy.
LoadBalancing default_load_balancing;
}; };
using ConnectionPoolWithFailoverPtr = std::shared_ptr<ConnectionPoolWithFailover>; using ConnectionPoolWithFailoverPtr = std::shared_ptr<ConnectionPoolWithFailover>;

View File

@ -521,7 +521,7 @@ ColumnObject::ColumnObject(bool is_nullable_)
{ {
} }
ColumnObject::ColumnObject(SubcolumnsTree && subcolumns_, bool is_nullable_) ColumnObject::ColumnObject(Subcolumns && subcolumns_, bool is_nullable_)
: is_nullable(is_nullable_) : is_nullable(is_nullable_)
, subcolumns(std::move(subcolumns_)) , subcolumns(std::move(subcolumns_))
, num_rows(subcolumns.empty() ? 0 : (*subcolumns.begin())->data.size()) , num_rows(subcolumns.empty() ? 0 : (*subcolumns.begin())->data.size())
@ -696,7 +696,7 @@ const ColumnObject::Subcolumn & ColumnObject::getSubcolumn(const PathInData & ke
ColumnObject::Subcolumn & ColumnObject::getSubcolumn(const PathInData & key) ColumnObject::Subcolumn & ColumnObject::getSubcolumn(const PathInData & key)
{ {
if (const auto * node = subcolumns.findLeaf(key)) if (const auto * node = subcolumns.findLeaf(key))
return const_cast<SubcolumnsTree::Node *>(node)->data; return const_cast<Subcolumns::Node *>(node)->data;
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in ColumnObject", key.getPath()); throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in ColumnObject", key.getPath());
} }
@ -794,7 +794,7 @@ bool ColumnObject::isFinalized() const
void ColumnObject::finalize() void ColumnObject::finalize()
{ {
size_t old_size = size(); size_t old_size = size();
SubcolumnsTree new_subcolumns; Subcolumns new_subcolumns;
for (auto && entry : subcolumns) for (auto && entry : subcolumns)
{ {
const auto & least_common_type = entry->data.getLeastCommonType(); const auto & least_common_type = entry->data.getLeastCommonType();

View File

@ -138,20 +138,20 @@ public:
size_t num_of_defaults_in_prefix = 0; size_t num_of_defaults_in_prefix = 0;
}; };
using SubcolumnsTree = SubcolumnsTree<Subcolumn>; using Subcolumns = SubcolumnsTree<Subcolumn>;
private: private:
/// If true then all subcolumns are nullable. /// If true then all subcolumns are nullable.
const bool is_nullable; const bool is_nullable;
SubcolumnsTree subcolumns; Subcolumns subcolumns;
size_t num_rows; size_t num_rows;
public: public:
static constexpr auto COLUMN_NAME_DUMMY = "_dummy"; static constexpr auto COLUMN_NAME_DUMMY = "_dummy";
explicit ColumnObject(bool is_nullable_); explicit ColumnObject(bool is_nullable_);
ColumnObject(SubcolumnsTree && subcolumns_, bool is_nullable_); ColumnObject(Subcolumns && subcolumns_, bool is_nullable_);
/// Checks that all subcolumns have consistent sizes. /// Checks that all subcolumns have consistent sizes.
void checkConsistency() const; void checkConsistency() const;
@ -173,8 +173,8 @@ public:
/// It cares about consistency of sizes of Nested arrays. /// It cares about consistency of sizes of Nested arrays.
void addNestedSubcolumn(const PathInData & key, const FieldInfo & field_info, size_t new_size); void addNestedSubcolumn(const PathInData & key, const FieldInfo & field_info, size_t new_size);
const SubcolumnsTree & getSubcolumns() const { return subcolumns; } const Subcolumns & getSubcolumns() const { return subcolumns; }
SubcolumnsTree & getSubcolumns() { return subcolumns; } Subcolumns & getSubcolumns() { return subcolumns; }
PathsInData getKeys() const; PathsInData getKeys() const;
/// Finalizes all subcolumns. /// Finalizes all subcolumns.

View File

@ -83,11 +83,20 @@ size_t extractMaskNumericImpl(
const PaddedPODArray<UInt8> * null_bytemap, const PaddedPODArray<UInt8> * null_bytemap,
PaddedPODArray<UInt8> * nulls) PaddedPODArray<UInt8> * nulls)
{ {
if constexpr (!column_is_short)
{
if (data.size() != mask.size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a full data column is not equal to the size of a mask");
}
size_t ones_count = 0; size_t ones_count = 0;
size_t data_index = 0; size_t data_index = 0;
size_t mask_size = mask.size();
for (size_t i = 0; i != mask_size; ++i) size_t mask_size = mask.size();
size_t data_size = data.size();
size_t i = 0;
for (; i != mask_size && data_index != data_size; ++i)
{ {
// Change mask only where value is 1. // Change mask only where value is 1.
if (!mask[i]) if (!mask[i])
@ -120,6 +129,13 @@ size_t extractMaskNumericImpl(
mask[i] = value; mask[i] = value;
} }
if constexpr (column_is_short)
{
if (data_index != data_size)
throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a short column is not equal to the number of ones in a mask");
}
return ones_count; return ones_count;
} }

View File

@ -31,8 +31,8 @@ public:
/// probably it worth to try to increase stack size for coroutines. /// probably it worth to try to increase stack size for coroutines.
/// ///
/// Current value is just enough for all tests in our CI. It's not selected in some special /// Current value is just enough for all tests in our CI. It's not selected in some special
/// way. We will have 40 pages with 4KB page size. /// way. We will have 80 pages with 4KB page size.
static constexpr size_t default_stack_size = 192 * 1024; /// 64KB was not enough for tests static constexpr size_t default_stack_size = 320 * 1024; /// 64KB was not enough for tests
explicit FiberStack(size_t stack_size_ = default_stack_size) : stack_size(stack_size_) explicit FiberStack(size_t stack_size_ = default_stack_size) : stack_size(stack_size_)
{ {

View File

@ -437,6 +437,7 @@ String FileSegment::stateToString(FileSegment::State state)
case FileSegment::State::SKIP_CACHE: case FileSegment::State::SKIP_CACHE:
return "SKIP_CACHE"; return "SKIP_CACHE";
} }
__builtin_unreachable();
} }
String FileSegmentsHolder::toString() String FileSegmentsHolder::toString()

View File

@ -0,0 +1,49 @@
#include <Common/GetPriorityForLoadBalancing.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
std::function<size_t(size_t index)> GetPriorityForLoadBalancing::getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const
{
std::function<size_t(size_t index)> get_priority;
switch (load_balance)
{
case LoadBalancing::NEAREST_HOSTNAME:
if (hostname_differences.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "It's a bug: hostname_differences is not initialized");
get_priority = [&](size_t i) { return hostname_differences[i]; };
break;
case LoadBalancing::IN_ORDER:
get_priority = [](size_t i) { return i; };
break;
case LoadBalancing::RANDOM:
break;
case LoadBalancing::FIRST_OR_RANDOM:
get_priority = [offset](size_t i) -> size_t { return i != offset; };
break;
case LoadBalancing::ROUND_ROBIN:
if (last_used >= pool_size)
last_used = 0;
++last_used;
/* Consider pool_size equals to 5
* last_used = 1 -> get_priority: 0 1 2 3 4
* last_used = 2 -> get_priority: 4 0 1 2 3
* last_used = 3 -> get_priority: 4 3 0 1 2
* ...
* */
get_priority = [&](size_t i)
{
++i;
return i < last_used ? pool_size - i : i - last_used;
};
break;
}
return get_priority;
}
}

View File

@ -0,0 +1,34 @@
#pragma once
#include <Core/SettingsEnums.h>
namespace DB
{
class GetPriorityForLoadBalancing
{
public:
GetPriorityForLoadBalancing(LoadBalancing load_balancing_) : load_balancing(load_balancing_) {}
GetPriorityForLoadBalancing(){}
bool operator == (const GetPriorityForLoadBalancing & other) const
{
return load_balancing == other.load_balancing && hostname_differences == other.hostname_differences;
}
bool operator != (const GetPriorityForLoadBalancing & other) const
{
return !(*this == other);
}
std::function<size_t(size_t index)> getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const;
std::vector<size_t> hostname_differences; /// Distances from name of this host to the names of hosts of pools.
LoadBalancing load_balancing = LoadBalancing::RANDOM;
private:
mutable size_t last_used = 0; /// Last used for round_robin policy.
};
}

View File

@ -23,6 +23,12 @@ void OvercommitTracker::setMaxWaitTime(UInt64 wait_time)
bool OvercommitTracker::needToStopQuery(MemoryTracker * tracker) bool OvercommitTracker::needToStopQuery(MemoryTracker * tracker)
{ {
// NOTE: Do not change the order of locks
//
// global_mutex must be acquired before overcommit_m, because
// method OvercommitTracker::unsubscribe(MemoryTracker *) is
// always called with already acquired global_mutex in
// ProcessListEntry::~ProcessListEntry().
std::unique_lock<std::mutex> global_lock(global_mutex); std::unique_lock<std::mutex> global_lock(global_mutex);
std::unique_lock<std::mutex> lk(overcommit_m); std::unique_lock<std::mutex> lk(overcommit_m);
@ -76,7 +82,7 @@ void UserOvercommitTracker::pickQueryToExcludeImpl()
MemoryTracker * query_tracker = nullptr; MemoryTracker * query_tracker = nullptr;
OvercommitRatio current_ratio{0, 0}; OvercommitRatio current_ratio{0, 0};
// At this moment query list must be read only. // At this moment query list must be read only.
// BlockQueryIfMemoryLimit is used in ProcessList to guarantee this. // This is guaranteed by locking global_mutex in OvercommitTracker::needToStopQuery.
auto & queries = user_process_list->queries; auto & queries = user_process_list->queries;
LOG_DEBUG(logger, "Trying to choose query to stop from {} queries", queries.size()); LOG_DEBUG(logger, "Trying to choose query to stop from {} queries", queries.size());
for (auto const & query : queries) for (auto const & query : queries)
@ -111,9 +117,9 @@ void GlobalOvercommitTracker::pickQueryToExcludeImpl()
MemoryTracker * query_tracker = nullptr; MemoryTracker * query_tracker = nullptr;
OvercommitRatio current_ratio{0, 0}; OvercommitRatio current_ratio{0, 0};
// At this moment query list must be read only. // At this moment query list must be read only.
// BlockQueryIfMemoryLimit is used in ProcessList to guarantee this. // This is guaranteed by locking global_mutex in OvercommitTracker::needToStopQuery.
LOG_DEBUG(logger, "Trying to choose query to stop"); LOG_DEBUG(logger, "Trying to choose query to stop from {} queries", process_list->size());
process_list->processEachQueryStatus([&](DB::QueryStatus const & query) for (auto const & query : process_list->processes)
{ {
if (query.isKilled()) if (query.isKilled())
return; return;
@ -134,7 +140,7 @@ void GlobalOvercommitTracker::pickQueryToExcludeImpl()
query_tracker = memory_tracker; query_tracker = memory_tracker;
current_ratio = ratio; current_ratio = ratio;
} }
}); }
LOG_DEBUG(logger, "Selected to stop query with overcommit ratio {}/{}", LOG_DEBUG(logger, "Selected to stop query with overcommit ratio {}/{}",
current_ratio.committed, current_ratio.soft_limit); current_ratio.committed, current_ratio.soft_limit);
picked_tracker = query_tracker; picked_tracker = query_tracker;

View File

@ -43,8 +43,6 @@ class MemoryTracker;
// is killed to free memory. // is killed to free memory.
struct OvercommitTracker : boost::noncopyable struct OvercommitTracker : boost::noncopyable
{ {
explicit OvercommitTracker(std::mutex & global_mutex_);
void setMaxWaitTime(UInt64 wait_time); void setMaxWaitTime(UInt64 wait_time);
bool needToStopQuery(MemoryTracker * tracker); bool needToStopQuery(MemoryTracker * tracker);
@ -54,8 +52,12 @@ struct OvercommitTracker : boost::noncopyable
virtual ~OvercommitTracker() = default; virtual ~OvercommitTracker() = default;
protected: protected:
explicit OvercommitTracker(std::mutex & global_mutex_);
virtual void pickQueryToExcludeImpl() = 0; virtual void pickQueryToExcludeImpl() = 0;
// This mutex is used to disallow concurrent access
// to picked_tracker and cancelation_state variables.
mutable std::mutex overcommit_m; mutable std::mutex overcommit_m;
mutable std::condition_variable cv; mutable std::condition_variable cv;
@ -87,6 +89,11 @@ private:
} }
} }
// Global mutex which is used in ProcessList to synchronize
// insertion and deletion of queries.
// OvercommitTracker::pickQueryToExcludeImpl() implementations
// require this mutex to be locked, because they read list (or sublist)
// of queries.
std::mutex & global_mutex; std::mutex & global_mutex;
}; };

View File

@ -9,6 +9,7 @@
M(SelectQuery, "Same as Query, but only for SELECT queries.") \ M(SelectQuery, "Same as Query, but only for SELECT queries.") \
M(InsertQuery, "Same as Query, but only for INSERT queries.") \ M(InsertQuery, "Same as Query, but only for INSERT queries.") \
M(AsyncInsertQuery, "Same as InsertQuery, but only for asynchronous INSERT queries.") \ M(AsyncInsertQuery, "Same as InsertQuery, but only for asynchronous INSERT queries.") \
M(AsyncInsertBytes, "Data size in bytes of asynchronous INSERT queries.") \
M(FailedQuery, "Number of failed queries.") \ M(FailedQuery, "Number of failed queries.") \
M(FailedSelectQuery, "Same as FailedQuery, but only for SELECT queries.") \ M(FailedSelectQuery, "Same as FailedQuery, but only for SELECT queries.") \
M(FailedInsertQuery, "Same as FailedQuery, but only for INSERT queries.") \ M(FailedInsertQuery, "Same as FailedQuery, but only for INSERT queries.") \
@ -284,6 +285,13 @@
\ \
M(MainConfigLoads, "Number of times the main configuration was reloaded.") \ M(MainConfigLoads, "Number of times the main configuration was reloaded.") \
\ \
M(MergeTreeMetadataCacheGet, "Number of rocksdb reads(used for merge tree metadata cache)") \
M(MergeTreeMetadataCachePut, "Number of rocksdb puts(used for merge tree metadata cache)") \
M(MergeTreeMetadataCacheDelete, "Number of rocksdb deletes(used for merge tree metadata cache)") \
M(MergeTreeMetadataCacheSeek, "Number of rocksdb seeks(used for merge tree metadata cache)") \
M(MergeTreeMetadataCacheHit, "Number of times the read of meta file was done from MergeTree metadata cache") \
M(MergeTreeMetadataCacheMiss, "Number of times the read of meta file was not done from MergeTree metadata cache") \
\
M(ScalarSubqueriesGlobalCacheHit, "Number of times a read from a scalar subquery was done using the global cache") \ M(ScalarSubqueriesGlobalCacheHit, "Number of times a read from a scalar subquery was done using the global cache") \
M(ScalarSubqueriesLocalCacheHit, "Number of times a read from a scalar subquery was done using the local cache") \ M(ScalarSubqueriesLocalCacheHit, "Number of times a read from a scalar subquery was done using the local cache") \
M(ScalarSubqueriesCacheMiss, "Number of times a read from a scalar subquery was not cached and had to be calculated completely") M(ScalarSubqueriesCacheMiss, "Number of times a read from a scalar subquery was not cached and had to be calculated completely")

View File

@ -22,7 +22,6 @@ target_link_libraries (clickhouse_common_zookeeper_no_log
PRIVATE PRIVATE
string_utils string_utils
) )
if (ENABLE_EXAMPLES) if (ENABLE_EXAMPLES)
add_subdirectory(examples) add_subdirectory(examples)
endif() endif()

View File

@ -5,15 +5,15 @@
#include <functional> #include <functional>
#include <filesystem> #include <filesystem>
#include <pcg-random/pcg_random.hpp>
#include <base/logger_useful.h>
#include <base/find_symbols.h> #include <base/find_symbols.h>
#include <Common/randomSeed.h> #include <base/getFQDNOrHostName.h>
#include <Common/StringUtils/StringUtils.h> #include <Common/StringUtils/StringUtils.h>
#include <Common/Exception.h> #include <Common/Exception.h>
#include <Common/isLocalAddress.h>
#include <Poco/Net/NetException.h> #include <Poco/Net/NetException.h>
#include <Poco/Net/DNS.h>
#define ZOOKEEPER_CONNECTION_TIMEOUT_MS 1000 #define ZOOKEEPER_CONNECTION_TIMEOUT_MS 1000
@ -48,7 +48,7 @@ static void check(Coordination::Error code, const std::string & path)
void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_, void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_,
int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_) int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, const GetPriorityForLoadBalancing & get_priority_load_balancing_)
{ {
log = &Poco::Logger::get("ZooKeeper"); log = &Poco::Logger::get("ZooKeeper");
hosts = hosts_; hosts = hosts_;
@ -57,6 +57,7 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_
operation_timeout_ms = operation_timeout_ms_; operation_timeout_ms = operation_timeout_ms_;
chroot = chroot_; chroot = chroot_;
implementation = implementation_; implementation = implementation_;
get_priority_load_balancing = get_priority_load_balancing_;
if (implementation == "zookeeper") if (implementation == "zookeeper")
{ {
@ -66,14 +67,13 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_
Coordination::ZooKeeper::Nodes nodes; Coordination::ZooKeeper::Nodes nodes;
nodes.reserve(hosts.size()); nodes.reserve(hosts.size());
Strings shuffled_hosts = hosts;
/// Shuffle the hosts to distribute the load among ZooKeeper nodes. /// Shuffle the hosts to distribute the load among ZooKeeper nodes.
pcg64 generator(randomSeed()); std::vector<ShuffleHost> shuffled_hosts = shuffleHosts();
std::shuffle(shuffled_hosts.begin(), shuffled_hosts.end(), generator);
bool dns_error = false; bool dns_error = false;
for (auto & host_string : shuffled_hosts) for (auto & host : shuffled_hosts)
{ {
auto & host_string = host.host;
try try
{ {
bool secure = bool(startsWith(host_string, "secure://")); bool secure = bool(startsWith(host_string, "secure://"));
@ -81,6 +81,7 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_
if (secure) if (secure)
host_string.erase(0, strlen("secure://")); host_string.erase(0, strlen("secure://"));
LOG_TEST(log, "Adding ZooKeeper host {} ({})", host_string, Poco::Net::SocketAddress{host_string}.toString());
nodes.emplace_back(Coordination::ZooKeeper::Node{Poco::Net::SocketAddress{host_string}, secure}); nodes.emplace_back(Coordination::ZooKeeper::Node{Poco::Net::SocketAddress{host_string}, secure});
} }
catch (const Poco::Net::HostNotFoundException & e) catch (const Poco::Net::HostNotFoundException & e)
@ -154,23 +155,47 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_
} }
} }
std::vector<ShuffleHost> ZooKeeper::shuffleHosts() const
{
std::function<size_t(size_t index)> get_priority = get_priority_load_balancing.getPriorityFunc(get_priority_load_balancing.load_balancing, 0, hosts.size());
std::vector<ShuffleHost> shuffle_hosts;
for (size_t i = 0; i < hosts.size(); ++i)
{
ShuffleHost shuffle_host;
shuffle_host.host = hosts[i];
if (get_priority)
shuffle_host.priority = get_priority(i);
shuffle_host.randomize();
shuffle_hosts.emplace_back(shuffle_host);
}
std::sort(
shuffle_hosts.begin(), shuffle_hosts.end(),
[](const ShuffleHost & lhs, const ShuffleHost & rhs)
{
return ShuffleHost::compare(lhs, rhs);
});
return shuffle_hosts;
}
ZooKeeper::ZooKeeper(const std::string & hosts_string, const std::string & identity_, int32_t session_timeout_ms_, ZooKeeper::ZooKeeper(const std::string & hosts_string, const std::string & identity_, int32_t session_timeout_ms_,
int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_, int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_,
std::shared_ptr<DB::ZooKeeperLog> zk_log_) std::shared_ptr<DB::ZooKeeperLog> zk_log_, const GetPriorityForLoadBalancing & get_priority_load_balancing_)
{ {
zk_log = std::move(zk_log_); zk_log = std::move(zk_log_);
Strings hosts_strings; Strings hosts_strings;
splitInto<','>(hosts_strings, hosts_string); splitInto<','>(hosts_strings, hosts_string);
init(implementation_, hosts_strings, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_); init(implementation_, hosts_strings, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_, get_priority_load_balancing_);
} }
ZooKeeper::ZooKeeper(const Strings & hosts_, const std::string & identity_, int32_t session_timeout_ms_, ZooKeeper::ZooKeeper(const Strings & hosts_, const std::string & identity_, int32_t session_timeout_ms_,
int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_, int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_,
std::shared_ptr<DB::ZooKeeperLog> zk_log_) std::shared_ptr<DB::ZooKeeperLog> zk_log_, const GetPriorityForLoadBalancing & get_priority_load_balancing_)
{ {
zk_log = std::move(zk_log_); zk_log = std::move(zk_log_);
init(implementation_, hosts_, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_); init(implementation_, hosts_, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_, get_priority_load_balancing_);
} }
struct ZooKeeperArgs struct ZooKeeperArgs
@ -213,6 +238,15 @@ struct ZooKeeperArgs
{ {
implementation = config.getString(config_name + "." + key); implementation = config.getString(config_name + "." + key);
} }
else if (key == "zookeeper_load_balancing")
{
String load_balancing_str = config.getString(config_name + "." + key);
/// Use magic_enum to avoid dependency from dbms (`SettingFieldLoadBalancingTraits::fromString(...)`)
auto load_balancing = magic_enum::enum_cast<DB::LoadBalancing>(Poco::toUpper(load_balancing_str));
if (!load_balancing)
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Unknown load balancing: {}", load_balancing_str);
get_priority_load_balancing.load_balancing = *load_balancing;
}
else else
throw KeeperException(std::string("Unknown key ") + key + " in config file", Coordination::Error::ZBADARGUMENTS); throw KeeperException(std::string("Unknown key ") + key + " in config file", Coordination::Error::ZBADARGUMENTS);
} }
@ -224,6 +258,15 @@ struct ZooKeeperArgs
if (chroot.back() == '/') if (chroot.back() == '/')
chroot.pop_back(); chroot.pop_back();
} }
/// init get_priority_load_balancing
get_priority_load_balancing.hostname_differences.resize(hosts.size());
const String & local_hostname = getFQDNOrHostName();
for (size_t i = 0; i < hosts.size(); ++i)
{
const String & node_host = hosts[i].substr(0, hosts[i].find_last_of(':'));
get_priority_load_balancing.hostname_differences[i] = DB::getHostNameDifference(local_hostname, node_host);
}
} }
Strings hosts; Strings hosts;
@ -232,13 +275,14 @@ struct ZooKeeperArgs
int operation_timeout_ms; int operation_timeout_ms;
std::string chroot; std::string chroot;
std::string implementation; std::string implementation;
GetPriorityForLoadBalancing get_priority_load_balancing;
}; };
ZooKeeper::ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, std::shared_ptr<DB::ZooKeeperLog> zk_log_) ZooKeeper::ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, std::shared_ptr<DB::ZooKeeperLog> zk_log_)
: zk_log(std::move(zk_log_)) : zk_log(std::move(zk_log_))
{ {
ZooKeeperArgs args(config, config_name); ZooKeeperArgs args(config, config_name);
init(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot); init(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot, args.get_priority_load_balancing);
} }
bool ZooKeeper::configChanged(const Poco::Util::AbstractConfiguration & config, const std::string & config_name) const bool ZooKeeper::configChanged(const Poco::Util::AbstractConfiguration & config, const std::string & config_name) const
@ -249,8 +293,11 @@ bool ZooKeeper::configChanged(const Poco::Util::AbstractConfiguration & config,
if (args.implementation == implementation && implementation == "testkeeper") if (args.implementation == implementation && implementation == "testkeeper")
return false; return false;
return std::tie(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot) if (args.get_priority_load_balancing != get_priority_load_balancing)
!= std::tie(implementation, hosts, identity, session_timeout_ms, operation_timeout_ms, chroot); return true;
return std::tie(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot, args.get_priority_load_balancing)
!= std::tie(implementation, hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, args.get_priority_load_balancing);
} }
@ -757,7 +804,7 @@ bool ZooKeeper::waitForDisappear(const std::string & path, const WaitCondition &
ZooKeeperPtr ZooKeeper::startNewSession() const ZooKeeperPtr ZooKeeper::startNewSession() const
{ {
return std::make_shared<ZooKeeper>(hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, implementation, zk_log); return std::make_shared<ZooKeeper>(hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, implementation, zk_log, get_priority_load_balancing);
} }

View File

@ -13,7 +13,10 @@
#include <Common/Stopwatch.h> #include <Common/Stopwatch.h>
#include <Common/ZooKeeper/IKeeper.h> #include <Common/ZooKeeper/IKeeper.h>
#include <Common/ZooKeeper/ZooKeeperConstants.h> #include <Common/ZooKeeper/ZooKeeperConstants.h>
#include <Common/GetPriorityForLoadBalancing.h>
#include <Common/thread_local_rng.h>
#include <unistd.h> #include <unistd.h>
#include <random>
namespace ProfileEvents namespace ProfileEvents
@ -37,6 +40,25 @@ namespace zkutil
/// Preferred size of multi() command (in number of ops) /// Preferred size of multi() command (in number of ops)
constexpr size_t MULTI_BATCH_SIZE = 100; constexpr size_t MULTI_BATCH_SIZE = 100;
struct ShuffleHost
{
String host;
Int64 priority = 0;
UInt32 random = 0;
void randomize()
{
random = thread_local_rng();
}
static bool compare(const ShuffleHost & lhs, const ShuffleHost & rhs)
{
return std::forward_as_tuple(lhs.priority, lhs.random)
< std::forward_as_tuple(rhs.priority, rhs.random);
}
};
using GetPriorityForLoadBalancing = DB::GetPriorityForLoadBalancing;
/// ZooKeeper session. The interface is substantially different from the usual libzookeeper API. /// ZooKeeper session. The interface is substantially different from the usual libzookeeper API.
/// ///
@ -58,14 +80,16 @@ public:
int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS, int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS,
const std::string & chroot_ = "", const std::string & chroot_ = "",
const std::string & implementation_ = "zookeeper", const std::string & implementation_ = "zookeeper",
std::shared_ptr<DB::ZooKeeperLog> zk_log_ = nullptr); std::shared_ptr<DB::ZooKeeperLog> zk_log_ = nullptr,
const GetPriorityForLoadBalancing & get_priority_load_balancing_ = {});
explicit ZooKeeper(const Strings & hosts_, const std::string & identity_ = "", explicit ZooKeeper(const Strings & hosts_, const std::string & identity_ = "",
int32_t session_timeout_ms_ = Coordination::DEFAULT_SESSION_TIMEOUT_MS, int32_t session_timeout_ms_ = Coordination::DEFAULT_SESSION_TIMEOUT_MS,
int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS, int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS,
const std::string & chroot_ = "", const std::string & chroot_ = "",
const std::string & implementation_ = "zookeeper", const std::string & implementation_ = "zookeeper",
std::shared_ptr<DB::ZooKeeperLog> zk_log_ = nullptr); std::shared_ptr<DB::ZooKeeperLog> zk_log_ = nullptr,
const GetPriorityForLoadBalancing & get_priority_load_balancing_ = {});
/** Config of the form: /** Config of the form:
<zookeeper> <zookeeper>
@ -91,6 +115,8 @@ public:
*/ */
ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, std::shared_ptr<DB::ZooKeeperLog> zk_log_); ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, std::shared_ptr<DB::ZooKeeperLog> zk_log_);
std::vector<ShuffleHost> shuffleHosts() const;
/// Creates a new session with the same parameters. This method can be used for reconnecting /// Creates a new session with the same parameters. This method can be used for reconnecting
/// after the session has expired. /// after the session has expired.
/// This object remains unchanged, and the new session is returned. /// This object remains unchanged, and the new session is returned.
@ -284,7 +310,7 @@ private:
friend class EphemeralNodeHolder; friend class EphemeralNodeHolder;
void init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_, void init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_,
int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_); int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, const GetPriorityForLoadBalancing & get_priority_load_balancing_);
/// The following methods don't any throw exceptions but return error codes. /// The following methods don't any throw exceptions but return error codes.
Coordination::Error createImpl(const std::string & path, const std::string & data, int32_t mode, std::string & path_created); Coordination::Error createImpl(const std::string & path, const std::string & data, int32_t mode, std::string & path_created);
@ -311,6 +337,8 @@ private:
Poco::Logger * log = nullptr; Poco::Logger * log = nullptr;
std::shared_ptr<DB::ZooKeeperLog> zk_log; std::shared_ptr<DB::ZooKeeperLog> zk_log;
GetPriorityForLoadBalancing get_priority_load_balancing;
AtomicStopwatch session_uptime; AtomicStopwatch session_uptime;
}; };

View File

@ -451,7 +451,7 @@ void ZooKeeper::connect(
} }
else else
{ {
LOG_TEST(log, "Connected to ZooKeeper at {} with session_id {}", socket.peerAddress().toString(), session_id); LOG_TEST(log, "Connected to ZooKeeper at {} with session_id {}{}", socket.peerAddress().toString(), session_id, fail_reasons.str());
} }
} }

View File

@ -11,7 +11,7 @@
constexpr size_t IPV4_BINARY_LENGTH = 4; constexpr size_t IPV4_BINARY_LENGTH = 4;
constexpr size_t IPV6_BINARY_LENGTH = 16; constexpr size_t IPV6_BINARY_LENGTH = 16;
constexpr size_t IPV4_MAX_TEXT_LENGTH = 15; /// Does not count tail zero byte. constexpr size_t IPV4_MAX_TEXT_LENGTH = 15; /// Does not count tail zero byte.
constexpr size_t IPV6_MAX_TEXT_LENGTH = 39; constexpr size_t IPV6_MAX_TEXT_LENGTH = 45; /// Does not count tail zero byte.
namespace DB namespace DB
{ {

View File

@ -124,6 +124,7 @@ bool isLocalAddress(const Poco::Net::SocketAddress & address, UInt16 clickhouse_
size_t getHostNameDifference(const std::string & local_hostname, const std::string & host) size_t getHostNameDifference(const std::string & local_hostname, const std::string & host)
{ {
/// FIXME should we replace it with Levenstein distance? (we already have it in NamePrompter)
size_t hostname_difference = 0; size_t hostname_difference = 0;
for (size_t i = 0; i < std::min(local_hostname.length(), host.length()); ++i) for (size_t i = 0; i < std::min(local_hostname.length(), host.length()); ++i)
if (local_hostname[i] != host[i]) if (local_hostname[i] != host[i])

View File

@ -1,7 +1,18 @@
#include "gtest_global_context.h" #include "gtest_global_context.h"
const ContextHolder & getContext() const ContextHolder & getContext()
{
return getMutableContext();
}
ContextHolder & getMutableContext()
{ {
static ContextHolder holder; static ContextHolder holder;
return holder; return holder;
} }
void destroyContext()
{
auto & holder = getMutableContext();
return holder.destroy();
}

View File

@ -16,6 +16,17 @@ struct ContextHolder
} }
ContextHolder(ContextHolder &&) = default; ContextHolder(ContextHolder &&) = default;
void destroy()
{
context->shutdown();
context.reset();
shared_context.reset();
}
}; };
const ContextHolder & getContext(); const ContextHolder & getContext();
ContextHolder & getMutableContext();
void destroyContext();

View File

@ -165,25 +165,36 @@ void registerCodecNone(CompressionCodecFactory & factory);
void registerCodecLZ4(CompressionCodecFactory & factory); void registerCodecLZ4(CompressionCodecFactory & factory);
void registerCodecLZ4HC(CompressionCodecFactory & factory); void registerCodecLZ4HC(CompressionCodecFactory & factory);
void registerCodecZSTD(CompressionCodecFactory & factory); void registerCodecZSTD(CompressionCodecFactory & factory);
void registerCodecMultiple(CompressionCodecFactory & factory);
/// Keeper use only general-purpose codecs, so we don't need these special codecs
/// in standalone build
#ifndef KEEPER_STANDALONE_BUILD
void registerCodecDelta(CompressionCodecFactory & factory); void registerCodecDelta(CompressionCodecFactory & factory);
void registerCodecT64(CompressionCodecFactory & factory); void registerCodecT64(CompressionCodecFactory & factory);
void registerCodecDoubleDelta(CompressionCodecFactory & factory); void registerCodecDoubleDelta(CompressionCodecFactory & factory);
void registerCodecGorilla(CompressionCodecFactory & factory); void registerCodecGorilla(CompressionCodecFactory & factory);
void registerCodecEncrypted(CompressionCodecFactory & factory); void registerCodecEncrypted(CompressionCodecFactory & factory);
void registerCodecMultiple(CompressionCodecFactory & factory);
#endif
CompressionCodecFactory::CompressionCodecFactory() CompressionCodecFactory::CompressionCodecFactory()
{ {
registerCodecLZ4(*this);
registerCodecNone(*this); registerCodecNone(*this);
registerCodecLZ4(*this);
registerCodecZSTD(*this); registerCodecZSTD(*this);
registerCodecLZ4HC(*this); registerCodecLZ4HC(*this);
registerCodecMultiple(*this);
#ifndef KEEPER_STANDALONE_BUILD
registerCodecDelta(*this); registerCodecDelta(*this);
registerCodecT64(*this); registerCodecT64(*this);
registerCodecDoubleDelta(*this); registerCodecDoubleDelta(*this);
registerCodecGorilla(*this); registerCodecGorilla(*this);
registerCodecEncrypted(*this); registerCodecEncrypted(*this);
registerCodecMultiple(*this); #endif
default_codec = get("LZ4", {}); default_codec = get("LZ4", {});
} }

View File

@ -13,6 +13,7 @@
#include <iterator> #include <iterator>
#include <base/sort.h> #include <base/sort.h>
#include <boost/algorithm/string.hpp>
namespace DB namespace DB
@ -269,8 +270,18 @@ const ColumnWithTypeAndName & Block::safeGetByPosition(size_t position) const
} }
const ColumnWithTypeAndName * Block::findByName(const std::string & name) const const ColumnWithTypeAndName * Block::findByName(const std::string & name, bool case_insensitive) const
{ {
if (case_insensitive)
{
auto found = std::find_if(data.begin(), data.end(), [&](const auto & column) { return boost::iequals(column.name, name); });
if (found == data.end())
{
return nullptr;
}
return &*found;
}
auto it = index_by_name.find(name); auto it = index_by_name.find(name);
if (index_by_name.end() == it) if (index_by_name.end() == it)
{ {
@ -280,19 +291,23 @@ const ColumnWithTypeAndName * Block::findByName(const std::string & name) const
} }
const ColumnWithTypeAndName & Block::getByName(const std::string & name) const const ColumnWithTypeAndName & Block::getByName(const std::string & name, bool case_insensitive) const
{ {
const auto * result = findByName(name); const auto * result = findByName(name, case_insensitive);
if (!result) if (!result)
throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames() throw Exception(
, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); "Not found column " + name + " in block. There are only columns: " + dumpNames(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
return *result; return *result;
} }
bool Block::has(const std::string & name) const bool Block::has(const std::string & name, bool case_insensitive) const
{ {
if (case_insensitive)
return std::find_if(data.begin(), data.end(), [&](const auto & column) { return boost::iequals(column.name, name); })
!= data.end();
return index_by_name.end() != index_by_name.find(name); return index_by_name.end() != index_by_name.find(name);
} }
@ -301,8 +316,8 @@ size_t Block::getPositionByName(const std::string & name) const
{ {
auto it = index_by_name.find(name); auto it = index_by_name.find(name);
if (index_by_name.end() == it) if (index_by_name.end() == it)
throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames() throw Exception(
, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); "Not found column " + name + " in block. There are only columns: " + dumpNames(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
return it->second; return it->second;
} }

View File

@ -60,21 +60,21 @@ public:
ColumnWithTypeAndName & safeGetByPosition(size_t position); ColumnWithTypeAndName & safeGetByPosition(size_t position);
const ColumnWithTypeAndName & safeGetByPosition(size_t position) const; const ColumnWithTypeAndName & safeGetByPosition(size_t position) const;
ColumnWithTypeAndName* findByName(const std::string & name) ColumnWithTypeAndName* findByName(const std::string & name, bool case_insensitive = false)
{ {
return const_cast<ColumnWithTypeAndName *>( return const_cast<ColumnWithTypeAndName *>(
const_cast<const Block *>(this)->findByName(name)); const_cast<const Block *>(this)->findByName(name, case_insensitive));
} }
const ColumnWithTypeAndName * findByName(const std::string & name) const; const ColumnWithTypeAndName * findByName(const std::string & name, bool case_insensitive = false) const;
ColumnWithTypeAndName & getByName(const std::string & name) ColumnWithTypeAndName & getByName(const std::string & name, bool case_insensitive = false)
{ {
return const_cast<ColumnWithTypeAndName &>( return const_cast<ColumnWithTypeAndName &>(
const_cast<const Block *>(this)->getByName(name)); const_cast<const Block *>(this)->getByName(name, case_insensitive));
} }
const ColumnWithTypeAndName & getByName(const std::string & name) const; const ColumnWithTypeAndName & getByName(const std::string & name, bool case_insensitive = false) const;
Container::iterator begin() { return data.begin(); } Container::iterator begin() { return data.begin(); }
Container::iterator end() { return data.end(); } Container::iterator end() { return data.end(); }
@ -83,7 +83,7 @@ public:
Container::const_iterator cbegin() const { return data.cbegin(); } Container::const_iterator cbegin() const { return data.cbegin(); }
Container::const_iterator cend() const { return data.cend(); } Container::const_iterator cend() const { return data.cend(); }
bool has(const std::string & name) const; bool has(const std::string & name, bool case_insensitive = false) const;
size_t getPositionByName(const std::string & name) const; size_t getPositionByName(const std::string & name) const;

View File

@ -616,11 +616,13 @@ class IColumn;
M(Bool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \ M(Bool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \
M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices \\N", 0) \ M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices \\N", 0) \
M(Bool, input_format_null_as_default, true, "For text input formats initialize null fields with default values if data type of this field is not nullable", 0) \ M(Bool, input_format_null_as_default, true, "For text input formats initialize null fields with default values if data type of this field is not nullable", 0) \
M(Bool, input_format_use_lowercase_column_name, false, "Use lowercase column name while reading input formats", 0) \
M(Bool, input_format_arrow_import_nested, false, "Allow to insert array of structs into Nested table in Arrow input format.", 0) \ M(Bool, input_format_arrow_import_nested, false, "Allow to insert array of structs into Nested table in Arrow input format.", 0) \
M(Bool, input_format_arrow_case_insensitive_column_matching, false, "Ignore case when matching Arrow columns with CH columns.", 0) \
M(Bool, input_format_orc_import_nested, false, "Allow to insert array of structs into Nested table in ORC input format.", 0) \ M(Bool, input_format_orc_import_nested, false, "Allow to insert array of structs into Nested table in ORC input format.", 0) \
M(Int64, input_format_orc_row_batch_size, 100'000, "Batch size when reading ORC stripes.", 0) \ M(Int64, input_format_orc_row_batch_size, 100'000, "Batch size when reading ORC stripes.", 0) \
M(Bool, input_format_orc_case_insensitive_column_matching, false, "Ignore case when matching ORC columns with CH columns.", 0) \
M(Bool, input_format_parquet_import_nested, false, "Allow to insert array of structs into Nested table in Parquet input format.", 0) \ M(Bool, input_format_parquet_import_nested, false, "Allow to insert array of structs into Nested table in Parquet input format.", 0) \
M(Bool, input_format_parquet_case_insensitive_column_matching, false, "Ignore case when matching Parquet columns with CH columns.", 0) \
M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \ M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \
M(Bool, input_format_orc_allow_missing_columns, false, "Allow missing columns while reading ORC input formats", 0) \ M(Bool, input_format_orc_allow_missing_columns, false, "Allow missing columns while reading ORC input formats", 0) \
M(Bool, input_format_parquet_allow_missing_columns, false, "Allow missing columns while reading Parquet input formats", 0) \ M(Bool, input_format_parquet_allow_missing_columns, false, "Allow missing columns while reading Parquet input formats", 0) \

View File

@ -149,4 +149,5 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS,
{"str", FormatSettings::MsgPackUUIDRepresentation::STR}, {"str", FormatSettings::MsgPackUUIDRepresentation::STR},
{"ext", FormatSettings::MsgPackUUIDRepresentation::EXT}}) {"ext", FormatSettings::MsgPackUUIDRepresentation::EXT}})
} }

View File

@ -187,5 +187,4 @@ DECLARE_SETTING_ENUM_WITH_RENAME(EnumComparingMode, FormatSettings::EnumComparin
DECLARE_SETTING_ENUM_WITH_RENAME(EscapingRule, FormatSettings::EscapingRule) DECLARE_SETTING_ENUM_WITH_RENAME(EscapingRule, FormatSettings::EscapingRule)
DECLARE_SETTING_ENUM_WITH_RENAME(MsgPackUUIDRepresentation, FormatSettings::MsgPackUUIDRepresentation) DECLARE_SETTING_ENUM_WITH_RENAME(MsgPackUUIDRepresentation, FormatSettings::MsgPackUUIDRepresentation)
} }

View File

@ -33,7 +33,7 @@ DataTypePtr DataTypeFactory::get(const String & full_name) const
/// Value 315 is known to cause stack overflow in some test configurations (debug build, sanitizers) /// Value 315 is known to cause stack overflow in some test configurations (debug build, sanitizers)
/// let's make the threshold significantly lower. /// let's make the threshold significantly lower.
/// It is impractical for user to have complex data types with this depth. /// It is impractical for user to have complex data types with this depth.
static constexpr size_t data_type_max_parse_depth = 150; static constexpr size_t data_type_max_parse_depth = 200;
ParserDataType parser; ParserDataType parser;
ASTPtr ast = parseQuery(parser, full_name.data(), full_name.data() + full_name.size(), "data type", 0, data_type_max_parse_depth); ASTPtr ast = parseQuery(parser, full_name.data(), full_name.data() + full_name.size(), "data type", 0, data_type_max_parse_depth);

View File

@ -15,6 +15,8 @@
#include <Parsers/IAST.h> #include <Parsers/IAST.h>
#include <boost/algorithm/string/case_conv.hpp>
namespace DB namespace DB
{ {
@ -227,14 +229,17 @@ void validateArraySizes(const Block & block)
} }
std::unordered_set<String> getAllTableNames(const Block & block) std::unordered_set<String> getAllTableNames(const Block & block, bool to_lower_case)
{ {
std::unordered_set<String> nested_table_names; std::unordered_set<String> nested_table_names;
for (auto & name : block.getNames()) for (const auto & name : block.getNames())
{ {
auto nested_table_name = Nested::extractTableName(name); auto nested_table_name = Nested::extractTableName(name);
if (to_lower_case)
boost::to_lower(nested_table_name);
if (!nested_table_name.empty()) if (!nested_table_name.empty())
nested_table_names.insert(nested_table_name); nested_table_names.insert(std::move(nested_table_name));
} }
return nested_table_names; return nested_table_names;
} }

View File

@ -32,7 +32,7 @@ namespace Nested
void validateArraySizes(const Block & block); void validateArraySizes(const Block & block);
/// Get all nested tables names from a block. /// Get all nested tables names from a block.
std::unordered_set<String> getAllTableNames(const Block & block); std::unordered_set<String> getAllTableNames(const Block & block, bool to_lower_case = false);
} }
} }

View File

@ -63,12 +63,12 @@ private:
size_t num_dimensions_to_keep; size_t num_dimensions_to_keep;
}; };
using Node = typename ColumnObject::SubcolumnsTree::Node; using Node = typename ColumnObject::Subcolumns::Node;
/// Finds a subcolumn from the same Nested type as @entry and inserts /// Finds a subcolumn from the same Nested type as @entry and inserts
/// an array with default values with consistent sizes as in Nested type. /// an array with default values with consistent sizes as in Nested type.
bool tryInsertDefaultFromNested( bool tryInsertDefaultFromNested(
const std::shared_ptr<Node> & entry, const ColumnObject::SubcolumnsTree & subcolumns) const std::shared_ptr<Node> & entry, const ColumnObject::Subcolumns & subcolumns)
{ {
if (!entry->path.hasNested()) if (!entry->path.hasNested())
return false; return false;
@ -198,7 +198,7 @@ void SerializationObject<Parser>::deserializeWholeText(IColumn & column, ReadBuf
template <typename Parser> template <typename Parser>
void SerializationObject<Parser>::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const void SerializationObject<Parser>::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{ {
deserializeTextImpl(column, [&](String & s) { readEscapedStringInto(s, istr); }); deserializeTextImpl(column, [&](String & s) { readEscapedString(s, istr); });
} }
template <typename Parser> template <typename Parser>

View File

@ -88,6 +88,9 @@ DatabaseReplicated::DatabaseReplicated(
/// If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it. /// If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it.
if (zookeeper_path.front() != '/') if (zookeeper_path.front() != '/')
zookeeper_path = "/" + zookeeper_path; zookeeper_path = "/" + zookeeper_path;
if (!db_settings.collection_name.value.empty())
fillClusterAuthInfo(db_settings.collection_name.value, context_->getConfigRef());
} }
String DatabaseReplicated::getFullReplicaName() const String DatabaseReplicated::getFullReplicaName() const
@ -191,22 +194,36 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const
shards.back().emplace_back(unescapeForFileName(host_port)); shards.back().emplace_back(unescapeForFileName(host_port));
} }
String username = db_settings.cluster_username;
String password = db_settings.cluster_password;
UInt16 default_port = getContext()->getTCPPort(); UInt16 default_port = getContext()->getTCPPort();
bool secure = db_settings.cluster_secure_connection;
bool treat_local_as_remote = false; bool treat_local_as_remote = false;
bool treat_local_port_as_remote = getContext()->getApplicationType() == Context::ApplicationType::LOCAL; bool treat_local_port_as_remote = getContext()->getApplicationType() == Context::ApplicationType::LOCAL;
return std::make_shared<Cluster>( return std::make_shared<Cluster>(
getContext()->getSettingsRef(), getContext()->getSettingsRef(),
shards, shards,
username, cluster_auth_info.cluster_username,
password, cluster_auth_info.cluster_password,
default_port, default_port,
treat_local_as_remote, treat_local_as_remote,
treat_local_port_as_remote, treat_local_port_as_remote,
secure); cluster_auth_info.cluster_secure_connection,
/*priority=*/1,
database_name,
cluster_auth_info.cluster_secret);
}
void DatabaseReplicated::fillClusterAuthInfo(String collection_name, const Poco::Util::AbstractConfiguration & config_ref)
{
const auto & config_prefix = fmt::format("named_collections.{}", collection_name);
if (!config_ref.has(config_prefix))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", collection_name);
cluster_auth_info.cluster_username = config_ref.getString(config_prefix + ".cluster_username", "");
cluster_auth_info.cluster_password = config_ref.getString(config_prefix + ".cluster_password", "");
cluster_auth_info.cluster_secret = config_ref.getString(config_prefix + ".cluster_secret", "");
cluster_auth_info.cluster_secure_connection = config_ref.getBool(config_prefix + ".cluster_secure_connection", false);
} }
void DatabaseReplicated::tryConnectToZooKeeperAndInitDatabase(bool force_attach) void DatabaseReplicated::tryConnectToZooKeeperAndInitDatabase(bool force_attach)

View File

@ -75,6 +75,16 @@ private:
bool createDatabaseNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper); bool createDatabaseNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper);
void createReplicaNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper); void createReplicaNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper);
struct
{
String cluster_username{"default"};
String cluster_password;
String cluster_secret;
bool cluster_secure_connection{false};
} cluster_auth_info;
void fillClusterAuthInfo(String collection_name, const Poco::Util::AbstractConfiguration & config);
void checkQueryValid(const ASTPtr & query, ContextPtr query_context) const; void checkQueryValid(const ASTPtr & query, ContextPtr query_context) const;
void recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 our_log_ptr, UInt32 max_log_ptr); void recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 our_log_ptr, UInt32 max_log_ptr);

View File

@ -8,12 +8,11 @@ namespace DB
class ASTStorage; class ASTStorage;
#define LIST_OF_DATABASE_REPLICATED_SETTINGS(M) \ #define LIST_OF_DATABASE_REPLICATED_SETTINGS(M) \
M(Float, max_broken_tables_ratio, 0.5, "Do not recover replica automatically if the ratio of staled tables to all tables is greater", 0) \ M(Float, max_broken_tables_ratio, 0.5, "Do not recover replica automatically if the ratio of staled tables to all tables is greater", 0) \
M(UInt64, max_replication_lag_to_enqueue, 10, "Replica will throw exception on attempt to execute query if its replication lag greater", 0) \ M(UInt64, max_replication_lag_to_enqueue, 10, "Replica will throw exception on attempt to execute query if its replication lag greater", 0) \
M(UInt64, wait_entry_commited_timeout_sec, 3600, "Replicas will try to cancel query if timeout exceed, but initiator host has not executed it yet", 0) \ M(UInt64, wait_entry_commited_timeout_sec, 3600, "Replicas will try to cancel query if timeout exceed, but initiator host has not executed it yet", 0) \
M(String, cluster_username, "default", "Username to use when connecting to hosts of cluster", 0) \ M(String, collection_name, "", "A name of a collection defined in server's config where all info for cluster authentication is defined", 0) \
M(String, cluster_password, "", "Password to use when connecting to hosts of cluster", 0) \
M(Bool, cluster_secure_connection, false, "Enable TLS when connecting to hosts of cluster", 0) \
DECLARE_SETTINGS_TRAITS(DatabaseReplicatedSettingsTraits, LIST_OF_DATABASE_REPLICATED_SETTINGS) DECLARE_SETTINGS_TRAITS(DatabaseReplicatedSettingsTraits, LIST_OF_DATABASE_REPLICATED_SETTINGS)

View File

@ -96,6 +96,7 @@ private:
case ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE: case ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE:
return "REMOTE_FS_READ_AND_PUT_IN_CACHE"; return "REMOTE_FS_READ_AND_PUT_IN_CACHE";
} }
__builtin_unreachable();
} }
size_t first_offset = 0; size_t first_offset = 0;
}; };

View File

@ -20,6 +20,7 @@
#include <Common/getRandomASCIIString.h> #include <Common/getRandomASCIIString.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <Interpreters/threadPoolCallbackRunner.h>
#include <IO/ReadBufferFromS3.h> #include <IO/ReadBufferFromS3.h>
#include <IO/ReadBufferFromString.h> #include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
@ -264,32 +265,6 @@ std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path,
LOG_TRACE(log, "{} to file by path: {}. S3 path: {}", LOG_TRACE(log, "{} to file by path: {}. S3 path: {}",
mode == WriteMode::Rewrite ? "Write" : "Append", backQuote(metadata_disk->getPath() + path), remote_fs_root_path + blob_name); mode == WriteMode::Rewrite ? "Write" : "Append", backQuote(metadata_disk->getPath() + path), remote_fs_root_path + blob_name);
ScheduleFunc schedule = [pool = &getThreadPoolWriter(), thread_group = CurrentThread::getGroup()](auto callback)
{
pool->scheduleOrThrow([callback = std::move(callback), thread_group]()
{
if (thread_group)
CurrentThread::attachTo(thread_group);
SCOPE_EXIT_SAFE(
if (thread_group)
CurrentThread::detachQueryIfNotDetached();
/// After we detached from the thread_group, parent for memory_tracker inside ThreadStatus will be reset to it's parent.
/// Typically, it may be changes from Process to User.
/// Usually it could be ok, because thread pool task is executed before user-level memory tracker is destroyed.
/// However, thread could stay alive inside the thread pool, and it's ThreadStatus as well.
/// When, finally, we destroy the thread (and the ThreadStatus),
/// it can use memory tracker in the ~ThreadStatus in order to alloc/free untracked_memory,\
/// and by this time user-level memory tracker may be already destroyed.
///
/// As a work-around, reset memory tracker to total, which is always alive.
CurrentThread::get().memory_tracker.setParent(&total_memory_tracker);
);
callback();
});
};
auto s3_buffer = std::make_unique<WriteBufferFromS3>( auto s3_buffer = std::make_unique<WriteBufferFromS3>(
settings->client, settings->client,
bucket, bucket,
@ -299,7 +274,7 @@ std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path,
settings->s3_upload_part_size_multiply_parts_count_threshold, settings->s3_upload_part_size_multiply_parts_count_threshold,
settings->s3_max_single_part_upload_size, settings->s3_max_single_part_upload_size,
std::move(object_metadata), std::move(object_metadata),
buf_size, std::move(schedule)); buf_size, threadPoolCallbackRunner(getThreadPoolWriter()));
auto create_metadata_callback = [this, path, blob_name, mode] (size_t count) auto create_metadata_callback = [this, path, blob_name, mode] (size_t count)
{ {

View File

@ -89,10 +89,10 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers; format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers;
format_settings.json.quote_denormals = settings.output_format_json_quote_denormals; format_settings.json.quote_denormals = settings.output_format_json_quote_denormals;
format_settings.null_as_default = settings.input_format_null_as_default; format_settings.null_as_default = settings.input_format_null_as_default;
format_settings.use_lowercase_column_name = settings.input_format_use_lowercase_column_name;
format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros; format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros;
format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size; format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size;
format_settings.parquet.import_nested = settings.input_format_parquet_import_nested; format_settings.parquet.import_nested = settings.input_format_parquet_import_nested;
format_settings.parquet.case_insensitive_column_matching = settings.input_format_parquet_case_insensitive_column_matching;
format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns; format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns;
format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8; format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8;
format_settings.pretty.color = settings.output_format_pretty_color; format_settings.pretty.color = settings.output_format_pretty_color;
@ -123,9 +123,11 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.arrow.low_cardinality_as_dictionary = settings.output_format_arrow_low_cardinality_as_dictionary; format_settings.arrow.low_cardinality_as_dictionary = settings.output_format_arrow_low_cardinality_as_dictionary;
format_settings.arrow.import_nested = settings.input_format_arrow_import_nested; format_settings.arrow.import_nested = settings.input_format_arrow_import_nested;
format_settings.arrow.allow_missing_columns = settings.input_format_arrow_allow_missing_columns; format_settings.arrow.allow_missing_columns = settings.input_format_arrow_allow_missing_columns;
format_settings.arrow.case_insensitive_column_matching = settings.input_format_arrow_case_insensitive_column_matching;
format_settings.orc.import_nested = settings.input_format_orc_import_nested; format_settings.orc.import_nested = settings.input_format_orc_import_nested;
format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns; format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns;
format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size; format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size;
format_settings.orc.case_insensitive_column_matching = settings.input_format_orc_case_insensitive_column_matching;
format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields; format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields;
format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode; format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode;
format_settings.seekable_read = settings.input_format_allow_seeks; format_settings.seekable_read = settings.input_format_allow_seeks;

View File

@ -32,7 +32,6 @@ struct FormatSettings
bool null_as_default = true; bool null_as_default = true;
bool decimal_trailing_zeros = false; bool decimal_trailing_zeros = false;
bool defaults_for_omitted_fields = true; bool defaults_for_omitted_fields = true;
bool use_lowercase_column_name = false;
bool seekable_read = true; bool seekable_read = true;
UInt64 max_rows_to_read_for_schema_inference = 100; UInt64 max_rows_to_read_for_schema_inference = 100;
@ -75,6 +74,7 @@ struct FormatSettings
bool low_cardinality_as_dictionary = false; bool low_cardinality_as_dictionary = false;
bool import_nested = false; bool import_nested = false;
bool allow_missing_columns = false; bool allow_missing_columns = false;
bool case_insensitive_column_matching = false;
} arrow; } arrow;
struct struct
@ -137,6 +137,7 @@ struct FormatSettings
UInt64 row_group_size = 1000000; UInt64 row_group_size = 1000000;
bool import_nested = false; bool import_nested = false;
bool allow_missing_columns = false; bool allow_missing_columns = false;
bool case_insensitive_column_matching = false;
} parquet; } parquet;
struct Pretty struct Pretty
@ -217,6 +218,7 @@ struct FormatSettings
bool import_nested = false; bool import_nested = false;
bool allow_missing_columns = false; bool allow_missing_columns = false;
int64_t row_batch_size = 100'000; int64_t row_batch_size = 100'000;
bool case_insensitive_column_matching = false;
} orc; } orc;
/// For capnProto format we should determine how to /// For capnProto format we should determine how to

View File

@ -13,6 +13,7 @@ void registerFileSegmentationEngineCSV(FormatFactory & factory);
void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory); void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory);
void registerFileSegmentationEngineRegexp(FormatFactory & factory); void registerFileSegmentationEngineRegexp(FormatFactory & factory);
void registerFileSegmentationEngineJSONAsString(FormatFactory & factory); void registerFileSegmentationEngineJSONAsString(FormatFactory & factory);
void registerFileSegmentationEngineJSONAsObject(FormatFactory & factory);
void registerFileSegmentationEngineJSONCompactEachRow(FormatFactory & factory); void registerFileSegmentationEngineJSONCompactEachRow(FormatFactory & factory);
/// Formats for both input/output. /// Formats for both input/output.
@ -103,6 +104,7 @@ void registerProtobufSchemaReader(FormatFactory & factory);
void registerProtobufListSchemaReader(FormatFactory & factory); void registerProtobufListSchemaReader(FormatFactory & factory);
void registerLineAsStringSchemaReader(FormatFactory & factory); void registerLineAsStringSchemaReader(FormatFactory & factory);
void registerJSONAsStringSchemaReader(FormatFactory & factory); void registerJSONAsStringSchemaReader(FormatFactory & factory);
void registerJSONAsObjectSchemaReader(FormatFactory & factory);
void registerRawBLOBSchemaReader(FormatFactory & factory); void registerRawBLOBSchemaReader(FormatFactory & factory);
void registerMsgPackSchemaReader(FormatFactory & factory); void registerMsgPackSchemaReader(FormatFactory & factory);
void registerCapnProtoSchemaReader(FormatFactory & factory); void registerCapnProtoSchemaReader(FormatFactory & factory);
@ -123,6 +125,7 @@ void registerFormats()
registerFileSegmentationEngineJSONEachRow(factory); registerFileSegmentationEngineJSONEachRow(factory);
registerFileSegmentationEngineRegexp(factory); registerFileSegmentationEngineRegexp(factory);
registerFileSegmentationEngineJSONAsString(factory); registerFileSegmentationEngineJSONAsString(factory);
registerFileSegmentationEngineJSONAsObject(factory);
registerFileSegmentationEngineJSONCompactEachRow(factory); registerFileSegmentationEngineJSONCompactEachRow(factory);
registerInputFormatNative(factory); registerInputFormatNative(factory);
@ -207,6 +210,7 @@ void registerFormats()
registerProtobufListSchemaReader(factory); registerProtobufListSchemaReader(factory);
registerLineAsStringSchemaReader(factory); registerLineAsStringSchemaReader(factory);
registerJSONAsStringSchemaReader(factory); registerJSONAsStringSchemaReader(factory);
registerJSONAsObjectSchemaReader(factory);
registerRawBLOBSchemaReader(factory); registerRawBLOBSchemaReader(factory);
registerMsgPackSchemaReader(factory); registerMsgPackSchemaReader(factory);
registerCapnProtoSchemaReader(factory); registerCapnProtoSchemaReader(factory);

View File

@ -43,6 +43,9 @@ public:
for (size_t i = 2; i < args.size() - 1; i += 2) for (size_t i = 2; i < args.size() - 1; i += 2)
dst_array_types.push_back(args[i]); dst_array_types.push_back(args[i]);
// Type of the ELSE branch
dst_array_types.push_back(args.back());
return getLeastSupertype(dst_array_types); return getLeastSupertype(dst_array_types);
} }

View File

@ -7,9 +7,9 @@ namespace DB
{ {
/// An O(1) time and space consistent hash algorithm by Konstantin Oblakov /// An O(1) time and space consistent hash algorithm by Konstantin Oblakov
struct YandexConsistentHashImpl struct KostikConsistentHashImpl
{ {
static constexpr auto name = "yandexConsistentHash"; static constexpr auto name = "kostikConsistentHash";
using HashType = UInt64; using HashType = UInt64;
/// Actually it supports UInt64, but it is efficient only if n <= 32768 /// Actually it supports UInt64, but it is efficient only if n <= 32768
@ -23,12 +23,12 @@ struct YandexConsistentHashImpl
} }
}; };
using FunctionYandexConsistentHash = FunctionConsistentHashImpl<YandexConsistentHashImpl>; using FunctionKostikConsistentHash = FunctionConsistentHashImpl<KostikConsistentHashImpl>;
void registerFunctionYandexConsistentHash(FunctionFactory & factory) void registerFunctionKostikConsistentHash(FunctionFactory & factory)
{ {
factory.registerFunction<FunctionYandexConsistentHash>(); factory.registerFunction<FunctionKostikConsistentHash>();
factory.registerAlias("yandexConsistentHash", "kostikConsistentHash");
} }
} }

155
src/Functions/makeDate.cpp Normal file
View File

@ -0,0 +1,155 @@
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDate32.h>
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnsNumber.h>
#include <Interpreters/castColumn.h>
#include <Common/DateLUT.h>
#include <Common/typeid_cast.h>
#include <array>
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
namespace
{
// A helper function to simplify comparisons of valid YYYY-MM-DD values for <,>,=
inline constexpr Int64 YearMonthDayToSingleInt(Int64 year, Int64 month, Int64 day)
{
return year * 512 + month * 32 + day;
}
// Common implementation for makeDate, makeDate32
template <typename Traits>
class FunctionMakeDate : public IFunction
{
private:
static constexpr std::array<const char*, 3> argument_names = {"year", "month", "day"};
public:
static constexpr auto name = Traits::name;
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionMakeDate>(); }
String getName() const override { return name; }
bool isVariadic() const override { return false; }
size_t getNumberOfArguments() const override { return argument_names.size(); }
bool isInjective(const ColumnsWithTypeAndName &) const override
{
return false; // {year,month,day} that are out of supported range are converted into a default value
}
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
bool useDefaultImplementationForNulls() const override { return true; }
bool useDefaultImplementationForConstants() const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (arguments.size() != argument_names.size())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Function {} requires 3 arguments, but {} given", getName(), arguments.size());
for (size_t i = 0; i < argument_names.size(); ++i)
{
DataTypePtr argument_type = arguments[i];
if (!isNumber(argument_type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Argument '{}' for function {} must be number", std::string(argument_names[i]), getName());
}
return std::make_shared<typename Traits::ReturnDataType>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const DataTypePtr converted_argument_type = std::make_shared<DataTypeFloat32>();
Columns converted_arguments;
converted_arguments.reserve(arguments.size());
for (const auto & argument : arguments)
{
ColumnPtr argument_column = castColumn(argument, converted_argument_type);
argument_column = argument_column->convertToFullColumnIfConst();
converted_arguments.push_back(argument_column);
}
auto res_column = Traits::ReturnColumnType::create(input_rows_count);
auto & result_data = res_column->getData();
const auto & year_data = typeid_cast<const ColumnFloat32 &>(*converted_arguments[0]).getData();
const auto & month_data = typeid_cast<const ColumnFloat32 &>(*converted_arguments[1]).getData();
const auto & day_data = typeid_cast<const ColumnFloat32 &>(*converted_arguments[2]).getData();
const auto & date_lut = DateLUT::instance();
for (size_t i = 0; i < input_rows_count; ++i)
{
const auto year = year_data[i];
const auto month = month_data[i];
const auto day = day_data[i];
Int32 day_num = 0;
if (year >= Traits::MIN_YEAR &&
year <= Traits::MAX_YEAR &&
month >= 1 && month <= 12 &&
day >= 1 && day <= 31 &&
YearMonthDayToSingleInt(year, month, day) <= Traits::MAX_DATE)
{
day_num = date_lut.makeDayNum(year, month, day);
}
result_data[i] = day_num;
}
return res_column;
}
};
// makeDate(year, month, day)
struct MakeDateTraits
{
static constexpr auto name = "makeDate";
using ReturnDataType = DataTypeDate;
using ReturnColumnType = ColumnUInt16;
static constexpr auto MIN_YEAR = 1970;
static constexpr auto MAX_YEAR = 2149;
// This date has the maximum day number that fits in 16-bit uint
static constexpr auto MAX_DATE = YearMonthDayToSingleInt(MAX_YEAR, 6, 6);
};
// makeDate32(year, month, day)
struct MakeDate32Traits
{
static constexpr auto name = "makeDate32";
using ReturnDataType = DataTypeDate32;
using ReturnColumnType = ColumnInt32;
static constexpr auto MIN_YEAR = 1925;
static constexpr auto MAX_YEAR = 2283;
static constexpr auto MAX_DATE = YearMonthDayToSingleInt(MAX_YEAR, 11, 11);
};
}
void registerFunctionsMakeDate(FunctionFactory & factory)
{
factory.registerFunction<FunctionMakeDate<MakeDateTraits>>();
factory.registerFunction<FunctionMakeDate<MakeDate32Traits>>();
}
}

View File

@ -8,6 +8,7 @@ namespace DB
void registerFunctionsArithmetic(FunctionFactory &); void registerFunctionsArithmetic(FunctionFactory &);
void registerFunctionsArray(FunctionFactory &); void registerFunctionsArray(FunctionFactory &);
void registerFunctionsTuple(FunctionFactory &); void registerFunctionsTuple(FunctionFactory &);
void registerFunctionsMakeDate(FunctionFactory &);
void registerFunctionsMap(FunctionFactory &); void registerFunctionsMap(FunctionFactory &);
void registerFunctionsBitmap(FunctionFactory &); void registerFunctionsBitmap(FunctionFactory &);
void registerFunctionsBinaryRepr(FunctionFactory &); void registerFunctionsBinaryRepr(FunctionFactory &);
@ -73,6 +74,7 @@ void registerFunctions()
registerFunctionsArithmetic(factory); registerFunctionsArithmetic(factory);
registerFunctionsArray(factory); registerFunctionsArray(factory);
registerFunctionsTuple(factory); registerFunctionsTuple(factory);
registerFunctionsMakeDate(factory);
registerFunctionsMap(factory); registerFunctionsMap(factory);
registerFunctionsBitmap(factory); registerFunctionsBitmap(factory);
registerFunctionsBinaryRepr(factory); registerFunctionsBinaryRepr(factory);

View File

@ -2,12 +2,12 @@ namespace DB
{ {
class FunctionFactory; class FunctionFactory;
void registerFunctionYandexConsistentHash(FunctionFactory & factory); void registerFunctionKostikConsistentHash(FunctionFactory & factory);
void registerFunctionJumpConsistentHash(FunctionFactory & factory); void registerFunctionJumpConsistentHash(FunctionFactory & factory);
void registerFunctionsConsistentHashing(FunctionFactory & factory) void registerFunctionsConsistentHashing(FunctionFactory & factory)
{ {
registerFunctionYandexConsistentHash(factory); registerFunctionKostikConsistentHash(factory);
registerFunctionJumpConsistentHash(factory); registerFunctionJumpConsistentHash(factory);
} }

View File

@ -39,6 +39,10 @@ public:
{ {
} }
virtual ~ReadBufferFromFileDescriptor() override
{
}
int getFD() const int getFD() const
{ {
return fd; return fd;
@ -80,6 +84,9 @@ public:
{ {
use_pread = true; use_pread = true;
} }
virtual ~ReadBufferFromFileDescriptorPRead() override
{
}
}; };
} }

View File

@ -15,4 +15,11 @@ public:
explicit ReadBufferFromString(std::string_view s) : ReadBufferFromMemory(s.data(), s.size()) {} explicit ReadBufferFromString(std::string_view s) : ReadBufferFromMemory(s.data(), s.size()) {}
}; };
class ReadBufferFromOwnString : public String, public ReadBufferFromString
{
public:
explicit ReadBufferFromOwnString(const String & s_): String(s_), ReadBufferFromString(*this) {}
};
} }

View File

@ -663,7 +663,7 @@ public:
Range range{from, to}; Range range{from, to};
from = to; from = to;
return std::move(range); return range;
} }
private: private:

View File

@ -2,18 +2,15 @@
#include <iostream> #include <iostream>
#include <base/types.h> #include <IO/ReadBuffer.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadBufferFromString.h> #include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
#include <base/types.h>
int readAndPrint(DB::ReadBuffer & in)
int main(int, char **)
{ {
try try
{ {
std::string s = "-123456 123.456 вася пе\\tтя\t'\\'xyz\\\\'";
DB::ReadBufferFromString in(s);
DB::Int64 a; DB::Int64 a;
DB::Float64 b; DB::Float64 b;
DB::String c, d; DB::String c, d;
@ -31,12 +28,32 @@ int main(int, char **)
std::cout << a << ' ' << b << ' ' << c << '\t' << '\'' << d << '\'' << std::endl; std::cout << a << ' ' << b << ' ' << c << '\t' << '\'' << d << '\'' << std::endl;
std::cout << in.count() << std::endl; std::cout << in.count() << std::endl;
return 0;
} }
catch (const DB::Exception & e) catch (const DB::Exception & e)
{ {
std::cerr << e.what() << ", " << e.displayText() << std::endl; std::cerr << e.what() << ", " << e.displayText() << std::endl;
return 1; return 1;
} }
}
int main(int, char **)
{
{
std::string s = "-123456 123.456 вася пе\\tтя\t'\\'xyz\\\\'";
DB::ReadBufferFromString in(s);
if (readAndPrint(in))
std::cout << "readAndPrint from ReadBufferFromString failed" << std::endl;
}
std::shared_ptr<DB::ReadBufferFromOwnString> in;
{
std::string s = "-123456 123.456 вася пе\\tтя\t'\\'xyz\\\\'";
in = std::make_shared<DB::ReadBufferFromOwnString>(s);
}
if (readAndPrint(*in))
std::cout << "readAndPrint from ReadBufferFromOwnString failed" << std::endl;
return 0; return 0;
} }

View File

@ -32,6 +32,7 @@ namespace CurrentMetrics
namespace ProfileEvents namespace ProfileEvents
{ {
extern const Event AsyncInsertQuery; extern const Event AsyncInsertQuery;
extern const Event AsyncInsertBytes;
} }
namespace DB namespace DB
@ -222,7 +223,9 @@ void AsynchronousInsertQueue::pushImpl(InsertData::EntryPtr entry, QueueIterator
if (!data) if (!data)
data = std::make_unique<InsertData>(); data = std::make_unique<InsertData>();
data->size += entry->bytes.size(); size_t entry_data_size = entry->bytes.size();
data->size += entry_data_size;
data->last_update = std::chrono::steady_clock::now(); data->last_update = std::chrono::steady_clock::now();
data->entries.emplace_back(entry); data->entries.emplace_back(entry);
@ -239,6 +242,7 @@ void AsynchronousInsertQueue::pushImpl(InsertData::EntryPtr entry, QueueIterator
CurrentMetrics::add(CurrentMetrics::PendingAsyncInsert); CurrentMetrics::add(CurrentMetrics::PendingAsyncInsert);
ProfileEvents::increment(ProfileEvents::AsyncInsertQuery); ProfileEvents::increment(ProfileEvents::AsyncInsertQuery);
ProfileEvents::increment(ProfileEvents::AsyncInsertBytes, entry_data_size);
} }
void AsynchronousInsertQueue::waitForProcessingQuery(const String & query_id, const Milliseconds & timeout) void AsynchronousInsertQueue::waitForProcessingQuery(const String & query_id, const Milliseconds & timeout)

View File

@ -12,6 +12,7 @@
#include <Storages/MarkCache.h> #include <Storages/MarkCache.h>
#include <Storages/StorageMergeTree.h> #include <Storages/StorageMergeTree.h>
#include <Storages/StorageReplicatedMergeTree.h> #include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/MergeTree/MergeTreeMetadataCache.h>
#include <IO/UncompressedCache.h> #include <IO/UncompressedCache.h>
#include <IO/MMappedFileCache.h> #include <IO/MMappedFileCache.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
@ -607,6 +608,15 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
} }
} }
#if USE_ROCKSDB
{
if (auto metadata_cache = getContext()->tryGetMergeTreeMetadataCache())
{
new_values["MergeTreeMetadataCacheSize"] = metadata_cache->getEstimateNumKeys();
}
}
#endif
#if USE_EMBEDDED_COMPILER #if USE_EMBEDDED_COMPILER
{ {
if (auto * compiled_expression_cache = CompiledExpressionCacheFactory::instance().tryGetCache()) if (auto * compiled_expression_cache = CompiledExpressionCacheFactory::instance().tryGetCache())
@ -617,6 +627,7 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
} }
#endif #endif
new_values["Uptime"] = getContext()->getUptimeSeconds(); new_values["Uptime"] = getContext()->getUptimeSeconds();
/// Process process memory usage according to OS /// Process process memory usage according to OS

Some files were not shown because too many files have changed in this diff Show More