Merge branch 'master' into lwd_mutation_always_sync

This commit is contained in:
Alexander Gololobov 2023-01-05 21:20:14 +01:00 committed by GitHub
commit 102dd8bd45
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
494 changed files with 11983 additions and 5047 deletions

View File

@ -18,5 +18,21 @@ tests/ci/run_check.py
### Changelog entry (a user-readable short description of the changes that goes to CHANGELOG.md):
...
### Documentation entry for user-facing changes
<!---
Directly edit documentation source files in the "docs" folder with the same pull-request as code changes
or
Add a user-readable short description of the changes that should be added to docs.clickhouse.com below.
At a minimum, the following information should be added (but add more as needed).
- Motivation: Why is this function, table engine, etc. useful to ClickHouse users?
- Parameters: If the feature being added takes arguments, options or is influenced by settings, please list them below with a brief explanation.
- Example use: A query or command.
-->
> Information about CI checks: https://clickhouse.com/docs/en/development/continuous-integration/

View File

@ -141,37 +141,6 @@ jobs:
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
SharedBuildSmokeTest:
needs: [BuilderDebShared]
runs-on: [self-hosted, style-checker]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/split_build_check
REPO_COPY=${{runner.temp}}/split_build_check/ClickHouse
REPORTS_PATH=${{runner.temp}}/reports_dir
EOF
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Shared build check
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 split_build_smoke_check.py
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
#########################################################################################
#################################### ORDINARY BUILDS ####################################
#########################################################################################
@ -508,47 +477,6 @@ jobs:
##########################################################################################
##################################### SPECIAL BUILDS #####################################
##########################################################################################
BuilderDebShared:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_shared
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinClangTidy:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
@ -968,7 +896,6 @@ jobs:
- BuilderBinAmd64Compat
- BuilderBinAarch64V80Compat
- BuilderBinClangTidy
- BuilderDebShared
runs-on: [self-hosted, style-checker]
if: ${{ success() || failure() }}
steps:
@ -3139,7 +3066,6 @@ jobs:
- UnitTestsMsan
- UnitTestsUBsan
- UnitTestsReleaseClang
- SharedBuildSmokeTest
- SQLancerTestRelease
- SQLancerTestDebug
runs-on: [self-hosted, style-checker]

View File

@ -203,37 +203,6 @@ jobs:
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
SharedBuildSmokeTest:
needs: [BuilderDebShared]
runs-on: [self-hosted, style-checker]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/split_build_check
REPO_COPY=${{runner.temp}}/split_build_check/ClickHouse
REPORTS_PATH=${{runner.temp}}/reports_dir
EOF
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Shared build check
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 split_build_smoke_check.py
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
#########################################################################################
#################################### ORDINARY BUILDS ####################################
#########################################################################################
@ -570,47 +539,6 @@ jobs:
##########################################################################################
##################################### SPECIAL BUILDS #####################################
##########################################################################################
BuilderDebShared:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_shared
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinClangTidy:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
@ -1018,12 +946,10 @@ jobs:
- BuilderBinDarwin
- BuilderBinDarwinAarch64
- BuilderBinFreeBSD
# - BuilderBinGCC
- BuilderBinPPC64
- BuilderBinAmd64Compat
- BuilderBinAarch64V80Compat
- BuilderBinClangTidy
- BuilderDebShared
runs-on: [self-hosted, style-checker]
if: ${{ success() || failure() }}
steps:
@ -2603,7 +2529,7 @@ jobs:
sudo rm -fr "$TEMP_PATH"
TestsBugfixCheck:
needs: [CheckLabels, StyleCheck]
runs-on: [self-hosted, stress-tester]
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
@ -2639,7 +2565,7 @@ jobs:
python3 functional_test_check.py "Stateless $CHECK_NAME" "$KILL_TIMEOUT" \
--validate-bugfix --post-commit-status=file || echo 'ignore exit code'
python3 bugfix_validate_check.py "${TEMP_PATH}/stateless/post_commit_status.tsv" "${TEMP_PATH}/integration/post_commit_status.tsv"
python3 bugfix_validate_check.py "${TEMP_PATH}/stateless/functional_commit_status.tsv" "${TEMP_PATH}/integration/integration_commit_status.tsv"
- name: Cleanup
if: always()
run: |
@ -4448,7 +4374,6 @@ jobs:
- UnitTestsMsan
- UnitTestsUBsan
- UnitTestsReleaseClang
- SharedBuildSmokeTest
- CompatibilityCheck
- IntegrationTestsFlakyCheck
- SQLancerTestRelease

37
.gitmodules vendored
View File

@ -104,13 +104,13 @@
url = https://github.com/ClickHouse/aws-sdk-cpp.git
[submodule "aws-c-event-stream"]
path = contrib/aws-c-event-stream
url = https://github.com/ClickHouse/aws-c-event-stream.git
url = https://github.com/awslabs/aws-c-event-stream.git
[submodule "aws-c-common"]
path = contrib/aws-c-common
url = https://github.com/ClickHouse/aws-c-common.git
[submodule "aws-checksums"]
path = contrib/aws-checksums
url = https://github.com/ClickHouse/aws-checksums.git
url = https://github.com/awslabs/aws-checksums.git
[submodule "contrib/curl"]
path = contrib/curl
url = https://github.com/curl/curl.git
@ -284,6 +284,9 @@
[submodule "contrib/xxHash"]
path = contrib/xxHash
url = https://github.com/Cyan4973/xxHash.git
[submodule "contrib/crc32-s390x"]
path = contrib/crc32-s390x
url = https://github.com/linux-on-ibm-z/crc32-s390x.git
[submodule "contrib/openssl"]
path = contrib/openssl
url = https://github.com/openssl/openssl
@ -294,3 +297,33 @@
[submodule "contrib/libdivide"]
path = contrib/libdivide
url = https://github.com/ridiculousfish/libdivide.git
[submodule "contrib/aws-crt-cpp"]
path = contrib/aws-crt-cpp
url = https://github.com/ClickHouse/aws-crt-cpp.git
[submodule "contrib/aws-c-io"]
path = contrib/aws-c-io
url = https://github.com/ClickHouse/aws-c-io.git
[submodule "contrib/aws-c-mqtt"]
path = contrib/aws-c-mqtt
url = https://github.com/awslabs/aws-c-mqtt.git
[submodule "contrib/aws-c-auth"]
path = contrib/aws-c-auth
url = https://github.com/awslabs/aws-c-auth.git
[submodule "contrib/aws-c-cal"]
path = contrib/aws-c-cal
url = https://github.com/ClickHouse/aws-c-cal.git
[submodule "contrib/aws-c-sdkutils"]
path = contrib/aws-c-sdkutils
url = https://github.com/awslabs/aws-c-sdkutils.git
[submodule "contrib/aws-c-http"]
path = contrib/aws-c-http
url = https://github.com/awslabs/aws-c-http.git
[submodule "contrib/aws-c-s3"]
path = contrib/aws-c-s3
url = https://github.com/awslabs/aws-c-s3.git
[submodule "contrib/aws-c-compression"]
path = contrib/aws-c-compression
url = https://github.com/awslabs/aws-c-compression.git
[submodule "contrib/aws-s2n-tls"]
path = contrib/aws-s2n-tls
url = https://github.com/aws/s2n-tls.git

View File

@ -73,22 +73,7 @@ message (STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC)
option(USE_STATIC_LIBRARIES "Disable to use shared libraries" ON)
# DEVELOPER ONLY.
# Faster linking if turned on.
option(SPLIT_SHARED_LIBRARIES "Keep all internal libraries as separate .so files" OFF)
if (USE_STATIC_LIBRARIES AND SPLIT_SHARED_LIBRARIES)
message(FATAL_ERROR "SPLIT_SHARED_LIBRARIES=1 must not be used together with USE_STATIC_LIBRARIES=1")
endif()
if (NOT USE_STATIC_LIBRARIES AND SPLIT_SHARED_LIBRARIES)
set(BUILD_SHARED_LIBS 1 CACHE INTERNAL "")
endif ()
if (USE_STATIC_LIBRARIES)
list(REVERSE CMAKE_FIND_LIBRARY_SUFFIXES)
endif ()
list(REVERSE CMAKE_FIND_LIBRARY_SUFFIXES)
option (ENABLE_FUZZING "Fuzzy testing using libfuzzer" OFF)
@ -171,7 +156,7 @@ option(ENABLE_TESTS "Provide unit_test_dbms target with Google.Test unit tests"
option(ENABLE_EXAMPLES "Build all example programs in 'examples' subdirectories" OFF)
option(ENABLE_BENCHMARKS "Build all benchmark programs in 'benchmarks' subdirectories" OFF)
if (OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64) AND USE_STATIC_LIBRARIES AND NOT SPLIT_SHARED_LIBRARIES AND NOT USE_MUSL)
if (OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64) AND NOT USE_MUSL)
# Only for Linux, x86_64 or aarch64.
option(GLIBC_COMPATIBILITY "Enable compatibility with older glibc libraries." ON)
elseif(GLIBC_COMPATIBILITY)
@ -467,22 +452,13 @@ endif ()
set (CMAKE_POSTFIX_VARIABLE "CMAKE_${CMAKE_BUILD_TYPE_UC}_POSTFIX")
if (USE_STATIC_LIBRARIES)
set (CMAKE_POSITION_INDEPENDENT_CODE OFF)
if (OS_LINUX AND NOT ARCH_AARCH64)
# Slightly more efficient code can be generated
# It's disabled for ARM because otherwise ClickHouse cannot run on Android.
set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-pie")
set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -fno-pie")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -no-pie -Wl,-no-pie")
endif ()
else ()
set (CMAKE_POSITION_INDEPENDENT_CODE ON)
# This is required for clang on Arch linux, that uses PIE by default.
# See enable-SSP-and-PIE-by-default.patch [1].
#
# [1]: https://github.com/archlinux/svntogit-packages/blob/6e681aa860e65ad46a1387081482eb875c2200f2/trunk/enable-SSP-and-PIE-by-default.patch
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -no-pie")
set (CMAKE_POSITION_INDEPENDENT_CODE OFF)
if (OS_LINUX AND NOT ARCH_AARCH64)
# Slightly more efficient code can be generated
# It's disabled for ARM because otherwise ClickHouse cannot run on Android.
set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-pie")
set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -fno-pie")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -no-pie -Wl,-no-pie")
endif ()
if (ENABLE_TESTS)
@ -504,10 +480,7 @@ else ()
set (CLICKHOUSE_ETC_DIR "${CMAKE_INSTALL_PREFIX}/etc")
endif ()
message (STATUS
"Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE_LIBRARY_ARCHITECTURE} ;
USE_STATIC_LIBRARIES=${USE_STATIC_LIBRARIES}
SPLIT_SHARED_LIBRARIES=${SPLIT_SHARED_LIBRARIES}")
message (STATUS "Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE_LIBRARY_ARCHITECTURE}")
include (GNUInstallDirs)
@ -553,7 +526,7 @@ macro (clickhouse_add_executable target)
# - _je_zone_register due to JEMALLOC_PRIVATE_NAMESPACE=je_ under OS X.
# - but jemalloc-cmake does not run private_namespace.sh
# so symbol name should be _zone_register
if (ENABLE_JEMALLOC AND USE_STATIC_LIBRARIES AND OS_DARWIN)
if (ENABLE_JEMALLOC AND OS_DARWIN)
set_property(TARGET ${target} APPEND PROPERTY LINK_OPTIONS -u_zone_register)
endif()
endif()

View File

@ -1,4 +1,4 @@
Copyright 2016-2022 ClickHouse, Inc.
Copyright 2016-2023 ClickHouse, Inc.
Apache License
Version 2.0, January 2004
@ -188,7 +188,7 @@ Copyright 2016-2022 ClickHouse, Inc.
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2016-2022 ClickHouse, Inc.
Copyright 2016-2023 ClickHouse, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

View File

@ -39,10 +39,6 @@ endif ()
target_include_directories(common PUBLIC .. "${CMAKE_CURRENT_BINARY_DIR}/..")
if (OS_DARWIN AND NOT USE_STATIC_LIBRARIES)
target_link_libraries(common PUBLIC -Wl,-U,_inside_main)
endif()
target_link_libraries (common
PUBLIC
ch_contrib::cityhash

View File

@ -37,7 +37,7 @@ if (GLIBC_COMPATIBILITY)
target_include_directories(glibc-compatibility PRIVATE libcxxabi ${musl_arch_include_dir})
if (( NOT USE_STATIC_LIBRARIES AND NOT USE_STATIC_LIBRARIES ) OR ENABLE_OPENSSL_DYNAMIC)
if (ENABLE_OPENSSL_DYNAMIC)
target_compile_options(glibc-compatibility PRIVATE -fPIC)
endif ()

View File

@ -102,6 +102,11 @@ elseif (ARCH_AMD64)
SET(ENABLE_AVX512_FOR_SPEC_OP 0)
endif()
# ClickHouse can be cross-compiled (e.g. on an ARM host for x86) but it is also possible to build ClickHouse on x86 w/o AVX for x86 w/
# AVX. We only check that the compiler can emit certain SIMD instructions, we don't care if the host system is able to run the binary.
# Therefore, use check_cxx_source_compiles (= does the code compile+link?) instead of check_cxx_source_runs (= does the code
# compile+link+run).
set (TEST_FLAG "-mssse3")
set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0")
check_cxx_source_compiles("

View File

@ -25,7 +25,7 @@ if (SANITIZE)
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${ASAN_FLAGS}")
endif()
if (USE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libasan")
endif ()
if (COMPILER_GCC)
@ -50,7 +50,7 @@ if (SANITIZE)
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=memory")
endif()
if (USE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libmsan")
endif ()
@ -71,7 +71,7 @@ if (SANITIZE)
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=thread")
endif()
if (USE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libtsan")
endif ()
if (COMPILER_GCC)
@ -103,7 +103,7 @@ if (SANITIZE)
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined")
endif()
if (USE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libubsan")
endif ()
if (COMPILER_GCC)

View File

@ -115,12 +115,25 @@ endif()
add_contrib (llvm-project-cmake llvm-project)
add_contrib (libfuzzer-cmake llvm-project)
add_contrib (libxml2-cmake libxml2)
add_contrib (aws-s3-cmake
add_contrib (aws-cmake
aws
aws-c-auth
aws-c-cal
aws-c-common
aws-c-compression
aws-c-event-stream
aws-c-http
aws-c-io
aws-c-mqtt
aws-c-s3
aws-c-sdkutils
aws-s2n-tls
aws-checksums
aws-crt-cpp
aws-cmake
)
add_contrib (base64-cmake base64)
add_contrib (simdjson-cmake simdjson)
add_contrib (rapidjson-cmake rapidjson)
@ -166,6 +179,10 @@ add_contrib (c-ares-cmake c-ares)
add_contrib (qpl-cmake qpl)
add_contrib (morton-nd-cmake morton-nd)
if (ARCH_S390X)
add_contrib(crc32-s390x-cmake crc32-s390x)
endif()
add_contrib (annoy-cmake annoy)
add_contrib (xxHash-cmake xxHash)

View File

@ -78,23 +78,14 @@ set(FLATBUFFERS_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/flatbuffers")
set(FLATBUFFERS_INCLUDE_DIR "${FLATBUFFERS_SRC_DIR}/include")
# set flatbuffers CMake options
if (USE_STATIC_LIBRARIES)
set(FLATBUFFERS_BUILD_FLATLIB ON CACHE BOOL "Enable the build of the flatbuffers library")
set(FLATBUFFERS_BUILD_SHAREDLIB OFF CACHE BOOL "Disable the build of the flatbuffers shared library")
else ()
set(FLATBUFFERS_BUILD_SHAREDLIB ON CACHE BOOL "Enable the build of the flatbuffers shared library")
set(FLATBUFFERS_BUILD_FLATLIB OFF CACHE BOOL "Disable the build of the flatbuffers library")
endif ()
set(FLATBUFFERS_BUILD_FLATLIB ON CACHE BOOL "Enable the build of the flatbuffers library")
set(FLATBUFFERS_BUILD_SHAREDLIB OFF CACHE BOOL "Disable the build of the flatbuffers shared library")
set(FLATBUFFERS_BUILD_TESTS OFF CACHE BOOL "Skip flatbuffers tests")
add_subdirectory(${FLATBUFFERS_SRC_DIR} "${FLATBUFFERS_BINARY_DIR}")
add_library(_flatbuffers INTERFACE)
if(USE_STATIC_LIBRARIES)
target_link_libraries(_flatbuffers INTERFACE flatbuffers)
else()
target_link_libraries(_flatbuffers INTERFACE flatbuffers_shared)
endif()
target_link_libraries(_flatbuffers INTERFACE flatbuffers)
target_include_directories(_flatbuffers INTERFACE ${FLATBUFFERS_INCLUDE_DIR})
# === hdfs

2
contrib/aws vendored

@ -1 +1 @@
Subproject commit 00b03604543367d7e310cb0993973fdcb723ea79
Subproject commit 4a12641211d4dbc8e2fdb2dd0f1eea0927db9252

1
contrib/aws-c-auth vendored Submodule

@ -0,0 +1 @@
Subproject commit 30df6c407e2df43bd244e2c34c9b4a4b87372bfb

1
contrib/aws-c-cal vendored Submodule

@ -0,0 +1 @@
Subproject commit 85dd7664b786a389c6fb1a6f031ab4bb2282133d

@ -1 +1 @@
Subproject commit 736a82d1697c108b04a277e66438a7f4e19b6857
Subproject commit 324fd1d973ccb25c813aa747bf1759cfde5121c5

1
contrib/aws-c-compression vendored Submodule

@ -0,0 +1 @@
Subproject commit b517b7decd0dac30be2162f5186c250221c53aff

@ -1 +1 @@
Subproject commit 3bc33662f9ccff4f4cbcf9509cc78c26e022fde0
Subproject commit 39bfa94a14b7126bf0c1330286ef8db452d87e66

1
contrib/aws-c-http vendored Submodule

@ -0,0 +1 @@
Subproject commit 2c5a2a7d5556600b9782ffa6c9d7e09964df1abc

1
contrib/aws-c-io vendored Submodule

@ -0,0 +1 @@
Subproject commit 5d32c453560d0823df521a686bf7fbacde7f9be3

1
contrib/aws-c-mqtt vendored Submodule

@ -0,0 +1 @@
Subproject commit 882c689561a3db1466330ccfe3b63637e0a575d3

1
contrib/aws-c-s3 vendored Submodule

@ -0,0 +1 @@
Subproject commit a41255ece72a7c887bba7f9d998ca3e14f4c8a1b

1
contrib/aws-c-sdkutils vendored Submodule

@ -0,0 +1 @@
Subproject commit 25bf5cf225f977c3accc6a05a0a7a181ef2a4a30

@ -1 +1 @@
Subproject commit 519d6d9093819b6cf89ffff589a27ef8f83d0f65
Subproject commit 48e7c0e01479232f225c8044d76c84e74192889d

View File

@ -0,0 +1,114 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0.
include(CheckCSourceRuns)
option(USE_CPU_EXTENSIONS "Whenever possible, use functions optimized for CPUs with specific extensions (ex: SSE, AVX)." ON)
# In the current (11/2/21) state of mingw64, the packaged gcc is not capable of emitting properly aligned avx2 instructions under certain circumstances.
# This leads to crashes for windows builds using mingw64 when invoking the avx2-enabled versions of certain functions. Until we can find a better
# work-around, disable avx2 (and all other extensions) in mingw builds.
#
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412
#
if (MINGW)
message(STATUS "MINGW detected! Disabling avx2 and other CPU extensions")
set(USE_CPU_EXTENSIONS OFF)
endif()
if(NOT CMAKE_CROSSCOMPILING)
check_c_source_runs("
#include <stdbool.h>
bool foo(int a, int b, int *c) {
return __builtin_mul_overflow(a, b, c);
}
int main() {
int out;
if (foo(1, 2, &out)) {
return 0;
}
return 0;
}" AWS_HAVE_GCC_OVERFLOW_MATH_EXTENSIONS)
if (USE_CPU_EXTENSIONS)
check_c_source_runs("
int main() {
int foo = 42;
_mulx_u32(1, 2, &foo);
return foo != 2;
}" AWS_HAVE_MSVC_MULX)
endif()
endif()
check_c_source_compiles("
#include <Windows.h>
#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
int main() {
return 0;
}
#else
it's not windows desktop
#endif
" AWS_HAVE_WINAPI_DESKTOP)
check_c_source_compiles("
int main() {
#if !(defined(__x86_64__) || defined(__i386__) || defined(_M_X64) || defined(_M_IX86))
# error \"not intel\"
#endif
return 0;
}
" AWS_ARCH_INTEL)
check_c_source_compiles("
int main() {
#if !(defined(__aarch64__) || defined(_M_ARM64))
# error \"not arm64\"
#endif
return 0;
}
" AWS_ARCH_ARM64)
check_c_source_compiles("
int main() {
#if !(defined(__arm__) || defined(_M_ARM))
# error \"not arm\"
#endif
return 0;
}
" AWS_ARCH_ARM32)
check_c_source_compiles("
int main() {
int foo = 42, bar = 24;
__asm__ __volatile__(\"\":\"=r\"(foo):\"r\"(bar):\"memory\");
}" AWS_HAVE_GCC_INLINE_ASM)
check_c_source_compiles("
#include <sys/auxv.h>
int main() {
#ifdef __linux__
getauxval(AT_HWCAP);
getauxval(AT_HWCAP2);
#endif
return 0;
}" AWS_HAVE_AUXV)
string(REGEX MATCH "^(aarch64|arm)" ARM_CPU "${CMAKE_SYSTEM_PROCESSOR}")
if(NOT LEGACY_COMPILER_SUPPORT OR ARM_CPU)
check_c_source_compiles("
#include <execinfo.h>
int main() {
backtrace(NULL, 0);
return 0;
}" AWS_HAVE_EXECINFO)
endif()
check_c_source_compiles("
#include <linux/if_link.h>
int main() {
return 1;
}" AWS_HAVE_LINUX_IF_LINK_H)

View File

@ -0,0 +1,74 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0.
include(CheckCCompilerFlag)
include(CheckIncludeFile)
if (USE_CPU_EXTENSIONS)
if (MSVC)
check_c_compiler_flag("/arch:AVX2" HAVE_M_AVX2_FLAG)
if (HAVE_M_AVX2_FLAG)
set(AVX2_CFLAGS "/arch:AVX2")
endif()
else()
check_c_compiler_flag(-mavx2 HAVE_M_AVX2_FLAG)
if (HAVE_M_AVX2_FLAG)
set(AVX2_CFLAGS "-mavx -mavx2")
endif()
endif()
cmake_push_check_state()
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${AVX2_CFLAGS}")
check_c_source_compiles("
#include <immintrin.h>
#include <emmintrin.h>
#include <string.h>
int main() {
__m256i vec;
memset(&vec, 0, sizeof(vec));
_mm256_shuffle_epi8(vec, vec);
_mm256_set_epi32(1,2,3,4,5,6,7,8);
_mm256_permutevar8x32_epi32(vec, vec);
return 0;
}" HAVE_AVX2_INTRINSICS)
check_c_source_compiles("
#include <immintrin.h>
#include <string.h>
int main() {
__m256i vec;
memset(&vec, 0, sizeof(vec));
return (int)_mm256_extract_epi64(vec, 2);
}" HAVE_MM256_EXTRACT_EPI64)
cmake_pop_check_state()
endif() # USE_CPU_EXTENSIONS
macro(simd_add_definition_if target definition)
if(${definition})
target_compile_definitions(${target} PRIVATE -D${definition})
endif(${definition})
endmacro(simd_add_definition_if)
# Configure private preprocessor definitions for SIMD-related features
# Does not set any processor feature codegen flags
function(simd_add_definitions target)
simd_add_definition_if(${target} HAVE_AVX2_INTRINSICS)
simd_add_definition_if(${target} HAVE_MM256_EXTRACT_EPI64)
endfunction(simd_add_definitions)
# Adds source files only if AVX2 is supported. These files will be built with
# avx2 intrinsics enabled.
# Usage: simd_add_source_avx2(target file1.c file2.c ...)
function(simd_add_source_avx2 target)
foreach(file ${ARGN})
target_sources(${target} PRIVATE ${file})
set_source_files_properties(${file} PROPERTIES COMPILE_FLAGS "${AVX2_CFLAGS}")
endforeach()
endfunction(simd_add_source_avx2)

View File

@ -0,0 +1,50 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0.
include(CheckSymbolExists)
# Check if the platform supports setting thread affinity
# (important for hitting full NIC entitlement on NUMA architectures)
function(aws_set_thread_affinity_method target)
# Non-POSIX, Android, and Apple platforms do not support thread affinity.
if (NOT UNIX OR ANDROID OR APPLE)
target_compile_definitions(${target} PRIVATE
-DAWS_AFFINITY_METHOD=AWS_AFFINITY_METHOD_NONE)
return()
endif()
cmake_push_check_state()
list(APPEND CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE)
list(APPEND CMAKE_REQUIRED_LIBRARIES pthread)
set(headers "pthread.h")
# BSDs put nonportable pthread declarations in a separate header.
if(CMAKE_SYSTEM_NAME MATCHES BSD)
set(headers "${headers};pthread_np.h")
endif()
# Using pthread attrs is the preferred method, but is glibc-specific.
check_symbol_exists(pthread_attr_setaffinity_np "${headers}" USE_PTHREAD_ATTR_SETAFFINITY)
if (USE_PTHREAD_ATTR_SETAFFINITY)
target_compile_definitions(${target} PRIVATE
-DAWS_AFFINITY_METHOD=AWS_AFFINITY_METHOD_PTHREAD_ATTR)
return()
endif()
# This method is still nonportable, but is supported by musl and BSDs.
check_symbol_exists(pthread_setaffinity_np "${headers}" USE_PTHREAD_SETAFFINITY)
if (USE_PTHREAD_SETAFFINITY)
target_compile_definitions(${target} PRIVATE
-DAWS_AFFINITY_METHOD=AWS_AFFINITY_METHOD_PTHREAD)
return()
endif()
# If we got here, we expected thread affinity support but didn't find it.
# We still build with degraded NUMA performance, but show a warning.
message(WARNING "No supported method for setting thread affinity")
target_compile_definitions(${target} PRIVATE
-DAWS_AFFINITY_METHOD=AWS_AFFINITY_METHOD_NONE)
cmake_pop_check_state()
endfunction()

View File

@ -0,0 +1,61 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0.
include(CheckSymbolExists)
# Check how the platform supports setting thread name
function(aws_set_thread_name_method target)
if (WINDOWS)
# On Windows we do a runtime check, instead of compile-time check
return()
elseif (APPLE)
# All Apple platforms we support have the same function, so no need for compile-time check.
return()
endif()
cmake_push_check_state()
list(APPEND CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE)
list(APPEND CMAKE_REQUIRED_LIBRARIES pthread)
# The start of the test program
set(c_source_start "
#define _GNU_SOURCE
#include <pthread.h>
#if defined(__FreeBSD__) || defined(__NETBSD__)
#include <pthread_np.h>
#endif
int main() {
pthread_t thread_id;
")
# The end of the test program
set(c_source_end "}")
# pthread_setname_np() usually takes 2 args
check_c_source_compiles("
${c_source_start}
pthread_setname_np(thread_id, \"asdf\");
${c_source_end}"
PTHREAD_SETNAME_TAKES_2ARGS)
if (PTHREAD_SETNAME_TAKES_2ARGS)
target_compile_definitions(${target} PRIVATE -DAWS_PTHREAD_SETNAME_TAKES_2ARGS)
return()
endif()
# But on NetBSD it takes 3!
check_c_source_compiles("
${c_source_start}
pthread_setname_np(thread_id, \"asdf\", NULL);
${c_source_end}
" PTHREAD_SETNAME_TAKES_3ARGS)
if (PTHREAD_SETNAME_TAKES_3ARGS)
target_compile_definitions(${target} PRIVATE -DAWS_PTHREAD_SETNAME_TAKES_3ARGS)
return()
endif()
# And on many older/weirder platforms it's just not supported
cmake_pop_check_state()
endfunction()

View File

@ -0,0 +1,376 @@
set(ENABLE_AWS_S3_DEFAULT OFF)
if(ENABLE_LIBRARIES AND (OS_LINUX OR OS_DARWIN) AND TARGET OpenSSL::Crypto)
set(ENABLE_AWS_S3_DEFAULT ON)
endif()
option(ENABLE_AWS_S3 "Enable AWS S3" ${ENABLE_AWS_S3_DEFAULT})
if(ENABLE_AWS_S3)
if(NOT TARGET OpenSSL::Crypto)
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use AWS SDK without OpenSSL")
elseif(NOT (OS_LINUX OR OS_DARWIN))
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use AWS SDK with platform ${CMAKE_SYSTEM_NAME}")
endif()
endif()
if(NOT ENABLE_AWS_S3)
message(STATUS "Not using AWS S3")
return()
endif()
# Utilities.
include("${ClickHouse_SOURCE_DIR}/contrib/aws-cmake/AwsFeatureTests.cmake")
include("${ClickHouse_SOURCE_DIR}/contrib/aws-cmake/AwsThreadAffinity.cmake")
include("${ClickHouse_SOURCE_DIR}/contrib/aws-cmake/AwsThreadName.cmake")
include("${ClickHouse_SOURCE_DIR}/contrib/aws-cmake/AwsSIMD.cmake")
# Gather sources and options.
set(AWS_SOURCES)
set(AWS_PUBLIC_INCLUDES)
set(AWS_PRIVATE_INCLUDES)
set(AWS_PUBLIC_COMPILE_DEFS)
set(AWS_PRIVATE_COMPILE_DEFS)
set(AWS_PRIVATE_LIBS)
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
list(APPEND AWS_PRIVATE_COMPILE_DEFS "-DDEBUG_BUILD")
endif()
set(ENABLE_OPENSSL_ENCRYPTION ON)
if (ENABLE_OPENSSL_ENCRYPTION)
list(APPEND AWS_PRIVATE_COMPILE_DEFS "-DENABLE_OPENSSL_ENCRYPTION")
endif()
set(USE_S2N ON)
if (USE_S2N)
list(APPEND AWS_PRIVATE_COMPILE_DEFS "-DUSE_S2N")
endif()
# Directories.
SET(AWS_SDK_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws")
SET(AWS_SDK_CORE_DIR "${AWS_SDK_DIR}/aws-cpp-sdk-core")
SET(AWS_SDK_S3_DIR "${AWS_SDK_DIR}/aws-cpp-sdk-s3")
SET(AWS_AUTH_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-auth")
SET(AWS_CAL_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-cal")
SET(AWS_CHECKSUMS_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-checksums")
SET(AWS_COMMON_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-common")
SET(AWS_COMPRESSION_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-compression")
SET(AWS_CRT_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-crt-cpp")
SET(AWS_EVENT_STREAM_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-event-stream")
SET(AWS_HTTP_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-http")
SET(AWS_IO_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-io")
SET(AWS_MQTT_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-mqtt")
SET(AWS_S2N_TLS_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-s2n-tls")
SET(AWS_S3_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-s3")
SET(AWS_SDKUTILS_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-sdkutils")
# aws-cpp-sdk-core
file(GLOB AWS_SDK_CORE_SRC
"${AWS_SDK_CORE_DIR}/source/*.cpp"
"${AWS_SDK_CORE_DIR}/source/auth/*.cpp"
"${AWS_SDK_CORE_DIR}/source/auth/bearer-token-provider/*.cpp"
"${AWS_SDK_CORE_DIR}/source/auth/signer/*.cpp"
"${AWS_SDK_CORE_DIR}/source/auth/signer-provider/*.cpp"
"${AWS_SDK_CORE_DIR}/source/client/*.cpp"
"${AWS_SDK_CORE_DIR}/source/config/*.cpp"
"${AWS_SDK_CORE_DIR}/source/config/defaults/*.cpp"
"${AWS_SDK_CORE_DIR}/source/endpoint/*.cpp"
"${AWS_SDK_CORE_DIR}/source/endpoint/internal/*.cpp"
"${AWS_SDK_CORE_DIR}/source/external/cjson/*.cpp"
"${AWS_SDK_CORE_DIR}/source/external/tinyxml2/*.cpp"
"${AWS_SDK_CORE_DIR}/source/http/*.cpp"
"${AWS_SDK_CORE_DIR}/source/http/standard/*.cpp"
"${AWS_SDK_CORE_DIR}/source/internal/*.cpp"
"${AWS_SDK_CORE_DIR}/source/monitoring/*.cpp"
"${AWS_SDK_CORE_DIR}/source/utils/*.cpp"
"${AWS_SDK_CORE_DIR}/source/utils/base64/*.cpp"
"${AWS_SDK_CORE_DIR}/source/utils/crypto/*.cpp"
"${AWS_SDK_CORE_DIR}/source/utils/crypto/openssl/*.cpp"
"${AWS_SDK_CORE_DIR}/source/utils/crypto/factory/*.cpp"
"${AWS_SDK_CORE_DIR}/source/utils/event/*.cpp"
"${AWS_SDK_CORE_DIR}/source/utils/json/*.cpp"
"${AWS_SDK_CORE_DIR}/source/utils/logging/*.cpp"
"${AWS_SDK_CORE_DIR}/source/utils/memory/*.cpp"
"${AWS_SDK_CORE_DIR}/source/utils/memory/stl/*.cpp"
"${AWS_SDK_CORE_DIR}/source/utils/stream/*.cpp"
"${AWS_SDK_CORE_DIR}/source/utils/threading/*.cpp"
"${AWS_SDK_CORE_DIR}/source/utils/xml/*.cpp"
)
if(OS_LINUX OR OS_DARWIN)
file(GLOB AWS_SDK_CORE_NET_SRC "${AWS_SDK_CORE_DIR}/source/net/linux-shared/*.cpp")
file(GLOB AWS_SDK_CORE_PLATFORM_SRC "${AWS_SDK_CORE_DIR}/source/platform/linux-shared/*.cpp")
else()
file(GLOB AWS_SDK_CORE_NET_SRC "${AWS_SDK_CORE_DIR}/source/net/*.cpp")
set(AWS_SDK_CORE_PLATFORM_SRC)
endif()
OPTION(USE_AWS_MEMORY_MANAGEMENT "Aws memory management" OFF)
configure_file("${AWS_SDK_CORE_DIR}/include/aws/core/SDKConfig.h.in"
"${CMAKE_CURRENT_BINARY_DIR}/include/aws/core/SDKConfig.h" @ONLY)
list(APPEND AWS_PUBLIC_COMPILE_DEFS "-DAWS_SDK_VERSION_MAJOR=1")
list(APPEND AWS_PUBLIC_COMPILE_DEFS "-DAWS_SDK_VERSION_MINOR=10")
list(APPEND AWS_PUBLIC_COMPILE_DEFS "-DAWS_SDK_VERSION_PATCH=36")
list(APPEND AWS_SOURCES ${AWS_SDK_CORE_SRC} ${AWS_SDK_CORE_NET_SRC} ${AWS_SDK_CORE_PLATFORM_SRC})
list(APPEND AWS_PUBLIC_INCLUDES
"${AWS_SDK_CORE_DIR}/include/"
"${CMAKE_CURRENT_BINARY_DIR}/include"
)
# aws-cpp-sdk-s3
file(GLOB AWS_SDK_S3_SRC
"${AWS_SDK_S3_DIR}/source/*.cpp"
"${AWS_SDK_S3_DIR}/source/model/*.cpp"
)
list(APPEND AWS_SOURCES ${AWS_SDK_S3_SRC})
list(APPEND AWS_PUBLIC_INCLUDES "${AWS_SDK_S3_DIR}/include/")
# aws-c-auth
file(GLOB AWS_AUTH_SRC
"${AWS_AUTH_DIR}/source/*.c"
)
list(APPEND AWS_SOURCES ${AWS_AUTH_SRC})
list(APPEND AWS_PUBLIC_INCLUDES "${AWS_AUTH_DIR}/include/")
# aws-c-cal
file(GLOB AWS_CAL_SRC
"${AWS_CAL_DIR}/source/*.c"
)
if (ENABLE_OPENSSL_ENCRYPTION)
file(GLOB AWS_CAL_OS_SRC
"${AWS_CAL_DIR}/source/unix/*.c"
)
list(APPEND AWS_PRIVATE_LIBS OpenSSL::Crypto)
endif()
list(APPEND AWS_SOURCES ${AWS_CAL_SRC} ${AWS_CAL_OS_SRC})
list(APPEND AWS_PRIVATE_INCLUDES "${AWS_CAL_DIR}/include/")
# aws-c-event-stream
file(GLOB AWS_EVENT_STREAM_SRC
"${AWS_EVENT_STREAM_DIR}/source/*.c"
)
list(APPEND AWS_SOURCES ${AWS_EVENT_STREAM_SRC})
list(APPEND AWS_PRIVATE_INCLUDES "${AWS_EVENT_STREAM_DIR}/include/")
# aws-c-common
file(GLOB AWS_COMMON_SRC
"${AWS_COMMON_DIR}/source/*.c"
"${AWS_COMMON_DIR}/source/external/*.c"
"${AWS_COMMON_DIR}/source/posix/*.c"
)
file(GLOB AWS_COMMON_ARCH_SRC
"${AWS_COMMON_DIR}/source/arch/generic/*.c"
)
if (AWS_ARCH_INTEL)
file(GLOB AWS_COMMON_ARCH_SRC
"${AWS_COMMON_DIR}/source/arch/intel/cpuid.c"
"${AWS_COMMON_DIR}/source/arch/intel/asm/*.c"
)
elseif (AWS_ARCH_ARM64 OR AWS_ARCH_ARM32)
if (AWS_HAVE_AUXV)
file(GLOB AWS_COMMON_ARCH_SRC
"${AWS_COMMON_DIR}/source/arch/arm/asm/*.c"
)
endif()
endif()
set(AWS_COMMON_AVX2_SRC)
if (HAVE_AVX2_INTRINSICS)
list(APPEND AWS_PRIVATE_COMPILE_DEFS "-DUSE_SIMD_ENCODING")
set(AWS_COMMON_AVX2_SRC "${AWS_COMMON_DIR}/source/arch/intel/encoding_avx2.c")
set_source_files_properties(${AWS_COMMON_AVX2_SRC} PROPERTIES COMPILE_FLAGS "${AVX2_CFLAGS}")
endif()
configure_file("${AWS_COMMON_DIR}/include/aws/common/config.h.in"
"${CMAKE_CURRENT_BINARY_DIR}/include/aws/common/config.h" @ONLY)
list(APPEND AWS_SOURCES ${AWS_COMMON_SRC} ${AWS_COMMON_ARCH_SRC} ${AWS_COMMON_AVX2_SRC})
list(APPEND AWS_PUBLIC_INCLUDES
"${AWS_COMMON_DIR}/include/"
"${CMAKE_CURRENT_BINARY_DIR}/include"
)
# aws-checksums
file(GLOB AWS_CHECKSUMS_SRC
"${AWS_CHECKSUMS_DIR}/source/*.c"
"${AWS_CHECKSUMS_DIR}/source/intel/*.c"
"${AWS_CHECKSUMS_DIR}/source/intel/asm/*.c"
"${AWS_CHECKSUMS_DIR}/source/arm/*.c"
)
if(AWS_ARCH_INTEL AND AWS_HAVE_GCC_INLINE_ASM)
file(GLOB AWS_CHECKSUMS_ARCH_SRC
"${AWS_CHECKSUMS_DIR}/source/intel/asm/*.c"
)
endif()
if (AWS_ARCH_ARM64)
file(GLOB AWS_CHECKSUMS_ARCH_SRC
"${AWS_CHECKSUMS_DIR}/source/arm/*.c"
)
set_source_files_properties("${AWS_CHECKSUMS_DIR}/source/arm/crc32c_arm.c" PROPERTIES COMPILE_FLAGS -march=armv8-a+crc)
elseif (AWS_ARCH_ARM32)
if (AWS_ARM32_CRC)
file(GLOB AWS_CHECKSUMS_ARCH_SRC
"${AWS_CHECKSUMS_DIR}/source/arm/*.c"
"${AWS_CHECKSUMS_DIR}/source/arm/asm/*.c"
)
set_source_files_properties(source/arm/crc32c_arm.c PROPERTIES COMPILE_FLAGS -march=armv8-a+crc)
endif()
endif()
list(APPEND AWS_SOURCES ${AWS_CHECKSUMS_SRC} ${AWS_CHECKSUMS_ARCH_SRC})
list(APPEND AWS_PRIVATE_INCLUDES "${AWS_CHECKSUMS_DIR}/include/")
# aws-c-io
file(GLOB AWS_IO_SRC
"${AWS_IO_DIR}/source/*.c"
)
if (OS_LINUX)
file(GLOB AWS_IO_OS_SRC
"${AWS_IO_DIR}/source/linux/*.c"
"${AWS_IO_DIR}/source/posix/*.c"
)
elseif (OS_DARWIN)
file(GLOB AWS_IO_OS_SRC
"${AWS_IO_DIR}/source/bsd/*.c"
"${AWS_IO_DIR}/source/posix/*.c"
)
endif()
set(AWS_IO_TLS_SRC)
if (USE_S2N)
file(GLOB AWS_IO_TLS_SRC
"${AWS_IO_DIR}/source/s2n/*.c"
)
endif()
list(APPEND AWS_SOURCES ${AWS_IO_SRC} ${AWS_IO_OS_SRC} ${AWS_IO_TLS_SRC})
list(APPEND AWS_PUBLIC_INCLUDES "${AWS_IO_DIR}/include/")
# aws-s2n-tls
if (USE_S2N)
file(GLOB AWS_S2N_TLS_SRC
"${AWS_S2N_TLS_DIR}/crypto/*.c"
"${AWS_S2N_TLS_DIR}/error/*.c"
"${AWS_S2N_TLS_DIR}/stuffer/*.c"
"${AWS_S2N_TLS_DIR}/pq-crypto/*.c"
"${AWS_S2N_TLS_DIR}/pq-crypto/kyber_r3/*.c"
"${AWS_S2N_TLS_DIR}/tls/*.c"
"${AWS_S2N_TLS_DIR}/tls/extensions/*.c"
"${AWS_S2N_TLS_DIR}/utils/*.c"
)
list(APPEND AWS_SOURCES ${AWS_S2N_TLS_SRC})
list(APPEND AWS_PRIVATE_INCLUDES
"${AWS_S2N_TLS_DIR}/"
"${AWS_S2N_TLS_DIR}/api/"
)
endif()
# aws-crt-cpp
file(GLOB AWS_CRT_SRC
"${AWS_CRT_DIR}/source/*.cpp"
"${AWS_CRT_DIR}/source/auth/*.cpp"
"${AWS_CRT_DIR}/source/crypto/*.cpp"
"${AWS_CRT_DIR}/source/endpoints/*.cpp"
"${AWS_CRT_DIR}/source/external/*.cpp"
"${AWS_CRT_DIR}/source/http/*.cpp"
"${AWS_CRT_DIR}/source/io/*.cpp"
)
list(APPEND AWS_SOURCES ${AWS_CRT_SRC})
list(APPEND AWS_PUBLIC_INCLUDES "${AWS_CRT_DIR}/include/")
# aws-c-mqtt
file(GLOB AWS_MQTT_SRC
"${AWS_MQTT_DIR}/source/*.c"
)
list(APPEND AWS_SOURCES ${AWS_MQTT_SRC})
list(APPEND AWS_PUBLIC_INCLUDES "${AWS_MQTT_DIR}/include/")
# aws-c-http
file(GLOB AWS_HTTP_SRC
"${AWS_HTTP_DIR}/source/*.c"
)
list(APPEND AWS_SOURCES ${AWS_HTTP_SRC})
list(APPEND AWS_PRIVATE_INCLUDES "${AWS_HTTP_DIR}/include/")
# aws-c-compression
file(GLOB AWS_COMPRESSION_SRC
"${AWS_COMPRESSION_DIR}/source/*.c"
)
list(APPEND AWS_SOURCES ${AWS_COMPRESSION_SRC})
list(APPEND AWS_PRIVATE_INCLUDES "${AWS_COMPRESSION_DIR}/include/")
# aws-c-s3
file(GLOB AWS_S3_SRC
"${AWS_S3_DIR}/source/*.c"
)
list(APPEND AWS_SOURCES ${AWS_S3_SRC})
list(APPEND AWS_PRIVATE_INCLUDES "${AWS_S3_DIR}/include/")
# aws-c-sdkutils
file(GLOB AWS_SDKUTILS_SRC
"${AWS_SDKUTILS_DIR}/source/*.c"
)
list(APPEND AWS_SOURCES ${AWS_SDKUTILS_SRC})
list(APPEND AWS_PUBLIC_INCLUDES "${AWS_SDKUTILS_DIR}/include/")
# Add library.
add_library(_aws ${AWS_SOURCES})
target_include_directories(_aws SYSTEM BEFORE PUBLIC ${AWS_PUBLIC_INCLUDES})
target_include_directories(_aws SYSTEM BEFORE PRIVATE ${AWS_PRIVATE_INCLUDES})
target_compile_definitions(_aws PUBLIC ${AWS_PUBLIC_COMPILE_DEFS})
target_compile_definitions(_aws PRIVATE ${AWS_PRIVATE_COMPILE_DEFS})
target_link_libraries(_aws PRIVATE ${AWS_PRIVATE_LIBS})
aws_set_thread_affinity_method(_aws)
aws_set_thread_name_method(_aws)
# The library is large - avoid bloat.
if (OMIT_HEAVY_DEBUG_SYMBOLS)
target_compile_options (_aws PRIVATE -g0)
endif()
add_library(ch_contrib::aws_s3 ALIAS _aws)

1
contrib/aws-crt-cpp vendored Submodule

@ -0,0 +1 @@
Subproject commit ec0bea288f451d884c0d80d534bc5c66241c39a4

1
contrib/aws-s2n-tls vendored Submodule

@ -0,0 +1 @@
Subproject commit 15d534e8a9ca1eda6bacee514e37d08b4f38a526

View File

@ -1,122 +0,0 @@
if(NOT OS_FREEBSD)
option(ENABLE_S3 "Enable S3" ${ENABLE_LIBRARIES})
elseif(ENABLE_S3)
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use S3 on FreeBSD")
endif()
if(NOT ENABLE_S3)
message(STATUS "Not using S3")
return()
endif()
SET(AWS_S3_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws/aws-cpp-sdk-s3")
SET(AWS_CORE_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws/aws-cpp-sdk-core")
SET(AWS_CHECKSUMS_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-checksums")
SET(AWS_COMMON_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-common")
SET(AWS_EVENT_STREAM_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-event-stream")
OPTION(USE_AWS_MEMORY_MANAGEMENT "Aws memory management" OFF)
configure_file("${AWS_CORE_LIBRARY_DIR}/include/aws/core/SDKConfig.h.in"
"${CMAKE_CURRENT_BINARY_DIR}/include/aws/core/SDKConfig.h" @ONLY)
configure_file("${AWS_COMMON_LIBRARY_DIR}/include/aws/common/config.h.in"
"${CMAKE_CURRENT_BINARY_DIR}/include/aws/common/config.h" @ONLY)
file(GLOB AWS_CORE_SOURCES
"${AWS_CORE_LIBRARY_DIR}/source/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/auth/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/client/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/http/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/http/standard/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/config/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/external/cjson/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/external/tinyxml2/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/internal/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/monitoring/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/net/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/linux-shared/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/platform/linux-shared/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/utils/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/utils/base64/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/utils/event/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/utils/crypto/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/utils/crypto/openssl/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/utils/crypto/factory/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/utils/json/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/utils/logging/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/utils/memory/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/utils/memory/stl/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/utils/stream/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/utils/threading/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/utils/xml/*.cpp"
)
file(GLOB AWS_S3_SOURCES
"${AWS_S3_LIBRARY_DIR}/source/*.cpp"
)
file(GLOB AWS_S3_MODEL_SOURCES
"${AWS_S3_LIBRARY_DIR}/source/model/*.cpp"
)
file(GLOB AWS_EVENT_STREAM_SOURCES
"${AWS_EVENT_STREAM_LIBRARY_DIR}/source/*.c"
)
file(GLOB AWS_COMMON_SOURCES
"${AWS_COMMON_LIBRARY_DIR}/source/*.c"
"${AWS_COMMON_LIBRARY_DIR}/source/posix/*.c"
)
file(GLOB AWS_CHECKSUMS_SOURCES
"${AWS_CHECKSUMS_LIBRARY_DIR}/source/*.c"
"${AWS_CHECKSUMS_LIBRARY_DIR}/source/intel/*.c"
"${AWS_CHECKSUMS_LIBRARY_DIR}/source/arm/*.c"
)
file(GLOB S3_UNIFIED_SRC
${AWS_EVENT_STREAM_SOURCES}
${AWS_COMMON_SOURCES}
${AWS_S3_SOURCES}
${AWS_S3_MODEL_SOURCES}
${AWS_CORE_SOURCES}
)
set(S3_INCLUDES
"${AWS_COMMON_LIBRARY_DIR}/include/"
"${AWS_EVENT_STREAM_LIBRARY_DIR}/include/"
"${AWS_S3_LIBRARY_DIR}/include/"
"${AWS_CORE_LIBRARY_DIR}/include/"
"${CMAKE_CURRENT_BINARY_DIR}/include/"
)
add_library(_aws_s3_checksums ${AWS_CHECKSUMS_SOURCES})
target_include_directories(_aws_s3_checksums SYSTEM PUBLIC "${AWS_CHECKSUMS_LIBRARY_DIR}/include/")
if(CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
target_compile_definitions(_aws_s3_checksums PRIVATE "-DDEBUG_BUILD")
endif()
set_target_properties(_aws_s3_checksums PROPERTIES LINKER_LANGUAGE C)
set_property(TARGET _aws_s3_checksums PROPERTY C_STANDARD 99)
add_library(_aws_s3 ${S3_UNIFIED_SRC})
target_compile_definitions(_aws_s3 PUBLIC "AWS_SDK_VERSION_MAJOR=1")
target_compile_definitions(_aws_s3 PUBLIC "AWS_SDK_VERSION_MINOR=7")
target_compile_definitions(_aws_s3 PUBLIC "AWS_SDK_VERSION_PATCH=231")
target_include_directories(_aws_s3 SYSTEM BEFORE PUBLIC ${S3_INCLUDES})
if (TARGET OpenSSL::SSL)
target_compile_definitions(_aws_s3 PUBLIC -DENABLE_OPENSSL_ENCRYPTION)
target_link_libraries(_aws_s3 PRIVATE OpenSSL::Crypto OpenSSL::SSL)
endif()
target_link_libraries(_aws_s3 PRIVATE _aws_s3_checksums)
# The library is large - avoid bloat.
if (OMIT_HEAVY_DEBUG_SYMBOLS)
target_compile_options (_aws_s3 PRIVATE -g0)
target_compile_options (_aws_s3_checksums PRIVATE -g0)
endif()
add_library(ch_contrib::aws_s3 ALIAS _aws_s3)

View File

@ -139,13 +139,6 @@ if(NOT OPENSSL_NO_ASM)
endif()
endif()
if(BUILD_SHARED_LIBS)
add_definitions(-DBORINGSSL_SHARED_LIBRARY)
# Enable position-independent code globally. This is needed because
# some library targets are OBJECT libraries.
set(CMAKE_POSITION_INDEPENDENT_CODE TRUE)
endif()
set(
CRYPTO_ios_aarch64_SOURCES

View File

@ -63,13 +63,8 @@ SET(SRCS
"${LIBRARY_DIR}/src/lib/windows_port.c"
)
if (USE_STATIC_LIBRARIES)
add_library(_c-ares STATIC ${SRCS})
target_compile_definitions(_c-ares PUBLIC CARES_STATICLIB)
else()
add_library(_c-ares SHARED ${SRCS})
target_compile_definitions(_c-ares PUBLIC CARES_BUILDING_LIBRARY)
endif()
add_library(_c-ares STATIC ${SRCS})
target_compile_definitions(_c-ares PUBLIC CARES_STATICLIB)
target_compile_definitions(_c-ares PRIVATE HAVE_CONFIG_H=1)

1
contrib/crc32-s390x vendored Submodule

@ -0,0 +1 @@
Subproject commit 30980583bf9ed3fa193abb83a1849705ff457f70

View File

@ -0,0 +1,27 @@
if(ARCH_S390X)
option (ENABLE_CRC32_S390X "Enable crc32 on s390x platform" ON)
endif()
if (NOT ENABLE_CRC32_S390X)
return()
endif()
set(CRC32_S390X_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/crc32-s390x)
set(CRC32_S390X_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/crc32-s390x)
set(CRC32_SRCS
"${CRC32_S390X_SOURCE_DIR}/crc32-s390x.c"
"${CRC32_S390X_SOURCE_DIR}/crc32be-vx.S"
"${CRC32_S390X_SOURCE_DIR}/crc32le-vx.S"
)
set(CRC32_HDRS
"${CRC32_S390X_INCLUDE_DIR}/crc32-s390x.h"
)
add_library(_crc32_s390x ${CRC32_SRCS} ${CRC32_HDRS})
target_include_directories(_crc32_s390x SYSTEM PUBLIC "${CRC32_S390X_INCLUDE_DIR}")
target_compile_definitions(_crc32_s390x PUBLIC)
add_library(ch_contrib::crc32_s390x ALIAS _crc32_s390x)

2
contrib/googletest vendored

@ -1 +1 @@
Subproject commit e7e591764baba0a0c3c9ad0014430e7a27331d16
Subproject commit 71140c3ca7a87bb1b5b9c9f1500fea8858cce344

View File

@ -136,11 +136,6 @@ add_library(ch_contrib::uv ALIAS _uv)
target_compile_definitions(_uv PRIVATE ${uv_defines})
target_include_directories(_uv SYSTEM PUBLIC ${SOURCE_DIR}/include PRIVATE ${SOURCE_DIR}/src)
target_link_libraries(_uv ${uv_libraries})
if (NOT USE_STATIC_LIBRARIES)
target_compile_definitions(_uv
INTERFACE USING_UV_SHARED=1
PRIVATE BUILDING_UV_SHARED=1)
endif()
if(UNIX)
# Now for some gibbering horrors from beyond the stars...

View File

@ -6,8 +6,6 @@ endif()
option (ENABLE_EMBEDDED_COMPILER "Enable support for 'compile_expressions' option for query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT})
# If USE_STATIC_LIBRARIES=0 was passed to CMake, we'll still build LLVM statically to keep complexity minimal.
if (NOT ENABLE_EMBEDDED_COMPILER)
message(STATUS "Not using LLVM")
return()

View File

@ -1,4 +1,4 @@
if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES AND NOT (OS_DARWIN AND COMPILER_CLANG))
if (NOT OS_FREEBSD AND NOT (OS_DARWIN AND COMPILER_CLANG))
option (ENABLE_SENTRY "Enable Sentry" ${ENABLE_LIBRARIES})
else()
option (ENABLE_SENTRY "Enable Sentry" OFF)
@ -51,11 +51,7 @@ endif()
add_library(_sentry ${SRCS})
if(BUILD_SHARED_LIBS)
target_compile_definitions(_sentry PRIVATE SENTRY_BUILD_SHARED)
else()
target_compile_definitions(_sentry PUBLIC SENTRY_BUILD_STATIC)
endif()
target_compile_definitions(_sentry PUBLIC SENTRY_BUILD_STATIC)
target_link_libraries(_sentry PRIVATE ch_contrib::curl pthread)
target_include_directories(_sentry PUBLIC "${SRC_DIR}/include" PRIVATE "${SRC_DIR}/src")

2
contrib/sysroot vendored

@ -1 +1 @@
Subproject commit 0f41651860fa4a530ecd68b93a15b8fd77397adf
Subproject commit f0081b2649b94837855f3bc7d05ef326b100bad8

View File

@ -2,7 +2,6 @@
"docker/packager/binary": {
"name": "clickhouse/binary-builder",
"dependent": [
"docker/test/split_build_smoke_test",
"docker/test/codebrowser"
]
},
@ -55,10 +54,6 @@
"name": "clickhouse/stress-test",
"dependent": []
},
"docker/test/split_build_smoke_test": {
"name": "clickhouse/split-build-smoke-test",
"dependent": []
},
"docker/test/codebrowser": {
"name": "clickhouse/codebrowser",
"dependent": []

View File

@ -107,8 +107,6 @@ fi
mv ./programs/clickhouse* /output
[ -x ./programs/self-extracting/clickhouse ] && mv ./programs/self-extracting/clickhouse /output
mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds
find . -name '*.so' -print -exec mv '{}' /output \;
find . -name '*.so.*' -print -exec mv '{}' /output \;
prepare_combined_output () {
local OUTPUT
@ -165,7 +163,7 @@ then
)
fi
# May be set for split build or for performance test.
# May be set for performance test.
if [ "" != "$COMBINED_OUTPUT" ]
then
prepare_combined_output /output

View File

@ -100,12 +100,11 @@ def run_docker_image_with_env(
subprocess.check_call(cmd, shell=True)
def is_release_build(build_type, package_type, sanitizer, shared_libraries):
def is_release_build(build_type, package_type, sanitizer):
return (
build_type == ""
and package_type == "deb"
and sanitizer == ""
and not shared_libraries
)
@ -116,7 +115,6 @@ def parse_env_variables(
package_type,
cache,
distcc_hosts,
shared_libraries,
clang_tidy,
version,
author,
@ -218,7 +216,7 @@ def parse_env_variables(
cmake_flags.append("-DCMAKE_INSTALL_PREFIX=/usr")
cmake_flags.append("-DCMAKE_INSTALL_SYSCONFDIR=/etc")
cmake_flags.append("-DCMAKE_INSTALL_LOCALSTATEDIR=/var")
if is_release_build(build_type, package_type, sanitizer, shared_libraries):
if is_release_build(build_type, package_type, sanitizer):
cmake_flags.append("-DSPLIT_DEBUG_SYMBOLS=ON")
result.append("WITH_PERFORMANCE=1")
if is_cross_arm:
@ -231,12 +229,10 @@ def parse_env_variables(
cmake_flags.append(f"-DCMAKE_C_COMPILER={cc}")
cmake_flags.append(f"-DCMAKE_CXX_COMPILER={cxx}")
# Create combined output archive for shared library build and for performance tests.
# Create combined output archive for performance tests.
if package_type == "coverity":
result.append("COMBINED_OUTPUT=coverity")
result.append('COVERITY_TOKEN="$COVERITY_TOKEN"')
elif shared_libraries:
result.append("COMBINED_OUTPUT=shared_build")
if sanitizer:
result.append(f"SANITIZER={sanitizer}")
@ -285,15 +281,6 @@ def parse_env_variables(
result.append("BINARY_OUTPUT=tests")
cmake_flags.append("-DENABLE_TESTS=1")
if shared_libraries:
cmake_flags.append("-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1")
# We can't always build utils because it requires too much space, but
# we have to build them at least in some way in CI. The shared library
# build is probably the least heavy disk-wise.
cmake_flags.append("-DENABLE_UTILS=1")
# utils are not included into clickhouse-bundle, so build everything
build_target = "all"
if clang_tidy:
cmake_flags.append("-DENABLE_CLANG_TIDY=1")
cmake_flags.append("-DENABLE_TESTS=1")
@ -371,7 +358,6 @@ if __name__ == "__main__":
default="",
)
parser.add_argument("--shared-libraries", action="store_true")
parser.add_argument("--clang-tidy", action="store_true")
parser.add_argument("--cache", choices=("ccache", "distcc", ""), default="")
parser.add_argument(
@ -424,7 +410,6 @@ if __name__ == "__main__":
args.package_type,
args.cache,
args.distcc_hosts,
args.shared_libraries,
args.clang_tidy,
args.version,
args.author,

View File

@ -157,7 +157,7 @@ function fuzz
mkdir -p /var/run/clickhouse-server
# NOTE: we use process substitution here to preserve keep $! as a pid of clickhouse-server
clickhouse-server --config-file db/config.xml --pid-file /var/run/clickhouse-server/clickhouse-server.pid -- --path db 2>&1 | pigz > server.log.gz &
clickhouse-server --config-file db/config.xml --pid-file /var/run/clickhouse-server/clickhouse-server.pid -- --path db > server.log 2>&1 &
server_pid=$!
kill -0 $server_pid
@ -262,12 +262,12 @@ quit
if [ "$server_died" == 1 ]
then
# The server has died.
if ! zgrep --text -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*\|.*runtime error: .*\|.*is located.*\|SUMMARY: AddressSanitizer:.*\|SUMMARY: MemorySanitizer:.*\|SUMMARY: ThreadSanitizer:.*\|.*_LIBCPP_ASSERT.*" server.log.gz > description.txt
if ! grep --text -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*\|.*runtime error: .*\|.*is located.*\|SUMMARY: AddressSanitizer:.*\|SUMMARY: MemorySanitizer:.*\|SUMMARY: ThreadSanitizer:.*\|.*_LIBCPP_ASSERT.*" server.log > description.txt
then
echo "Lost connection to server. See the logs." > description.txt
fi
if grep -F --text 'Sanitizer: out-of-memory' description.txt
if grep -E --text 'Sanitizer: (out-of-memory|failed to allocate)' description.txt
then
# OOM of sanitizer is not a problem we can handle - treat it as success, but preserve the description.
task_exit_code=0
@ -342,24 +342,28 @@ case "$stage" in
time fuzz
;&
"report")
CORE_LINK=''
if [ -f core.gz ]; then
CORE_LINK='<a href="core.gz">core.gz</a>'
fi
grep --text -F '<Fatal>' server.log > fatal.log ||:
pigz server.log
cat > report.html <<EOF ||:
<!DOCTYPE html>
<html lang="en">
<style>
body { font-family: "DejaVu Sans", "Noto Sans", Arial, sans-serif; background: #EEE; }
h1 { margin-left: 10px; }
th, td { border: 0; padding: 5px 10px 5px 10px; text-align: left; vertical-align: top; line-height: 1.5; background-color: #FFF;
td { white-space: pre; font-family: Monospace, Courier New; }
border: 0; box-shadow: 0 0 0 1px rgba(0, 0, 0, 0.05), 0 8px 25px -5px rgba(0, 0, 0, 0.1); }
th, td { border: 0; padding: 5px 10px 5px 10px; text-align: left; vertical-align: top; line-height: 1.5; background-color: #FFF; }
td { white-space: pre; font-family: Monospace, Courier New; box-shadow: 0 0 0 1px rgba(0, 0, 0, 0.05), 0 8px 25px -5px rgba(0, 0, 0, 0.1); }
a { color: #06F; text-decoration: none; }
a:hover, a:active { color: #F40; text-decoration: underline; }
table { border: 0; }
p.links a { padding: 5px; margin: 3px; background: #FFF; line-height: 2; white-space: nowrap; box-shadow: 0 0 0 1px rgba(0, 0, 0, 0.05), 0 8px 25px -5px rgba(0, 0, 0, 0.1); }
th { cursor: pointer; }
</style>
<title>AST Fuzzer for PR #${PR_TO_TEST} @ ${SHA_TO_TEST}</title>
@ -384,7 +388,14 @@ th { cursor: pointer; }
<tr>
<td>AST Fuzzer</td>
<td>$(cat status.txt)</td>
<td style="white-space: pre;">$(clickhouse-local --input-format RawBLOB --output-format RawBLOB --query "SELECT encodeXMLComponent(*) FROM table" < description.txt || cat description.txt)</td>
<td>$(
clickhouse-local --input-format RawBLOB --output-format RawBLOB --query "SELECT encodeXMLComponent(*) FROM table" < description.txt || cat description.txt
)</td>
</tr>
<tr>
<td colspan="3" style="white-space: pre-wrap;">$(
clickhouse-local --input-format RawBLOB --output-format RawBLOB --query "SELECT encodeXMLComponent(*) FROM table" < fatal.log || cat fatal.log
)</td>
</tr>
</table>
</body>

View File

@ -57,14 +57,17 @@ RUN arch=${TARGETARCH:-amd64} \
# ZooKeeper is not started by default, but consumes some space in containers.
# 777 perms used to allow anybody to start/stop ZooKeeper
ENV ZOOKEEPER_VERSION='3.6.3'
RUN curl -O "https://dlcdn.apache.org/zookeeper/zookeeper-${ZOOKEEPER_VERSION}/apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz"
RUN tar -zxvf apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz && mv apache-zookeeper-${ZOOKEEPER_VERSION}-bin /opt/zookeeper && chmod -R 777 /opt/zookeeper && rm apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz
RUN echo $'tickTime=2500 \n\
RUN curl "https://archive.apache.org/dist/zookeeper/zookeeper-${ZOOKEEPER_VERSION}/apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz" | \
tar -C opt -zxv && \
mv /opt/apache-zookeeper-${ZOOKEEPER_VERSION}-bin /opt/zookeeper && \
chmod -R 777 /opt/zookeeper && \
echo $'tickTime=2500 \n\
tickTime=2500 \n\
dataDir=/zookeeper \n\
clientPort=2181 \n\
maxClientCnxns=80' > /opt/zookeeper/conf/zoo.cfg
RUN mkdir /zookeeper && chmod -R 777 /zookeeper
maxClientCnxns=80' > /opt/zookeeper/conf/zoo.cfg && \
mkdir /zookeeper && \
chmod -R 777 /zookeeper
ENV TZ=Etc/UTC
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone

View File

@ -83,6 +83,7 @@ RUN python3 -m pip install \
pytest \
pytest-order==1.0.0 \
pytest-timeout \
pytest-random \
pytest-xdist \
pytest-repeat \
pytz \

View File

@ -0,0 +1,5 @@
version: '2.3'
# Used to pre-pull images with docker-compose
services:
clickhouse1:
image: clickhouse/integration-test

View File

@ -5,10 +5,10 @@ services:
hostname: hdfs1
restart: always
expose:
- ${HDFS_NAME_PORT}
- ${HDFS_DATA_PORT}
- ${HDFS_NAME_PORT:-50070}
- ${HDFS_DATA_PORT:-50075}
entrypoint: /etc/bootstrap.sh -d
volumes:
- type: ${HDFS_FS:-tmpfs}
source: ${HDFS_LOGS:-}
target: /usr/local/hadoop/logs
target: /usr/local/hadoop/logs

View File

@ -15,7 +15,7 @@ services:
image: confluentinc/cp-kafka:5.2.0
hostname: kafka1
ports:
- ${KAFKA_EXTERNAL_PORT}:${KAFKA_EXTERNAL_PORT}
- ${KAFKA_EXTERNAL_PORT:-8081}:${KAFKA_EXTERNAL_PORT:-8081}
environment:
KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:${KAFKA_EXTERNAL_PORT},OUTSIDE://kafka1:19092
KAFKA_ADVERTISED_HOST_NAME: kafka1
@ -35,7 +35,7 @@ services:
image: confluentinc/cp-schema-registry:5.2.0
hostname: schema-registry
ports:
- ${SCHEMA_REGISTRY_EXTERNAL_PORT}:${SCHEMA_REGISTRY_INTERNAL_PORT}
- ${SCHEMA_REGISTRY_EXTERNAL_PORT:-12313}:${SCHEMA_REGISTRY_INTERNAL_PORT:-12313}
environment:
SCHEMA_REGISTRY_HOST_NAME: schema-registry
SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT

View File

@ -15,8 +15,8 @@ services:
source: ${KERBERIZED_HDFS_LOGS:-}
target: /var/log/hadoop-hdfs
expose:
- ${KERBERIZED_HDFS_NAME_PORT}
- ${KERBERIZED_HDFS_DATA_PORT}
- ${KERBERIZED_HDFS_NAME_PORT:-50070}
- ${KERBERIZED_HDFS_DATA_PORT:-1006}
depends_on:
- hdfskerberos
entrypoint: /etc/bootstrap.sh -d

View File

@ -23,7 +23,7 @@ services:
# restart: always
hostname: kerberized_kafka1
ports:
- ${KERBERIZED_KAFKA_EXTERNAL_PORT}:${KERBERIZED_KAFKA_EXTERNAL_PORT}
- ${KERBERIZED_KAFKA_EXTERNAL_PORT:-19092}:${KERBERIZED_KAFKA_EXTERNAL_PORT:-19092}
environment:
KAFKA_LISTENERS: OUTSIDE://:19092,UNSECURED_OUTSIDE://:19093,UNSECURED_INSIDE://0.0.0.0:${KERBERIZED_KAFKA_EXTERNAL_PORT}
KAFKA_ADVERTISED_LISTENERS: OUTSIDE://kerberized_kafka1:19092,UNSECURED_OUTSIDE://kerberized_kafka1:19093,UNSECURED_INSIDE://localhost:${KERBERIZED_KAFKA_EXTERNAL_PORT}
@ -41,7 +41,7 @@ services:
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/broker_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dsun.security.krb5.debug=true"
volumes:
- ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets
- ${KERBERIZED_KAFKA_DIR:-}/secrets:/etc/kafka/secrets
- /dev/urandom:/dev/random
depends_on:
- kafka_kerberized_zookeeper

View File

@ -4,13 +4,13 @@ services:
image: getmeili/meilisearch:v0.27.0
restart: always
ports:
- ${MEILI_EXTERNAL_PORT}:${MEILI_INTERNAL_PORT}
- ${MEILI_EXTERNAL_PORT:-7700}:${MEILI_INTERNAL_PORT:-7700}
meili_secure:
image: getmeili/meilisearch:v0.27.0
restart: always
ports:
- ${MEILI_SECURE_EXTERNAL_PORT}:${MEILI_SECURE_INTERNAL_PORT}
- ${MEILI_SECURE_EXTERNAL_PORT:-7700}:${MEILI_SECURE_INTERNAL_PORT:-7700}
environment:
MEILI_MASTER_KEY: "password"

View File

@ -9,7 +9,7 @@ services:
- data1-1:/data1
- ${MINIO_CERTS_DIR:-}:/certs
expose:
- ${MINIO_PORT}
- ${MINIO_PORT:-9001}
environment:
MINIO_ACCESS_KEY: minio
MINIO_SECRET_KEY: minio123

View File

@ -7,11 +7,11 @@ services:
MONGO_INITDB_ROOT_USERNAME: root
MONGO_INITDB_ROOT_PASSWORD: clickhouse
ports:
- ${MONGO_EXTERNAL_PORT}:${MONGO_INTERNAL_PORT}
- ${MONGO_EXTERNAL_PORT:-27017}:${MONGO_INTERNAL_PORT:-27017}
command: --profile=2 --verbose
mongo2:
image: mongo:5.0
restart: always
ports:
- ${MONGO_NO_CRED_EXTERNAL_PORT}:${MONGO_NO_CRED_INTERNAL_PORT}
- ${MONGO_NO_CRED_EXTERNAL_PORT:-27017}:${MONGO_NO_CRED_INTERNAL_PORT:-27017}

View File

@ -7,7 +7,7 @@ services:
MONGO_INITDB_ROOT_USERNAME: root
MONGO_INITDB_ROOT_PASSWORD: clickhouse
volumes:
- ${MONGO_CONFIG_PATH}:/mongo/
- ${MONGO_CONFIG_PATH:-}:/mongo/
ports:
- ${MONGO_EXTERNAL_PORT}:${MONGO_INTERNAL_PORT}
- ${MONGO_EXTERNAL_PORT:-27017}:${MONGO_INTERNAL_PORT:-27017}
command: --config /mongo/mongo_secure.conf --profile=2 --verbose

View File

@ -8,7 +8,7 @@ services:
MYSQL_ROOT_HOST: ${MYSQL_ROOT_HOST}
DATADIR: /mysql/
expose:
- ${MYSQL_PORT}
- ${MYSQL_PORT:-3306}
command: --server_id=100
--log-bin='mysql-bin-1.log'
--default-time-zone='+3:00'

View File

@ -1,21 +0,0 @@
version: '2.3'
services:
mysql1:
image: mysql:5.7
restart: 'no'
environment:
MYSQL_ROOT_PASSWORD: clickhouse
ports:
- 3308:3306
command: --server_id=100 --log-bin='mysql-bin-1.log'
--default-time-zone='+3:00'
--gtid-mode="ON"
--enforce-gtid-consistency
--log-error-verbosity=3
--log-error=/var/log/mysqld/error.log
--general-log=ON
--general-log-file=/var/log/mysqld/general.log
volumes:
- type: ${MYSQL_LOGS_FS:-tmpfs}
source: ${MYSQL_LOGS:-}
target: /var/log/mysqld/

View File

@ -8,7 +8,7 @@ services:
MYSQL_ROOT_HOST: ${MYSQL_ROOT_HOST}
DATADIR: /mysql/
expose:
- ${MYSQL8_PORT}
- ${MYSQL8_PORT:-3306}
command: --server_id=100 --log-bin='mysql-bin-1.log'
--default_authentication_plugin='mysql_native_password'
--default-time-zone='+3:00' --gtid-mode="ON"

View File

@ -8,7 +8,7 @@ services:
MYSQL_ROOT_HOST: ${MYSQL_CLUSTER_ROOT_HOST}
DATADIR: /mysql/
expose:
- ${MYSQL_CLUSTER_PORT}
- ${MYSQL_CLUSTER_PORT:-3306}
command: --server_id=100
--log-bin='mysql-bin-2.log'
--default-time-zone='+3:00'
@ -30,7 +30,7 @@ services:
MYSQL_ROOT_HOST: ${MYSQL_CLUSTER_ROOT_HOST}
DATADIR: /mysql/
expose:
- ${MYSQL_CLUSTER_PORT}
- ${MYSQL_CLUSTER_PORT:-3306}
command: --server_id=100
--log-bin='mysql-bin-3.log'
--default-time-zone='+3:00'
@ -52,7 +52,7 @@ services:
MYSQL_ROOT_HOST: ${MYSQL_CLUSTER_ROOT_HOST}
DATADIR: /mysql/
expose:
- ${MYSQL_CLUSTER_PORT}
- ${MYSQL_CLUSTER_PORT:-3306}
command: --server_id=100
--log-bin='mysql-bin-4.log'
--default-time-zone='+3:00'

View File

@ -3,9 +3,9 @@ services:
nats1:
image: nats
ports:
- "${NATS_EXTERNAL_PORT}:${NATS_INTERNAL_PORT}"
- "${NATS_EXTERNAL_PORT:-4444}:${NATS_INTERNAL_PORT:-4444}"
command: "-p 4444 --user click --pass house --tls --tlscert=/etc/certs/server-cert.pem --tlskey=/etc/certs/server-key.pem"
volumes:
- type: bind
source: "${NATS_CERT_DIR}/nats"
source: "${NATS_CERT_DIR:-}/nats"
target: /etc/certs

View File

@ -5,7 +5,7 @@ services:
command: ["postgres", "-c", "wal_level=logical", "-c", "max_replication_slots=2", "-c", "logging_collector=on", "-c", "log_directory=/postgres/logs", "-c", "log_filename=postgresql.log", "-c", "log_statement=all", "-c", "max_connections=200"]
restart: always
expose:
- ${POSTGRES_PORT}
- ${POSTGRES_PORT:-5432}
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres"]
interval: 10s

View File

@ -9,7 +9,7 @@ services:
POSTGRES_PASSWORD: mysecretpassword
PGDATA: /postgres/data
expose:
- ${POSTGRES_PORT}
- ${POSTGRES_PORT:-5432}
volumes:
- type: ${POSTGRES_LOGS_FS:-tmpfs}
source: ${POSTGRES2_DIR:-}
@ -23,7 +23,7 @@ services:
POSTGRES_PASSWORD: mysecretpassword
PGDATA: /postgres/data
expose:
- ${POSTGRES_PORT}
- ${POSTGRES_PORT:-5432}
volumes:
- type: ${POSTGRES_LOGS_FS:-tmpfs}
source: ${POSTGRES3_DIR:-}
@ -37,7 +37,7 @@ services:
POSTGRES_PASSWORD: mysecretpassword
PGDATA: /postgres/data
expose:
- ${POSTGRES_PORT}
- ${POSTGRES_PORT:-5432}
volumes:
- type: ${POSTGRES_LOGS_FS:-tmpfs}
source: ${POSTGRES4_DIR:-}

View File

@ -5,7 +5,7 @@ services:
image: rabbitmq:3.8-management-alpine
hostname: rabbitmq1
expose:
- ${RABBITMQ_PORT}
- ${RABBITMQ_PORT:-5672}
environment:
RABBITMQ_DEFAULT_USER: "root"
RABBITMQ_DEFAULT_PASS: "clickhouse"

View File

@ -4,5 +4,5 @@ services:
image: redis
restart: always
ports:
- ${REDIS_EXTERNAL_PORT}:${REDIS_INTERNAL_PORT}
- ${REDIS_EXTERNAL_PORT:-6379}:${REDIS_INTERNAL_PORT:-6379}
command: redis-server --requirepass "clickhouse" --databases 32

View File

@ -297,6 +297,7 @@ if not args.use_existing_tables:
# Let's sync the data to avoid writeback affects performance
os.system("sync")
reportStageEnd("sync")
# By default, test all queries.
queries_to_run = range(0, len(test_queries))

View File

@ -1,9 +0,0 @@
# rebuild in #33610
# docker build -t clickhouse/split-build-smoke-test .
ARG FROM_TAG=latest
FROM clickhouse/binary-builder:$FROM_TAG
COPY run.sh /run.sh
COPY process_split_build_smoke_test_result.py /
CMD /run.sh

View File

@ -1,64 +0,0 @@
#!/usr/bin/env python3
import os
import logging
import argparse
import csv
RESULT_LOG_NAME = "run.log"
def process_result(result_folder):
status = "success"
description = "Server started and responded"
summary = [("Smoke test", "OK")]
with open(os.path.join(result_folder, RESULT_LOG_NAME), "r") as run_log:
lines = run_log.read().split("\n")
if not lines or lines[0].strip() != "OK":
status = "failure"
logging.info("Lines is not ok: %s", str("\n".join(lines)))
summary = [("Smoke test", "FAIL")]
description = "Server failed to respond, see result in logs"
result_logs = []
server_log_path = os.path.join(result_folder, "clickhouse-server.log")
stderr_log_path = os.path.join(result_folder, "stderr.log")
client_stderr_log_path = os.path.join(result_folder, "clientstderr.log")
if os.path.exists(server_log_path):
result_logs.append(server_log_path)
if os.path.exists(stderr_log_path):
result_logs.append(stderr_log_path)
if os.path.exists(client_stderr_log_path):
result_logs.append(client_stderr_log_path)
return status, description, summary, result_logs
def write_results(results_file, status_file, results, status):
with open(results_file, "w") as f:
out = csv.writer(f, delimiter="\t")
out.writerows(results)
with open(status_file, "w") as f:
out = csv.writer(f, delimiter="\t")
out.writerow(status)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
parser = argparse.ArgumentParser(
description="ClickHouse script for parsing results of split build smoke test"
)
parser.add_argument("--in-results-dir", default="/test_output/")
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
args = parser.parse_args()
state, description, test_results, logs = process_result(args.in_results_dir)
logging.info("Result parsed")
status = (state, description)
write_results(args.out_results_file, args.out_status_file, test_results, status)
logging.info("Result written")

View File

@ -1,22 +0,0 @@
#!/bin/bash
set -x
install_and_run_server() {
mkdir /unpacked
tar -xzf /package_folder/shared_build.tgz -C /unpacked --strip 1
LD_LIBRARY_PATH=/unpacked /unpacked/clickhouse-server --config /unpacked/config/config.xml >/test_output/stderr.log 2>&1 &
}
run_client() {
for i in {1..100}; do
sleep 1
LD_LIBRARY_PATH=/unpacked /unpacked/clickhouse-client --query "select 'OK'" > /test_output/run.log 2> /test_output/clientstderr.log && break
[[ $i == 100 ]] && echo 'FAIL'
done
}
install_and_run_server
run_client
mv /var/log/clickhouse-server/clickhouse-server.log /test_output/clickhouse-server.log
/process_split_build_smoke_test_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv

View File

@ -1,90 +1,151 @@
#!/bin/bash
USAGE='Usage for local run:
set -euxf -o pipefail
./docker/test/stateless/setup_minio.sh { stateful | stateless } ./tests/
export MINIO_ROOT_USER=${MINIO_ROOT_USER:-clickhouse}
export MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD:-clickhouse}
'
usage() {
echo $"Usage: $0 <stateful|stateless> <test_path> (default path: /usr/share/clickhouse-test)"
exit 1
}
set -e -x -a -u
TEST_TYPE="$1"
shift
case $TEST_TYPE in
stateless) QUERY_DIR=0_stateless ;;
stateful) QUERY_DIR=1_stateful ;;
*) echo "unknown test type $TEST_TYPE"; echo "${USAGE}"; exit 1 ;;
esac
ls -lha
mkdir -p ./minio_data
if [ ! -f ./minio ]; then
MINIO_SERVER_VERSION=${MINIO_SERVER_VERSION:-2022-09-07T22-25-02Z}
MINIO_CLIENT_VERSION=${MINIO_CLIENT_VERSION:-2022-08-28T20-08-11Z}
case $(uname -m) in
x86_64) BIN_ARCH=amd64 ;;
aarch64) BIN_ARCH=arm64 ;;
*) echo "unknown architecture $(uname -m)"; exit 1 ;;
esac
echo 'MinIO binary not found, downloading...'
BINARY_TYPE=$(uname -s | tr '[:upper:]' '[:lower:]')
wget "https://dl.min.io/server/minio/release/${BINARY_TYPE}-${BIN_ARCH}/archive/minio.RELEASE.${MINIO_SERVER_VERSION}" -O ./minio \
&& wget "https://dl.min.io/client/mc/release/${BINARY_TYPE}-${BIN_ARCH}/archive/mc.RELEASE.${MINIO_CLIENT_VERSION}" -O ./mc \
&& chmod +x ./mc ./minio
fi
MINIO_ROOT_USER=${MINIO_ROOT_USER:-clickhouse}
MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD:-clickhouse}
./minio --version
./minio server --address ":11111" ./minio_data &
i=0
while ! curl -v --silent http://localhost:11111 2>&1 | grep AccessDenied
do
if [[ $i == 60 ]]; then
echo "Failed to setup minio"
exit 0
check_arg() {
local query_dir
if [ ! $# -eq 1 ]; then
if [ ! $# -eq 2 ]; then
echo "ERROR: need either one or two arguments, <stateful|stateless> <test_path> (default path: /usr/share/clickhouse-test)"
usage
fi
fi
echo "Trying to connect to minio"
sleep 1
i=$((i + 1))
done
case "$1" in
stateless)
query_dir="0_stateless"
;;
stateful)
query_dir="1_stateful"
;;
*)
echo "unknown test type ${test_type}"
usage
;;
esac
echo ${query_dir}
}
lsof -i :11111
find_arch() {
local arch
case $(uname -m) in
x86_64)
arch="amd64"
;;
aarch64)
arch="arm64"
;;
*)
echo "unknown architecture $(uname -m)";
exit 1
;;
esac
echo ${arch}
}
sleep 5
find_os() {
local os
os=$(uname -s | tr '[:upper:]' '[:lower:]')
echo "${os}"
}
./mc alias set clickminio http://localhost:11111 clickhouse clickhouse
./mc admin user add clickminio test testtest
./mc admin policy set clickminio readwrite user=test
./mc mb clickminio/test
if [ "$TEST_TYPE" = "stateless" ]; then
./mc policy set public clickminio/test
fi
download_minio() {
local os
local arch
local minio_server_version=${MINIO_SERVER_VERSION:-2022-09-07T22-25-02Z}
local minio_client_version=${MINIO_CLIENT_VERSION:-2022-08-28T20-08-11Z}
os=$(find_os)
arch=$(find_arch)
wget "https://dl.min.io/server/minio/release/${os}-${arch}/archive/minio.RELEASE.${minio_server_version}" -O ./minio
wget "https://dl.min.io/client/mc/release/${os}-${arch}/archive/mc.RELEASE.${minio_client_version}" -O ./mc
chmod +x ./mc ./minio
}
# Upload data to Minio. By default after unpacking all tests will in
# /usr/share/clickhouse-test/queries
start_minio() {
mkdir -p ./minio_data
./minio --version
./minio server --address ":11111" ./minio_data &
wait_for_it
lsof -i :11111
sleep 5
}
TEST_PATH=${1:-/usr/share/clickhouse-test}
MINIO_DATA_PATH=${TEST_PATH}/queries/${QUERY_DIR}/data_minio
setup_minio() {
local test_type=$1
./mc alias set clickminio http://localhost:11111 clickhouse clickhouse
./mc admin user add clickminio test testtest
./mc admin policy set clickminio readwrite user=test
./mc mb clickminio/test
if [ "$test_type" = "stateless" ]; then
./mc policy set public clickminio/test
fi
}
# Iterating over globs will cause redudant FILE variale to be a path to a file, not a filename
# shellcheck disable=SC2045
for FILE in $(ls "${MINIO_DATA_PATH}"); do
echo "$FILE";
./mc cp "${MINIO_DATA_PATH}"/"$FILE" clickminio/test/"$FILE";
done
# uploads data to minio, by default after unpacking all tests
# will be in /usr/share/clickhouse-test/queries
upload_data() {
local query_dir=$1
local test_path=$2
local data_path=${test_path}/queries/${query_dir}/data_minio
mkdir -p ~/.aws
cat <<EOT >> ~/.aws/credentials
# iterating over globs will cause redundant file variable to be
# a path to a file, not a filename
# shellcheck disable=SC2045
for file in $(ls "${data_path}"); do
echo "${file}";
./mc cp "${data_path}"/"${file}" clickminio/test/"${file}";
done
}
setup_aws_credentials() {
local minio_root_user=${MINIO_ROOT_USER:-clickhouse}
local minio_root_password=${MINIO_ROOT_PASSWORD:-clickhouse}
mkdir -p ~/.aws
cat <<EOT >> ~/.aws/credentials
[default]
aws_access_key_id=${MINIO_ROOT_USER}
aws_secret_access_key=${MINIO_ROOT_PASSWORD}
aws_access_key_id=${minio_root_user}
aws_secret_access_key=${minio_root_password}
EOT
}
wait_for_it() {
local counter=0
local max_counter=60
local url="http://localhost:11111"
local params=(
--silent
--verbose
)
while ! curl "${params[@]}" "${url}" 2>&1 | grep AccessDenied
do
if [[ ${counter} == "${max_counter}" ]]; then
echo "failed to setup minio"
exit 0
fi
echo "trying to connect to minio"
sleep 1
counter=$((counter + 1))
done
}
main() {
local query_dir
query_dir=$(check_arg "$@")
if [ ! -f ./minio ]; then
download_minio
fi
start_minio
setup_minio "$1"
upload_data "${query_dir}" "${2:-/usr/share/clickhouse-test}"
setup_aws_credentials
}
main "$@"

View File

@ -11,31 +11,6 @@ set -x
# core.COMM.PID-TID
sysctl kernel.core_pattern='core.%e.%p-%P'
# Thread Fuzzer allows to check more permutations of possible thread scheduling
# and find more potential issues.
# Temporarily disable ThreadFuzzer with tsan because of https://github.com/google/sanitizers/issues/1540
is_tsan_build=$(clickhouse local -q "select value like '% -fsanitize=thread %' from system.build_options where name='CXX_FLAGS'")
if [ "$is_tsan_build" -eq "0" ]; then
export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000
export THREAD_FUZZER_SLEEP_PROBABILITY=0.1
export THREAD_FUZZER_SLEEP_TIME_US=100000
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000
fi
function install_packages()
{
@ -54,7 +29,7 @@ function configure()
# we mount tests folder from repo to /usr/share
ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
ln -s /usr/share/clickhouse-test/ci/download_release_packets.py /usr/bin/download_release_packets
ln -s /usr/share/clickhouse-test/ci/download_release_packages.py /usr/bin/download_release_packages
ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag
# avoid too slow startup
@ -78,6 +53,7 @@ function configure()
local total_mem
total_mem=$(awk '/MemTotal/ { print $(NF-1) }' /proc/meminfo) # KiB
total_mem=$(( total_mem*1024 )) # bytes
# Set maximum memory usage as half of total memory (less chance of OOM).
#
# But not via max_server_memory_usage but via max_memory_usage_for_user,
@ -90,16 +66,17 @@ function configure()
# max_server_memory_usage will be hard limit, and queries that should be
# executed regardless memory limits will use max_memory_usage_for_user=0,
# instead of relying on max_untracked_memory
local max_server_mem
max_server_mem=$((total_mem*75/100)) # 75%
echo "Setting max_server_memory_usage=$max_server_mem"
max_server_memory_usage_to_ram_ratio=0.5
echo "Setting max_server_memory_usage_to_ram_ratio to ${max_server_memory_usage_to_ram_ratio}"
cat > /etc/clickhouse-server/config.d/max_server_memory_usage.xml <<EOL
<clickhouse>
<max_server_memory_usage>${max_server_mem}</max_server_memory_usage>
<max_server_memory_usage_to_ram_ratio>${max_server_memory_usage_to_ram_ratio}</max_server_memory_usage_to_ram_ratio>
</clickhouse>
EOL
local max_users_mem
max_users_mem=$((total_mem*50/100)) # 50%
max_users_mem=$((total_mem*30/100)) # 30%
echo "Setting max_memory_usage_for_user=$max_users_mem"
cat > /etc/clickhouse-server/users.d/max_memory_usage_for_user.xml <<EOL
<clickhouse>
@ -122,6 +99,13 @@ EOL
-->
<core_path>$PWD</core_path>
</clickhouse>
EOL
# Let OOM killer terminate other processes before clickhouse-server:
cat > /etc/clickhouse-server/config.d/oom_score.xml <<EOL
<clickhouse>
<oom_score>-1000</oom_score>
</clickhouse>
EOL
# Analyzer is not yet ready for testing
@ -226,6 +210,31 @@ quit
install_packages package_folder
# Thread Fuzzer allows to check more permutations of possible thread scheduling
# and find more potential issues.
# Temporarily disable ThreadFuzzer with tsan because of https://github.com/google/sanitizers/issues/1540
is_tsan_build=$(clickhouse local -q "select value like '% -fsanitize=thread %' from system.build_options where name='CXX_FLAGS'")
if [ "$is_tsan_build" -eq "0" ]; then
export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000
export THREAD_FUZZER_SLEEP_PROBABILITY=0.1
export THREAD_FUZZER_SLEEP_TIME_US=100000
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000
fi
export ZOOKEEPER_FAULT_INJECTION=1
configure
@ -360,10 +369,10 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
echo "Clone previous release repository"
git clone https://github.com/ClickHouse/ClickHouse.git --no-tags --progress --branch=$previous_release_tag --no-recurse-submodules --depth=1 previous_release_repository
echo "Download previous release server"
echo "Download clickhouse-server from the previous release"
mkdir previous_release_package_folder
echo $previous_release_tag | download_release_packets && echo -e 'Download script exit code\tOK' >> /test_output/test_results.tsv \
echo $previous_release_tag | download_release_packages && echo -e 'Download script exit code\tOK' >> /test_output/test_results.tsv \
|| echo -e 'Download script failed\tFAIL' >> /test_output/test_results.tsv
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.clean.log
@ -380,10 +389,10 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
echo -e "Backward compatibility check: Failed to clone previous release tests\tFAIL" >> /test_output/test_results.tsv
elif ! [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ]
then
echo -e "Backward compatibility check: Failed to download previous release packets\tFAIL" >> /test_output/test_results.tsv
echo -e "Backward compatibility check: Failed to download previous release packages\tFAIL" >> /test_output/test_results.tsv
else
echo -e "Successfully cloned previous release tests\tOK" >> /test_output/test_results.tsv
echo -e "Successfully downloaded previous release packets\tOK" >> /test_output/test_results.tsv
echo -e "Successfully downloaded previous release packages\tOK" >> /test_output/test_results.tsv
# Uninstall current packages
dpkg --remove clickhouse-client

View File

@ -14,9 +14,6 @@ def get_options(i, backward_compatibility_check):
if 0 < i:
options.append("--order=random")
if i % 3 == 1:
options.append("--db-engine=Ordinary")
if i % 3 == 2 and not backward_compatibility_check:
options.append(
'''--db-engine="Replicated('/test/db/test_{}', 's1', 'r1')"'''.format(i)

View File

@ -1,82 +0,0 @@
# docker build -t clickhouse/testflows-runner .
FROM ubuntu:20.04
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
RUN apt-get update \
&& env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
ca-certificates \
bash \
btrfs-progs \
e2fsprogs \
iptables \
xfsprogs \
tar \
pigz \
wget \
git \
iproute2 \
cgroupfs-mount \
python3-pip \
tzdata \
libicu-dev \
bsdutils \
curl \
liblua5.1-dev \
luajit \
libssl-dev \
libcurl4-openssl-dev \
gdb \
&& rm -rf \
/var/lib/apt/lists/* \
/var/cache/debconf \
/tmp/* \
&& apt-get clean
ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
RUN pip3 install urllib3 testflows==1.7.20 docker-compose==1.29.2 docker==5.0.0 dicttoxml kazoo tzlocal==2.1 pytz python-dateutil numpy
ENV DOCKER_CHANNEL stable
ENV DOCKER_VERSION 20.10.6
# Architecture of the image when BuildKit/buildx is used
ARG TARGETARCH
# Install docker
RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \
amd64) rarch=x86_64 ;; \
arm64) rarch=aarch64 ;; \
esac \
&& set -eux \
&& if ! wget -nv -O docker.tgz "https://download.docker.com/linux/static/${DOCKER_CHANNEL}/${rarch}/docker-${DOCKER_VERSION}.tgz"; then \
echo >&2 "error: failed to download 'docker-${DOCKER_VERSION}' from '${DOCKER_CHANNEL}' for '${rarch}'" \
&& exit 1; \
fi \
&& tar --extract \
--file docker.tgz \
--strip-components 1 \
--directory /usr/local/bin/ \
&& rm docker.tgz \
&& dockerd --version \
&& docker --version
COPY modprobe.sh /usr/local/bin/modprobe
COPY dockerd-entrypoint.sh /usr/local/bin/
COPY process_testflows_result.py /usr/local/bin/
RUN set -x \
&& addgroup --system dockremap \
&& adduser --system dockremap \
&& adduser dockremap dockremap \
&& echo 'dockremap:165536:65536' >> /etc/subuid \
&& echo 'dockremap:165536:65536' >> /etc/subgid
VOLUME /var/lib/docker
EXPOSE 2375
ENTRYPOINT ["dockerd-entrypoint.sh"]
CMD ["sh", "-c", "python3 regression.py --no-color -o new-fails --local --clickhouse-binary-path ${CLICKHOUSE_TESTS_SERVER_BIN_PATH} --log test.log ${TESTFLOWS_OPTS}; cat test.log | tfs report results --format json > results.json; /usr/local/bin/process_testflows_result.py || echo -e 'failure\tCannot parse results' > check_status.tsv; find * -type f | grep _instances | grep clickhouse-server | xargs -n1 tar -rvf clickhouse_logs.tar; gzip -9 clickhouse_logs.tar"]

View File

@ -1,39 +0,0 @@
#!/bin/bash
set -e
echo "Configure to use Yandex dockerhub-proxy"
mkdir -p /etc/docker/
cat > /etc/docker/daemon.json << EOF
{
"insecure-registries" : ["dockerhub-proxy.dockerhub-proxy-zone:5000"],
"registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"]
}
EOF
# In case of test hung it is convenient to use pytest --pdb to debug it,
# and on hung you can simply press Ctrl-C and it will spawn a python pdb,
# but on SIGINT dockerd will exit, so ignore it to preserve the daemon.
trap '' INT
dockerd --host=unix:///var/run/docker.sock --host=tcp://0.0.0.0:2375 &>/var/log/somefile &
set +e
reties=0
while true; do
docker info &>/dev/null && break
reties=$((reties+1))
if [[ $reties -ge 100 ]]; then # 10 sec max
echo "Can't start docker daemon, timeout exceeded." >&2
exit 1;
fi
sleep 0.1
done
set -e
echo "Start tests"
export CLICKHOUSE_TESTS_SERVER_BIN_PATH=/clickhouse
export CLICKHOUSE_TESTS_CLIENT_BIN_PATH=/clickhouse
export CLICKHOUSE_TESTS_BASE_CONFIG_DIR=/clickhouse-config
export CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH=/clickhouse-odbc-bridge
cd /ClickHouse/tests/testflows
exec "$@"

View File

@ -1,20 +0,0 @@
#!/bin/sh
set -eu
# "modprobe" without modprobe
# https://twitter.com/lucabruno/status/902934379835662336
# this isn't 100% fool-proof, but it'll have a much higher success rate than simply using the "real" modprobe
# Docker often uses "modprobe -va foo bar baz"
# so we ignore modules that start with "-"
for module; do
if [ "${module#-}" = "$module" ]; then
ip link show "$module" || true
lsmod | grep "$module" || true
fi
done
# remove /usr/local/... from PATH so we can exec the real modprobe as a last resort
export PATH='/usr/sbin:/usr/bin:/sbin:/bin'
exec modprobe "$@"

View File

@ -1,71 +0,0 @@
#!/usr/bin/env python3
import os
import logging
import argparse
import csv
import json
def process_result(result_folder):
json_path = os.path.join(result_folder, "results.json")
if not os.path.exists(json_path):
return "success", "No testflows in branch", None, []
test_binary_log = os.path.join(result_folder, "test.log")
with open(json_path) as source:
results = json.loads(source.read())
total_tests = 0
total_ok = 0
total_fail = 0
total_other = 0
test_results = []
for test in results["tests"]:
test_name = test["test"]["test_name"]
test_result = test["result"]["result_type"].upper()
test_time = str(test["result"]["message_rtime"])
total_tests += 1
if test_result == "OK":
total_ok += 1
elif test_result == "FAIL" or test_result == "ERROR":
total_fail += 1
else:
total_other += 1
test_results.append((test_name, test_result, test_time))
if total_fail != 0:
status = "failure"
else:
status = "success"
description = "failed: {}, passed: {}, other: {}".format(
total_fail, total_ok, total_other
)
return status, description, test_results, [json_path, test_binary_log]
def write_results(results_file, status_file, results, status):
with open(results_file, "w") as f:
out = csv.writer(f, delimiter="\t")
out.writerows(results)
with open(status_file, "w") as f:
out = csv.writer(f, delimiter="\t")
out.writerow(status)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
parser = argparse.ArgumentParser(
description="ClickHouse script for parsing results of Testflows tests"
)
parser.add_argument("--in-results-dir", default="./")
parser.add_argument("--out-results-file", default="./test_results.tsv")
parser.add_argument("--out-status-file", default="./check_status.tsv")
args = parser.parse_args()
state, description, test_results, logs = process_result(args.in_results_dir)
logging.info("Result parsed")
status = (state, description)
write_results(args.out_results_file, args.out_status_file, test_results, status)
logging.info("Result written")

View File

@ -118,7 +118,6 @@ Builds ClickHouse in various configurations for use in further steps. You have t
- **Compiler**: `gcc-9` or `clang-10` (or `clang-10-xx` for other architectures e.g. `clang-10-freebsd`).
- **Build type**: `Debug` or `RelWithDebInfo` (cmake).
- **Sanitizer**: `none` (without sanitizers), `address` (ASan), `memory` (MSan), `undefined` (UBSan), or `thread` (TSan).
- **Split** `splitted` is a [split build](../development/build.md#split-build)
- **Status**: `success` or `fail`
- **Build log**: link to the building and files copying log, useful when build failed.
- **Build time**.
@ -130,7 +129,6 @@ Builds ClickHouse in various configurations for use in further steps. You have t
- `clickhouse`: Main built binary.
- `clickhouse-odbc-bridge`
- `unit_tests_dbms`: GoogleTest binary with ClickHouse unit tests.
- `shared_build.tgz`: build with shared libraries.
- `performance.tgz`: Special package for performance tests.
@ -169,16 +167,6 @@ concurrency-related errors. If it fails:
of error.
## Split Build Smoke Test
Checks that the server build in [split build](../development/developer-instruction.md#split-build)
configuration can start and run simple queries. If it fails:
* Fix other test errors first;
* Build the server in [split build](../development/developer-instruction.md#split-build) configuration
locally and check whether it can start and run `select 1`.
## Compatibility Check
Checks that `clickhouse` binary runs on distributions with old libc versions. If it fails, ask a maintainer for help.

View File

@ -202,7 +202,7 @@ Google OSS-Fuzz can be found at `docker/fuzz`.
We also use simple fuzz test to generate random SQL queries and to check that the server does not die executing them.
You can find it in `00746_sql_fuzzy.pl`. This test should be run continuously (overnight and longer).
We also use sophisticated AST-based query fuzzer that is able to find huge amount of corner cases. It does random permutations and substitutions in queries AST. It remembers AST nodes from previous tests to use them for fuzzing of subsequent tests while processing them in random order. You can learn more about this fuzzer in [this blog article](https://clickhouse.com/blog/en/2021/fuzzing-clickhouse/).
We also use sophisticated AST-based query fuzzer that is able to find huge amount of corner cases. It does random permutations and substitutions in queries AST. It remembers AST nodes from previous tests to use them for fuzzing of subsequent tests while processing them in random order. You can learn more about this fuzzer in [this blog article](https://clickhouse.com/blog/fuzzing-click-house).
## Stress test

View File

@ -34,7 +34,14 @@ SETTINGS
[kafka_max_block_size = 0,]
[kafka_skip_broken_messages = N,]
[kafka_commit_every_batch = 0,]
[kafka_thread_per_consumer = 0]
[kafka_client_id = '',]
[kafka_poll_timeout_ms = 0,]
[kafka_poll_max_batch_size = 0,]
[kafka_flush_interval_ms = 0,]
[kafka_thread_per_consumer = 0,]
[kafka_handle_error_mode = 'default',]
[kafka_commit_on_select = false,]
[kafka_max_rows_per_message = 1];
```
Required parameters:
@ -46,13 +53,20 @@ Required parameters:
Optional parameters:
- `kafka_row_delimiter` — Delimiter character, which ends the message.
- `kafka_row_delimiter` — Delimiter character, which ends the message. **This setting is deprecated and is no longer used, not left for compatibility reasons.**
- `kafka_schema` — Parameter that must be used if the format requires a schema definition. For example, [Capn Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object.
- `kafka_num_consumers` — The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition, and must not be greater than the number of physical cores on the server where ClickHouse is deployed.
- `kafka_max_block_size` — The maximum batch size (in messages) for poll (default: `max_block_size`).
- `kafka_skip_broken_messages` — Kafka message parser tolerance to schema-incompatible messages per block. Default: `0`. If `kafka_skip_broken_messages = N` then the engine skips *N* Kafka messages that cannot be parsed (a message equals a row of data).
- `kafka_commit_every_batch` — Commit every consumed and handled batch instead of a single commit after writing a whole block (default: `0`).
- `kafka_thread_per_consumer` — Provide independent thread for each consumer (default: `0`). When enabled, every consumer flush the data independently, in parallel (otherwise — rows from several consumers squashed to form one block).
- `kafka_num_consumers` — The number of consumers per table. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition, and must not be greater than the number of physical cores on the server where ClickHouse is deployed. Default: `1`.
- `kafka_max_block_size` — The maximum batch size (in messages) for poll. Default: [max_insert_block_size](../../../operations/settings/settings.md#setting-max_insert_block_size).
- `kafka_skip_broken_messages` — Kafka message parser tolerance to schema-incompatible messages per block. If `kafka_skip_broken_messages = N` then the engine skips *N* Kafka messages that cannot be parsed (a message equals a row of data). Default: `0`.
- `kafka_commit_every_batch` — Commit every consumed and handled batch instead of a single commit after writing a whole block. Default: `0`.
- `kafka_client_id` — Client identifier. Empty by default.
- `kafka_poll_timeout_ms` — Timeout for single poll from Kafka. Default: [stream_poll_timeout_ms](../../../operations/settings/settings.md#stream_poll_timeout_ms).
- `kafka_poll_max_batch_size` — Maximum amount of messages to be polled in a single Kafka poll. Default: [max_block_size](../../../operations/settings/settings.md#setting-max_block_size).
- `kafka_flush_interval_ms` — Timeout for flushing data from Kafka. Default: [stream_flush_interval_ms](../../../operations/settings/settings.md#stream-flush-interval-ms).
- `kafka_thread_per_consumer` — Provide independent thread for each consumer. When enabled, every consumer flush the data independently, in parallel (otherwise — rows from several consumers squashed to form one block). Default: `0`.
- `kafka_handle_error_mode` — How to handle errors for Kafka engine. Possible values: default, stream.
- `kafka_commit_on_select` — Commit messages when select query is made. Default: `false`.
- `kafka_max_rows_per_message` — The maximum number of rows written in one kafka message for row-based formats. Default : `1`.
Examples:
@ -94,7 +108,7 @@ Do not use this method in new projects. If possible, switch old projects to the
``` sql
Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format
[, kafka_row_delimiter, kafka_schema, kafka_num_consumers, kafka_skip_broken_messages])
[, kafka_row_delimiter, kafka_schema, kafka_num_consumers, kafka_max_block_size, kafka_skip_broken_messages, kafka_commit_every_batch, kafka_client_id, kafka_poll_timeout_ms, kafka_poll_max_batch_size, kafka_flush_interval_ms, kafka_thread_per_consumer, kafka_handle_error_mode, kafka_commit_on_select, kafka_max_rows_per_message]);
```
</details>
@ -193,6 +207,14 @@ Example:
- `_headers.name` — Array of message's headers keys.
- `_headers.value` — Array of message's headers values.
## Data formats support {#data-formats-support}
Kafka engine supports all [formats](../../../interfaces/formats.md) supported in ClickHouse.
The number of rows in one Kafka message depends on whether the format is row-based or block-based:
- For row-based formats the number of rows in one Kafka message can be controlled by setting `kafka_max_rows_per_message`.
- For block-based formats we cannot divide block into smaller parts, but the number of rows in one block can be controlled by general setting [max_block_size](../../../operations/settings/settings.md#setting-max_block_size).
**See Also**
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)

View File

@ -37,8 +37,10 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
[nats_max_block_size = N,]
[nats_flush_interval_ms = N,]
[nats_username = 'user',]
[nats_password = 'password']
[redis_password = 'clickhouse']
[nats_password = 'password',]
[nats_token = 'clickhouse',]
[nats_startup_connect_tries = '5']
[nats_max_rows_per_message = 1]
```
Required parameters:
@ -49,7 +51,7 @@ Required parameters:
Optional parameters:
- `nats_row_delimiter` Delimiter character, which ends the message.
- `nats_row_delimiter` Delimiter character, which ends the message. **This setting is deprecated and is no longer used, not left for compatibility reasons.**
- `nats_schema` Parameter that must be used if the format requires a schema definition. For example, [Capn Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object.
- `nats_num_consumers` The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient.
- `nats_queue_group` Name for queue group of NATS subscribers. Default is the table name.
@ -57,11 +59,13 @@ Optional parameters:
- `nats_reconnect_wait` Amount of time in milliseconds to sleep between each reconnect attempt. Default: `5000`.
- `nats_server_list` - Server list for connection. Can be specified to connect to NATS cluster.
- `nats_skip_broken_messages` - NATS message parser tolerance to schema-incompatible messages per block. Default: `0`. If `nats_skip_broken_messages = N` then the engine skips *N* RabbitMQ messages that cannot be parsed (a message equals a row of data).
- `nats_max_block_size` - Number of row collected by poll(s) for flushing data from NATS.
- `nats_flush_interval_ms` - Timeout for flushing data read from NATS.
- `nats_max_block_size` - Number of row collected by poll(s) for flushing data from NATS. Default: [max_insert_block_size](../../../operations/settings/settings.md#setting-max_insert_block_size).
- `nats_flush_interval_ms` - Timeout for flushing data read from NATS. Default: [stream_flush_interval_ms](../../../operations/settings/settings.md#stream-flush-interval-ms).
- `nats_username` - NATS username.
- `nats_password` - NATS password.
- `nats_token` - NATS auth token.
- `nats_startup_connect_tries` - Number of connect tries at startup. Default: `5`.
- `nats_max_rows_per_message` — The maximum number of rows written in one NATS message for row-based formats. (default : `1`).
SSL connection:
@ -159,6 +163,14 @@ If you want to change the target table by using `ALTER`, we recommend disabling
## Virtual Columns {#virtual-columns}
- `_subject` - NATS message subject.
- `_subject` - NATS message subject.
## Data formats support {#data-formats-support}
NATS engine supports all [formats](../../../interfaces/formats.md) supported in ClickHouse.
The number of rows in one NATS message depends on whether the format is row-based or block-based:
- For row-based formats the number of rows in one NATS message can be controlled by setting `nats_max_rows_per_message`.
- For block-based formats we cannot divide block into smaller parts, but the number of rows in one block can be controlled by general setting [max_block_size](../../../operations/settings/settings.md#setting-max_block_size).
[Original article](https://clickhouse.com/docs/en/engines/table-engines/integrations/nats/) <!--hide-->

View File

@ -37,8 +37,16 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
[rabbitmq_persistent = 0,]
[rabbitmq_skip_broken_messages = N,]
[rabbitmq_max_block_size = N,]
[rabbitmq_flush_interval_ms = N]
[rabbitmq_queue_settings_list = 'x-dead-letter-exchange=my-dlx,x-max-length=10,x-overflow=reject-publish']
[rabbitmq_flush_interval_ms = N,]
[rabbitmq_queue_settings_list = 'x-dead-letter-exchange=my-dlx,x-max-length=10,x-overflow=reject-publish',]
[rabbitmq_queue_consume = false,]
[rabbitmq_address = '',]
[rabbitmq_vhost = '/',]
[rabbitmq_queue_consume = false,]
[rabbitmq_username = '',]
[rabbitmq_password = '',]
[rabbitmq_commit_on_select = false,]
[rabbitmq_max_rows_per_message = 1]
```
Required parameters:
@ -49,19 +57,27 @@ Required parameters:
Optional parameters:
- `rabbitmq_exchange_type` The type of RabbitMQ exchange: `direct`, `fanout`, `topic`, `headers`, `consistent_hash`. Default: `fanout`.
- `rabbitmq_routing_key_list` A comma-separated list of routing keys.
- `rabbitmq_row_delimiter` Delimiter character, which ends the message.
- `rabbitmq_schema` Parameter that must be used if the format requires a schema definition. For example, [Capn Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object.
- `rabbitmq_num_consumers` The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient.
- `rabbitmq_num_queues` Total number of queues. Default: `1`. Increasing this number can significantly improve performance.
- `rabbitmq_queue_base` - Specify a hint for queue names. Use cases of this setting are described below.
- `rabbitmq_deadletter_exchange` - Specify name for a [dead letter exchange](https://www.rabbitmq.com/dlx.html). You can create another table with this exchange name and collect messages in cases when they are republished to dead letter exchange. By default dead letter exchange is not specified.
- `rabbitmq_persistent` - If set to 1 (true), in insert query delivery mode will be set to 2 (marks messages as 'persistent'). Default: `0`.
- `rabbitmq_skip_broken_messages` RabbitMQ message parser tolerance to schema-incompatible messages per block. Default: `0`. If `rabbitmq_skip_broken_messages = N` then the engine skips *N* RabbitMQ messages that cannot be parsed (a message equals a row of data).
- `rabbitmq_max_block_size`
- `rabbitmq_flush_interval_ms`
- `rabbitmq_queue_settings_list` - allows to set RabbitMQ settings when creating a queue. Available settings: `x-max-length`, `x-max-length-bytes`, `x-message-ttl`, `x-expires`, `x-priority`, `x-max-priority`, `x-overflow`, `x-dead-letter-exchange`, `x-queue-type`. The `durable` setting is enabled automatically for the queue.
- `rabbitmq_exchange_type` The type of RabbitMQ exchange: `direct`, `fanout`, `topic`, `headers`, `consistent_hash`. Default: `fanout`.
- `rabbitmq_routing_key_list` A comma-separated list of routing keys.
- `rabbitmq_row_delimiter` Delimiter character, which ends the message. **This setting is deprecated and is no longer used, not left for compatibility reasons.**
- `rabbitmq_schema` Parameter that must be used if the format requires a schema definition. For example, [Capn Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object.
- `rabbitmq_num_consumers` The number of consumers per table. Specify more consumers if the throughput of one consumer is insufficient. Default: `1`
- `rabbitmq_num_queues` Total number of queues. Increasing this number can significantly improve performance. Default: `1`.
- `rabbitmq_queue_base` - Specify a hint for queue names. Use cases of this setting are described below.
- `rabbitmq_deadletter_exchange` - Specify name for a [dead letter exchange](https://www.rabbitmq.com/dlx.html). You can create another table with this exchange name and collect messages in cases when they are republished to dead letter exchange. By default dead letter exchange is not specified.
- `rabbitmq_persistent` - If set to 1 (true), in insert query delivery mode will be set to 2 (marks messages as 'persistent'). Default: `0`.
- `rabbitmq_skip_broken_messages` RabbitMQ message parser tolerance to schema-incompatible messages per block. If `rabbitmq_skip_broken_messages = N` then the engine skips *N* RabbitMQ messages that cannot be parsed (a message equals a row of data). Default: `0`.
- `rabbitmq_max_block_size` - Number of row collected before flushing data from RabbitMQ. Default: [max_insert_block_size](../../../operations/settings/settings.md#setting-max_insert_block_size).
- `rabbitmq_flush_interval_ms` - Timeout for flushing data from RabbitMQ. Default: [stream_flush_interval_ms](../../../operations/settings/settings.md#stream-flush-interval-ms).
- `rabbitmq_queue_settings_list` - allows to set RabbitMQ settings when creating a queue. Available settings: `x-max-length`, `x-max-length-bytes`, `x-message-ttl`, `x-expires`, `x-priority`, `x-max-priority`, `x-overflow`, `x-dead-letter-exchange`, `x-queue-type`. The `durable` setting is enabled automatically for the queue.
- `rabbitmq_address` - Address for connection. Use ether this setting or `rabbitmq_host_port`.
- `rabbitmq_vhost` - RabbitMQ vhost. Default: `'\'`.
- `rabbitmq_queue_consume` - Use user-defined queues and do not make any RabbitMQ setup: declaring exchanges, queues, bindings. Default: `false`.
- `rabbitmq_username` - RabbitMQ username.
- `rabbitmq_password` - RabbitMQ password.
- `rabbitmq_commit_on_select` - Commit messages when select query is made. Default: `false`.
- `rabbitmq_max_rows_per_message` — The maximum number of rows written in one RabbitMQ message for row-based formats. Default : `1`.
SSL connection:
@ -166,11 +182,20 @@ Example:
## Virtual Columns {#virtual-columns}
- `_exchange_name` - RabbitMQ exchange name.
- `_channel_id` - ChannelID, on which consumer, who received the message, was declared.
- `_delivery_tag` - DeliveryTag of the received message. Scoped per channel.
- `_redelivered` - `redelivered` flag of the message.
- `_message_id` - messageID of the received message; non-empty if was set, when message was published.
- `_timestamp` - timestamp of the received message; non-empty if was set, when message was published.
- `_exchange_name` - RabbitMQ exchange name.
- `_channel_id` - ChannelID, on which consumer, who received the message, was declared.
- `_delivery_tag` - DeliveryTag of the received message. Scoped per channel.
- `_redelivered` - `redelivered` flag of the message.
- `_message_id` - messageID of the received message; non-empty if was set, when message was published.
- `_timestamp` - timestamp of the received message; non-empty if was set, when message was published.
## Data formats support {#data-formats-support}
RabbitMQ engine supports all [formats](../../../interfaces/formats.md) supported in ClickHouse.
The number of rows in one RabbitMQ message depends on whether the format is row-based or block-based:
- For row-based formats the number of rows in one RabbitMQ message can be controlled by setting `rabbitmq_max_rows_per_message`.
- For block-based formats we cannot divide block into smaller parts, but the number of rows in one block can be controlled by general setting [max_block_size](../../../operations/settings/settings.md#setting-max_block_size).
[Original article](https://clickhouse.com/docs/en/engines/table-engines/integrations/rabbitmq/) <!--hide-->

View File

@ -621,7 +621,7 @@ CREATE TABLE example_table
ENGINE = MergeTree
PARTITION BY toYYYYMM(d)
ORDER BY d
TTL d + INTERVAL 1 MONTH [DELETE],
TTL d + INTERVAL 1 MONTH DELETE,
d + INTERVAL 1 WEEK TO VOLUME 'aaa',
d + INTERVAL 2 WEEK TO DISK 'bbb';
```

View File

@ -6,6 +6,17 @@ sidebar_label: Data Replication
# Data Replication
:::note
In ClickHouse Cloud replication is managed for you. Please create your tables without adding arguments. For example, in the text below you would replace:
```
ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/table_name', '{replica}', ver)
```
with:
```
ENGINE = ReplicatedReplacingMergeTree
```
:::
Replication is only supported for tables in the MergeTree family:
- ReplicatedMergeTree
@ -85,15 +96,15 @@ Example of setting the addresses of the auxiliary ZooKeeper cluster:
</auxiliary_zookeepers>
```
To store table metadata in an auxiliary ZooKeeper cluster instead of default ZooKeeper cluster, we can use the SQL to create table with
ReplicatedMergeTree engine as follow:
To store table metadata in an auxiliary ZooKeeper cluster instead of the default ZooKeeper cluster, we can use SQL to create the table with
ReplicatedMergeTree engine as follows:
```
CREATE TABLE table_name ( ... ) ENGINE = ReplicatedMergeTree('zookeeper_name_configured_in_auxiliary_zookeepers:path', 'replica_name') ...
```
You can specify any existing ZooKeeper cluster and the system will use a directory on it for its own data (the directory is specified when creating a replicatable table).
If ZooKeeper isnt set in the config file, you cant create replicated tables, and any existing replicated tables will be read-only.
If ZooKeeper is not set in the config file, you cant create replicated tables, and any existing replicated tables will be read-only.
ZooKeeper is not used in `SELECT` queries because replication does not affect the performance of `SELECT` and queries run just as fast as they do for non-replicated tables. When querying distributed replicated tables, ClickHouse behavior is controlled by the settings [max_replica_delay_for_distributed_queries](/docs/en/operations/settings/settings.md/#settings-max_replica_delay_for_distributed_queries) and [fallback_to_stale_replicas_for_distributed_queries](/docs/en/operations/settings/settings.md/#settings-fallback_to_stale_replicas_for_distributed_queries).
@ -119,8 +130,23 @@ The system monitors data synchronicity on replicas and is able to recover after
## Creating Replicated Tables {#creating-replicated-tables}
:::note
In ClickHouse Cloud replication is managed for you. Please create your tables without adding arguments. For example, in the text below you would replace:
```
ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/table_name', '{replica}', ver)
```
with:
```
ENGINE = ReplicatedReplacingMergeTree
```
:::
The `Replicated` prefix is added to the table engine name. For example:`ReplicatedMergeTree`.
:::tip
Adding `Replicated` is optional in ClickHouse Cloud, as all of the tables are replicated.
:::
### Replicated\*MergeTree parameters
#### zoo_path
@ -144,7 +170,7 @@ CREATE TABLE table_name
CounterID UInt32,
UserID UInt32,
ver UInt16
) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{layer}-{shard}/table_name', '{replica}', ver)
) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/table_name', '{replica}', ver)
PARTITION BY toYYYYMM(EventDate)
ORDER BY (CounterID, EventDate, intHash32(UserID))
SAMPLE BY intHash32(UserID);
@ -160,7 +186,7 @@ CREATE TABLE table_name
EventDate DateTime,
CounterID UInt32,
UserID UInt32
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/table_name', '{replica}', EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID), EventTime), 8192);
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/table_name', '{replica}', EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID), EventTime), 8192);
```
</details>
@ -171,7 +197,6 @@ Example:
``` xml
<macros>
<layer>05</layer>
<shard>02</shard>
<replica>example05-02-1</replica>
</macros>
@ -182,12 +207,12 @@ In this case, the path consists of the following parts:
`/clickhouse/tables/` is the common prefix. We recommend using exactly this one.
`{layer}-{shard}` is the shard identifier. In this example it consists of two parts, since the example cluster uses bi-level sharding. For most tasks, you can leave just the {shard} substitution, which will be expanded to the shard identifier.
`{shard}` will be expanded to the shard identifier.
`table_name` is the name of the node for the table in ClickHouse Keeper. It is a good idea to make it the same as the table name. It is defined explicitly, because in contrast to the table name, it does not change after a RENAME query.
*HINT*: you could add a database name in front of `table_name` as well. E.g. `db_name.table_name`
The two built-in substitutions `{database}` and `{table}` can be used, they expand into the table name and the database name respectively (unless these macros are defined in the `macros` section). So the zookeeper path can be specified as `'/clickhouse/tables/{layer}-{shard}/{database}/{table}'`.
The two built-in substitutions `{database}` and `{table}` can be used, they expand into the table name and the database name respectively (unless these macros are defined in the `macros` section). So the zookeeper path can be specified as `'/clickhouse/tables/{shard}/{database}/{table}'`.
Be careful with table renames when using these built-in substitutions. The path in ClickHouse Keeper cannot be changed, and when the table is renamed, the macros will expand into a different path, the table will refer to a path that does not exist in ClickHouse Keeper, and will go into read-only mode.
The replica name identifies different replicas of the same table. You can use the server name for this, as in the example. The name only needs to be unique within each shard.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -9,6 +9,29 @@ slug: /en/operations/backup
- [Backup/restore using an S3 disk](#backuprestore-using-an-s3-disk)
- [Alternatives](#alternatives)
## Command summary
```bash
BACKUP|RESTORE
TABLE [db.]table_name [AS [db.]table_name_in_backup]
[PARTITION[S] partition_expr [,...]] |
DICTIONARY [db.]dictionary_name [AS [db.]name_in_backup] |
DATABASE database_name [AS database_name_in_backup]
[EXCEPT TABLES ...] |
TEMPORARY TABLE table_name [AS table_name_in_backup] |
VIEW view_name [AS view_name_in_backup]
ALL TEMPORARY TABLES [EXCEPT ...] |
ALL DATABASES [EXCEPT ...] } [,...]
[ON CLUSTER 'cluster_name']
TO|FROM File('<path>/<filename>') | Disk('<disk_name>', '<path>/') | S3('<S3 endpoint>/<path>', '<Access key ID>', '<Secret access key>')
[SETTINGS base_backup = File('<path>/<filename>') | Disk(...) | S3('<S3 endpoint>/<path>', '<Access key ID>', '<Secret access key>')]
```
:::note ALL
`ALL` is only applicable to the `RESTORE` command.
:::
## Background
While [replication](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [you cant just drop tables with a MergeTree-like engine containing more than 50 Gb of data](server-configuration-parameters/settings.md#max-table-size-to-drop). However, these safeguards do not cover all possible cases and can be circumvented.

View File

@ -127,6 +127,13 @@ Default value: 100000.
A large number of parts in a table reduces performance of ClickHouse queries and increases ClickHouse boot time. Most often this is a consequence of an incorrect design (mistakes when choosing a partitioning strategy - too small partitions).
## simultaneous_parts_removal_limit {#simultaneous-parts-removal-limit}
If there are a lot of outdated parts cleanup thread will try to delete up to `simultaneous_parts_removal_limit` parts during one iteration.
`simultaneous_parts_removal_limit` set to `0` means unlimited.
Default value: 0.
## replicated_deduplication_window {#replicated-deduplication-window}
The number of most recently inserted blocks for which ClickHouse Keeper stores hash sums to check for duplicates.

View File

@ -6,6 +6,39 @@ slug: /en/operations/settings/settings
# Settings
## additional_table_filters
An additional filter expression that is applied after reading
from the specified table.
Default value: 0.
**Example**
``` sql
insert into table_1 values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd');
```
```response
┌─x─┬─y────┐
│ 1 │ a │
│ 2 │ bb │
│ 3 │ ccc │
│ 4 │ dddd │
└───┴──────┘
```
```sql
SELECT *
FROM table_1
SETTINGS additional_table_filters = (('table_1', 'x != 2'))
```
```response
┌─x─┬─y────┐
│ 1 │ a │
│ 3 │ ccc │
│ 4 │ dddd │
└───┴──────┘
```
## allow_nondeterministic_mutations {#allow_nondeterministic_mutations}
User-level setting that allows mutations on replicated tables to make use of non-deterministic functions such as `dictGet`.
@ -1011,6 +1044,12 @@ The default value is 7500.
The smaller the value, the more often data is flushed into the table. Setting the value too low leads to poor performance.
## stream_poll_timeout_ms {#stream_poll_timeout_ms}
Timeout for polling data from/to streaming storages.
Default value: 500.
## load_balancing {#settings-load_balancing}
Specifies the algorithm of replicas selection that is used for distributed query processing.
@ -3625,7 +3664,7 @@ z IPv4
Controls making inferred types `Nullable` in schema inference for formats without information about nullability.
If the setting is enabled, the inferred type will be `Nullable` only if column contains `NULL` in a sample that is parsed during schema inference.
Default value: `false`.
Default value: `true`.
## input_format_try_infer_integers {#input_format_try_infer_integers}

View File

@ -0,0 +1,70 @@
---
slug: /en/operations/system-tables/schema_inference_cache
---
# Schema inference cache
Contains information about all cached file schemas.
Columns:
- `storage` ([String](/docs/en/sql-reference/data-types/string.md)) — Storage name: File, URL, S3 or HDFS.
- `source` ([String](/docs/en/sql-reference/data-types/string.md)) — File source.
- `format` ([String](/docs/en/sql-reference/data-types/string.md)) — Format name.
- `additional_format_info` ([String](/docs/en/sql-reference/data-types/string.md)) - Additional information required to identify the schema. For example, format specific settings.
- `registration_time` ([DateTime](/docs/en/sql-reference/data-types/datetime.md)) — Timestamp when schema was added in cache.
- `schema` ([String](/docs/en/sql-reference/data-types/string.md)) - Cached schema.
**Example**
Let's say we have a file `data.jsonl` with this content:
```json
{"id" : 1, "age" : 25, "name" : "Josh", "hobbies" : ["football", "cooking", "music"]}
{"id" : 2, "age" : 19, "name" : "Alan", "hobbies" : ["tennis", "art"]}
{"id" : 3, "age" : 32, "name" : "Lana", "hobbies" : ["fitness", "reading", "shopping"]}
{"id" : 4, "age" : 47, "name" : "Brayan", "hobbies" : ["movies", "skydiving"]}
```
:::tip
Place `data.jsonl` in the `user_files_path` directory. You can find this by looking
in your ClickHouse configuration files. The default is:
```
<user_files_path>/var/lib/clickhouse/user_files/</user_files_path>
```
:::
Open `clickhouse-client` and run the `DESCRIBE` query:
```sql
DESCRIBE file('data.jsonl') SETTINGS input_format_try_infer_integers=0;
```
```response
┌─name────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ id │ Nullable(Float64) │ │ │ │ │ │
│ age │ Nullable(Float64) │ │ │ │ │ │
│ name │ Nullable(String) │ │ │ │ │ │
│ hobbies │ Array(Nullable(String)) │ │ │ │ │ │
└─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
Let's see the content of the `system.schema_inference_cache` table:
```sql
SELECT *
FROM system.schema_inference_cache
FORMAT Vertical
```
```response
Row 1:
──────
storage: File
source: /home/droscigno/user_files/data.jsonl
format: JSONEachRow
additional_format_info: schema_inference_hints=, max_rows_to_read_for_schema_inference=25000, schema_inference_make_columns_nullable=true, try_infer_integers=false, try_infer_dates=true, try_infer_datetimes=true, try_infer_numbers_from_strings=true, read_bools_as_numbers=true, try_infer_objects=false
registration_time: 2022-12-29 17:49:52
schema: id Nullable(Float64), age Nullable(Float64), name Nullable(String), hobbies Array(Nullable(String))
```
**See also**
- [Automatic schema inference from input data](/docs/en/interfaces/schema-inference.md)

View File

@ -0,0 +1,41 @@
---
slug: /en/sql-reference/aggregate-functions/reference/grouparraylast
sidebar_position: 110
---
# groupArrayLast
Syntax: `groupArrayLast(max_size)(x)`
Creates an array of last argument values.
For example, `groupArrayLast(1)(x)` is equivalent to `[anyLast (x)]`.
In some cases, you can still rely on the order of execution. This applies to cases when `SELECT` comes from a subquery that uses `ORDER BY`.
**Example**
Query:
```sql
select groupArrayLast(2)(number+1) numbers from numbers(10)
```
Result:
```text
┌─numbers─┐
│ [9,10] │
└─────────┘
```
In compare to `groupArray`:
```sql
select groupArray(2)(number+1) numbers from numbers(10)
```
```text
┌─numbers─┐
│ [1,2] │
└─────────┘
```

View File

@ -32,6 +32,7 @@ ClickHouse-specific aggregate functions:
- [topK](../../../sql-reference/aggregate-functions/reference/topk.md)
- [topKWeighted](../../../sql-reference/aggregate-functions/reference/topkweighted.md)
- [groupArray](../../../sql-reference/aggregate-functions/reference/grouparray.md)
- [groupArrayLast](../../../sql-reference/aggregate-functions/reference/grouparraylast.md)
- [groupUniqArray](../../../sql-reference/aggregate-functions/reference/groupuniqarray.md)
- [groupArrayInsertAt](../../../sql-reference/aggregate-functions/reference/grouparrayinsertat.md)
- [groupArrayMovingAvg](../../../sql-reference/aggregate-functions/reference/grouparraymovingavg.md)

View File

@ -65,10 +65,27 @@ An exception is thrown when dividing by zero or when dividing a minimal negative
Differs from [modulo](#modulo) in that it returns zero when the divisor is zero.
## positive_modulo(a, b)
Calculates the remainder when dividing `a` by `b`. Similar to function `modulo` except that `positive_modulo` always return non-negative number.
## positiveModulo(a, b), positive_modulo(a, b), pmod(a, b)
Calculates the remainder when dividing `a` by `b`. Similar to the function `modulo` except that `positive_modulo` always returns a non-negative number.
Notice that `positive_modulo` is 4-5 times slower than `modulo`. You should not use `positive_modulo` unless you want to get positive result and don't care about performance too much.
Notice that `positive_modulo` is 4-5 times slower than `modulo`. You should not use `positive_modulo` unless you want to get a positive result and don't care about performance too much.
**Example**
Query:
```sql
SELECT positiveModulo(-1, 10)
```
Result:
```text
┌─positiveModulo(-1, 10)─┐
│ 9 │
└────────────────────────┘
```
## negate(a), -a operator

View File

@ -302,16 +302,23 @@ Returns the date.
The behavior of parsing incorrect dates is implementation specific. ClickHouse may return zero date, throw an exception or do “natural” overflow.
:::
## toLastDayOfMonth
Rounds a date, or date with time, to the last day of the month.
Returns the date.
Alias: `LAST_DAY`.
If `toLastDayOfMonth` is called with an argument of type `Date` greater then 2149-05-31, the result will be calculated from the argument 2149-05-31 instead.
## toMonday
Rounds down a date or date with time to the nearest Monday.
Rounds down a date, or date with time, to the nearest Monday.
Returns the date.
## toStartOfWeek(t\[,mode\])
Rounds down a date or date with time to the nearest Sunday or Monday by mode.
Rounds down a date, or date with time, to the nearest Sunday or Monday by mode.
Returns the date.
The mode argument works exactly like the mode argument to toWeek(). For the single-argument syntax, a mode value of 0 is used.
@ -410,43 +417,43 @@ Converts a date with time to a certain fixed date, while preserving the time.
## toRelativeYearNum
Converts a date with time or date to the number of the year, starting from a certain fixed point in the past.
Converts a date, or date with time, to the number of the year, starting from a certain fixed point in the past.
## toRelativeQuarterNum
Converts a date with time or date to the number of the quarter, starting from a certain fixed point in the past.
Converts a date, or date with time, to the number of the quarter, starting from a certain fixed point in the past.
## toRelativeMonthNum
Converts a date with time or date to the number of the month, starting from a certain fixed point in the past.
Converts a date, or date with time, to the number of the month, starting from a certain fixed point in the past.
## toRelativeWeekNum
Converts a date with time or date to the number of the week, starting from a certain fixed point in the past.
Converts a date, or date with time, to the number of the week, starting from a certain fixed point in the past.
## toRelativeDayNum
Converts a date with time or date to the number of the day, starting from a certain fixed point in the past.
Converts a date, or date with time, to the number of the day, starting from a certain fixed point in the past.
## toRelativeHourNum
Converts a date with time or date to the number of the hour, starting from a certain fixed point in the past.
Converts a date, or date with time, to the number of the hour, starting from a certain fixed point in the past.
## toRelativeMinuteNum
Converts a date with time or date to the number of the minute, starting from a certain fixed point in the past.
Converts a date, or date with time, to the number of the minute, starting from a certain fixed point in the past.
## toRelativeSecondNum
Converts a date with time or date to the number of the second, starting from a certain fixed point in the past.
Converts a date, or date with time, to the number of the second, starting from a certain fixed point in the past.
## toISOYear
Converts a date or date with time to a UInt16 number containing the ISO Year number.
Converts a date, or date with time, to a UInt16 number containing the ISO Year number.
## toISOWeek
Converts a date or date with time to a UInt8 number containing the ISO Week number.
Converts a date, or date with time, to a UInt8 number containing the ISO Week number.
## toWeek(date\[,mode\])
@ -517,6 +524,154 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d
└────────────┴───────────┴───────────┴───────────┘
```
## age
Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 second.
E.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for `day` unit, 0 months for `month` unit, 0 years for `year` unit.
**Syntax**
``` sql
age('unit', startdate, enddate, [timezone])
```
**Arguments**
- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md).
Possible values:
- `second` (possible abbreviations: `ss`, `s`)
- `minute` (possible abbreviations: `mi`, `n`)
- `hour` (possible abbreviations: `hh`, `h`)
- `day` (possible abbreviations: `dd`, `d`)
- `week` (possible abbreviations: `wk`, `ww`)
- `month` (possible abbreviations: `mm`, `m`)
- `quarter` (possible abbreviations: `qq`, `q`)
- `year` (possible abbreviations: `yyyy`, `yy`)
- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md).
**Returned value**
Difference between `enddate` and `startdate` expressed in `unit`.
Type: [Int](../../sql-reference/data-types/int-uint.md).
**Example**
Query:
``` sql
SELECT age('hour', toDateTime('2018-01-01 22:30:00'), toDateTime('2018-01-02 23:00:00'));
```
Result:
``` text
┌─age('hour', toDateTime('2018-01-01 22:30:00'), toDateTime('2018-01-02 23:00:00'))─┐
│ 24 │
└───────────────────────────────────────────────────────────────────────────────────┘
```
Query:
``` sql
SELECT
toDate('2022-01-01') AS e,
toDate('2021-12-29') AS s,
age('day', s, e) AS day_age,
age('month', s, e) AS month__age,
age('year', s, e) AS year_age;
```
Result:
``` text
┌──────────e─┬──────────s─┬─day_age─┬─month__age─┬─year_age─┐
│ 2022-01-01 │ 2021-12-29 │ 3 │ 0 │ 0 │
└────────────┴────────────┴─────────┴────────────┴──────────┘
```
## date\_diff
Returns the count of the specified `unit` boundaries crossed between the `startdate` and `enddate`.
The difference is calculated using relative units, e.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for day unit (see [toRelativeDayNum](#torelativedaynum)), 1 month for month unit (see [toRelativeMonthNum](#torelativemonthnum)), 1 year for year unit (see [toRelativeYearNum](#torelativeyearnum)).
**Syntax**
``` sql
date_diff('unit', startdate, enddate, [timezone])
```
Aliases: `dateDiff`, `DATE_DIFF`.
**Arguments**
- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md).
Possible values:
- `second` (possible abbreviations: `ss`, `s`)
- `minute` (possible abbreviations: `mi`, `n`)
- `hour` (possible abbreviations: `hh`, `h`)
- `day` (possible abbreviations: `dd`, `d`)
- `week` (possible abbreviations: `wk`, `ww`)
- `month` (possible abbreviations: `mm`, `m`)
- `quarter` (possible abbreviations: `qq`, `q`)
- `year` (possible abbreviations: `yyyy`, `yy`)
- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md).
**Returned value**
Difference between `enddate` and `startdate` expressed in `unit`.
Type: [Int](../../sql-reference/data-types/int-uint.md).
**Example**
Query:
``` sql
SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'));
```
Result:
``` text
┌─dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'))─┐
│ 25 │
└────────────────────────────────────────────────────────────────────────────────────────┘
```
Query:
``` sql
SELECT
toDate('2022-01-01') AS e,
toDate('2021-12-29') AS s,
dateDiff('day', s, e) AS day_diff,
dateDiff('month', s, e) AS month__diff,
dateDiff('year', s, e) AS year_diff;
```
Result:
``` text
┌──────────e─┬──────────s─┬─day_diff─┬─month__diff─┬─year_diff─┐
│ 2022-01-01 │ 2021-12-29 │ 3 │ 1 │ 1 │
└────────────┴────────────┴──────────┴─────────────┴───────────┘
```
## date\_trunc
Truncates date and time data to the specified part of date.
@ -637,80 +792,6 @@ Result:
└───────────────────────────────────────────────┘
```
## date\_diff
Returns the difference between two dates or dates with time values.
The difference is calculated using relative units, e.g. the difference between `2022-01-01` and `2021-12-29` is 3 days for day unit (see [toRelativeDayNum](#torelativedaynum)), 1 month for month unit (see [toRelativeMonthNum](#torelativemonthnum)), 1 year for year unit (see [toRelativeYearNum](#torelativeyearnum)).
**Syntax**
``` sql
date_diff('unit', startdate, enddate, [timezone])
```
Aliases: `dateDiff`, `DATE_DIFF`.
**Arguments**
- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md).
Possible values:
- `second`
- `minute`
- `hour`
- `day`
- `week`
- `month`
- `quarter`
- `year`
- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md).
**Returned value**
Difference between `enddate` and `startdate` expressed in `unit`.
Type: [Int](../../sql-reference/data-types/int-uint.md).
**Example**
Query:
``` sql
SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'));
```
Result:
``` text
┌─dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'))─┐
│ 25 │
└────────────────────────────────────────────────────────────────────────────────────────┘
```
Query:
``` sql
SELECT
toDate('2022-01-01') AS e,
toDate('2021-12-29') AS s,
dateDiff('day', s, e) AS day_diff,
dateDiff('month', s, e) AS month__diff,
dateDiff('year', s, e) AS year_diff;
```
Result:
``` text
┌──────────e─┬──────────s─┬─day_diff─┬─month__diff─┬─year_diff─┐
│ 2022-01-01 │ 2021-12-29 │ 3 │ 1 │ 1 │
└────────────┴────────────┴──────────┴─────────────┴───────────┘
```
## date\_sub
Subtracts the time interval or date interval from the provided date or date with time.
@ -1085,6 +1166,8 @@ SELECT timeSlots(toDateTime64('1980-12-12 21:01:02.1234', 4, 'UTC'), toDecimal64
Formats a Time according to the given Format string. Format is a constant expression, so you cannot have multiple formats for a single result column.
formatDateTime uses MySQL datetime format style, refer to https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format.
**Syntax**
``` sql
@ -1158,6 +1241,64 @@ Result:
└─────────────────────────────────────────────────────────────────────┘
```
**See Also**
- [formatDateTimeInJodaSyntax](##formatDateTimeInJodaSyntax)
## formatDateTimeInJodaSyntax
Similar to formatDateTime, except that it formats datetime in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html.
**Replacement fields**
Using replacement fields, you can define a pattern for the resulting string.
| Placeholder | Description | Presentation | Examples |
| ----------- | ----------- | ------------- | -------- |
| G | era | text | AD |
| C | century of era (>=0) | number | 20 |
| Y | year of era (>=0) | year | 1996 |
| x | weekyear(not supported yet) | year | 1996 |
| w | week of weekyear(not supported yet) | number | 27 |
| e | day of week | number | 2 |
| E | day of week | text | Tuesday; Tue |
| y | year | year | 1996 |
| D | day of year | number | 189 |
| M | month of year | month | July; Jul; 07 |
| d | day of month | number | 10 |
| a | halfday of day | text | PM |
| K | hour of halfday (0~11) | number | 0 |
| h | clockhour of halfday (1~12) | number | 12 |
| H | hour of day (0~23) | number | 0 |
| k | clockhour of day (1~24) | number | 24 |
| m | minute of hour | number | 30 |
| s | second of minute | number | 55 |
| S | fraction of second(not supported yet) | number | 978 |
| z | time zone(short name not supported yet) | text | Pacific Standard Time; PST |
| Z | time zone offset/id(not supported yet) | zone | -0800; -08:00; America/Los_Angeles |
| ' | escape for text | delimiter| |
| '' | single quote | literal | ' |
**Example**
Query:
``` sql
SELECT formatDateTimeInJodaSyntax(toDateTime('2010-01-04 12:34:56'), 'yyyy-MM-dd HH:mm:ss')
```
Result:
```
┌─formatDateTimeInJodaSyntax(toDateTime('2010-01-04 12:34:56'), 'yyyy-MM-dd HH:mm:ss')─┐
│ 2010-01-04 12:34:56 │
└─────────────────────────────────────────────────────────────────────────────────────────┘
```
## dateName
Returns specified part of date.
@ -1241,6 +1382,8 @@ Result:
Function converts Unix timestamp to a calendar date and a time of a day. When there is only a single argument of [Integer](../../sql-reference/data-types/int-uint.md) type, it acts in the same way as [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime) and return [DateTime](../../sql-reference/data-types/datetime.md) type.
FROM_UNIXTIME uses MySQL datetime format style, refer to https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format.
Alias: `fromUnixTimestamp`.
**Example:**
@ -1273,6 +1416,28 @@ SELECT FROM_UNIXTIME(1234334543, '%Y-%m-%d %R:%S') AS DateTime;
└─────────────────────┘
```
**See Also**
- [fromUnixTimestampInJodaSyntax](##fromUnixTimestampInJodaSyntax)
## fromUnixTimestampInJodaSyntax
Similar to FROM_UNIXTIME, except that it formats time in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html.
**Example:**
Query:
``` sql
SELECT fromUnixTimestampInJodaSyntax(1669804872, 'yyyy-MM-dd HH:mm:ss', 'UTC');
```
Result:
```
┌─fromUnixTimestampInJodaSyntax(1669804872, 'yyyy-MM-dd HH:mm:ss', 'UTC')─┐
│ 2022-11-30 10:41:12 │
└────────────────────────────────────────────────────────────────────────────┘
```
## toModifiedJulianDay
Converts a [Proleptic Gregorian calendar](https://en.wikipedia.org/wiki/Proleptic_Gregorian_calendar) date in text form `YYYY-MM-DD` to a [Modified Julian Day](https://en.wikipedia.org/wiki/Julian_day#Variants) number in Int32. This function supports date from `0000-01-01` to `9999-12-31`. It raises an exception if the argument cannot be parsed as a date, or the date is invalid.

View File

@ -1027,6 +1027,186 @@ Result:
└─────────────┘
```
## h3PointDistM
Returns the "great circle" or "haversine" distance between pairs of GeoCoord points (latitude/longitude) pairs in meters.
**Syntax**
``` sql
h3PointDistM(lat1, lon1, lat2, lon2)
```
**Arguments**
- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md).
- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md).
**Returned values**
- Haversine or great circle distance in meters.
Type: [Float64](../../../sql-reference/data-types/float.md).
**Example**
Query:
``` sql
select h3PointDistM(-10.0 ,0.0, 10.0, 0.0) as h3PointDistM;
```
Result:
``` text
┌──────h3PointDistM─┐
│ 2223901.039504589 │
└───────────────────┘
```
## h3PointDistKm
Returns the "great circle" or "haversine" distance between pairs of GeoCoord points (latitude/longitude) pairs in kilometers.
**Syntax**
``` sql
h3PointDistKm(lat1, lon1, lat2, lon2)
```
**Arguments**
- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md).
- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md).
**Returned values**
- Haversine or great circle distance in kilometers.
Type: [Float64](../../../sql-reference/data-types/float.md).
**Example**
Query:
``` sql
select h3PointDistKm(-10.0 ,0.0, 10.0, 0.0) as h3PointDistKm;
```
Result:
``` text
┌─────h3PointDistKm─┐
│ 2223.901039504589 │
└───────────────────┘
```
## h3PointDistRads
Returns the "great circle" or "haversine" distance between pairs of GeoCoord points (latitude/longitude) pairs in radians.
**Syntax**
``` sql
h3PointDistRads(lat1, lon1, lat2, lon2)
```
**Arguments**
- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md).
- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md).
**Returned values**
- Haversine or great circle distance in radians.
Type: [Float64](../../../sql-reference/data-types/float.md).
**Example**
Query:
``` sql
select h3PointDistRads(-10.0 ,0.0, 10.0, 0.0) as h3PointDistRads;
```
Result:
``` text
┌────h3PointDistRads─┐
│ 0.3490658503988659 │
└────────────────────┘
```
## h3GetRes0Indexes
Returns an array of all the resolution 0 H3 indexes.
**Syntax**
``` sql
h3GetRes0Indexes()
```
**Returned values**
- Array of all the resolution 0 H3 indexes.
Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
**Example**
Query:
``` sql
select h3GetRes0Indexes as indexes ;
```
Result:
``` text
┌─indexes─────────────────────────────────────┐
│ [576495936675512319,576531121047601151,....]│
└─────────────────────────────────────────────┘
```
## h3GetPentagonIndexes
Returns all the pentagon H3 indexes at the specified resolution.
**Syntax**
``` sql
h3GetPentagonIndexes(resolution)
```
**Parameter**
- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
**Returned value**
- Array of all pentagon H3 indexes.
Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
**Example**
Query:
``` sql
SELECT h3GetPentagonIndexes(3) AS indexes;
```
Result:
``` text
┌─indexes────────────────────────────────────────────────────────┐
│ [590112357393367039,590464201114255359,590816044835143679,...] │
└────────────────────────────────────────────────────────────────┘
```
## h3Line
Returns the line of indices between the two indices that are provided.

File diff suppressed because it is too large Load Diff

View File

@ -38,7 +38,7 @@ INSERT INTO t_uuid SELECT generateUUIDv4()
SELECT * FROM t_uuid
```
``` text
```response
┌────────────────────────────────────x─┐
│ f4bf890f-f9dc-4332-ad5c-0c18e73f28e9 │
└──────────────────────────────────────┘
@ -89,7 +89,7 @@ SELECT empty(generateUUIDv4());
Result:
```text
```response
┌─empty(generateUUIDv4())─┐
│ 0 │
└─────────────────────────┘
@ -131,7 +131,7 @@ SELECT notEmpty(generateUUIDv4());
Result:
```text
```response
┌─notEmpty(generateUUIDv4())─┐
│ 1 │
└────────────────────────────┘
@ -155,12 +155,56 @@ The UUID type value.
SELECT toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0') AS uuid
```
``` text
```response
┌─────────────────────────────────uuid─┐
│ 61f0c404-5cb3-11e7-907b-a6006ad3dba0 │
└──────────────────────────────────────┘
```
## toUUIDOrDefault (x,y)
**Arguments**
- `string` — String of 36 characters or FixedString(36). [String](../../sql-reference/syntax.md#string).
- `default` — UUID to be used as the default if the first argument cannot be converted to a UUID type. [UUID](/docs/en/sql-reference/data-types/uuid.md).
**Returned value**
UUID
``` sql
toUUIDOrDefault(String, UUID)
```
**Returned value**
The UUID type value.
**Usage examples**
This first example returns the first argument converted to a UUID type as it can be converted:
``` sql
SELECT toUUIDOrDefault('61f0c404-5cb3-11e7-907b-a6006ad3dba0', cast('59f0c404-5cb3-11e7-907b-a6006ad3dba0' as UUID));
```
```response
┌─toUUIDOrDefault('61f0c404-5cb3-11e7-907b-a6006ad3dba0', CAST('59f0c404-5cb3-11e7-907b-a6006ad3dba0', 'UUID'))─┐
│ 61f0c404-5cb3-11e7-907b-a6006ad3dba0 │
└───────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
This second example returns the second argument (the provided default UUID) as the first argument cannot be converted to a UUID type:
```sql
SELECT toUUIDOrDefault('-----61f0c404-5cb3-11e7-907b-a6006ad3dba0', cast('59f0c404-5cb3-11e7-907b-a6006ad3dba0' as UUID));
```
```response
┌─toUUIDOrDefault('-----61f0c404-5cb3-11e7-907b-a6006ad3dba0', CAST('59f0c404-5cb3-11e7-907b-a6006ad3dba0', 'UUID'))─┐
│ 59f0c404-5cb3-11e7-907b-a6006ad3dba0 │
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
## toUUIDOrNull (x)
It takes an argument of type String and tries to parse it into UUID. If failed, returns NULL.
@ -179,7 +223,7 @@ The Nullable(UUID) type value.
SELECT toUUIDOrNull('61f0c404-5cb3-11e7-907b-a6006ad3dba0T') AS uuid
```
``` text
```response
┌─uuid─┐
│ ᴺᵁᴸᴸ │
└──────┘
@ -203,7 +247,7 @@ The UUID type value.
SELECT toUUIDOrZero('61f0c404-5cb3-11e7-907b-a6006ad3dba0T') AS uuid
```
``` text
```response
┌─────────────────────────────────uuid─┐
│ 00000000-0000-0000-0000-000000000000 │
└──────────────────────────────────────┘
@ -236,7 +280,7 @@ SELECT
UUIDStringToNum(uuid) AS bytes
```
``` text
```response
┌─uuid─────────────────────────────────┬─bytes────────────┐
│ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │ a/<@];!~p{jTj={) │
└──────────────────────────────────────┴──────────────────┘
@ -248,7 +292,7 @@ SELECT
UUIDStringToNum(uuid, 2) AS bytes
```
``` text
```response
┌─uuid─────────────────────────────────┬─bytes────────────┐
│ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │ @</a;]~!p{jTj={) │
└──────────────────────────────────────┴──────────────────┘
@ -281,7 +325,7 @@ SELECT
UUIDNumToString(toFixedString(bytes, 16)) AS uuid
```
``` text
```response
┌─bytes────────────┬─uuid─────────────────────────────────┐
│ a/<@];!~p{jTj={) │ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │
└──────────────────┴──────────────────────────────────────┘
@ -293,7 +337,7 @@ SELECT
UUIDNumToString(toFixedString(bytes, 16), 2) AS uuid
```
``` text
```response
┌─bytes────────────┬─uuid─────────────────────────────────┐
│ @</a;]~!p{jTj={) │ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │
└──────────────────┴──────────────────────────────────────┘

View File

@ -91,11 +91,11 @@ INSERT INTO t FORMAT TabSeparated
You can insert data separately from the query by using the command-line client or the HTTP interface. For more information, see the section “[Interfaces](../../interfaces)”.
### Constraints
## Constraints
If table has [constraints](../../sql-reference/statements/create/table.md#constraints), their expressions will be checked for each row of inserted data. If any of those constraints is not satisfied — server will raise an exception containing constraint name and expression, the query will be stopped.
### Inserting the Results of `SELECT`
## Inserting the Results of `SELECT`
**Syntax**
@ -114,7 +114,7 @@ However, you can delete old data using `ALTER TABLE ... DROP PARTITION`.
To insert a default value instead of `NULL` into a column with not nullable data type, enable [insert_null_as_default](../../operations/settings/settings.md#insert_null_as_default) setting.
### Inserting Data from a File
## Inserting Data from a File
**Syntax**
@ -122,14 +122,15 @@ To insert a default value instead of `NULL` into a column with not nullable data
INSERT INTO [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] FORMAT format_name
```
Use the syntax above to insert data from a file stored on a **client** side. `file_name` and `type` are string literals. Input file [format](../../interfaces/formats.md) must be set in the `FORMAT` clause.
Use the syntax above to insert data from a file, or files, stored on the **client** side. `file_name` and `type` are string literals. Input file [format](../../interfaces/formats.md) must be set in the `FORMAT` clause.
Compressed files are supported. Compression type is detected by the extension of the file name. Or it can be explicitly specified in a `COMPRESSION` clause. Supported types are: `'none'`, `'gzip'`, `'deflate'`, `'br'`, `'xz'`, `'zstd'`, `'lz4'`, `'bz2'`.
Compressed files are supported. The compression type is detected by the extension of the file name. Or it can be explicitly specified in a `COMPRESSION` clause. Supported types are: `'none'`, `'gzip'`, `'deflate'`, `'br'`, `'xz'`, `'zstd'`, `'lz4'`, `'bz2'`.
This functionality is available in the [command-line client](../../interfaces/cli.md) and [clickhouse-local](../../operations/utilities/clickhouse-local.md).
**Example**
**Examples**
### Single file with FROM INFILE
Execute the following queries using [command-line client](../../interfaces/cli.md):
```bash
@ -148,7 +149,27 @@ Result:
└────┴──────┘
```
### Inserting into Table Function
### Multiple files with FROM INFILE using globs
This example is very similar to the previous one but inserts from multiple files using `FROM INFILE 'input_*.csv`.
```bash
echo 1,A > input_1.csv ; echo 2,B > input_2.csv
clickhouse-client --query="CREATE TABLE infile_globs (id UInt32, text String) ENGINE=MergeTree() ORDER BY id;"
clickhouse-client --query="INSERT INTO infile_globs FROM INFILE 'input_*.csv' FORMAT CSV;"
clickhouse-client --query="SELECT * FROM infile_globs FORMAT PrettyCompact;"
```
:::tip
In addition to selecting multiple files with `*`, you can use ranges (`{1,2}` or `{1..9}`) and other [glob substitutions](/docs/en/sql-reference/table-functions/file.md/#globs-in-path). These three all would work with the above example:
```sql
INSERT INTO infile_globs FROM INFILE 'input_*.csv' FORMAT CSV;
INSERT INTO infile_globs FROM INFILE 'input_{1,2}.csv' FORMAT CSV;
INSERT INTO infile_globs FROM INFILE 'input_?.csv' FORMAT CSV;
```
:::
## Inserting into Table Function
Data can be inserted into tables referenced by [table functions](../../sql-reference/table-functions/index.md).
@ -176,7 +197,7 @@ Result:
└─────┴───────────────────────┘
```
### Performance Considerations
## Performance Considerations
`INSERT` sorts the input data by primary key and splits them into partitions by a partition key. If you insert data into several partitions at once, it can significantly reduce the performance of the `INSERT` query. To avoid this:

View File

@ -169,12 +169,6 @@ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
cmake -D CMAKE_BUILD_TYPE=Debug ..
В случае использования на разработческой машине старого HDD или SSD, а также при желании использовать меньше места для артефактов сборки можно использовать следующую команду:
```bash
cmake -DUSE_DEBUG_HELPERS=1 -DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 ..
```
При этом надо учесть, что получаемые в результате сборки исполнимые файлы будут динамически слинкованы с библиотеками, и поэтому фактически станут непереносимыми на другие компьютеры (либо для этого нужно будет предпринять значительно больше усилий по сравнению со статической сборкой). Плюсом же в данном случае является значительно меньшее время сборки (это проявляется не на первой сборке, а на последующих, после внесения изменений в исходный код - тратится меньшее время на линковку по сравнению со статической сборкой) и значительно меньшее использование места на жёстком диске (экономия более, чем в 3 раза по сравнению со статической сборкой). Для целей разработки, когда планируются только отладочные запуски на том же компьютере, где осуществлялась сборка, это может быть наиболее удобным вариантом.
Вы можете изменить вариант сборки, выполнив новую команду в директории build.
Запустите ninja для сборки:

View File

@ -523,7 +523,7 @@ CREATE TABLE example_table
ENGINE = MergeTree
PARTITION BY toYYYYMM(d)
ORDER BY d
TTL d + INTERVAL 1 MONTH [DELETE],
TTL d + INTERVAL 1 MONTH DELETE,
d + INTERVAL 1 WEEK TO VOLUME 'aaa',
d + INTERVAL 2 WEEK TO DISK 'bbb';
```

View File

@ -424,23 +424,23 @@ WITH toDateTime64('2020-01-01 10:20:30.999', 3) AS dt64 SELECT toStartOfSecond(d
## toRelativeYearNum {#torelativeyearnum}
Переводит дату-с-временем или дату в номер года, начиная с некоторого фиксированного момента в прошлом.
Переводит дату или дату-с-временем в номер года, начиная с некоторого фиксированного момента в прошлом.
## toRelativeQuarterNum {#torelativequarternum}
Переводит дату-с-временем или дату в номер квартала, начиная с некоторого фиксированного момента в прошлом.
Переводит дату или дату-с-временем в номер квартала, начиная с некоторого фиксированного момента в прошлом.
## toRelativeMonthNum {#torelativemonthnum}
Переводит дату-с-временем или дату в номер месяца, начиная с некоторого фиксированного момента в прошлом.
Переводит дату или дату-с-временем в номер месяца, начиная с некоторого фиксированного момента в прошлом.
## toRelativeWeekNum {#torelativeweeknum}
Переводит дату-с-временем или дату в номер недели, начиная с некоторого фиксированного момента в прошлом.
Переводит дату или дату-с-временем в номер недели, начиная с некоторого фиксированного момента в прошлом.
## toRelativeDayNum {#torelativedaynum}
Переводит дату-с-временем или дату в номер дня, начиная с некоторого фиксированного момента в прошлом.
Переводит дату или дату-с-временем в номер дня, начиная с некоторого фиксированного момента в прошлом.
## toRelativeHourNum {#torelativehournum}
@ -456,7 +456,7 @@ WITH toDateTime64('2020-01-01 10:20:30.999', 3) AS dt64 SELECT toStartOfSecond(d
## toISOYear {#toisoyear}
Переводит дату-с-временем или дату в число типа UInt16, содержащее номер ISO года. ISO год отличается от обычного года, потому что в соответствии с [ISO 8601:1988](https://en.wikipedia.org/wiki/ISO_8601) ISO год начинается необязательно первого января.
Переводит дату или дату-с-временем в число типа UInt16, содержащее номер ISO года. ISO год отличается от обычного года, потому что в соответствии с [ISO 8601:1988](https://en.wikipedia.org/wiki/ISO_8601) ISO год начинается необязательно первого января.
**Пример**
@ -479,7 +479,7 @@ SELECT
## toISOWeek {#toisoweek}
Переводит дату-с-временем или дату в число типа UInt8, содержащее номер ISO недели.
Переводит дату или дату-с-временем в число типа UInt8, содержащее номер ISO недели.
Начало ISO года отличается от начала обычного года, потому что в соответствии с [ISO 8601:1988](https://en.wikipedia.org/wiki/ISO_8601) первая неделя года - это неделя с четырьмя или более днями в этом году.
1 Января 2017 г. - воскресение, т.е. первая ISO неделя 2017 года началась в понедельник 2 января, поэтому 1 января 2017 это последняя неделя 2016 года.
@ -503,7 +503,7 @@ SELECT
```
## toWeek(date\[, mode\]\[, timezone\]) {#toweek}
Переводит дату-с-временем или дату в число UInt8, содержащее номер недели. Второй аргументам mode задает режим, начинается ли неделя с воскресенья или с понедельника и должно ли возвращаемое значение находиться в диапазоне от 0 до 53 или от 1 до 53. Если аргумент mode опущен, то используется режим 0.
Переводит дату или дату-с-временем в число UInt8, содержащее номер недели. Второй аргументам mode задает режим, начинается ли неделя с воскресенья или с понедельника и должно ли возвращаемое значение находиться в диапазоне от 0 до 53 или от 1 до 53. Если аргумент mode опущен, то используется режим 0.
`toISOWeek() ` эквивалентно `toWeek(date,3)`.
@ -569,6 +569,132 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d
└────────────┴───────────┴───────────┴───────────┘
```
## age
Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 секунду.
Например, разница между `2021-12-29` и `2022-01-01` 3 дня для единицы `day`, 0 месяцев для единицы `month`, 0 лет для единицы `year`.
**Синтаксис**
``` sql
age('unit', startdate, enddate, [timezone])
```
**Аргументы**
- `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md).
Возможные значения:
- `second` (возможные сокращения: `ss`, `s`)
- `minute` (возможные сокращения: `mi`, `n`)
- `hour` (возможные сокращения: `hh`, `h`)
- `day` (возможные сокращения: `dd`, `d`)
- `week` (возможные сокращения: `wk`, `ww`)
- `month` (возможные сокращения: `mm`, `m`)
- `quarter` (возможные сокращения: `qq`, `q`)
- `year` (возможные сокращения: `yyyy`, `yy`)
- `startdate` — первая дата или дата со временем, которая вычитается из `enddate`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md).
- `enddate` — вторая дата или дата со временем, из которой вычитается `startdate`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md).
- `timezone` — [часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (необязательно). Если этот аргумент указан, то он применяется как для `startdate`, так и для `enddate`. Если этот аргумент не указан, то используются часовые пояса аргументов `startdate` и `enddate`. Если часовые пояса аргументов `startdate` и `enddate` не совпадают, то результат не определен. [String](../../sql-reference/data-types/string.md).
**Возвращаемое значение**
Разница между `enddate` и `startdate`, выраженная в `unit`.
Тип: [Int](../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
``` sql
SELECT age('hour', toDateTime('2018-01-01 22:30:00'), toDateTime('2018-01-02 23:00:00'));
```
Результат:
``` text
┌─age('hour', toDateTime('2018-01-01 22:30:00'), toDateTime('2018-01-02 23:00:00'))─┐
│ 24 │
└───────────────────────────────────────────────────────────────────────────────────┘
```
Запрос:
``` sql
SELECT
toDate('2022-01-01') AS e,
toDate('2021-12-29') AS s,
age('day', s, e) AS day_age,
age('month', s, e) AS month__age,
age('year', s, e) AS year_age;
```
Результат:
``` text
┌──────────e─┬──────────s─┬─day_age─┬─month__age─┬─year_age─┐
│ 2022-01-01 │ 2021-12-29 │ 3 │ 0 │ 0 │
└────────────┴────────────┴─────────┴────────────┴──────────┘
```
## date\_diff {#date_diff}
Вычисляет разницу указанных границ `unit` пересекаемых между `startdate` и `enddate`.
**Синтаксис**
``` sql
date_diff('unit', startdate, enddate, [timezone])
```
Синонимы: `dateDiff`, `DATE_DIFF`.
**Аргументы**
- `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md).
Возможные значения:
- `second` (возможные сокращения: `ss`, `s`)
- `minute` (возможные сокращения: `mi`, `n`)
- `hour` (возможные сокращения: `hh`, `h`)
- `day` (возможные сокращения: `dd`, `d`)
- `week` (возможные сокращения: `wk`, `ww`)
- `month` (возможные сокращения: `mm`, `m`)
- `quarter` (возможные сокращения: `qq`, `q`)
- `year` (возможные сокращения: `yyyy`, `yy`)
- `startdate` — первая дата или дата со временем, которая вычитается из `enddate`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md).
- `enddate` — вторая дата или дата со временем, из которой вычитается `startdate`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md).
- `timezone` — [часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (необязательно). Если этот аргумент указан, то он применяется как для `startdate`, так и для `enddate`. Если этот аргумент не указан, то используются часовые пояса аргументов `startdate` и `enddate`. Если часовые пояса аргументов `startdate` и `enddate` не совпадают, то результат не определен. [String](../../sql-reference/data-types/string.md).
**Возвращаемое значение**
Разница между `enddate` и `startdate`, выраженная в `unit`.
Тип: [Int](../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
``` sql
SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'));
```
Результат:
``` text
┌─dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'))─┐
│ 25 │
└────────────────────────────────────────────────────────────────────────────────────────┘
```
## date_trunc {#date_trunc}
Отсекает от даты и времени части, меньшие чем указанная часть.
@ -689,60 +815,6 @@ SELECT date_add(YEAR, 3, toDate('2018-01-01'));
└───────────────────────────────────────────────┘
```
## date\_diff {#date_diff}
Вычисляет разницу между двумя значениями дат или дат со временем.
**Синтаксис**
``` sql
date_diff('unit', startdate, enddate, [timezone])
```
Синонимы: `dateDiff`, `DATE_DIFF`.
**Аргументы**
- `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md).
Возможные значения:
- `second`
- `minute`
- `hour`
- `day`
- `week`
- `month`
- `quarter`
- `year`
- `startdate` — первая дата или дата со временем, которая вычитается из `enddate`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md).
- `enddate` — вторая дата или дата со временем, из которой вычитается `startdate`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md).
- `timezone` — [часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (необязательно). Если этот аргумент указан, то он применяется как для `startdate`, так и для `enddate`. Если этот аргумент не указан, то используются часовые пояса аргументов `startdate` и `enddate`. Если часовые пояса аргументов `startdate` и `enddate` не совпадают, то результат не определен. [String](../../sql-reference/data-types/string.md).
**Возвращаемое значение**
Разница между `enddate` и `startdate`, выраженная в `unit`.
Тип: [Int](../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
``` sql
SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'));
```
Результат:
``` text
┌─dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'))─┐
│ 25 │
└────────────────────────────────────────────────────────────────────────────────────────┘
```
## date\_sub {#date_sub}
Вычитает интервал времени или даты из указанной даты или даты со временем.

Some files were not shown because too many files have changed in this diff Show More