mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
Merge remote-tracking branch 'origin/master' into sixtyfour
This commit is contained in:
commit
441ff0a456
3
.github/PULL_REQUEST_TEMPLATE.md
vendored
3
.github/PULL_REQUEST_TEMPLATE.md
vendored
@ -59,6 +59,9 @@ At a minimum, the following information should be added (but add more as needed)
|
||||
- [ ] <!---ci_exclude_tsan|msan|ubsan|coverage--> Exclude: All with TSAN, MSAN, UBSAN, Coverage
|
||||
- [ ] <!---ci_exclude_aarch64|release|debug--> Exclude: All with aarch64, release, debug
|
||||
---
|
||||
- [ ] <!---ci_include_fuzzer--> Run only fuzzers related jobs (libFuzzer fuzzers, AST fuzzers, etc.)
|
||||
- [ ] <!---ci_exclude_ast--> Exclude: AST fuzzers
|
||||
---
|
||||
- [ ] <!---do_not_test--> Do not test
|
||||
- [ ] <!---woolen_wolfdog--> Woolen Wolfdog
|
||||
- [ ] <!---upload_all--> Upload binaries for special builds
|
||||
|
18
.github/actions/debug/action.yml
vendored
Normal file
18
.github/actions/debug/action.yml
vendored
Normal file
@ -0,0 +1,18 @@
|
||||
name: DebugInfo
|
||||
description: Prints workflow debug info
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Print envs
|
||||
shell: bash
|
||||
run: |
|
||||
echo "::group::Envs"
|
||||
env
|
||||
echo "::endgroup::"
|
||||
- name: Print Event.json
|
||||
shell: bash
|
||||
run: |
|
||||
echo "::group::Event.json"
|
||||
python3 -m json.tool "$GITHUB_EVENT_PATH"
|
||||
echo "::endgroup::"
|
109
.github/workflows/auto_releases.yml
vendored
Normal file
109
.github/workflows/auto_releases.yml
vendored
Normal file
@ -0,0 +1,109 @@
|
||||
name: AutoReleases
|
||||
|
||||
env:
|
||||
PYTHONUNBUFFERED: 1
|
||||
|
||||
concurrency:
|
||||
group: autoreleases
|
||||
|
||||
on:
|
||||
# schedule:
|
||||
# - cron: '0 9 * * *'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
dry-run:
|
||||
description: 'Dry run'
|
||||
required: false
|
||||
default: true
|
||||
type: boolean
|
||||
|
||||
jobs:
|
||||
AutoReleaseInfo:
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
outputs:
|
||||
data: ${{ steps.info.outputs.AUTO_RELEASE_PARAMS }}
|
||||
dry_run: ${{ steps.info.outputs.DRY_RUN }}
|
||||
steps:
|
||||
- name: Debug Info
|
||||
uses: ./.github/actions/debug
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
|
||||
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
|
||||
RCSK
|
||||
EOF
|
||||
echo "DRY_RUN=true" >> "$GITHUB_ENV"
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
- name: Prepare Info
|
||||
id: info
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 auto_release.py --prepare
|
||||
echo "::group::Auto Release Info"
|
||||
python3 -m json.tool /tmp/autorelease_info.json
|
||||
echo "::endgroup::"
|
||||
{
|
||||
echo 'AUTO_RELEASE_PARAMS<<EOF'
|
||||
cat /tmp/autorelease_info.json
|
||||
echo 'EOF'
|
||||
} >> "$GITHUB_ENV"
|
||||
{
|
||||
echo 'AUTO_RELEASE_PARAMS<<EOF'
|
||||
cat /tmp/autorelease_info.json
|
||||
echo 'EOF'
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
echo "DRY_RUN=true" >> "$GITHUB_OUTPUT"
|
||||
- name: Post Release Branch statuses
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 auto_release.py --post-status
|
||||
- name: Clean up
|
||||
uses: ./.github/actions/clean
|
||||
|
||||
Release_0:
|
||||
needs: AutoReleaseInfo
|
||||
name: Release ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0].release_branch }}
|
||||
if: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0] && fromJson(needs.AutoReleaseInfo.outputs.data).releases[0].ready }}
|
||||
uses: ./.github/workflows/create_release.yml
|
||||
with:
|
||||
ref: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0].commit_sha }}
|
||||
type: patch
|
||||
dry-run: ${{ needs.AutoReleaseInfo.outputs.dry_run }}
|
||||
#
|
||||
# Release_1:
|
||||
# needs: [AutoReleaseInfo, Release_0]
|
||||
# name: Release ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[1].release_branch }}
|
||||
# if: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[1] && fromJson(needs.AutoReleaseInfo.outputs.data).releases[1].ready }}
|
||||
# uses: ./.github/workflows/create_release.yml
|
||||
# with:
|
||||
# ref: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[1].commit_sha }}
|
||||
# type: patch
|
||||
# dry-run: ${{ env.DRY_RUN }}
|
||||
#
|
||||
# Release_2:
|
||||
# needs: [AutoReleaseInfo, Release_1]
|
||||
# name: Release ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[2].release_branch }}
|
||||
# if: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0] && fromJson(needs.AutoReleaseInfo.outputs.data).releases[2].ready }}
|
||||
# uses: ./.github/workflow/create_release.yml
|
||||
# with:
|
||||
# ref: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0].commit_sha }}
|
||||
# type: patch
|
||||
# dry-run: ${{ env.DRY_RUN }}
|
||||
#
|
||||
# Release_3:
|
||||
# needs: [AutoReleaseInfo, Release_2]
|
||||
# name: Release ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[3].release_branch }}
|
||||
# if: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[3] && fromJson(needs.AutoReleaseInfo.outputs.data).releases[3].ready }}
|
||||
# uses: ./.github/workflow/create_release.yml
|
||||
# with:
|
||||
# ref: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[3].commit_sha }}
|
||||
# type: patch
|
||||
# dry-run: ${{ env.DRY_RUN }}
|
||||
|
||||
# - name: Post Slack Message
|
||||
# if: ${{ !cancelled() }}
|
||||
# run: |
|
||||
# cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
# python3 auto_release.py --post-auto-release-complete --wf-status ${{ job.status }}
|
7
.github/workflows/backport_branches.yml
vendored
7
.github/workflows/backport_branches.yml
vendored
@ -260,13 +260,18 @@ jobs:
|
||||
- name: Finish label
|
||||
if: ${{ !failure() }}
|
||||
run: |
|
||||
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
|
||||
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
|
||||
${{ toJson(needs) }}
|
||||
EOF
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
# update mergeable check
|
||||
python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
|
||||
python3 merge_pr.py --set-ci-status
|
||||
# update overall ci report
|
||||
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
|
||||
python3 merge_pr.py
|
||||
- name: Check Workflow results
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
|
||||
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
|
||||
|
24
.github/workflows/create_release.yml
vendored
24
.github/workflows/create_release.yml
vendored
@ -2,6 +2,7 @@ name: CreateRelease
|
||||
|
||||
concurrency:
|
||||
group: release
|
||||
|
||||
'on':
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
@ -26,6 +27,26 @@ concurrency:
|
||||
required: false
|
||||
default: false
|
||||
type: boolean
|
||||
workflow_call:
|
||||
inputs:
|
||||
ref:
|
||||
description: 'Git reference (branch or commit sha) from which to create the release'
|
||||
required: true
|
||||
type: string
|
||||
type:
|
||||
description: 'The type of release: "new" for a new release or "patch" for a patch release'
|
||||
required: true
|
||||
type: string
|
||||
only-repo:
|
||||
description: 'Run only repos updates including docker (repo-recovery, tests)'
|
||||
required: false
|
||||
default: false
|
||||
type: boolean
|
||||
dry-run:
|
||||
description: 'Dry run'
|
||||
required: false
|
||||
default: false
|
||||
type: boolean
|
||||
|
||||
jobs:
|
||||
CreateRelease:
|
||||
@ -101,6 +122,7 @@ jobs:
|
||||
--volume=".:/wd" --workdir="/wd" \
|
||||
clickhouse/style-test \
|
||||
./tests/ci/changelog.py -v --debug-helpers \
|
||||
--gh-user-or-token ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }} \
|
||||
--jobs=5 \
|
||||
--output="./docs/changelogs/${{ env.RELEASE_TAG }}.md" ${{ env.RELEASE_TAG }}
|
||||
git add ./docs/changelogs/${{ env.RELEASE_TAG }}.md
|
||||
@ -129,9 +151,9 @@ jobs:
|
||||
if: ${{ inputs.type == 'patch' && ! inputs.only-repo }}
|
||||
shell: bash
|
||||
run: |
|
||||
python3 ./tests/ci/create_release.py --set-progress-completed
|
||||
git reset --hard HEAD
|
||||
git checkout "$GITHUB_REF_NAME"
|
||||
python3 ./tests/ci/create_release.py --set-progress-completed
|
||||
- name: Create GH Release
|
||||
if: ${{ inputs.type == 'patch' && ! inputs.only-repo }}
|
||||
shell: bash
|
||||
|
3
.github/workflows/jepsen.yml
vendored
3
.github/workflows/jepsen.yml
vendored
@ -64,9 +64,10 @@ jobs:
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
- name: Check Workflow results
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
|
||||
cat >> "$WORKFLOW_RESULT_FILE" << 'EOF'
|
||||
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
|
||||
${{ toJson(needs) }}
|
||||
EOF
|
||||
python3 ./tests/ci/ci_buddy.py --check-wf-status
|
||||
|
7
.github/workflows/merge_queue.yml
vendored
7
.github/workflows/merge_queue.yml
vendored
@ -103,9 +103,14 @@ jobs:
|
||||
- name: Check and set merge status
|
||||
if: ${{ needs.StyleCheck.result == 'success' }}
|
||||
run: |
|
||||
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
|
||||
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
|
||||
${{ toJson(needs) }}
|
||||
EOF
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
|
||||
python3 merge_pr.py --set-ci-status
|
||||
- name: Check Workflow results
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
|
||||
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
|
||||
|
1
.github/workflows/nightly.yml
vendored
1
.github/workflows/nightly.yml
vendored
@ -52,6 +52,7 @@ jobs:
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
- name: Check Workflow results
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
|
||||
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
|
||||
|
6
.github/workflows/pull_request.yml
vendored
6
.github/workflows/pull_request.yml
vendored
@ -170,7 +170,11 @@ jobs:
|
||||
if: ${{ needs.StyleCheck.result == 'success' }}
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
|
||||
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
|
||||
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
|
||||
${{ toJson(needs) }}
|
||||
EOF
|
||||
python3 merge_pr.py --set-ci-status
|
||||
- name: Check Workflow results
|
||||
uses: ./.github/actions/check_workflow
|
||||
with:
|
||||
|
4
.github/workflows/release_branches.yml
vendored
4
.github/workflows/release_branches.yml
vendored
@ -481,12 +481,10 @@ jobs:
|
||||
- name: Finish label
|
||||
if: ${{ !failure() }}
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
# update mergeable check
|
||||
python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
|
||||
# update overall ci report
|
||||
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
|
||||
- name: Check Workflow results
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
|
||||
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
|
||||
|
8
.gitmodules
vendored
8
.gitmodules
vendored
@ -230,9 +230,6 @@
|
||||
[submodule "contrib/minizip-ng"]
|
||||
path = contrib/minizip-ng
|
||||
url = https://github.com/zlib-ng/minizip-ng
|
||||
[submodule "contrib/annoy"]
|
||||
path = contrib/annoy
|
||||
url = https://github.com/ClickHouse/annoy
|
||||
[submodule "contrib/qpl"]
|
||||
path = contrib/qpl
|
||||
url = https://github.com/intel/qpl
|
||||
@ -341,16 +338,13 @@
|
||||
url = https://github.com/graphitemaster/incbin.git
|
||||
[submodule "contrib/usearch"]
|
||||
path = contrib/usearch
|
||||
url = https://github.com/unum-cloud/usearch.git
|
||||
url = https://github.com/ClickHouse/usearch.git
|
||||
[submodule "contrib/SimSIMD"]
|
||||
path = contrib/SimSIMD
|
||||
url = https://github.com/ashvardanian/SimSIMD.git
|
||||
[submodule "contrib/FP16"]
|
||||
path = contrib/FP16
|
||||
url = https://github.com/Maratyszcza/FP16.git
|
||||
[submodule "contrib/robin-map"]
|
||||
path = contrib/robin-map
|
||||
url = https://github.com/Tessil/robin-map.git
|
||||
[submodule "contrib/aklomp-base64"]
|
||||
path = contrib/aklomp-base64
|
||||
url = https://github.com/aklomp/base64.git
|
||||
|
@ -64,6 +64,7 @@
|
||||
* The setting `optimize_trivial_insert_select` is disabled by default. In most cases, it should be beneficial. Nevertheless, if you are seeing slower INSERT SELECT or increased memory usage, you can enable it back or `SET compatibility = '24.6'`. [#58970](https://github.com/ClickHouse/ClickHouse/pull/58970) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Print stacktrace and diagnostic info if `clickhouse-client` or `clickhouse-local` crashes. [#61109](https://github.com/ClickHouse/ClickHouse/pull/61109) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* The result of `SHOW INDEX | INDEXES | INDICES | KEYS` was previously sorted by the primary key column names. Since this was unintuitive, the result is now sorted by the position of the primary key columns within the primary key. [#61131](https://github.com/ClickHouse/ClickHouse/pull/61131) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Change how deduplication for Materialized Views works. Fixed a lot of cases like: - on destination table: data is split for 2 or more blocks and that blocks is considered as duplicate when that block is inserted in parallel. - on MV destination table: the equal blocks are deduplicated, that happens when MV often produces equal data as a result for different input data due to performing aggregation. - on MV destination table: the equal blocks which comes from different MV are deduplicated. [#61601](https://github.com/ClickHouse/ClickHouse/pull/61601) ([Sema Checherinda](https://github.com/CheSema)).
|
||||
* Support reading partitioned data DeltaLake data. Infer DeltaLake schema by reading metadata instead of data. [#63201](https://github.com/ClickHouse/ClickHouse/pull/63201) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* In composable protocols TLS layer accepted only `certificateFile` and `privateKeyFile` parameters. https://clickhouse.com/docs/en/operations/settings/composable-protocols. [#63985](https://github.com/ClickHouse/ClickHouse/pull/63985) ([Anton Ivashkin](https://github.com/ianton-ru)).
|
||||
* Added profile event `SelectQueriesWithPrimaryKeyUsage` which indicates how many SELECT queries use the primary key to evaluate the WHERE clause. [#64492](https://github.com/ClickHouse/ClickHouse/pull/64492) ([0x01f](https://github.com/0xfei)).
|
||||
|
@ -187,14 +187,6 @@ else ()
|
||||
set(NO_WHOLE_ARCHIVE --no-whole-archive)
|
||||
endif ()
|
||||
|
||||
if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
|
||||
# Can be lld or ld-lld or lld-13 or /path/to/lld.
|
||||
if (LINKER_NAME MATCHES "lld")
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gdb-index")
|
||||
message (STATUS "Adding .gdb-index via --gdb-index linker option.")
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
if (NOT (SANITIZE_COVERAGE OR WITH_COVERAGE)
|
||||
AND (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE"
|
||||
OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO"
|
||||
@ -330,17 +322,21 @@ if (DISABLE_OMIT_FRAME_POINTER)
|
||||
set (CMAKE_ASM_FLAGS_ADD "${CMAKE_ASM_FLAGS_ADD} -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer")
|
||||
endif()
|
||||
|
||||
# Before you start hating your debugger because it refuses to show variables ('<optimized out>'), try building with -DDEBUG_O_LEVEL="0"
|
||||
# https://stackoverflow.com/questions/63386189/whats-the-difference-between-a-compilers-o0-option-and-og-option/63386263#63386263
|
||||
set(DEBUG_O_LEVEL "g" CACHE STRING "The -Ox level used for debug builds")
|
||||
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMPILER_FLAGS} ${CMAKE_CXX_FLAGS_ADD}")
|
||||
set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_CXX_FLAGS_ADD}")
|
||||
set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og ${DEBUG_INFO_FLAGS} ${CMAKE_CXX_FLAGS_ADD}")
|
||||
set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O${DEBUG_O_LEVEL} ${DEBUG_INFO_FLAGS} ${CMAKE_CXX_FLAGS_ADD}")
|
||||
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMPILER_FLAGS} ${CMAKE_C_FLAGS_ADD}")
|
||||
set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_C_FLAGS_ADD}")
|
||||
set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -Og ${DEBUG_INFO_FLAGS} ${CMAKE_C_FLAGS_ADD}")
|
||||
set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O${DEBUG_O_LEVEL} ${DEBUG_INFO_FLAGS} ${CMAKE_C_FLAGS_ADD}")
|
||||
|
||||
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${COMPILER_FLAGS} ${CMAKE_ASM_FLAGS_ADD}")
|
||||
set (CMAKE_ASM_FLAGS_RELWITHDEBINFO "${CMAKE_ASM_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}")
|
||||
set (CMAKE_ASM_FLAGS_DEBUG "${CMAKE_ASM_FLAGS_DEBUG} -Og ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}")
|
||||
set (CMAKE_ASM_FLAGS_DEBUG "${CMAKE_ASM_FLAGS_DEBUG} -O${DEBUG_O_LEVEL} ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}")
|
||||
|
||||
if (OS_DARWIN)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
|
||||
@ -402,7 +398,7 @@ if ((NOT OS_LINUX AND NOT OS_ANDROID) OR (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
|
||||
set(ENABLE_GWP_ASAN OFF)
|
||||
endif ()
|
||||
|
||||
option (ENABLE_FIU "Enable Fiu" ON)
|
||||
option (ENABLE_LIBFIU "Enable libfiu" ON)
|
||||
|
||||
option(WERROR "Enable -Werror compiler option" ON)
|
||||
|
||||
@ -428,12 +424,17 @@ if (NOT SANITIZE)
|
||||
set (CMAKE_POSITION_INDEPENDENT_CODE OFF)
|
||||
endif()
|
||||
|
||||
if (OS_LINUX AND NOT (ARCH_AARCH64 OR ARCH_S390X) AND NOT SANITIZE)
|
||||
# Slightly more efficient code can be generated
|
||||
# It's disabled for ARM because otherwise ClickHouse cannot run on Android.
|
||||
if (NOT OS_ANDROID AND OS_LINUX AND NOT ARCH_S390X AND NOT SANITIZE)
|
||||
# Using '-no-pie' builds executables with fixed addresses, resulting in slightly more efficient code
|
||||
# and keeping binary addresses constant even with ASLR enabled.
|
||||
# Disabled on Android as it requires PIE: https://source.android.com/docs/security/enhancements#android-5
|
||||
# Disabled on IBM S390X due to build issues with 'no-pie'
|
||||
# Disabled with sanitizers to avoid issues with maximum relocation size: https://github.com/ClickHouse/ClickHouse/pull/49145
|
||||
set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-pie")
|
||||
set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -fno-pie")
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -no-pie -Wl,-no-pie")
|
||||
else ()
|
||||
message (WARNING "ClickHouse is built as PIE, system.trace_log will contain invalid addresses after server restart.")
|
||||
endif ()
|
||||
|
||||
if (ENABLE_TESTS)
|
||||
|
@ -1,4 +1,4 @@
|
||||
add_compile_options($<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>)
|
||||
add_compile_options("$<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>")
|
||||
|
||||
if (USE_CLANG_TIDY)
|
||||
set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
|
||||
|
@ -27,27 +27,6 @@ bool cgroupsV2Enabled()
|
||||
#endif
|
||||
}
|
||||
|
||||
bool cgroupsV2MemoryControllerEnabled()
|
||||
{
|
||||
#if defined(OS_LINUX)
|
||||
chassert(cgroupsV2Enabled());
|
||||
/// According to https://docs.kernel.org/admin-guide/cgroup-v2.html, file "cgroup.controllers" defines which controllers are available
|
||||
/// for the current + child cgroups. The set of available controllers can be restricted from level to level using file
|
||||
/// "cgroups.subtree_control". It is therefore sufficient to check the bottom-most nested "cgroup.controllers" file.
|
||||
fs::path cgroup_dir = cgroupV2PathOfProcess();
|
||||
if (cgroup_dir.empty())
|
||||
return false;
|
||||
std::ifstream controllers_file(cgroup_dir / "cgroup.controllers");
|
||||
if (!controllers_file.is_open())
|
||||
return false;
|
||||
std::string controllers;
|
||||
std::getline(controllers_file, controllers);
|
||||
return controllers.find("memory") != std::string::npos;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
fs::path cgroupV2PathOfProcess()
|
||||
{
|
||||
#if defined(OS_LINUX)
|
||||
@ -71,3 +50,28 @@ fs::path cgroupV2PathOfProcess()
|
||||
return {};
|
||||
#endif
|
||||
}
|
||||
|
||||
std::optional<std::string> getCgroupsV2PathContainingFile([[maybe_unused]] std::string_view file_name)
|
||||
{
|
||||
#if defined(OS_LINUX)
|
||||
if (!cgroupsV2Enabled())
|
||||
return {};
|
||||
|
||||
fs::path current_cgroup = cgroupV2PathOfProcess();
|
||||
if (current_cgroup.empty())
|
||||
return {};
|
||||
|
||||
/// Return the bottom-most nested file. If there is no such file at the current
|
||||
/// level, try again at the parent level as settings are inherited.
|
||||
while (current_cgroup != default_cgroups_mount.parent_path())
|
||||
{
|
||||
const auto path = current_cgroup / file_name;
|
||||
if (fs::exists(path))
|
||||
return {current_cgroup};
|
||||
current_cgroup = current_cgroup.parent_path();
|
||||
}
|
||||
return {};
|
||||
#else
|
||||
return {};
|
||||
#endif
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <filesystem>
|
||||
#include <string_view>
|
||||
|
||||
#if defined(OS_LINUX)
|
||||
/// I think it is possible to mount the cgroups hierarchy somewhere else (e.g. when in containers).
|
||||
@ -11,11 +12,11 @@ static inline const std::filesystem::path default_cgroups_mount = "/sys/fs/cgrou
|
||||
/// Is cgroups v2 enabled on the system?
|
||||
bool cgroupsV2Enabled();
|
||||
|
||||
/// Is the memory controller of cgroups v2 enabled on the system?
|
||||
/// Assumes that cgroupsV2Enabled() is enabled.
|
||||
bool cgroupsV2MemoryControllerEnabled();
|
||||
|
||||
/// Detects which cgroup v2 the process belongs to and returns the filesystem path to the cgroup.
|
||||
/// Returns an empty path the cgroup cannot be determined.
|
||||
/// Assumes that cgroupsV2Enabled() is enabled.
|
||||
std::filesystem::path cgroupV2PathOfProcess();
|
||||
|
||||
/// Returns the most nested cgroup dir containing the specified file.
|
||||
/// If cgroups v2 is not enabled - returns an empty optional.
|
||||
std::optional<std::string> getCgroupsV2PathContainingFile([[maybe_unused]] std::string_view file_name);
|
||||
|
@ -19,9 +19,6 @@ std::optional<uint64_t> getCgroupsV2MemoryLimit()
|
||||
if (!cgroupsV2Enabled())
|
||||
return {};
|
||||
|
||||
if (!cgroupsV2MemoryControllerEnabled())
|
||||
return {};
|
||||
|
||||
std::filesystem::path current_cgroup = cgroupV2PathOfProcess();
|
||||
if (current_cgroup.empty())
|
||||
return {};
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "Poco/Exception.h"
|
||||
#include "Poco/Foundation.h"
|
||||
#include "Poco/Mutex.h"
|
||||
#include "Poco/Message.h"
|
||||
|
||||
|
||||
namespace Poco
|
||||
@ -78,6 +79,10 @@ public:
|
||||
///
|
||||
/// The default implementation just breaks into the debugger.
|
||||
|
||||
virtual void logMessageImpl(Message::Priority priority, const std::string & msg) {}
|
||||
/// Write a messages to the log
|
||||
/// Useful for logging from Poco
|
||||
|
||||
static void handle(const Exception & exc);
|
||||
/// Invokes the currently registered ErrorHandler.
|
||||
|
||||
@ -87,6 +92,9 @@ public:
|
||||
static void handle();
|
||||
/// Invokes the currently registered ErrorHandler.
|
||||
|
||||
static void logMessage(Message::Priority priority, const std::string & msg);
|
||||
/// Invokes the currently registered ErrorHandler to log a message.
|
||||
|
||||
static ErrorHandler * set(ErrorHandler * pHandler);
|
||||
/// Registers the given handler as the current error handler.
|
||||
///
|
||||
|
@ -8,7 +8,7 @@
|
||||
// Copyright (c) 2005-2006, Applied Informatics Software Engineering GmbH.
|
||||
// and Contributors.
|
||||
//
|
||||
// SPDX-License-Identifier: BSL-1.0
|
||||
// SPDX-License-Identifier: BSL-1.0
|
||||
//
|
||||
|
||||
|
||||
@ -35,79 +35,91 @@ ErrorHandler::~ErrorHandler()
|
||||
|
||||
void ErrorHandler::exception(const Exception& exc)
|
||||
{
|
||||
poco_debugger_msg(exc.what());
|
||||
poco_debugger_msg(exc.what());
|
||||
}
|
||||
|
||||
|
||||
|
||||
void ErrorHandler::exception(const std::exception& exc)
|
||||
{
|
||||
poco_debugger_msg(exc.what());
|
||||
poco_debugger_msg(exc.what());
|
||||
}
|
||||
|
||||
|
||||
void ErrorHandler::exception()
|
||||
{
|
||||
poco_debugger_msg("unknown exception");
|
||||
poco_debugger_msg("unknown exception");
|
||||
}
|
||||
|
||||
|
||||
void ErrorHandler::handle(const Exception& exc)
|
||||
{
|
||||
FastMutex::ScopedLock lock(_mutex);
|
||||
try
|
||||
{
|
||||
_pHandler->exception(exc);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
}
|
||||
FastMutex::ScopedLock lock(_mutex);
|
||||
try
|
||||
{
|
||||
_pHandler->exception(exc);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void ErrorHandler::handle(const std::exception& exc)
|
||||
{
|
||||
FastMutex::ScopedLock lock(_mutex);
|
||||
try
|
||||
{
|
||||
_pHandler->exception(exc);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
}
|
||||
FastMutex::ScopedLock lock(_mutex);
|
||||
try
|
||||
{
|
||||
_pHandler->exception(exc);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void ErrorHandler::handle()
|
||||
{
|
||||
FastMutex::ScopedLock lock(_mutex);
|
||||
try
|
||||
{
|
||||
_pHandler->exception();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
}
|
||||
FastMutex::ScopedLock lock(_mutex);
|
||||
try
|
||||
{
|
||||
_pHandler->exception();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
void ErrorHandler::logMessage(Message::Priority priority, const std::string & msg)
|
||||
{
|
||||
FastMutex::ScopedLock lock(_mutex);
|
||||
try
|
||||
{
|
||||
_pHandler->logMessageImpl(priority, msg);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
ErrorHandler* ErrorHandler::set(ErrorHandler* pHandler)
|
||||
{
|
||||
poco_check_ptr(pHandler);
|
||||
poco_check_ptr(pHandler);
|
||||
|
||||
FastMutex::ScopedLock lock(_mutex);
|
||||
ErrorHandler* pOld = _pHandler;
|
||||
_pHandler = pHandler;
|
||||
return pOld;
|
||||
FastMutex::ScopedLock lock(_mutex);
|
||||
ErrorHandler* pOld = _pHandler;
|
||||
_pHandler = pHandler;
|
||||
return pOld;
|
||||
}
|
||||
|
||||
|
||||
ErrorHandler* ErrorHandler::defaultHandler()
|
||||
{
|
||||
// NOTE: Since this is called to initialize the static _pHandler
|
||||
// variable, sh has to be a local static, otherwise we run
|
||||
// into static initialization order issues.
|
||||
static SingletonHolder<ErrorHandler> sh;
|
||||
return sh.get();
|
||||
// NOTE: Since this is called to initialize the static _pHandler
|
||||
// variable, sh has to be a local static, otherwise we run
|
||||
// into static initialization order issues.
|
||||
static SingletonHolder<ErrorHandler> sh;
|
||||
return sh.get();
|
||||
}
|
||||
|
||||
|
||||
|
@ -58,6 +58,10 @@ namespace Net
|
||||
|
||||
void setKeepAliveTimeout(Poco::Timespan keepAliveTimeout);
|
||||
|
||||
size_t getKeepAliveTimeout() const { return _keepAliveTimeout.totalSeconds(); }
|
||||
|
||||
size_t getMaxKeepAliveRequests() const { return _maxKeepAliveRequests; }
|
||||
|
||||
private:
|
||||
bool _firstRequest;
|
||||
Poco::Timespan _keepAliveTimeout;
|
||||
|
@ -19,11 +19,11 @@ namespace Poco {
|
||||
namespace Net {
|
||||
|
||||
|
||||
HTTPServerSession::HTTPServerSession(const StreamSocket& socket, HTTPServerParams::Ptr pParams):
|
||||
HTTPSession(socket, pParams->getKeepAlive()),
|
||||
_firstRequest(true),
|
||||
_keepAliveTimeout(pParams->getKeepAliveTimeout()),
|
||||
_maxKeepAliveRequests(pParams->getMaxKeepAliveRequests())
|
||||
HTTPServerSession::HTTPServerSession(const StreamSocket & socket, HTTPServerParams::Ptr pParams)
|
||||
: HTTPSession(socket, pParams->getKeepAlive())
|
||||
, _firstRequest(true)
|
||||
, _keepAliveTimeout(pParams->getKeepAliveTimeout())
|
||||
, _maxKeepAliveRequests(pParams->getMaxKeepAliveRequests())
|
||||
{
|
||||
setTimeout(pParams->getTimeout());
|
||||
}
|
||||
@ -52,11 +52,12 @@ bool HTTPServerSession::hasMoreRequests()
|
||||
}
|
||||
else if (_maxKeepAliveRequests != 0 && getKeepAlive())
|
||||
{
|
||||
if (_maxKeepAliveRequests > 0)
|
||||
--_maxKeepAliveRequests;
|
||||
return buffered() > 0 || socket().poll(_keepAliveTimeout, Socket::SELECT_READ);
|
||||
}
|
||||
else return false;
|
||||
if (_maxKeepAliveRequests > 0)
|
||||
--_maxKeepAliveRequests;
|
||||
return buffered() > 0 || socket().poll(_keepAliveTimeout, Socket::SELECT_READ);
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include "Poco/Net/StreamSocketImpl.h"
|
||||
#include "Poco/NumberFormatter.h"
|
||||
#include "Poco/Timestamp.h"
|
||||
#include "Poco/ErrorHandler.h"
|
||||
#include <string.h> // FD_SET needs memset on some platforms, so we can't use <cstring>
|
||||
|
||||
|
||||
|
@ -8,7 +8,7 @@
|
||||
// Copyright (c) 2005-2006, Applied Informatics Software Engineering GmbH.
|
||||
// and Contributors.
|
||||
//
|
||||
// SPDX-License-Identifier: BSL-1.0
|
||||
// SPDX-License-Identifier: BSL-1.0
|
||||
//
|
||||
|
||||
|
||||
@ -44,190 +44,194 @@ TCPServerConnectionFilter::~TCPServerConnectionFilter()
|
||||
|
||||
|
||||
TCPServer::TCPServer(TCPServerConnectionFactory::Ptr pFactory, Poco::UInt16 portNumber, TCPServerParams::Ptr pParams):
|
||||
_socket(ServerSocket(portNumber)),
|
||||
_thread(threadName(_socket)),
|
||||
_stopped(true)
|
||||
{
|
||||
Poco::ThreadPool& pool = Poco::ThreadPool::defaultPool();
|
||||
if (pParams)
|
||||
{
|
||||
int toAdd = pParams->getMaxThreads() - pool.capacity();
|
||||
if (toAdd > 0) pool.addCapacity(toAdd);
|
||||
}
|
||||
_pDispatcher = new TCPServerDispatcher(pFactory, pool, pParams);
|
||||
|
||||
_socket(ServerSocket(portNumber)),
|
||||
_thread(threadName(_socket)),
|
||||
_stopped(true)
|
||||
{
|
||||
Poco::ThreadPool& pool = Poco::ThreadPool::defaultPool();
|
||||
if (pParams)
|
||||
{
|
||||
int toAdd = pParams->getMaxThreads() - pool.capacity();
|
||||
if (toAdd > 0) pool.addCapacity(toAdd);
|
||||
}
|
||||
_pDispatcher = new TCPServerDispatcher(pFactory, pool, pParams);
|
||||
|
||||
}
|
||||
|
||||
|
||||
TCPServer::TCPServer(TCPServerConnectionFactory::Ptr pFactory, const ServerSocket& socket, TCPServerParams::Ptr pParams):
|
||||
_socket(socket),
|
||||
_thread(threadName(socket)),
|
||||
_stopped(true)
|
||||
_socket(socket),
|
||||
_thread(threadName(socket)),
|
||||
_stopped(true)
|
||||
{
|
||||
Poco::ThreadPool& pool = Poco::ThreadPool::defaultPool();
|
||||
if (pParams)
|
||||
{
|
||||
int toAdd = pParams->getMaxThreads() - pool.capacity();
|
||||
if (toAdd > 0) pool.addCapacity(toAdd);
|
||||
}
|
||||
_pDispatcher = new TCPServerDispatcher(pFactory, pool, pParams);
|
||||
Poco::ThreadPool& pool = Poco::ThreadPool::defaultPool();
|
||||
if (pParams)
|
||||
{
|
||||
int toAdd = pParams->getMaxThreads() - pool.capacity();
|
||||
if (toAdd > 0) pool.addCapacity(toAdd);
|
||||
}
|
||||
_pDispatcher = new TCPServerDispatcher(pFactory, pool, pParams);
|
||||
}
|
||||
|
||||
|
||||
TCPServer::TCPServer(TCPServerConnectionFactory::Ptr pFactory, Poco::ThreadPool& threadPool, const ServerSocket& socket, TCPServerParams::Ptr pParams):
|
||||
_socket(socket),
|
||||
_pDispatcher(new TCPServerDispatcher(pFactory, threadPool, pParams)),
|
||||
_thread(threadName(socket)),
|
||||
_stopped(true)
|
||||
_socket(socket),
|
||||
_pDispatcher(new TCPServerDispatcher(pFactory, threadPool, pParams)),
|
||||
_thread(threadName(socket)),
|
||||
_stopped(true)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
TCPServer::~TCPServer()
|
||||
{
|
||||
try
|
||||
{
|
||||
stop();
|
||||
_pDispatcher->release();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
poco_unexpected();
|
||||
}
|
||||
try
|
||||
{
|
||||
stop();
|
||||
_pDispatcher->release();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
poco_unexpected();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
const TCPServerParams& TCPServer::params() const
|
||||
{
|
||||
return _pDispatcher->params();
|
||||
return _pDispatcher->params();
|
||||
}
|
||||
|
||||
|
||||
void TCPServer::start()
|
||||
{
|
||||
poco_assert (_stopped);
|
||||
poco_assert (_stopped);
|
||||
|
||||
_stopped = false;
|
||||
_thread.start(*this);
|
||||
_stopped = false;
|
||||
_thread.start(*this);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void TCPServer::stop()
|
||||
{
|
||||
if (!_stopped)
|
||||
{
|
||||
_stopped = true;
|
||||
_thread.join();
|
||||
_pDispatcher->stop();
|
||||
}
|
||||
if (!_stopped)
|
||||
{
|
||||
_stopped = true;
|
||||
_thread.join();
|
||||
_pDispatcher->stop();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void TCPServer::run()
|
||||
{
|
||||
while (!_stopped)
|
||||
{
|
||||
Poco::Timespan timeout(250000);
|
||||
try
|
||||
{
|
||||
if (_socket.poll(timeout, Socket::SELECT_READ))
|
||||
{
|
||||
try
|
||||
{
|
||||
StreamSocket ss = _socket.acceptConnection();
|
||||
|
||||
if (!_pConnectionFilter || _pConnectionFilter->accept(ss))
|
||||
{
|
||||
// enable nodelay per default: OSX really needs that
|
||||
while (!_stopped)
|
||||
{
|
||||
Poco::Timespan timeout(250000);
|
||||
try
|
||||
{
|
||||
if (_socket.poll(timeout, Socket::SELECT_READ))
|
||||
{
|
||||
try
|
||||
{
|
||||
StreamSocket ss = _socket.acceptConnection();
|
||||
|
||||
if (!_pConnectionFilter || _pConnectionFilter->accept(ss))
|
||||
{
|
||||
// enable nodelay per default: OSX really needs that
|
||||
#if defined(POCO_OS_FAMILY_UNIX)
|
||||
if (ss.address().family() != AddressFamily::UNIX_LOCAL)
|
||||
if (ss.address().family() != AddressFamily::UNIX_LOCAL)
|
||||
#endif
|
||||
{
|
||||
ss.setNoDelay(true);
|
||||
}
|
||||
_pDispatcher->enqueue(ss);
|
||||
}
|
||||
}
|
||||
catch (Poco::Exception& exc)
|
||||
{
|
||||
ErrorHandler::handle(exc);
|
||||
}
|
||||
catch (std::exception& exc)
|
||||
{
|
||||
ErrorHandler::handle(exc);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
ErrorHandler::handle();
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Poco::Exception& exc)
|
||||
{
|
||||
ErrorHandler::handle(exc);
|
||||
// possibly a resource issue since poll() failed;
|
||||
// give some time to recover before trying again
|
||||
Poco::Thread::sleep(50);
|
||||
}
|
||||
}
|
||||
{
|
||||
ss.setNoDelay(true);
|
||||
}
|
||||
_pDispatcher->enqueue(ss);
|
||||
}
|
||||
else
|
||||
{
|
||||
ErrorHandler::logMessage(Message::PRIO_WARNING, "Filtered out connection from " + ss.peerAddress().toString());
|
||||
}
|
||||
}
|
||||
catch (Poco::Exception& exc)
|
||||
{
|
||||
ErrorHandler::handle(exc);
|
||||
}
|
||||
catch (std::exception& exc)
|
||||
{
|
||||
ErrorHandler::handle(exc);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
ErrorHandler::handle();
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Poco::Exception& exc)
|
||||
{
|
||||
ErrorHandler::handle(exc);
|
||||
// possibly a resource issue since poll() failed;
|
||||
// give some time to recover before trying again
|
||||
Poco::Thread::sleep(50);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int TCPServer::currentThreads() const
|
||||
{
|
||||
return _pDispatcher->currentThreads();
|
||||
return _pDispatcher->currentThreads();
|
||||
}
|
||||
|
||||
|
||||
int TCPServer::maxThreads() const
|
||||
{
|
||||
return _pDispatcher->maxThreads();
|
||||
return _pDispatcher->maxThreads();
|
||||
}
|
||||
|
||||
|
||||
|
||||
int TCPServer::totalConnections() const
|
||||
{
|
||||
return _pDispatcher->totalConnections();
|
||||
return _pDispatcher->totalConnections();
|
||||
}
|
||||
|
||||
|
||||
int TCPServer::currentConnections() const
|
||||
{
|
||||
return _pDispatcher->currentConnections();
|
||||
return _pDispatcher->currentConnections();
|
||||
}
|
||||
|
||||
|
||||
int TCPServer::maxConcurrentConnections() const
|
||||
{
|
||||
return _pDispatcher->maxConcurrentConnections();
|
||||
return _pDispatcher->maxConcurrentConnections();
|
||||
}
|
||||
|
||||
|
||||
|
||||
int TCPServer::queuedConnections() const
|
||||
{
|
||||
return _pDispatcher->queuedConnections();
|
||||
return _pDispatcher->queuedConnections();
|
||||
}
|
||||
|
||||
|
||||
int TCPServer::refusedConnections() const
|
||||
{
|
||||
return _pDispatcher->refusedConnections();
|
||||
return _pDispatcher->refusedConnections();
|
||||
}
|
||||
|
||||
|
||||
void TCPServer::setConnectionFilter(const TCPServerConnectionFilter::Ptr& pConnectionFilter)
|
||||
{
|
||||
poco_assert (_stopped);
|
||||
poco_assert (_stopped);
|
||||
|
||||
_pConnectionFilter = pConnectionFilter;
|
||||
_pConnectionFilter = pConnectionFilter;
|
||||
}
|
||||
|
||||
|
||||
std::string TCPServer::threadName(const ServerSocket& socket)
|
||||
{
|
||||
std::string name("TCPServer: ");
|
||||
name.append(socket.address().toString());
|
||||
return name;
|
||||
std::string name("TCPServer: ");
|
||||
name.append(socket.address().toString());
|
||||
return name;
|
||||
|
||||
}
|
||||
|
||||
|
@ -8,7 +8,7 @@
|
||||
// Copyright (c) 2005-2007, Applied Informatics Software Engineering GmbH.
|
||||
// and Contributors.
|
||||
//
|
||||
// SPDX-License-Identifier: BSL-1.0
|
||||
// SPDX-License-Identifier: BSL-1.0
|
||||
//
|
||||
|
||||
|
||||
@ -33,44 +33,44 @@ namespace Net {
|
||||
class TCPConnectionNotification: public Notification
|
||||
{
|
||||
public:
|
||||
TCPConnectionNotification(const StreamSocket& socket):
|
||||
_socket(socket)
|
||||
{
|
||||
}
|
||||
|
||||
~TCPConnectionNotification()
|
||||
{
|
||||
}
|
||||
|
||||
const StreamSocket& socket() const
|
||||
{
|
||||
return _socket;
|
||||
}
|
||||
TCPConnectionNotification(const StreamSocket& socket):
|
||||
_socket(socket)
|
||||
{
|
||||
}
|
||||
|
||||
~TCPConnectionNotification()
|
||||
{
|
||||
}
|
||||
|
||||
const StreamSocket& socket() const
|
||||
{
|
||||
return _socket;
|
||||
}
|
||||
|
||||
private:
|
||||
StreamSocket _socket;
|
||||
StreamSocket _socket;
|
||||
};
|
||||
|
||||
|
||||
TCPServerDispatcher::TCPServerDispatcher(TCPServerConnectionFactory::Ptr pFactory, Poco::ThreadPool& threadPool, TCPServerParams::Ptr pParams):
|
||||
_rc(1),
|
||||
_pParams(pParams),
|
||||
_currentThreads(0),
|
||||
_totalConnections(0),
|
||||
_currentConnections(0),
|
||||
_maxConcurrentConnections(0),
|
||||
_refusedConnections(0),
|
||||
_stopped(false),
|
||||
_pConnectionFactory(pFactory),
|
||||
_threadPool(threadPool)
|
||||
_rc(1),
|
||||
_pParams(pParams),
|
||||
_currentThreads(0),
|
||||
_totalConnections(0),
|
||||
_currentConnections(0),
|
||||
_maxConcurrentConnections(0),
|
||||
_refusedConnections(0),
|
||||
_stopped(false),
|
||||
_pConnectionFactory(pFactory),
|
||||
_threadPool(threadPool)
|
||||
{
|
||||
poco_check_ptr (pFactory);
|
||||
poco_check_ptr (pFactory);
|
||||
|
||||
if (!_pParams)
|
||||
_pParams = new TCPServerParams;
|
||||
|
||||
if (_pParams->getMaxThreads() == 0)
|
||||
_pParams->setMaxThreads(threadPool.capacity());
|
||||
if (!_pParams)
|
||||
_pParams = new TCPServerParams;
|
||||
|
||||
if (_pParams->getMaxThreads() == 0)
|
||||
_pParams->setMaxThreads(threadPool.capacity());
|
||||
}
|
||||
|
||||
|
||||
@ -81,161 +81,184 @@ TCPServerDispatcher::~TCPServerDispatcher()
|
||||
|
||||
void TCPServerDispatcher::duplicate()
|
||||
{
|
||||
++_rc;
|
||||
++_rc;
|
||||
}
|
||||
|
||||
|
||||
void TCPServerDispatcher::release()
|
||||
{
|
||||
if (--_rc == 0) delete this;
|
||||
if (--_rc == 0) delete this;
|
||||
}
|
||||
|
||||
|
||||
void TCPServerDispatcher::run()
|
||||
{
|
||||
AutoPtr<TCPServerDispatcher> guard(this); // ensure object stays alive
|
||||
AutoPtr<TCPServerDispatcher> guard(this); // ensure object stays alive
|
||||
|
||||
int idleTime = (int) _pParams->getThreadIdleTime().totalMilliseconds();
|
||||
int idleTime = (int) _pParams->getThreadIdleTime().totalMilliseconds();
|
||||
|
||||
for (;;)
|
||||
{
|
||||
try
|
||||
{
|
||||
AutoPtr<Notification> pNf = _queue.waitDequeueNotification(idleTime);
|
||||
if (pNf && !_stopped)
|
||||
{
|
||||
TCPConnectionNotification* pCNf = dynamic_cast<TCPConnectionNotification*>(pNf.get());
|
||||
if (pCNf)
|
||||
{
|
||||
beginConnection();
|
||||
if (!_stopped)
|
||||
{
|
||||
std::unique_ptr<TCPServerConnection> pConnection(_pConnectionFactory->createConnection(pCNf->socket()));
|
||||
poco_check_ptr(pConnection.get());
|
||||
pConnection->start();
|
||||
}
|
||||
/// endConnection() should be called after destroying TCPServerConnection,
|
||||
/// otherwise currentConnections() could become zero while some connections are yet still alive.
|
||||
endConnection();
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Poco::Exception &exc) { ErrorHandler::handle(exc); }
|
||||
catch (std::exception &exc) { ErrorHandler::handle(exc); }
|
||||
catch (...) { ErrorHandler::handle(); }
|
||||
FastMutex::ScopedLock lock(_mutex);
|
||||
if (_stopped || (_currentThreads > 1 && _queue.empty()))
|
||||
{
|
||||
--_currentThreads;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (;;)
|
||||
{
|
||||
try
|
||||
{
|
||||
AutoPtr<Notification> pNf = _queue.waitDequeueNotification(idleTime);
|
||||
if (pNf && !_stopped)
|
||||
{
|
||||
TCPConnectionNotification* pCNf = dynamic_cast<TCPConnectionNotification*>(pNf.get());
|
||||
if (pCNf)
|
||||
{
|
||||
beginConnection();
|
||||
if (!_stopped)
|
||||
{
|
||||
std::unique_ptr<TCPServerConnection> pConnection(_pConnectionFactory->createConnection(pCNf->socket()));
|
||||
poco_check_ptr(pConnection.get());
|
||||
pConnection->start();
|
||||
}
|
||||
/// endConnection() should be called after destroying TCPServerConnection,
|
||||
/// otherwise currentConnections() could become zero while some connections are yet still alive.
|
||||
endConnection();
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Poco::Exception &exc) { ErrorHandler::handle(exc); }
|
||||
catch (std::exception &exc) { ErrorHandler::handle(exc); }
|
||||
catch (...) { ErrorHandler::handle(); }
|
||||
FastMutex::ScopedLock lock(_mutex);
|
||||
if (_stopped || (_currentThreads > 1 && _queue.empty()))
|
||||
{
|
||||
--_currentThreads;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
static const std::string threadName("TCPServerConnection");
|
||||
static const std::string threadName("TCPServerConnection");
|
||||
}
|
||||
|
||||
|
||||
|
||||
void TCPServerDispatcher::enqueue(const StreamSocket& socket)
|
||||
{
|
||||
FastMutex::ScopedLock lock(_mutex);
|
||||
FastMutex::ScopedLock lock(_mutex);
|
||||
|
||||
if (_queue.size() < _pParams->getMaxQueued())
|
||||
{
|
||||
if (!_queue.hasIdleThreads() && _currentThreads < _pParams->getMaxThreads())
|
||||
{
|
||||
try
|
||||
{
|
||||
ErrorHandler::logMessage(Message::PRIO_TEST, "Queue size: " + std::to_string(_queue.size()) +
|
||||
", current threads: " + std::to_string(_currentThreads) +
|
||||
", threads in pool: " + std::to_string(_threadPool.allocated()) +
|
||||
", current connections: " + std::to_string(_currentConnections));
|
||||
|
||||
|
||||
if (_queue.size() < _pParams->getMaxQueued())
|
||||
{
|
||||
/// NOTE: the condition below is wrong.
|
||||
/// Since the thread pool is shared between multiple servers/TCPServerDispatchers,
|
||||
/// _currentThreads < _pParams->getMaxThreads() will be true when the pool is actually saturated.
|
||||
/// As a result, queue is useless and connections never wait in queue.
|
||||
/// Instead, we (mistakenly) think that we can create a thread for this connection, but we fail to create it
|
||||
/// and the connection get rejected.
|
||||
/// We could check _currentThreads < _threadPool.allocated() to make it work,
|
||||
/// but it's not clear if we want to make it work
|
||||
/// because it may be better to reject connection immediately if we don't have resources to handle it.
|
||||
if (!_queue.hasIdleThreads() && _currentThreads < _pParams->getMaxThreads())
|
||||
{
|
||||
try
|
||||
{
|
||||
this->duplicate();
|
||||
_threadPool.startWithPriority(_pParams->getThreadPriority(), *this, threadName);
|
||||
++_currentThreads;
|
||||
}
|
||||
catch (Poco::Exception& exc)
|
||||
{
|
||||
_threadPool.startWithPriority(_pParams->getThreadPriority(), *this, threadName);
|
||||
++_currentThreads;
|
||||
}
|
||||
catch (Poco::Exception& exc)
|
||||
{
|
||||
ErrorHandler::logMessage(Message::PRIO_WARNING, "Got an exception while starting thread for connection from " +
|
||||
socket.peerAddress().toString());
|
||||
ErrorHandler::handle(exc);
|
||||
this->release();
|
||||
++_refusedConnections;
|
||||
std::cerr << "Got exception while starting thread for connection. Error code: "
|
||||
<< exc.code() << ", message: '" << exc.displayText() << "'" << std::endl;
|
||||
return;
|
||||
}
|
||||
}
|
||||
_queue.enqueueNotification(new TCPConnectionNotification(socket));
|
||||
}
|
||||
else
|
||||
{
|
||||
++_refusedConnections;
|
||||
}
|
||||
++_refusedConnections;
|
||||
return;
|
||||
}
|
||||
}
|
||||
else if (!_queue.hasIdleThreads())
|
||||
{
|
||||
ErrorHandler::logMessage(Message::PRIO_TRACE, "Don't have idle threads, adding connection from " +
|
||||
socket.peerAddress().toString() + " to the queue, size: " + std::to_string(_queue.size()));
|
||||
}
|
||||
_queue.enqueueNotification(new TCPConnectionNotification(socket));
|
||||
}
|
||||
else
|
||||
{
|
||||
ErrorHandler::logMessage(Message::PRIO_WARNING, "Refusing connection from " + socket.peerAddress().toString() +
|
||||
", reached max queue size " + std::to_string(_pParams->getMaxQueued()));
|
||||
++_refusedConnections;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void TCPServerDispatcher::stop()
|
||||
{
|
||||
_stopped = true;
|
||||
_queue.clear();
|
||||
_queue.wakeUpAll();
|
||||
_stopped = true;
|
||||
_queue.clear();
|
||||
_queue.wakeUpAll();
|
||||
}
|
||||
|
||||
|
||||
int TCPServerDispatcher::currentThreads() const
|
||||
{
|
||||
return _currentThreads;
|
||||
return _currentThreads;
|
||||
}
|
||||
|
||||
int TCPServerDispatcher::maxThreads() const
|
||||
{
|
||||
FastMutex::ScopedLock lock(_mutex);
|
||||
|
||||
return _threadPool.capacity();
|
||||
FastMutex::ScopedLock lock(_mutex);
|
||||
|
||||
return _threadPool.capacity();
|
||||
}
|
||||
|
||||
|
||||
int TCPServerDispatcher::totalConnections() const
|
||||
{
|
||||
return _totalConnections;
|
||||
return _totalConnections;
|
||||
}
|
||||
|
||||
|
||||
int TCPServerDispatcher::currentConnections() const
|
||||
{
|
||||
return _currentConnections;
|
||||
return _currentConnections;
|
||||
}
|
||||
|
||||
|
||||
int TCPServerDispatcher::maxConcurrentConnections() const
|
||||
{
|
||||
return _maxConcurrentConnections;
|
||||
return _maxConcurrentConnections;
|
||||
}
|
||||
|
||||
|
||||
int TCPServerDispatcher::queuedConnections() const
|
||||
{
|
||||
return _queue.size();
|
||||
return _queue.size();
|
||||
}
|
||||
|
||||
|
||||
int TCPServerDispatcher::refusedConnections() const
|
||||
{
|
||||
return _refusedConnections;
|
||||
return _refusedConnections;
|
||||
}
|
||||
|
||||
|
||||
void TCPServerDispatcher::beginConnection()
|
||||
{
|
||||
FastMutex::ScopedLock lock(_mutex);
|
||||
FastMutex::ScopedLock lock(_mutex);
|
||||
|
||||
++_totalConnections;
|
||||
++_currentConnections;
|
||||
if (_currentConnections > _maxConcurrentConnections)
|
||||
_maxConcurrentConnections.store(_currentConnections);
|
||||
++_totalConnections;
|
||||
++_currentConnections;
|
||||
if (_currentConnections > _maxConcurrentConnections)
|
||||
_maxConcurrentConnections.store(_currentConnections);
|
||||
}
|
||||
|
||||
|
||||
void TCPServerDispatcher::endConnection()
|
||||
{
|
||||
--_currentConnections;
|
||||
--_currentConnections;
|
||||
}
|
||||
|
||||
|
||||
|
@ -2,11 +2,11 @@
|
||||
|
||||
# NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION,
|
||||
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
|
||||
SET(VERSION_REVISION 54489)
|
||||
SET(VERSION_REVISION 54490)
|
||||
SET(VERSION_MAJOR 24)
|
||||
SET(VERSION_MINOR 8)
|
||||
SET(VERSION_MINOR 9)
|
||||
SET(VERSION_PATCH 1)
|
||||
SET(VERSION_GITHASH 3f8b27d7accd2b5ec4afe7d0dd459115323304af)
|
||||
SET(VERSION_DESCRIBE v24.8.1.1-testing)
|
||||
SET(VERSION_STRING 24.8.1.1)
|
||||
SET(VERSION_GITHASH e02b434d2fc0c4fbee29ca675deab7474d274608)
|
||||
SET(VERSION_DESCRIBE v24.9.1.1-testing)
|
||||
SET(VERSION_STRING 24.9.1.1)
|
||||
# end of autochange
|
||||
|
@ -57,8 +57,8 @@ option(WITH_COVERAGE "Instrumentation for code coverage with default implementat
|
||||
|
||||
if (WITH_COVERAGE)
|
||||
message (STATUS "Enabled instrumentation for code coverage")
|
||||
set(COVERAGE_FLAGS "SHELL:-fprofile-instr-generate -fcoverage-mapping")
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fprofile-instr-generate -fcoverage-mapping")
|
||||
set (COVERAGE_FLAGS -fprofile-instr-generate -fcoverage-mapping)
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fprofile-instr-generate -fcoverage-mapping")
|
||||
endif()
|
||||
|
||||
option (SANITIZE_COVERAGE "Instrumentation for code coverage with custom callbacks" OFF)
|
||||
|
10
contrib/CMakeLists.txt
vendored
10
contrib/CMakeLists.txt
vendored
@ -71,7 +71,6 @@ add_contrib (zlib-ng-cmake zlib-ng)
|
||||
add_contrib (bzip2-cmake bzip2)
|
||||
add_contrib (minizip-ng-cmake minizip-ng)
|
||||
add_contrib (snappy-cmake snappy)
|
||||
add_contrib (rocksdb-cmake rocksdb)
|
||||
add_contrib (thrift-cmake thrift)
|
||||
# parquet/arrow/orc
|
||||
add_contrib (arrow-cmake arrow) # requires: snappy, thrift, double-conversion
|
||||
@ -148,6 +147,7 @@ add_contrib (hive-metastore-cmake hive-metastore) # requires: thrift, avro, arro
|
||||
add_contrib (cppkafka-cmake cppkafka)
|
||||
add_contrib (libpqxx-cmake libpqxx)
|
||||
add_contrib (libpq-cmake libpq)
|
||||
add_contrib (rocksdb-cmake rocksdb) # requires: jemalloc, snappy, zlib, lz4, zstd, liburing
|
||||
add_contrib (nuraft-cmake NuRaft)
|
||||
add_contrib (fast_float-cmake fast_float)
|
||||
add_contrib (idna-cmake idna)
|
||||
@ -179,7 +179,7 @@ else()
|
||||
message(STATUS "Not using QPL")
|
||||
endif ()
|
||||
|
||||
if (OS_LINUX AND ARCH_AMD64)
|
||||
if (OS_LINUX AND ARCH_AMD64 AND NOT NO_SSE3_OR_HIGHER)
|
||||
option (ENABLE_QATLIB "Enable Intel® QuickAssist Technology Library (QATlib)" ${ENABLE_LIBRARIES})
|
||||
elseif(ENABLE_QATLIB)
|
||||
message (${RECONFIGURE_MESSAGE_LEVEL} "QATLib is only supported on x86_64")
|
||||
@ -205,14 +205,12 @@ add_contrib (morton-nd-cmake morton-nd)
|
||||
if (ARCH_S390X)
|
||||
add_contrib(crc32-s390x-cmake crc32-s390x)
|
||||
endif()
|
||||
add_contrib (annoy-cmake annoy)
|
||||
|
||||
option(ENABLE_USEARCH "Enable USearch (Approximate Neighborhood Search, HNSW) support" ${ENABLE_LIBRARIES})
|
||||
option(ENABLE_USEARCH "Enable USearch" ${ENABLE_LIBRARIES})
|
||||
if (ENABLE_USEARCH)
|
||||
add_contrib (FP16-cmake FP16)
|
||||
add_contrib (robin-map-cmake robin-map)
|
||||
add_contrib (SimSIMD-cmake SimSIMD)
|
||||
add_contrib (usearch-cmake usearch) # requires: FP16, robin-map, SimdSIMD
|
||||
add_contrib (usearch-cmake usearch) # requires: FP16, SimdSIMD
|
||||
else ()
|
||||
message(STATUS "Not using USearch")
|
||||
endif ()
|
||||
|
@ -27,7 +27,7 @@ if (ENABLE_QAT_OUT_OF_TREE_BUILD)
|
||||
${QAT_AL_INCLUDE_DIR}
|
||||
${QAT_USDM_INCLUDE_DIR}
|
||||
${ZSTD_LIBRARY_DIR})
|
||||
target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0 PUBLIC -DENABLE_ZSTD_QAT_CODEC)
|
||||
target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0)
|
||||
add_library (ch_contrib::qatzstd_plugin ALIAS _qatzstd_plugin)
|
||||
else () # In-tree build
|
||||
message(STATUS "Intel QATZSTD in-tree build")
|
||||
@ -78,7 +78,7 @@ else () # In-tree build
|
||||
${QAT_USDM_INCLUDE_DIR}
|
||||
${ZSTD_LIBRARY_DIR}
|
||||
${LIBQAT_HEADER_DIR})
|
||||
target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0 PUBLIC -DENABLE_ZSTD_QAT_CODEC -DINTREE)
|
||||
target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0 PUBLIC -DINTREE)
|
||||
target_include_directories(_qatzstd_plugin SYSTEM PUBLIC $<BUILD_INTERFACE:${QATZSTD_SRC_DIR}> $<INSTALL_INTERFACE:include>)
|
||||
add_library (ch_contrib::qatzstd_plugin ALIAS _qatzstd_plugin)
|
||||
endif ()
|
||||
|
2
contrib/SimSIMD
vendored
2
contrib/SimSIMD
vendored
@ -1 +1 @@
|
||||
Subproject commit de2cb75b9e9e3389d5e1e51fd9f8ed151f3c17cf
|
||||
Subproject commit 91a76d1ac519b3b9dc8957734a3dabd985f00c26
|
1
contrib/annoy
vendored
1
contrib/annoy
vendored
@ -1 +0,0 @@
|
||||
Subproject commit f2ac8e7b48f9a9cf676d3b58286e5455aba8e956
|
@ -1,24 +0,0 @@
|
||||
option(ENABLE_ANNOY "Enable Annoy index support" ${ENABLE_LIBRARIES})
|
||||
|
||||
# Annoy index should be disabled with undefined sanitizer. Because of memory storage optimizations
|
||||
# (https://github.com/ClickHouse/annoy/blob/9d8a603a4cd252448589e84c9846f94368d5a289/src/annoylib.h#L442-L463)
|
||||
# UBSan fails and leads to crash. Simmilar issue is already opened in Annoy repo
|
||||
# https://github.com/spotify/annoy/issues/456
|
||||
# Problem with aligment can lead to errors like
|
||||
# (https://stackoverflow.com/questions/46790550/c-undefined-behavior-strict-aliasing-rule-or-incorrect-alignment)
|
||||
# or will lead to crash on arm https://developer.arm.com/documentation/ka003038/latest
|
||||
# This issues should be resolved before annoy became non-experimental (--> setting "allow_experimental_annoy_index")
|
||||
if ((NOT ENABLE_ANNOY) OR (SANITIZE STREQUAL "undefined") OR (ARCH_AARCH64))
|
||||
message (STATUS "Not using annoy")
|
||||
return()
|
||||
endif()
|
||||
|
||||
set(ANNOY_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/annoy")
|
||||
set(ANNOY_SOURCE_DIR "${ANNOY_PROJECT_DIR}/src")
|
||||
|
||||
add_library(_annoy INTERFACE)
|
||||
target_include_directories(_annoy SYSTEM INTERFACE ${ANNOY_SOURCE_DIR})
|
||||
|
||||
add_library(ch_contrib::annoy ALIAS _annoy)
|
||||
target_compile_definitions(_annoy INTERFACE ENABLE_ANNOY)
|
||||
target_compile_definitions(_annoy INTERFACE ANNOYLIB_MULTITHREADED_BUILD)
|
@ -1,20 +1,21 @@
|
||||
if (NOT ENABLE_FIU)
|
||||
message (STATUS "Not using fiu")
|
||||
if (NOT ENABLE_LIBFIU)
|
||||
message (STATUS "Not using libfiu")
|
||||
return ()
|
||||
endif ()
|
||||
|
||||
set(FIU_DIR "${ClickHouse_SOURCE_DIR}/contrib/libfiu/")
|
||||
set(LIBFIU_DIR "${ClickHouse_SOURCE_DIR}/contrib/libfiu/")
|
||||
|
||||
set(FIU_SOURCES
|
||||
${FIU_DIR}/libfiu/fiu.c
|
||||
${FIU_DIR}/libfiu/fiu-rc.c
|
||||
${FIU_DIR}/libfiu/backtrace.c
|
||||
${FIU_DIR}/libfiu/wtable.c
|
||||
set(LIBFIU_SOURCES
|
||||
${LIBFIU_DIR}/libfiu/fiu.c
|
||||
${LIBFIU_DIR}/libfiu/fiu-rc.c
|
||||
${LIBFIU_DIR}/libfiu/backtrace.c
|
||||
${LIBFIU_DIR}/libfiu/wtable.c
|
||||
)
|
||||
|
||||
set(FIU_HEADERS "${FIU_DIR}/libfiu")
|
||||
set(LIBFIU_HEADERS "${LIBFIU_DIR}/libfiu")
|
||||
|
||||
add_library(_fiu ${FIU_SOURCES})
|
||||
target_compile_definitions(_fiu PUBLIC DUMMY_BACKTRACE)
|
||||
target_include_directories(_fiu PUBLIC ${FIU_HEADERS})
|
||||
add_library(ch_contrib::fiu ALIAS _fiu)
|
||||
add_library(_libfiu ${LIBFIU_SOURCES})
|
||||
target_compile_definitions(_libfiu PUBLIC DUMMY_BACKTRACE)
|
||||
target_compile_definitions(_libfiu PUBLIC FIU_ENABLE)
|
||||
target_include_directories(_libfiu PUBLIC ${LIBFIU_HEADERS})
|
||||
add_library(ch_contrib::libfiu ALIAS _libfiu)
|
||||
|
2
contrib/libprotobuf-mutator
vendored
2
contrib/libprotobuf-mutator
vendored
@ -1 +1 @@
|
||||
Subproject commit 1f95f8083066f5b38fd2db172e7e7f9aa7c49d2d
|
||||
Subproject commit b922c8ab9004ef9944982e4f165e2747b13223fa
|
2
contrib/librdkafka
vendored
2
contrib/librdkafka
vendored
@ -1 +1 @@
|
||||
Subproject commit 2d2aab6f5b79db1cfca15d7bf0dee75d00d82082
|
||||
Subproject commit 39d4ed49ccf3406e2bf825d5d7b0903b5a290782
|
2
contrib/libunwind
vendored
2
contrib/libunwind
vendored
@ -1 +1 @@
|
||||
Subproject commit a89d904befea07814628c6ce0b44083c4e149c62
|
||||
Subproject commit 601db0b0e03018c01710470a37703b618f9cf08b
|
2
contrib/qpl
vendored
2
contrib/qpl
vendored
@ -1 +1 @@
|
||||
Subproject commit d4715e0e79896b85612158e135ee1a85f3b3e04d
|
||||
Subproject commit c2ced94c53c1ee22191201a59878e9280bc9b9b8
|
@ -4,7 +4,6 @@ set (QPL_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl")
|
||||
set (QPL_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl/sources")
|
||||
set (QPL_BINARY_DIR "${ClickHouse_BINARY_DIR}/build/contrib/qpl")
|
||||
set (EFFICIENT_WAIT OFF)
|
||||
set (BLOCK_ON_FAULT ON)
|
||||
set (LOG_HW_INIT OFF)
|
||||
set (SANITIZE_MEMORY OFF)
|
||||
set (SANITIZE_THREADS OFF)
|
||||
@ -16,16 +15,20 @@ function(GetLibraryVersion _content _outputVar)
|
||||
SET(${_outputVar} ${CMAKE_MATCH_1} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
set (QPL_VERSION 1.2.0)
|
||||
set (QPL_VERSION 1.6.0)
|
||||
|
||||
message(STATUS "Intel QPL version: ${QPL_VERSION}")
|
||||
|
||||
# There are 5 source subdirectories under $QPL_SRC_DIR: isal, c_api, core-sw, middle-layer, c_api.
|
||||
# Generate 8 library targets: middle_layer_lib, isal, isal_asm, qplcore_px, qplcore_avx512, qplcore_sw_dispatcher, core_iaa, middle_layer_lib.
|
||||
# There are 5 source subdirectories under $QPL_SRC_DIR: c_api, core-iaa, core-sw, middle-layer and isal.
|
||||
# Generate 8 library targets: qpl_c_api, core_iaa, qplcore_px, qplcore_avx512, qplcore_sw_dispatcher, middle_layer_lib, isal and isal_asm,
|
||||
# which are then combined into static or shared qpl.
|
||||
# Output ch_contrib::qpl by linking with 8 library targets.
|
||||
|
||||
# The qpl submodule comes with its own version of isal. It contains code which does not exist in upstream isal. It would be nice to link
|
||||
# only upstream isal (ch_contrib::isal) but at this point we can't.
|
||||
# Note, QPL has integrated a customized version of ISA-L to meet specific needs.
|
||||
# This version has been significantly modified and there are no plans to maintain compatibility with the upstream version
|
||||
# or upgrade the current copy.
|
||||
|
||||
## cmake/CompileOptions.cmake and automatic wrappers generation
|
||||
|
||||
# ==========================================================================
|
||||
# Copyright (C) 2022 Intel Corporation
|
||||
@ -442,6 +445,7 @@ function(generate_unpack_kernel_arrays current_directory PLATFORMS_LIST)
|
||||
endforeach()
|
||||
endfunction()
|
||||
|
||||
# [SUBDIR]isal
|
||||
|
||||
enable_language(ASM_NASM)
|
||||
|
||||
@ -479,7 +483,6 @@ set(ISAL_ASM_SRC ${QPL_SRC_DIR}/isal/igzip/igzip_body.asm
|
||||
${QPL_SRC_DIR}/isal/igzip/igzip_set_long_icf_fg_04.asm
|
||||
${QPL_SRC_DIR}/isal/igzip/igzip_set_long_icf_fg_06.asm
|
||||
${QPL_SRC_DIR}/isal/igzip/igzip_multibinary.asm
|
||||
${QPL_SRC_DIR}/isal/igzip/stdmac.asm
|
||||
${QPL_SRC_DIR}/isal/crc/crc_multibinary.asm
|
||||
${QPL_SRC_DIR}/isal/crc/crc32_gzip_refl_by8.asm
|
||||
${QPL_SRC_DIR}/isal/crc/crc32_gzip_refl_by8_02.asm
|
||||
@ -505,7 +508,6 @@ set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS
|
||||
# Setting external and internal interfaces for ISA-L library
|
||||
target_include_directories(isal
|
||||
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/isal/include>
|
||||
PRIVATE ${QPL_SRC_DIR}/isal/include
|
||||
PUBLIC ${QPL_SRC_DIR}/isal/igzip)
|
||||
|
||||
set_target_properties(isal PROPERTIES
|
||||
@ -617,12 +619,9 @@ target_compile_options(qplcore_sw_dispatcher
|
||||
|
||||
# [SUBDIR]core-iaa
|
||||
file(GLOB HW_PATH_SRC ${QPL_SRC_DIR}/core-iaa/sources/aecs/*.c
|
||||
${QPL_SRC_DIR}/core-iaa/sources/aecs/*.cpp
|
||||
${QPL_SRC_DIR}/core-iaa/sources/driver_loader/*.c
|
||||
${QPL_SRC_DIR}/core-iaa/sources/driver_loader/*.cpp
|
||||
${QPL_SRC_DIR}/core-iaa/sources/descriptors/*.c
|
||||
${QPL_SRC_DIR}/core-iaa/sources/descriptors/*.cpp
|
||||
${QPL_SRC_DIR}/core-iaa/sources/bit_rev.c)
|
||||
${QPL_SRC_DIR}/core-iaa/sources/*.c)
|
||||
|
||||
# Create library
|
||||
add_library(core_iaa OBJECT ${HW_PATH_SRC})
|
||||
@ -634,31 +633,27 @@ target_include_directories(core_iaa
|
||||
PRIVATE ${UUID_DIR}
|
||||
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-iaa/include>
|
||||
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-iaa/sources/include>
|
||||
PRIVATE $<BUILD_INTERFACE:${QPL_PROJECT_DIR}/include> # status.h in own_checkers.h
|
||||
PRIVATE $<BUILD_INTERFACE:${QPL_PROJECT_DIR}/sources/c_api> # own_checkers.h
|
||||
PRIVATE $<BUILD_INTERFACE:${QPL_PROJECT_DIR}/include> # status.h in own_checkers.h
|
||||
PRIVATE $<TARGET_PROPERTY:qpl_c_api,INTERFACE_INCLUDE_DIRECTORIES> # for own_checkers.h
|
||||
PRIVATE $<TARGET_PROPERTY:qplcore_sw_dispatcher,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
|
||||
target_compile_features(core_iaa PRIVATE c_std_11)
|
||||
|
||||
target_compile_definitions(core_iaa PRIVATE QPL_BADARG_CHECK
|
||||
PRIVATE $<$<BOOL:${BLOCK_ON_FAULT}>: BLOCK_ON_FAULT_ENABLED>
|
||||
PRIVATE $<$<BOOL:${LOG_HW_INIT}>:LOG_HW_INIT>
|
||||
PRIVATE $<$<BOOL:${DYNAMIC_LOADING_LIBACCEL_CONFIG}>:DYNAMIC_LOADING_LIBACCEL_CONFIG>)
|
||||
|
||||
# [SUBDIR]middle-layer
|
||||
file(GLOB MIDDLE_LAYER_SRC
|
||||
${QPL_SRC_DIR}/middle-layer/analytics/*.cpp
|
||||
${QPL_SRC_DIR}/middle-layer/c_wrapper/*.cpp
|
||||
${QPL_SRC_DIR}/middle-layer/checksum/*.cpp
|
||||
${QPL_SRC_DIR}/middle-layer/accelerator/*.cpp
|
||||
${QPL_SRC_DIR}/middle-layer/analytics/*.cpp
|
||||
${QPL_SRC_DIR}/middle-layer/common/*.cpp
|
||||
${QPL_SRC_DIR}/middle-layer/compression/*.cpp
|
||||
${QPL_SRC_DIR}/middle-layer/compression/*/*.cpp
|
||||
${QPL_SRC_DIR}/middle-layer/compression/*/*/*.cpp
|
||||
${QPL_SRC_DIR}/middle-layer/dispatcher/*.cpp
|
||||
${QPL_SRC_DIR}/middle-layer/other/*.cpp
|
||||
${QPL_SRC_DIR}/middle-layer/util/*.cpp
|
||||
${QPL_SRC_DIR}/middle-layer/inflate/*.cpp
|
||||
${QPL_SRC_DIR}/core-iaa/sources/accelerator/*.cpp) # todo
|
||||
${QPL_SRC_DIR}/middle-layer/util/*.cpp)
|
||||
|
||||
add_library(middle_layer_lib OBJECT
|
||||
${MIDDLE_LAYER_SRC})
|
||||
@ -667,6 +662,7 @@ set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS
|
||||
$<TARGET_OBJECTS:middle_layer_lib>)
|
||||
|
||||
target_compile_options(middle_layer_lib
|
||||
PRIVATE $<$<C_COMPILER_ID:GNU,Clang>:$<$<CONFIG:Release>:-O3;-U_FORTIFY_SOURCE;-D_FORTIFY_SOURCE=2>>
|
||||
PRIVATE ${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS})
|
||||
|
||||
target_compile_definitions(middle_layer_lib
|
||||
@ -682,6 +678,7 @@ target_include_directories(middle_layer_lib
|
||||
PRIVATE ${UUID_DIR}
|
||||
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/middle-layer>
|
||||
PUBLIC $<TARGET_PROPERTY:_qpl,INTERFACE_INCLUDE_DIRECTORIES>
|
||||
PRIVATE $<TARGET_PROPERTY:qpl_c_api,INTERFACE_INCLUDE_DIRECTORIES>
|
||||
PUBLIC $<TARGET_PROPERTY:qplcore_sw_dispatcher,INTERFACE_INCLUDE_DIRECTORIES>
|
||||
PUBLIC $<TARGET_PROPERTY:isal,INTERFACE_INCLUDE_DIRECTORIES>
|
||||
PUBLIC $<TARGET_PROPERTY:core_iaa,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
@ -689,31 +686,50 @@ target_include_directories(middle_layer_lib
|
||||
target_compile_definitions(middle_layer_lib PUBLIC -DQPL_LIB)
|
||||
|
||||
# [SUBDIR]c_api
|
||||
file(GLOB_RECURSE QPL_C_API_SRC
|
||||
${QPL_SRC_DIR}/c_api/*.c
|
||||
${QPL_SRC_DIR}/c_api/*.cpp)
|
||||
file(GLOB QPL_C_API_SRC
|
||||
${QPL_SRC_DIR}/c_api/compression_operations/*.c
|
||||
${QPL_SRC_DIR}/c_api/compression_operations/*.cpp
|
||||
${QPL_SRC_DIR}/c_api/filter_operations/*.cpp
|
||||
${QPL_SRC_DIR}/c_api/legacy_hw_path/*.c
|
||||
${QPL_SRC_DIR}/c_api/legacy_hw_path/*.cpp
|
||||
${QPL_SRC_DIR}/c_api/other_operations/*.cpp
|
||||
${QPL_SRC_DIR}/c_api/serialization/*.cpp
|
||||
${QPL_SRC_DIR}/c_api/*.cpp)
|
||||
|
||||
add_library(qpl_c_api OBJECT ${QPL_C_API_SRC})
|
||||
|
||||
target_include_directories(qpl_c_api
|
||||
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/c_api/>
|
||||
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/include/> $<INSTALL_INTERFACE:include>
|
||||
PRIVATE $<TARGET_PROPERTY:middle_layer_lib,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
|
||||
set_target_properties(qpl_c_api PROPERTIES
|
||||
$<$<C_COMPILER_ID:GNU,Clang>:C_STANDARD 17
|
||||
CXX_STANDARD 17)
|
||||
|
||||
target_compile_options(qpl_c_api
|
||||
PRIVATE $<$<C_COMPILER_ID:GNU,Clang>:$<$<CONFIG:Release>:-O3;-U_FORTIFY_SOURCE;-D_FORTIFY_SOURCE=2>>
|
||||
PRIVATE $<$<COMPILE_LANG_AND_ID:CXX,GNU,Clang>:${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS}>)
|
||||
|
||||
target_compile_definitions(qpl_c_api
|
||||
PUBLIC -DQPL_BADARG_CHECK # own_checkers.h
|
||||
PUBLIC -DQPL_LIB # needed for middle_layer_lib
|
||||
PUBLIC $<$<BOOL:${LOG_HW_INIT}>:LOG_HW_INIT>) # needed for middle_layer_lib
|
||||
|
||||
set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS
|
||||
$<TARGET_OBJECTS:qpl_c_api>)
|
||||
|
||||
# Final _qpl target
|
||||
|
||||
get_property(LIB_DEPS GLOBAL PROPERTY QPL_LIB_DEPS)
|
||||
|
||||
add_library(_qpl STATIC ${QPL_C_API_SRC} ${LIB_DEPS})
|
||||
add_library(_qpl STATIC ${LIB_DEPS})
|
||||
|
||||
target_include_directories(_qpl
|
||||
PUBLIC $<BUILD_INTERFACE:${QPL_PROJECT_DIR}/include/> $<INSTALL_INTERFACE:include>
|
||||
PRIVATE $<TARGET_PROPERTY:middle_layer_lib,INTERFACE_INCLUDE_DIRECTORIES>
|
||||
PRIVATE $<BUILD_INTERFACE:${QPL_SRC_DIR}/c_api>)
|
||||
|
||||
target_compile_options(_qpl
|
||||
PRIVATE ${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS})
|
||||
|
||||
target_compile_definitions(_qpl
|
||||
PRIVATE -DQPL_LIB
|
||||
PRIVATE -DQPL_BADARG_CHECK
|
||||
PRIVATE $<$<BOOL:${DYNAMIC_LOADING_LIBACCEL_CONFIG}>:DYNAMIC_LOADING_LIBACCEL_CONFIG>
|
||||
PUBLIC -DENABLE_QPL_COMPRESSION)
|
||||
PUBLIC $<BUILD_INTERFACE:${QPL_PROJECT_DIR}/include/> $<INSTALL_INTERFACE:include>)
|
||||
|
||||
target_link_libraries(_qpl
|
||||
PRIVATE ch_contrib::accel-config
|
||||
PRIVATE ch_contrib::isal)
|
||||
PRIVATE ch_contrib::accel-config)
|
||||
|
||||
target_include_directories(_qpl SYSTEM BEFORE
|
||||
PUBLIC "${QPL_PROJECT_DIR}/include"
|
||||
|
1
contrib/robin-map
vendored
1
contrib/robin-map
vendored
@ -1 +0,0 @@
|
||||
Subproject commit 851a59e0e3063ee0e23089062090a73fd3de482d
|
@ -1 +0,0 @@
|
||||
# See contrib/usearch-cmake/CMakeLists.txt
|
2
contrib/rocksdb
vendored
2
contrib/rocksdb
vendored
@ -1 +1 @@
|
||||
Subproject commit 01e43568fa9f3f7bf107b2b66c00b286b456f33e
|
||||
Subproject commit 5f003e4a22d2e48e37c98d9620241237cd30dd24
|
@ -1,51 +1,46 @@
|
||||
option (ENABLE_ROCKSDB "Enable RocksDB" ${ENABLE_LIBRARIES})
|
||||
|
||||
if (NOT ENABLE_ROCKSDB)
|
||||
if (NOT ENABLE_ROCKSDB OR NO_SSE3_OR_HIGHER) # assumes SSE4.2 and PCLMUL
|
||||
message (STATUS "Not using RocksDB")
|
||||
return()
|
||||
endif()
|
||||
|
||||
# not in original build system, otherwise xxHash.cc fails to compile with ClickHouse C++23 default
|
||||
set (CMAKE_CXX_STANDARD 20)
|
||||
|
||||
# Always disable jemalloc for rocksdb by default because it introduces non-standard jemalloc APIs
|
||||
option(WITH_JEMALLOC "build with JeMalloc" OFF)
|
||||
|
||||
option(WITH_LIBURING "build with liburing" OFF) # TODO could try to enable this conditionally, depending on ClickHouse's ENABLE_LIBURING
|
||||
|
||||
# ClickHouse cannot be compiled without snappy, lz4, zlib, zstd
|
||||
option(WITH_SNAPPY "build with SNAPPY" ON)
|
||||
option(WITH_LZ4 "build with lz4" ON)
|
||||
option(WITH_ZLIB "build with zlib" ON)
|
||||
option(WITH_ZSTD "build with zstd" ON)
|
||||
|
||||
if(WITH_SNAPPY)
|
||||
if (ENABLE_JEMALLOC AND OS_LINUX) # gives compile errors with jemalloc enabled for rocksdb on non-Linux
|
||||
add_definitions(-DROCKSDB_JEMALLOC -DJEMALLOC_NO_DEMANGLE)
|
||||
list (APPEND THIRDPARTY_LIBS ch_contrib::jemalloc)
|
||||
endif ()
|
||||
|
||||
if (ENABLE_LIBURING)
|
||||
add_definitions(-DROCKSDB_IOURING_PRESENT)
|
||||
list (APPEND THIRDPARTY_LIBS ch_contrib::liburing)
|
||||
endif ()
|
||||
|
||||
if (WITH_SNAPPY)
|
||||
add_definitions(-DSNAPPY)
|
||||
list(APPEND THIRDPARTY_LIBS ch_contrib::snappy)
|
||||
endif()
|
||||
|
||||
if(WITH_ZLIB)
|
||||
if (WITH_ZLIB)
|
||||
add_definitions(-DZLIB)
|
||||
list(APPEND THIRDPARTY_LIBS ch_contrib::zlib)
|
||||
endif()
|
||||
|
||||
if(WITH_LZ4)
|
||||
if (WITH_LZ4)
|
||||
add_definitions(-DLZ4)
|
||||
list(APPEND THIRDPARTY_LIBS ch_contrib::lz4)
|
||||
endif()
|
||||
|
||||
if(WITH_ZSTD)
|
||||
if (WITH_ZSTD)
|
||||
add_definitions(-DZSTD)
|
||||
list(APPEND THIRDPARTY_LIBS ch_contrib::zstd)
|
||||
endif()
|
||||
|
||||
add_definitions(-DROCKSDB_PORTABLE)
|
||||
|
||||
if(ENABLE_SSE42 AND ENABLE_PCLMULQDQ)
|
||||
add_definitions(-DHAVE_SSE42)
|
||||
add_definitions(-DHAVE_PCLMUL)
|
||||
endif()
|
||||
|
||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64|AARCH64")
|
||||
set (HAS_ARMV8_CRC 1)
|
||||
# the original build descriptions set specific flags for ARM. These flags are already subsumed by ClickHouse's general
|
||||
@ -91,8 +86,11 @@ set(SOURCES
|
||||
${ROCKSDB_SOURCE_DIR}/cache/compressed_secondary_cache.cc
|
||||
${ROCKSDB_SOURCE_DIR}/cache/lru_cache.cc
|
||||
${ROCKSDB_SOURCE_DIR}/cache/secondary_cache.cc
|
||||
${ROCKSDB_SOURCE_DIR}/cache/secondary_cache_adapter.cc
|
||||
${ROCKSDB_SOURCE_DIR}/cache/sharded_cache.cc
|
||||
${ROCKSDB_SOURCE_DIR}/cache/tiered_secondary_cache.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/arena_wrapped_db_iter.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/attribute_group_iterator_impl.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/blob/blob_contents.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/blob/blob_fetcher.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_addition.cc
|
||||
@ -109,6 +107,7 @@ set(SOURCES
|
||||
${ROCKSDB_SOURCE_DIR}/db/blob/prefetch_buffer_collection.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/builder.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/c.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/coalescing_iterator.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/column_family.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/compaction/compaction.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_iterator.cc
|
||||
@ -129,6 +128,7 @@ set(SOURCES
|
||||
${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_write.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_compaction_flush.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_files.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_follower.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_open.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_debug.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_experimental.cc
|
||||
@ -174,9 +174,11 @@ set(SOURCES
|
||||
${ROCKSDB_SOURCE_DIR}/db/wal_manager.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/wide/wide_column_serialization.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/wide/wide_columns.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/wide/wide_columns_helper.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/write_batch.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/write_batch_base.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/write_controller.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/write_stall_stats.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/write_thread.cc
|
||||
${ROCKSDB_SOURCE_DIR}/env/composite_env.cc
|
||||
${ROCKSDB_SOURCE_DIR}/env/env.cc
|
||||
@ -184,6 +186,7 @@ set(SOURCES
|
||||
${ROCKSDB_SOURCE_DIR}/env/env_encryption.cc
|
||||
${ROCKSDB_SOURCE_DIR}/env/file_system.cc
|
||||
${ROCKSDB_SOURCE_DIR}/env/file_system_tracer.cc
|
||||
${ROCKSDB_SOURCE_DIR}/env/fs_on_demand.cc
|
||||
${ROCKSDB_SOURCE_DIR}/env/fs_remap.cc
|
||||
${ROCKSDB_SOURCE_DIR}/env/mock_env.cc
|
||||
${ROCKSDB_SOURCE_DIR}/env/unique_id_gen.cc
|
||||
@ -229,6 +232,7 @@ set(SOURCES
|
||||
${ROCKSDB_SOURCE_DIR}/options/configurable.cc
|
||||
${ROCKSDB_SOURCE_DIR}/options/customizable.cc
|
||||
${ROCKSDB_SOURCE_DIR}/options/db_options.cc
|
||||
${ROCKSDB_SOURCE_DIR}/options/offpeak_time_info.cc
|
||||
${ROCKSDB_SOURCE_DIR}/options/options.cc
|
||||
${ROCKSDB_SOURCE_DIR}/options/options_helper.cc
|
||||
${ROCKSDB_SOURCE_DIR}/options/options_parser.cc
|
||||
@ -268,6 +272,7 @@ set(SOURCES
|
||||
${ROCKSDB_SOURCE_DIR}/table/get_context.cc
|
||||
${ROCKSDB_SOURCE_DIR}/table/iterator.cc
|
||||
${ROCKSDB_SOURCE_DIR}/table/merging_iterator.cc
|
||||
${ROCKSDB_SOURCE_DIR}/table/compaction_merging_iterator.cc
|
||||
${ROCKSDB_SOURCE_DIR}/table/meta_blocks.cc
|
||||
${ROCKSDB_SOURCE_DIR}/table/persistent_cache_helper.cc
|
||||
${ROCKSDB_SOURCE_DIR}/table/plain/plain_table_bloom.cc
|
||||
@ -309,6 +314,7 @@ set(SOURCES
|
||||
${ROCKSDB_SOURCE_DIR}/util/compression_context_cache.cc
|
||||
${ROCKSDB_SOURCE_DIR}/util/concurrent_task_limiter_impl.cc
|
||||
${ROCKSDB_SOURCE_DIR}/util/crc32c.cc
|
||||
${ROCKSDB_SOURCE_DIR}/util/data_structure.cc
|
||||
${ROCKSDB_SOURCE_DIR}/util/dynamic_bloom.cc
|
||||
${ROCKSDB_SOURCE_DIR}/util/hash.cc
|
||||
${ROCKSDB_SOURCE_DIR}/util/murmurhash.cc
|
||||
@ -322,6 +328,8 @@ set(SOURCES
|
||||
${ROCKSDB_SOURCE_DIR}/util/string_util.cc
|
||||
${ROCKSDB_SOURCE_DIR}/util/thread_local.cc
|
||||
${ROCKSDB_SOURCE_DIR}/util/threadpool_imp.cc
|
||||
${ROCKSDB_SOURCE_DIR}/util/udt_util.cc
|
||||
${ROCKSDB_SOURCE_DIR}/util/write_batch_util.cc
|
||||
${ROCKSDB_SOURCE_DIR}/util/xxhash.cc
|
||||
${ROCKSDB_SOURCE_DIR}/utilities/agg_merge/agg_merge.cc
|
||||
${ROCKSDB_SOURCE_DIR}/utilities/backup/backup_engine.cc
|
||||
@ -366,6 +374,7 @@ set(SOURCES
|
||||
${ROCKSDB_SOURCE_DIR}/utilities/persistent_cache/volatile_tier_impl.cc
|
||||
${ROCKSDB_SOURCE_DIR}/utilities/simulator_cache/cache_simulator.cc
|
||||
${ROCKSDB_SOURCE_DIR}/utilities/simulator_cache/sim_cache.cc
|
||||
${ROCKSDB_SOURCE_DIR}/utilities/table_properties_collectors/compact_for_tiering_collector.cc
|
||||
${ROCKSDB_SOURCE_DIR}/utilities/table_properties_collectors/compact_on_deletion_collector.cc
|
||||
${ROCKSDB_SOURCE_DIR}/utilities/trace/file_trace_reader_writer.cc
|
||||
${ROCKSDB_SOURCE_DIR}/utilities/trace/replayer_impl.cc
|
||||
@ -386,6 +395,7 @@ set(SOURCES
|
||||
${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_prepared_txn_db.cc
|
||||
${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_unprepared_txn.cc
|
||||
${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_unprepared_txn_db.cc
|
||||
${ROCKSDB_SOURCE_DIR}/utilities/types_util.cc
|
||||
${ROCKSDB_SOURCE_DIR}/utilities/ttl/db_ttl_impl.cc
|
||||
${ROCKSDB_SOURCE_DIR}/utilities/wal_filter.cc
|
||||
${ROCKSDB_SOURCE_DIR}/utilities/write_batch_with_index/write_batch_with_index.cc
|
||||
@ -404,12 +414,6 @@ set(SOURCES
|
||||
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/util/memarena.cc
|
||||
build_version.cc) # generated by hand
|
||||
|
||||
if(ENABLE_SSE42 AND ENABLE_PCLMULQDQ)
|
||||
set_source_files_properties(
|
||||
"${ROCKSDB_SOURCE_DIR}/util/crc32c.cc"
|
||||
PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul")
|
||||
endif()
|
||||
|
||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64")
|
||||
list(APPEND SOURCES
|
||||
"${ROCKSDB_SOURCE_DIR}/util/crc32c_ppc.c"
|
||||
@ -422,14 +426,18 @@ if(HAS_ARMV8_CRC)
|
||||
endif(HAS_ARMV8_CRC)
|
||||
|
||||
list(APPEND SOURCES
|
||||
"${ROCKSDB_SOURCE_DIR}/port/port_posix.cc"
|
||||
"${ROCKSDB_SOURCE_DIR}/env/env_posix.cc"
|
||||
"${ROCKSDB_SOURCE_DIR}/env/fs_posix.cc"
|
||||
"${ROCKSDB_SOURCE_DIR}/env/io_posix.cc")
|
||||
${ROCKSDB_SOURCE_DIR}/port/port_posix.cc
|
||||
${ROCKSDB_SOURCE_DIR}/env/env_posix.cc
|
||||
${ROCKSDB_SOURCE_DIR}/env/fs_posix.cc
|
||||
${ROCKSDB_SOURCE_DIR}/env/io_posix.cc)
|
||||
|
||||
add_library(_rocksdb ${SOURCES})
|
||||
add_library(ch_contrib::rocksdb ALIAS _rocksdb)
|
||||
target_link_libraries(_rocksdb PRIVATE ${THIRDPARTY_LIBS} ${SYSTEM_LIBS})
|
||||
|
||||
# Not in the native build system but useful anyways:
|
||||
# Make all functions in xxHash.h inline. Beneficial for performance: https://github.com/Cyan4973/xxHash/tree/v0.8.2#build-modifiers
|
||||
target_compile_definitions (_rocksdb PRIVATE XXH_INLINE_ALL)
|
||||
|
||||
# SYSTEM is required to overcome some issues
|
||||
target_include_directories(_rocksdb SYSTEM BEFORE INTERFACE "${ROCKSDB_SOURCE_DIR}/include")
|
||||
|
2
contrib/usearch
vendored
2
contrib/usearch
vendored
@ -1 +1 @@
|
||||
Subproject commit 955c6f9c11adfd89c912e0d1643d160b4e9e543f
|
||||
Subproject commit e21a5778a0d4469ddaf38c94b7be0196bb701ee4
|
@ -1,17 +1,12 @@
|
||||
set(USEARCH_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/usearch")
|
||||
set(USEARCH_SOURCE_DIR "${USEARCH_PROJECT_DIR}/include")
|
||||
|
||||
set(FP16_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/FP16")
|
||||
set(ROBIN_MAP_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/robin-map")
|
||||
set(SIMSIMD_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/SimSIMD-map")
|
||||
set(SIMSIMD_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/SimSIMD")
|
||||
set(USEARCH_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/usearch")
|
||||
|
||||
add_library(_usearch INTERFACE)
|
||||
|
||||
target_include_directories(_usearch SYSTEM INTERFACE
|
||||
${FP16_PROJECT_DIR}/include
|
||||
${ROBIN_MAP_PROJECT_DIR}/include
|
||||
${SIMSIMD_PROJECT_DIR}/include
|
||||
${USEARCH_SOURCE_DIR})
|
||||
${USEARCH_PROJECT_DIR}/include)
|
||||
|
||||
add_library(ch_contrib::usearch ALIAS _usearch)
|
||||
target_compile_definitions(_usearch INTERFACE ENABLE_USEARCH)
|
||||
|
2
contrib/zlib-ng
vendored
2
contrib/zlib-ng
vendored
@ -1 +1 @@
|
||||
Subproject commit 50f0eae1a411764cd6d1e85b3ce471438acd3c1c
|
||||
Subproject commit a2fbeffdc30a8b0ce6d54ee31208e2688eac4c9f
|
@ -14,6 +14,8 @@ add_definitions(-DHAVE_VISIBILITY_HIDDEN)
|
||||
add_definitions(-DHAVE_VISIBILITY_INTERNAL)
|
||||
add_definitions(-DHAVE_BUILTIN_CTZ)
|
||||
add_definitions(-DHAVE_BUILTIN_CTZLL)
|
||||
add_definitions(-DHAVE_ATTRIBUTE_ALIGNED)
|
||||
add_definitions(-DHAVE_POSIX_MEMALIGN)
|
||||
|
||||
set(ZLIB_ARCH_SRCS)
|
||||
set(ZLIB_ARCH_HDRS)
|
||||
@ -24,67 +26,74 @@ if(ARCH_AARCH64)
|
||||
set(ARCHDIR "${SOURCE_DIR}/arch/arm")
|
||||
|
||||
add_definitions(-DARM_FEATURES)
|
||||
add_definitions(-DHAVE_SYS_AUXV_H)
|
||||
add_definitions(-DARM_AUXV_HAS_CRC32 -DARM_ASM_HWCAP)
|
||||
add_definitions(-DARM_AUXV_HAS_NEON)
|
||||
add_definitions(-DARM_ACLE_CRC_HASH)
|
||||
add_definitions(-DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH)
|
||||
add_definitions(-DARM_ACLE)
|
||||
add_definitions(-DHAVE_ARM_ACLE_H)
|
||||
add_definitions(-DARM_NEON)
|
||||
add_definitions(-DARM_NEON_HASLD4)
|
||||
|
||||
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/arm.h)
|
||||
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/armfeature.c)
|
||||
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/arm_features.h)
|
||||
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/arm_features.c)
|
||||
set(ACLE_SRCS ${ARCHDIR}/crc32_acle.c ${ARCHDIR}/insert_string_acle.c)
|
||||
list(APPEND ZLIB_ARCH_SRCS ${ACLE_SRCS})
|
||||
set(NEON_SRCS ${ARCHDIR}/adler32_neon.c ${ARCHDIR}/chunkset_neon.c ${ARCHDIR}/slide_neon.c)
|
||||
set(NEON_SRCS ${ARCHDIR}/adler32_neon.c ${ARCHDIR}/chunkset_neon.c
|
||||
${ARCHDIR}/compare256_neon.c ${ARCHDIR}/slide_hash_neon.c)
|
||||
list(APPEND ZLIB_ARCH_SRCS ${NEON_SRCS})
|
||||
|
||||
elseif(ARCH_PPC64LE)
|
||||
set(ARCHDIR "${SOURCE_DIR}/arch/power")
|
||||
|
||||
add_definitions(-DPOWER8)
|
||||
add_definitions(-DPOWER_FEATURES)
|
||||
add_definitions(-DPOWER8_VSX_ADLER32)
|
||||
add_definitions(-DPOWER8_VSX_SLIDEHASH)
|
||||
add_definitions(-DHAVE_SYS_AUXV_H)
|
||||
|
||||
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/power.h)
|
||||
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/power.c)
|
||||
set(POWER8_SRCS ${ARCHDIR}/adler32_power8.c ${ARCHDIR}/slide_hash_power8.c)
|
||||
if(POWER9)
|
||||
add_definitions(-DPOWER9)
|
||||
else()
|
||||
add_definitions(-DPOWER8)
|
||||
add_definitions(-DPOWER8_VSX)
|
||||
add_definitions(-DPOWER8_VSX_CRC32)
|
||||
endif()
|
||||
|
||||
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/power_features.h)
|
||||
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/power_features.c)
|
||||
set(POWER8_SRCS ${ARCHDIR}/adler32_power8.c ${ARCHDIR}/chunkset_power8.c ${ARCHDIR}/slide_hash_power8.c)
|
||||
list(APPEND POWER8_SRCS ${ARCHDIR}/crc32_power8.c)
|
||||
list(APPEND ZLIB_ARCH_SRCS ${POWER8_SRCS})
|
||||
|
||||
elseif(ARCH_AMD64)
|
||||
set(ARCHDIR "${SOURCE_DIR}/arch/x86")
|
||||
|
||||
add_definitions(-DX86_FEATURES)
|
||||
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/x86.h)
|
||||
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/x86.c)
|
||||
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/x86_features.h)
|
||||
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/x86_features.c)
|
||||
if(ENABLE_AVX2)
|
||||
add_definitions(-DX86_AVX2 -DX86_AVX2_ADLER32 -DX86_AVX_CHUNKSET)
|
||||
set(AVX2_SRCS ${ARCHDIR}/slide_avx.c)
|
||||
list(APPEND AVX2_SRCS ${ARCHDIR}/chunkset_avx.c)
|
||||
list(APPEND AVX2_SRCS ${ARCHDIR}/compare258_avx.c)
|
||||
list(APPEND AVX2_SRCS ${ARCHDIR}/adler32_avx.c)
|
||||
add_definitions(-DX86_AVX2)
|
||||
set(AVX2_SRCS ${ARCHDIR}/slide_hash_avx2.c)
|
||||
list(APPEND AVX2_SRCS ${ARCHDIR}/chunkset_avx2.c)
|
||||
list(APPEND AVX2_SRCS ${ARCHDIR}/compare256_avx2.c)
|
||||
list(APPEND AVX2_SRCS ${ARCHDIR}/adler32_avx2.c)
|
||||
list(APPEND ZLIB_ARCH_SRCS ${AVX2_SRCS})
|
||||
endif()
|
||||
if(ENABLE_SSE42)
|
||||
add_definitions(-DX86_SSE42_CRC_HASH)
|
||||
set(SSE42_SRCS ${ARCHDIR}/insert_string_sse.c)
|
||||
list(APPEND ZLIB_ARCH_SRCS ${SSE42_SRCS})
|
||||
add_definitions(-DX86_SSE42_CRC_INTRIN)
|
||||
add_definitions(-DX86_SSE42_CMP_STR)
|
||||
set(SSE42_SRCS ${ARCHDIR}/compare258_sse.c)
|
||||
add_definitions(-DX86_SSE42)
|
||||
set(SSE42_SRCS ${ARCHDIR}/adler32_sse42.c ${ARCHDIR}/insert_string_sse42.c)
|
||||
list(APPEND ZLIB_ARCH_SRCS ${SSE42_SRCS})
|
||||
endif()
|
||||
if(ENABLE_SSSE3)
|
||||
add_definitions(-DX86_SSSE3 -DX86_SSSE3_ADLER32)
|
||||
set(SSSE3_SRCS ${ARCHDIR}/adler32_ssse3.c)
|
||||
add_definitions(-DX86_SSSE3)
|
||||
set(SSSE3_SRCS ${ARCHDIR}/adler32_ssse3.c ${ARCHDIR}/chunkset_ssse3.c)
|
||||
list(APPEND ZLIB_ARCH_SRCS ${SSSE3_SRCS})
|
||||
endif()
|
||||
if(ENABLE_PCLMULQDQ)
|
||||
add_definitions(-DX86_PCLMULQDQ_CRC)
|
||||
set(PCLMULQDQ_SRCS ${ARCHDIR}/crc_folding.c)
|
||||
set(PCLMULQDQ_SRCS ${ARCHDIR}/crc32_pclmulqdq.c)
|
||||
list(APPEND ZLIB_ARCH_SRCS ${PCLMULQDQ_SRCS})
|
||||
endif()
|
||||
|
||||
add_definitions(-DX86_SSE2 -DX86_SSE2_CHUNKSET -DX86_SSE2_SLIDEHASH)
|
||||
set(SSE2_SRCS ${ARCHDIR}/chunkset_sse.c ${ARCHDIR}/slide_sse.c)
|
||||
add_definitions(-DX86_SSE2)
|
||||
set(SSE2_SRCS ${ARCHDIR}/chunkset_sse2.c ${ARCHDIR}/compare256_sse2.c ${ARCHDIR}/slide_hash_sse2.c)
|
||||
list(APPEND ZLIB_ARCH_SRCS ${SSE2_SRCS})
|
||||
add_definitions(-DX86_NOCHECK_SSE2)
|
||||
endif ()
|
||||
@ -106,39 +115,45 @@ generate_cmakein(${SOURCE_DIR}/zconf.h.in ${CMAKE_CURRENT_BINARY_DIR}/zconf.h.cm
|
||||
|
||||
set(ZLIB_SRCS
|
||||
${SOURCE_DIR}/adler32.c
|
||||
${SOURCE_DIR}/adler32_fold.c
|
||||
${SOURCE_DIR}/chunkset.c
|
||||
${SOURCE_DIR}/compare258.c
|
||||
${SOURCE_DIR}/compare256.c
|
||||
${SOURCE_DIR}/compress.c
|
||||
${SOURCE_DIR}/crc32.c
|
||||
${SOURCE_DIR}/crc32_comb.c
|
||||
${SOURCE_DIR}/cpu_features.c
|
||||
${SOURCE_DIR}/crc32_braid.c
|
||||
${SOURCE_DIR}/crc32_braid_comb.c
|
||||
${SOURCE_DIR}/crc32_fold.c
|
||||
${SOURCE_DIR}/deflate.c
|
||||
${SOURCE_DIR}/deflate_fast.c
|
||||
${SOURCE_DIR}/deflate_huff.c
|
||||
${SOURCE_DIR}/deflate_medium.c
|
||||
${SOURCE_DIR}/deflate_quick.c
|
||||
${SOURCE_DIR}/deflate_rle.c
|
||||
${SOURCE_DIR}/deflate_slow.c
|
||||
${SOURCE_DIR}/deflate_stored.c
|
||||
${SOURCE_DIR}/functable.c
|
||||
${SOURCE_DIR}/infback.c
|
||||
${SOURCE_DIR}/inffast.c
|
||||
${SOURCE_DIR}/inflate.c
|
||||
${SOURCE_DIR}/inftrees.c
|
||||
${SOURCE_DIR}/insert_string.c
|
||||
${SOURCE_DIR}/insert_string_roll.c
|
||||
${SOURCE_DIR}/slide_hash.c
|
||||
${SOURCE_DIR}/trees.c
|
||||
${SOURCE_DIR}/uncompr.c
|
||||
${SOURCE_DIR}/zutil.c
|
||||
)
|
||||
|
||||
set(ZLIB_GZFILE_SRCS
|
||||
${SOURCE_DIR}/gzlib.c
|
||||
${SOURCE_DIR}/gzread.c
|
||||
${CMAKE_CURRENT_BINARY_DIR}/gzread.c
|
||||
${SOURCE_DIR}/gzwrite.c
|
||||
)
|
||||
|
||||
set(ZLIB_ALL_SRCS ${ZLIB_SRCS} ${ZLIB_ARCH_SRCS})
|
||||
set(ZLIB_ALL_SRCS ${ZLIB_SRCS} ${ZLIB_ARCH_SRCS} ${ZLIB_GZFILE_SRCS})
|
||||
|
||||
add_library(_zlib ${ZLIB_ALL_SRCS})
|
||||
add_library(ch_contrib::zlib ALIAS _zlib)
|
||||
|
||||
# https://github.com/zlib-ng/zlib-ng/pull/733
|
||||
# This is disabed by default
|
||||
add_compile_definitions(Z_TLS=__thread)
|
||||
|
||||
if(HAVE_UNISTD_H)
|
||||
SET(ZCONF_UNISTD_LINE "#if 1 /* was set to #if 1 by configure/cmake/etc */")
|
||||
else()
|
||||
@ -153,6 +168,9 @@ endif()
|
||||
set(ZLIB_PC ${CMAKE_CURRENT_BINARY_DIR}/zlib.pc)
|
||||
configure_file(${SOURCE_DIR}/zlib.pc.cmakein ${ZLIB_PC} @ONLY)
|
||||
configure_file(${CMAKE_CURRENT_BINARY_DIR}/zconf.h.cmakein ${CMAKE_CURRENT_BINARY_DIR}/zconf.h @ONLY)
|
||||
configure_file(${SOURCE_DIR}/zlib.h.in ${CMAKE_CURRENT_BINARY_DIR}/zlib.h @ONLY)
|
||||
configure_file(${SOURCE_DIR}/zlib_name_mangling.h.in ${CMAKE_CURRENT_BINARY_DIR}/zlib_name_mangling.h @ONLY)
|
||||
configure_file(${SOURCE_DIR}/gzread.c.in ${CMAKE_CURRENT_BINARY_DIR}/gzread.c @ONLY)
|
||||
|
||||
# We should use same defines when including zlib.h as used when zlib compiled
|
||||
target_compile_definitions (_zlib PUBLIC ZLIB_COMPAT WITH_GZFILEOP)
|
||||
|
@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="24.7.2.13"
|
||||
ARG VERSION="24.7.3.42"
|
||||
ARG PACKAGES="clickhouse-keeper"
|
||||
ARG DIRECT_DOWNLOAD_URLS=""
|
||||
|
||||
|
@ -40,8 +40,6 @@ fi
|
||||
|
||||
DATA_DIR="${CLICKHOUSE_DATA_DIR:-/var/lib/clickhouse}"
|
||||
LOG_DIR="${LOG_DIR:-/var/log/clickhouse-keeper}"
|
||||
LOG_PATH="${LOG_DIR}/clickhouse-keeper.log"
|
||||
ERROR_LOG_PATH="${LOG_DIR}/clickhouse-keeper.err.log"
|
||||
COORDINATION_DIR="${DATA_DIR}/coordination"
|
||||
COORDINATION_LOG_DIR="${DATA_DIR}/coordination/log"
|
||||
COORDINATION_SNAPSHOT_DIR="${DATA_DIR}/coordination/snapshots"
|
||||
@ -84,7 +82,7 @@ if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then
|
||||
|
||||
# There is a config file. It is already tested with gosu (if it is readably by keeper user)
|
||||
if [ -f "$KEEPER_CONFIG" ]; then
|
||||
exec $gosu /usr/bin/clickhouse-keeper --config-file="$KEEPER_CONFIG" --log-file="$LOG_PATH" --errorlog-file="$ERROR_LOG_PATH" "$@"
|
||||
exec $gosu /usr/bin/clickhouse-keeper --config-file="$KEEPER_CONFIG" "$@"
|
||||
fi
|
||||
|
||||
# There is no config file. Will use embedded one
|
||||
|
@ -108,7 +108,8 @@ if [ -n "$MAKE_DEB" ]; then
|
||||
bash -x /build/packages/build
|
||||
fi
|
||||
|
||||
mv ./programs/clickhouse* /output || mv ./programs/*_fuzzer /output
|
||||
mv ./programs/clickhouse* /output ||:
|
||||
mv ./programs/*_fuzzer /output ||:
|
||||
[ -x ./programs/self-extracting/clickhouse ] && mv ./programs/self-extracting/clickhouse /output
|
||||
[ -x ./programs/self-extracting/clickhouse-stripped ] && mv ./programs/self-extracting/clickhouse-stripped /output
|
||||
[ -x ./programs/self-extracting/clickhouse-keeper ] && mv ./programs/self-extracting/clickhouse-keeper /output
|
||||
|
@ -1,3 +1,5 @@
|
||||
# docker build -t clickhouse/cctools .
|
||||
|
||||
# This is a hack to significantly reduce the build time of the clickhouse/binary-builder
|
||||
# It's based on the assumption that we don't care of the cctools version so much
|
||||
# It event does not depend on the clickhouse/fasttest in the `docker/images.json`
|
||||
@ -30,5 +32,29 @@ RUN git clone https://github.com/tpoechtrager/cctools-port.git \
|
||||
&& cd ../.. \
|
||||
&& rm -rf cctools-port
|
||||
|
||||
#
|
||||
# GDB
|
||||
#
|
||||
# ld from binutils is 2.38, which has the following error:
|
||||
#
|
||||
# DWARF error: invalid or unhandled FORM value: 0x23
|
||||
#
|
||||
ENV LD=ld.lld-${LLVM_VERSION}
|
||||
ARG GDB_VERSION=15.1
|
||||
RUN apt-get update \
|
||||
&& apt-get install --yes \
|
||||
libgmp-dev \
|
||||
libmpfr-dev \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||
RUN wget https://sourceware.org/pub/gdb/releases/gdb-$GDB_VERSION.tar.gz \
|
||||
&& tar -xvf gdb-$GDB_VERSION.tar.gz \
|
||||
&& cd gdb-$GDB_VERSION \
|
||||
&& ./configure --prefix=/opt/gdb \
|
||||
&& make -j $(nproc) \
|
||||
&& make install \
|
||||
&& rm -fr gdb-$GDB_VERSION gdb-$GDB_VERSION.tar.gz
|
||||
|
||||
FROM scratch
|
||||
COPY --from=builder /cctools /cctools
|
||||
COPY --from=builder /opt/gdb /opt/gdb
|
||||
|
@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="24.7.2.13"
|
||||
ARG VERSION="24.7.3.42"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
ARG DIRECT_DOWNLOAD_URLS=""
|
||||
|
||||
|
@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
|
||||
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
|
||||
ARG VERSION="24.7.2.13"
|
||||
ARG VERSION="24.7.3.42"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
#docker-official-library:off
|
||||
|
@ -28,12 +28,14 @@ RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 abort_on_error=1 history_
|
||||
RUN echo "UBSAN_OPTIONS='print_stacktrace=1 max_allocation_size_mb=32768'" >> /etc/environment
|
||||
RUN echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1 max_allocation_size_mb=32768'" >> /etc/environment
|
||||
RUN echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt max_allocation_size_mb=32768'" >> /etc/environment
|
||||
RUN echo "ASAN_OPTIONS='halt_on_error=1 abort_on_error=1'" >> /etc/environment
|
||||
# Sanitizer options for current shell (not current, but the one that will be spawned on "docker run")
|
||||
# (but w/o verbosity for TSAN, otherwise test.reference will not match)
|
||||
ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1 max_allocation_size_mb=32768'
|
||||
ENV UBSAN_OPTIONS='print_stacktrace=1 max_allocation_size_mb=32768'
|
||||
ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1 max_allocation_size_mb=32768'
|
||||
ENV LSAN_OPTIONS='max_allocation_size_mb=32768'
|
||||
ENV ASAN_OPTIONS='halt_on_error=1 abort_on_error=1'
|
||||
|
||||
# for external_symbolizer_path, and also ensure that llvm-symbolizer really
|
||||
# exists (since you don't want to fallback to addr2line, it is very slow)
|
||||
|
@ -218,6 +218,6 @@ function stop_logs_replication
|
||||
clickhouse-client --query "select database||'.'||table from system.tables where database = 'system' and (table like '%_sender' or table like '%_watcher')" | {
|
||||
tee /dev/stderr
|
||||
} | {
|
||||
xargs -n1 -r -i clickhouse-client --query "drop table {}"
|
||||
timeout --preserve-status --signal TERM --kill-after 5m 15m xargs -n1 -r -i clickhouse-client --query "drop table {}"
|
||||
}
|
||||
}
|
||||
|
@ -83,7 +83,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
|
||||
# Give suid to gdb to grant it attach permissions
|
||||
# chmod 777 to make the container user independent
|
||||
RUN chmod u+s /usr/bin/gdb \
|
||||
RUN chmod u+s /opt/gdb/bin/gdb \
|
||||
&& mkdir -p /var/lib/clickhouse \
|
||||
&& chmod 777 /var/lib/clickhouse
|
||||
|
||||
|
@ -41,7 +41,7 @@ export FASTTEST_WORKSPACE
|
||||
export FASTTEST_SOURCE
|
||||
export FASTTEST_BUILD
|
||||
export FASTTEST_DATA
|
||||
export FASTTEST_OUT
|
||||
export FASTTEST_OUTPUT
|
||||
export PATH
|
||||
|
||||
function ccache_status
|
||||
|
@ -28,9 +28,9 @@
|
||||
</table_function_remote_max_addresses>
|
||||
|
||||
<!-- Don't waste cycles testing the old interpreter. Spend time in the new analyzer instead -->
|
||||
<allow_experimental_analyzer>
|
||||
<enable_analyzer>
|
||||
<readonly/>
|
||||
</allow_experimental_analyzer>
|
||||
</enable_analyzer>
|
||||
|
||||
<!-- This feature is broken, deprecated and will be removed. We don't want more reports about it -->
|
||||
<allow_experimental_object_type>
|
||||
|
@ -193,53 +193,60 @@ function fuzz
|
||||
|
||||
kill -0 $server_pid
|
||||
|
||||
# Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog
|
||||
# and clickhouse-server can do fork-exec, for example, to run some bridge.
|
||||
# Do not set nostop noprint for all signals, because some it may cause gdb to hang,
|
||||
# explicitly ignore non-fatal signals that are used by server.
|
||||
# Number of SIGRTMIN can be determined only in runtime.
|
||||
RTMIN=$(kill -l SIGRTMIN)
|
||||
echo "
|
||||
set follow-fork-mode parent
|
||||
handle SIGHUP nostop noprint pass
|
||||
handle SIGINT nostop noprint pass
|
||||
handle SIGQUIT nostop noprint pass
|
||||
handle SIGPIPE nostop noprint pass
|
||||
handle SIGTERM nostop noprint pass
|
||||
handle SIGUSR1 nostop noprint pass
|
||||
handle SIGUSR2 nostop noprint pass
|
||||
handle SIG$RTMIN nostop noprint pass
|
||||
info signals
|
||||
continue
|
||||
backtrace full
|
||||
thread apply all backtrace full
|
||||
info registers
|
||||
disassemble /s
|
||||
up
|
||||
disassemble /s
|
||||
up
|
||||
disassemble /s
|
||||
p \"done\"
|
||||
detach
|
||||
quit
|
||||
" > script.gdb
|
||||
IS_ASAN=$(clickhouse-client --query "SELECT count() FROM system.build_options WHERE name = 'CXX_FLAGS' AND position('sanitize=address' IN value)")
|
||||
if [[ "$IS_ASAN" = "1" ]];
|
||||
then
|
||||
echo "ASAN build detected. Not using gdb since it disables LeakSanitizer detections"
|
||||
else
|
||||
# Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog
|
||||
# and clickhouse-server can do fork-exec, for example, to run some bridge.
|
||||
# Do not set nostop noprint for all signals, because some it may cause gdb to hang,
|
||||
# explicitly ignore non-fatal signals that are used by server.
|
||||
# Number of SIGRTMIN can be determined only in runtime.
|
||||
RTMIN=$(kill -l SIGRTMIN)
|
||||
echo "
|
||||
set follow-fork-mode parent
|
||||
handle SIGHUP nostop noprint pass
|
||||
handle SIGINT nostop noprint pass
|
||||
handle SIGQUIT nostop noprint pass
|
||||
handle SIGPIPE nostop noprint pass
|
||||
handle SIGTERM nostop noprint pass
|
||||
handle SIGUSR1 nostop noprint pass
|
||||
handle SIGUSR2 nostop noprint pass
|
||||
handle SIG$RTMIN nostop noprint pass
|
||||
info signals
|
||||
continue
|
||||
backtrace full
|
||||
thread apply all backtrace full
|
||||
info registers
|
||||
disassemble /s
|
||||
up
|
||||
disassemble /s
|
||||
up
|
||||
disassemble /s
|
||||
p \"done\"
|
||||
detach
|
||||
quit
|
||||
" > script.gdb
|
||||
|
||||
gdb -batch -command script.gdb -p $server_pid &
|
||||
sleep 5
|
||||
# gdb will send SIGSTOP, spend some time loading debug info, and then send SIGCONT, wait for it (up to send_timeout, 300s)
|
||||
time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||:
|
||||
gdb -batch -command script.gdb -p $server_pid &
|
||||
sleep 5
|
||||
# gdb will send SIGSTOP, spend some time loading debug info, and then send SIGCONT, wait for it (up to send_timeout, 300s)
|
||||
time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||:
|
||||
|
||||
# Check connectivity after we attach gdb, because it might cause the server
|
||||
# to freeze, and the fuzzer will fail. In debug build, it can take a lot of time.
|
||||
for _ in {1..180}
|
||||
do
|
||||
if clickhouse-client --query "select 1"
|
||||
then
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
kill -0 $server_pid # This checks that it is our server that is started and not some other one
|
||||
fi
|
||||
|
||||
# Check connectivity after we attach gdb, because it might cause the server
|
||||
# to freeze, and the fuzzer will fail. In debug build, it can take a lot of time.
|
||||
for _ in {1..180}
|
||||
do
|
||||
if clickhouse-client --query "select 1"
|
||||
then
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
kill -0 $server_pid # This checks that it is our server that is started and not some other one
|
||||
echo 'Server started and responded.'
|
||||
|
||||
setup_logs_replication
|
||||
@ -264,8 +271,13 @@ quit
|
||||
# The fuzzer_pid belongs to the timeout process.
|
||||
actual_fuzzer_pid=$(ps -o pid= --ppid "$fuzzer_pid")
|
||||
|
||||
echo "Attaching gdb to the fuzzer itself"
|
||||
gdb -batch -command script.gdb -p $actual_fuzzer_pid &
|
||||
if [[ "$IS_ASAN" = "1" ]];
|
||||
then
|
||||
echo "ASAN build detected. Not using gdb since it disables LeakSanitizer detections"
|
||||
else
|
||||
echo "Attaching gdb to the fuzzer itself"
|
||||
gdb -batch -command script.gdb -p $actual_fuzzer_pid &
|
||||
fi
|
||||
|
||||
# Wait for the fuzzer to complete.
|
||||
# Note that the 'wait || ...' thing is required so that the script doesn't
|
||||
|
@ -11,7 +11,6 @@ RUN apt-get update \
|
||||
curl \
|
||||
default-jre \
|
||||
g++ \
|
||||
gdb \
|
||||
iproute2 \
|
||||
krb5-user \
|
||||
libicu-dev \
|
||||
@ -73,3 +72,6 @@ maxClientCnxns=80' > /opt/zookeeper/conf/zoo.cfg && \
|
||||
|
||||
ENV TZ=Etc/UTC
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
||||
COPY --from=clickhouse/cctools:0d6b90a7a490 /opt/gdb /opt/gdb
|
||||
ENV PATH="/opt/gdb/bin:${PATH}"
|
||||
|
@ -30,7 +30,6 @@ RUN apt-get update \
|
||||
luajit \
|
||||
libssl-dev \
|
||||
libcurl4-openssl-dev \
|
||||
gdb \
|
||||
default-jdk \
|
||||
software-properties-common \
|
||||
libkrb5-dev \
|
||||
@ -87,6 +86,8 @@ COPY modprobe.sh /usr/local/bin/modprobe
|
||||
COPY dockerd-entrypoint.sh /usr/local/bin/
|
||||
COPY misc/ /misc/
|
||||
|
||||
COPY --from=clickhouse/cctools:0d6b90a7a490 /opt/gdb /opt/gdb
|
||||
ENV PATH="/opt/gdb/bin:${PATH}"
|
||||
|
||||
# Same options as in test/base/Dockerfile
|
||||
# (in case you need to override them in tests)
|
||||
|
@ -74,6 +74,7 @@ protobuf==4.25.2
|
||||
psycopg2-binary==2.9.6
|
||||
py4j==0.10.9.5
|
||||
py==1.11.0
|
||||
pyarrow==17.0.0
|
||||
pycparser==2.22
|
||||
pycryptodome==3.20.0
|
||||
pymongo==3.11.0
|
||||
|
@ -9,7 +9,6 @@ RUN apt-get update \
|
||||
curl \
|
||||
dmidecode \
|
||||
g++ \
|
||||
gdb \
|
||||
git \
|
||||
gnuplot \
|
||||
imagemagick \
|
||||
@ -42,6 +41,9 @@ RUN pip3 --no-cache-dir install -r requirements.txt
|
||||
|
||||
COPY run.sh /
|
||||
|
||||
COPY --from=clickhouse/cctools:0d6b90a7a490 /opt/gdb /opt/gdb
|
||||
ENV PATH="/opt/gdb/bin:${PATH}"
|
||||
|
||||
CMD ["bash", "/run.sh"]
|
||||
|
||||
# docker run --network=host --volume <workspace>:/workspace --volume=<output>:/output -e PR_TO_TEST=<> -e SHA_TO_TEST=<> clickhouse/performance-comparison
|
||||
|
@ -232,15 +232,26 @@ function run_tests()
|
||||
|
||||
set +e
|
||||
|
||||
TEST_ARGS=(
|
||||
-j 2
|
||||
--testname
|
||||
--shard
|
||||
--zookeeper
|
||||
--check-zookeeper-session
|
||||
--no-stateless
|
||||
--hung-check
|
||||
--print-time
|
||||
--capture-client-stacktrace
|
||||
"${ADDITIONAL_OPTIONS[@]}"
|
||||
"$SKIP_TESTS_OPTION"
|
||||
)
|
||||
if [[ -n "$USE_PARALLEL_REPLICAS" ]] && [[ "$USE_PARALLEL_REPLICAS" -eq 1 ]]; then
|
||||
clickhouse-test --client="clickhouse-client --allow_experimental_parallel_reading_from_replicas=1 --parallel_replicas_for_non_replicated_merge_tree=1 \
|
||||
--max_parallel_replicas=100 --cluster_for_parallel_replicas='parallel_replicas'" \
|
||||
-j 2 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --no-parallel-replicas --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \
|
||||
"$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
||||
else
|
||||
clickhouse-test -j 2 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \
|
||||
"$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
||||
TEST_ARGS+=(
|
||||
--client="clickhouse-client --allow_experimental_parallel_reading_from_replicas=1 --parallel_replicas_for_non_replicated_merge_tree=1 --max_parallel_replicas=100 --cluster_for_parallel_replicas='parallel_replicas'"
|
||||
--no-parallel-replicas
|
||||
)
|
||||
fi
|
||||
clickhouse-test "${TEST_ARGS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
||||
set -e
|
||||
}
|
||||
|
||||
|
@ -69,8 +69,8 @@ ENV MAX_RUN_TIME=0
|
||||
|
||||
# Unrelated to vars in setup_minio.sh, but should be the same there
|
||||
# to have the same binaries for local running scenario
|
||||
ARG MINIO_SERVER_VERSION=2022-01-03T18-22-58Z
|
||||
ARG MINIO_CLIENT_VERSION=2022-01-05T23-52-51Z
|
||||
ARG MINIO_SERVER_VERSION=2024-08-03T04-33-23Z
|
||||
ARG MINIO_CLIENT_VERSION=2024-07-31T15-58-33Z
|
||||
ARG TARGETARCH
|
||||
|
||||
# Download Minio-related binaries
|
||||
|
@ -5,43 +5,53 @@ source /utils.lib
|
||||
|
||||
function attach_gdb_to_clickhouse()
|
||||
{
|
||||
# Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog
|
||||
# and clickhouse-server can do fork-exec, for example, to run some bridge.
|
||||
# Do not set nostop noprint for all signals, because some it may cause gdb to hang,
|
||||
# explicitly ignore non-fatal signals that are used by server.
|
||||
# Number of SIGRTMIN can be determined only in runtime.
|
||||
RTMIN=$(kill -l SIGRTMIN)
|
||||
echo "
|
||||
set follow-fork-mode parent
|
||||
handle SIGHUP nostop noprint pass
|
||||
handle SIGINT nostop noprint pass
|
||||
handle SIGQUIT nostop noprint pass
|
||||
handle SIGPIPE nostop noprint pass
|
||||
handle SIGTERM nostop noprint pass
|
||||
handle SIGUSR1 nostop noprint pass
|
||||
handle SIGUSR2 nostop noprint pass
|
||||
handle SIG$RTMIN nostop noprint pass
|
||||
info signals
|
||||
continue
|
||||
backtrace full
|
||||
thread apply all backtrace full
|
||||
info registers
|
||||
disassemble /s
|
||||
up
|
||||
disassemble /s
|
||||
up
|
||||
disassemble /s
|
||||
p \"done\"
|
||||
detach
|
||||
quit
|
||||
" > script.gdb
|
||||
IS_ASAN=$(clickhouse-client --query "SELECT count() FROM system.build_options WHERE name = 'CXX_FLAGS' AND position('sanitize=address' IN value)")
|
||||
if [[ "$IS_ASAN" = "1" ]];
|
||||
then
|
||||
echo "ASAN build detected. Not using gdb since it disables LeakSanitizer detections"
|
||||
else
|
||||
# Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog
|
||||
# and clickhouse-server can do fork-exec, for example, to run some bridge.
|
||||
# Do not set nostop noprint for all signals, because some it may cause gdb to hang,
|
||||
# explicitly ignore non-fatal signals that are used by server.
|
||||
# Number of SIGRTMIN can be determined only in runtime.
|
||||
RTMIN=$(kill -l SIGRTMIN)
|
||||
# shellcheck disable=SC2016
|
||||
echo "
|
||||
set follow-fork-mode parent
|
||||
handle SIGHUP nostop noprint pass
|
||||
handle SIGINT nostop noprint pass
|
||||
handle SIGQUIT nostop noprint pass
|
||||
handle SIGPIPE nostop noprint pass
|
||||
handle SIGTERM nostop noprint pass
|
||||
handle SIGUSR1 nostop noprint pass
|
||||
handle SIGUSR2 nostop noprint pass
|
||||
handle SIG$RTMIN nostop noprint pass
|
||||
info signals
|
||||
continue
|
||||
backtrace full
|
||||
info registers
|
||||
p "top 1 KiB of the stack:"
|
||||
p/x *(uint64_t[128]*)"'$sp'"
|
||||
maintenance info sections
|
||||
thread apply all backtrace full
|
||||
disassemble /s
|
||||
up
|
||||
disassemble /s
|
||||
up
|
||||
disassemble /s
|
||||
p \"done\"
|
||||
detach
|
||||
quit
|
||||
" > script.gdb
|
||||
|
||||
# FIXME Hung check may work incorrectly because of attached gdb
|
||||
# We cannot attach another gdb to get stacktraces if some queries hung
|
||||
gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log &
|
||||
sleep 5
|
||||
# gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s)
|
||||
run_with_retry 60 clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'"
|
||||
# FIXME Hung check may work incorrectly because of attached gdb
|
||||
# We cannot attach another gdb to get stacktraces if some queries hung
|
||||
gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log &
|
||||
sleep 5
|
||||
# gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s)
|
||||
run_with_retry 60 clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'"
|
||||
fi
|
||||
}
|
||||
|
||||
# vi: ft=bash
|
||||
|
@ -54,8 +54,6 @@ source /utils.lib
|
||||
/usr/share/clickhouse-test/config/install.sh
|
||||
|
||||
./setup_minio.sh stateless
|
||||
./mc admin trace clickminio > /test_output/minio.log &
|
||||
MC_ADMIN_PID=$!
|
||||
|
||||
./setup_hdfs_minicluster.sh
|
||||
|
||||
@ -174,7 +172,56 @@ do
|
||||
done
|
||||
|
||||
setup_logs_replication
|
||||
attach_gdb_to_clickhouse || true # FIXME: to not break old builds, clean on 2023-09-01
|
||||
attach_gdb_to_clickhouse
|
||||
|
||||
# create tables for minio log webhooks
|
||||
clickhouse-client --query "CREATE TABLE minio_audit_logs
|
||||
(
|
||||
log String,
|
||||
event_time DateTime64(9) MATERIALIZED parseDateTime64BestEffortOrZero(trim(BOTH '\"' FROM JSONExtractRaw(log, 'time')), 9, 'UTC')
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY tuple()"
|
||||
|
||||
clickhouse-client --query "CREATE TABLE minio_server_logs
|
||||
(
|
||||
log String,
|
||||
event_time DateTime64(9) MATERIALIZED parseDateTime64BestEffortOrZero(trim(BOTH '\"' FROM JSONExtractRaw(log, 'time')), 9, 'UTC')
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY tuple()"
|
||||
|
||||
# create minio log webhooks for both audit and server logs
|
||||
# use async inserts to avoid creating too many parts
|
||||
./mc admin config set clickminio logger_webhook:ch_server_webhook endpoint="http://localhost:8123/?async_insert=1&wait_for_async_insert=0&async_insert_busy_timeout_min_ms=5000&async_insert_busy_timeout_max_ms=5000&async_insert_max_query_number=1000&async_insert_max_data_size=10485760&query=INSERT%20INTO%20minio_server_logs%20FORMAT%20LineAsString" queue_size=1000000 batch_size=500
|
||||
./mc admin config set clickminio audit_webhook:ch_audit_webhook endpoint="http://localhost:8123/?async_insert=1&wait_for_async_insert=0&async_insert_busy_timeout_min_ms=5000&async_insert_busy_timeout_max_ms=5000&async_insert_max_query_number=1000&async_insert_max_data_size=10485760&query=INSERT%20INTO%20minio_audit_logs%20FORMAT%20LineAsString" queue_size=1000000 batch_size=500
|
||||
|
||||
max_retries=100
|
||||
retry=1
|
||||
while [ $retry -le $max_retries ]; do
|
||||
echo "clickminio restart attempt $retry:"
|
||||
|
||||
output=$(./mc admin service restart clickminio --wait --json 2>&1 | jq -r .status)
|
||||
echo "Output of restart status: $output"
|
||||
|
||||
expected_output="success
|
||||
success"
|
||||
if [ "$output" = "$expected_output" ]; then
|
||||
echo "Restarted clickminio successfully."
|
||||
break
|
||||
fi
|
||||
|
||||
sleep 1
|
||||
|
||||
retry=$((retry + 1))
|
||||
done
|
||||
|
||||
if [ $retry -gt $max_retries ]; then
|
||||
echo "Failed to restart clickminio after $max_retries attempts."
|
||||
fi
|
||||
|
||||
./mc admin trace clickminio > /test_output/minio.log &
|
||||
MC_ADMIN_PID=$!
|
||||
|
||||
function fn_exists() {
|
||||
declare -F "$1" > /dev/null;
|
||||
@ -264,11 +311,22 @@ function run_tests()
|
||||
TIMEOUT=$((MAX_RUN_TIME - 800 > 8400 ? 8400 : MAX_RUN_TIME - 800))
|
||||
START_TIME=${SECONDS}
|
||||
set +e
|
||||
timeout --preserve-status --signal TERM --kill-after 60m ${TIMEOUT}s \
|
||||
clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
|
||||
--no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
|
||||
| ts '%Y-%m-%d %H:%M:%S' \
|
||||
| tee -a test_output/test_result.txt
|
||||
|
||||
TEST_ARGS=(
|
||||
--testname
|
||||
--shard
|
||||
--zookeeper
|
||||
--check-zookeeper-session
|
||||
--hung-check
|
||||
--print-time
|
||||
--no-drop-if-fail
|
||||
--capture-client-stacktrace
|
||||
--test-runs "$NUM_TRIES"
|
||||
"${ADDITIONAL_OPTIONS[@]}"
|
||||
)
|
||||
timeout --preserve-status --signal TERM --kill-after 60m ${TIMEOUT}s clickhouse-test "${TEST_ARGS[@]}" 2>&1 \
|
||||
| ts '%Y-%m-%d %H:%M:%S' \
|
||||
| tee -a test_output/test_result.txt
|
||||
set -e
|
||||
DURATION=$((SECONDS - START_TIME))
|
||||
|
||||
@ -328,6 +386,14 @@ do
|
||||
fi
|
||||
done
|
||||
|
||||
|
||||
# collect minio audit and server logs
|
||||
# wait for minio to flush its batch if it has any
|
||||
sleep 1
|
||||
clickhouse-client -q "SYSTEM FLUSH ASYNC INSERT QUEUE"
|
||||
clickhouse-client -q "SELECT log FROM minio_audit_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_audit_logs.jsonl.zst' FORMAT JSONEachRow"
|
||||
clickhouse-client -q "SELECT log FROM minio_server_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_server_logs.jsonl.zst' FORMAT JSONEachRow"
|
||||
|
||||
# Stop server so we can safely read data with clickhouse-local.
|
||||
# Why do we read data with clickhouse-local?
|
||||
# Because it's the simplest way to read it when server has crashed.
|
||||
|
@ -59,8 +59,8 @@ find_os() {
|
||||
download_minio() {
|
||||
local os
|
||||
local arch
|
||||
local minio_server_version=${MINIO_SERVER_VERSION:-2022-09-07T22-25-02Z}
|
||||
local minio_client_version=${MINIO_CLIENT_VERSION:-2022-08-28T20-08-11Z}
|
||||
local minio_server_version=${MINIO_SERVER_VERSION:-2024-08-03T04-33-23Z}
|
||||
local minio_client_version=${MINIO_CLIENT_VERSION:-2024-07-31T15-58-33Z}
|
||||
|
||||
os=$(find_os)
|
||||
arch=$(find_arch)
|
||||
@ -82,10 +82,10 @@ setup_minio() {
|
||||
local test_type=$1
|
||||
./mc alias set clickminio http://localhost:11111 clickhouse clickhouse
|
||||
./mc admin user add clickminio test testtest
|
||||
./mc admin policy set clickminio readwrite user=test
|
||||
./mc admin policy attach clickminio readwrite --user=test
|
||||
./mc mb --ignore-existing clickminio/test
|
||||
if [ "$test_type" = "stateless" ]; then
|
||||
./mc policy set public clickminio/test
|
||||
./mc anonymous set public clickminio/test
|
||||
fi
|
||||
}
|
||||
|
||||
@ -99,10 +99,9 @@ upload_data() {
|
||||
# iterating over globs will cause redundant file variable to be
|
||||
# a path to a file, not a filename
|
||||
# shellcheck disable=SC2045
|
||||
for file in $(ls "${data_path}"); do
|
||||
echo "${file}";
|
||||
./mc cp "${data_path}"/"${file}" clickminio/test/"${file}";
|
||||
done
|
||||
if [ -d "${data_path}" ]; then
|
||||
./mc cp --recursive "${data_path}"/ clickminio/test/
|
||||
fi
|
||||
}
|
||||
|
||||
setup_aws_credentials() {
|
||||
@ -148,4 +147,4 @@ main() {
|
||||
setup_aws_credentials
|
||||
}
|
||||
|
||||
main "$@"
|
||||
main "$@"
|
||||
|
@ -139,9 +139,9 @@ EOL
|
||||
</table_function_remote_max_addresses>
|
||||
|
||||
<!-- Don't waste cycles testing the old interpreter. Spend time in the new analyzer instead -->
|
||||
<allow_experimental_analyzer>
|
||||
<enable_analyzer>
|
||||
<readonly/>
|
||||
</allow_experimental_analyzer>
|
||||
</enable_analyzer>
|
||||
|
||||
<!-- This feature is broken, deprecated and will be removed. We don't want more reports about it -->
|
||||
<allow_experimental_object_type>
|
||||
@ -308,7 +308,8 @@ function collect_query_and_trace_logs()
|
||||
{
|
||||
for table in query_log trace_log metric_log
|
||||
do
|
||||
clickhouse-local --config-file=/etc/clickhouse-server/config.xml --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||:
|
||||
# Don't ignore errors here, it leads to ignore sanitizer reports when running clickhouse-local
|
||||
clickhouse-local --config-file=/etc/clickhouse-server/config.xml --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst
|
||||
done
|
||||
}
|
||||
|
||||
|
@ -4,4 +4,5 @@ ARG FROM_TAG=latest
|
||||
FROM clickhouse/test-base:$FROM_TAG
|
||||
|
||||
COPY run.sh /
|
||||
CMD ["/bin/bash", "/run.sh"]
|
||||
RUN chmod +x run.sh
|
||||
ENTRYPOINT ["/run.sh"]
|
||||
|
@ -1,5 +1,27 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -x
|
||||
# Need to keep error from tests after `tee`. Otherwise we don't alert on asan errors
|
||||
set -o pipefail
|
||||
set -e
|
||||
|
||||
timeout 40m gdb -q -ex 'set print inferior-events off' -ex 'set confirm off' -ex 'set print thread-events off' -ex run -ex bt -ex quit --args ./unit_tests_dbms --gtest_output='json:test_output/test_result.json' | tee test_output/test_result.txt
|
||||
if [ "$#" -ne 1 ]; then
|
||||
echo "Expected exactly one argument"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "$1" = "GDB" ];
|
||||
then
|
||||
timeout 40m \
|
||||
gdb -q -ex "set print inferior-events off" -ex "set confirm off" -ex "set print thread-events off" -ex run -ex bt -ex quit --args \
|
||||
./unit_tests_dbms --gtest_output='json:test_output/test_result.json' \
|
||||
| tee test_output/test_result.txt
|
||||
elif [ "$1" = "NO_GDB" ];
|
||||
then
|
||||
timeout 40m \
|
||||
./unit_tests_dbms --gtest_output='json:test_output/test_result.json' \
|
||||
| tee test_output/test_result.txt
|
||||
else
|
||||
echo "Unknown argument: $1"
|
||||
exit 1
|
||||
fi
|
||||
|
@ -129,6 +129,7 @@ configure
|
||||
|
||||
# Check that all new/changed setting were added in settings changes history.
|
||||
# Some settings can be different for builds with sanitizers, so we check
|
||||
# Also the automatic value of 'max_threads' and similar was displayed as "'auto(...)'" in previous versions instead of "auto(...)".
|
||||
# settings changes only for non-sanitizer builds.
|
||||
IS_SANITIZED=$(clickhouse-local --query "SELECT value LIKE '%-fsanitize=%' FROM system.build_options WHERE name = 'CXX_FLAGS'")
|
||||
if [ "${IS_SANITIZED}" -eq "0" ]
|
||||
@ -145,7 +146,9 @@ then
|
||||
old_settings.value AS old_value
|
||||
FROM new_settings
|
||||
LEFT JOIN old_settings ON new_settings.name = old_settings.name
|
||||
WHERE (new_settings.value != old_settings.value) AND (name NOT IN (
|
||||
WHERE (new_value != old_value)
|
||||
AND NOT (startsWith(new_value, 'auto(') AND old_value LIKE '%auto(%')
|
||||
AND (name NOT IN (
|
||||
SELECT arrayJoin(tupleElement(changes, 'name'))
|
||||
FROM
|
||||
(
|
||||
@ -177,7 +180,7 @@ then
|
||||
if [ -s changed_settings.txt ]
|
||||
then
|
||||
mv changed_settings.txt /test_output/
|
||||
echo -e "Changed settings are not reflected in settings changes history (see changed_settings.txt)$FAIL$(head_escaped /test_output/changed_settings.txt)" >> /test_output/test_results.tsv
|
||||
echo -e "Changed settings are not reflected in the settings changes history (see changed_settings.txt)$FAIL$(head_escaped /test_output/changed_settings.txt)" >> /test_output/test_results.tsv
|
||||
else
|
||||
echo -e "There are no changed settings or they are reflected in settings changes history$OK" >> /test_output/test_results.tsv
|
||||
fi
|
||||
|
@ -44,7 +44,6 @@ RUN apt-get update \
|
||||
bash \
|
||||
bsdmainutils \
|
||||
build-essential \
|
||||
gdb \
|
||||
git \
|
||||
gperf \
|
||||
moreutils \
|
||||
@ -58,3 +57,6 @@ RUN apt-get update \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||
|
||||
COPY process_functional_tests_result.py /
|
||||
|
||||
COPY --from=clickhouse/cctools:0d6b90a7a490 /opt/gdb /opt/gdb
|
||||
ENV PATH="/opt/gdb/bin:${PATH}"
|
||||
|
29
docs/changelogs/v24.3.7.30-lts.md
Normal file
29
docs/changelogs/v24.3.7.30-lts.md
Normal file
@ -0,0 +1,29 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v24.3.7.30-lts (c8a28cf4331) FIXME as compared to v24.3.6.48-lts (b2d33c3c45d)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#68103](https://github.com/ClickHouse/ClickHouse/issues/68103): Distinguish booleans and integers while parsing values for custom settings: ``` SET custom_a = true; SET custom_b = 1; ```. [#62206](https://github.com/ClickHouse/ClickHouse/pull/62206) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
* Backported in [#67931](https://github.com/ClickHouse/ClickHouse/issues/67931): Fixing the `Not-ready Set` error after the `PREWHERE` optimization for StorageMerge. [#65057](https://github.com/ClickHouse/ClickHouse/pull/65057) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#68062](https://github.com/ClickHouse/ClickHouse/issues/68062): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)).
|
||||
* Backported in [#67812](https://github.com/ClickHouse/ClickHouse/issues/67812): Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Backported in [#67848](https://github.com/ClickHouse/ClickHouse/issues/67848): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Backported in [#68271](https://github.com/ClickHouse/ClickHouse/issues/68271): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
|
||||
* Backported in [#67806](https://github.com/ClickHouse/ClickHouse/issues/67806): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Backported in [#67834](https://github.com/ClickHouse/ClickHouse/issues/67834): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#68206](https://github.com/ClickHouse/ClickHouse/issues/68206): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
|
||||
* Backported in [#68089](https://github.com/ClickHouse/ClickHouse/issues/68089): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Backported in [#68120](https://github.com/ClickHouse/ClickHouse/issues/68120): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Update version after release. [#67676](https://github.com/ClickHouse/ClickHouse/pull/67676) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Backported in [#68074](https://github.com/ClickHouse/ClickHouse/issues/68074): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)).
|
||||
|
67
docs/changelogs/v24.6.3.95-stable.md
Normal file
67
docs/changelogs/v24.6.3.95-stable.md
Normal file
@ -0,0 +1,67 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v24.6.3.95-stable (8325c920d11) FIXME as compared to v24.6.2.17-stable (5710a8b5c0c)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#66770](https://github.com/ClickHouse/ClickHouse/issues/66770): Make allow_experimental_analyzer be controlled by the initiator for distributed queries. This ensures compatibility and correctness during operations in mixed version clusters. [#65777](https://github.com/ClickHouse/ClickHouse/pull/65777) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
|
||||
#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)
|
||||
* Backported in [#66885](https://github.com/ClickHouse/ClickHouse/issues/66885): Fix unexpeced size of low cardinality column in function calls. [#65298](https://github.com/ClickHouse/ClickHouse/pull/65298) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#66693](https://github.com/ClickHouse/ClickHouse/issues/66693): Fix the VALID UNTIL clause in the user definition resetting after a restart. Closes [#66405](https://github.com/ClickHouse/ClickHouse/issues/66405). [#66409](https://github.com/ClickHouse/ClickHouse/pull/66409) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Backported in [#67816](https://github.com/ClickHouse/ClickHouse/issues/67816): Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Backported in [#67503](https://github.com/ClickHouse/ClickHouse/issues/67503): Fix crash in DistributedAsyncInsert when connection is empty. [#67219](https://github.com/ClickHouse/ClickHouse/pull/67219) ([Pablo Marcos](https://github.com/pamarcos)).
|
||||
* Backported in [#67852](https://github.com/ClickHouse/ClickHouse/issues/67852): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Backported in [#67838](https://github.com/ClickHouse/ClickHouse/issues/67838): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
* Backported in [#66303](https://github.com/ClickHouse/ClickHouse/issues/66303): Better handling of join conditions involving `IS NULL` checks (for example `ON (a = b AND (a IS NOT NULL) AND (b IS NOT NULL) ) OR ( (a IS NULL) AND (b IS NULL) )` is rewritten to `ON a <=> b`), fix incorrect optimization when condition other then `IS NULL` are present. [#65835](https://github.com/ClickHouse/ClickHouse/pull/65835) ([vdimir](https://github.com/vdimir)).
|
||||
* Backported in [#66330](https://github.com/ClickHouse/ClickHouse/issues/66330): Add missing settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines/input_format_csv_try_infer_numbers_from_strings/input_format_csv_try_infer_strings_from_quoted_tuples` in schema inference cache because they can change the resulting schema. It prevents from incorrect result of schema inference with these settings changed. [#65980](https://github.com/ClickHouse/ClickHouse/pull/65980) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Backported in [#66157](https://github.com/ClickHouse/ClickHouse/issues/66157): Fixed buffer overflow bug in `unbin`/`unhex` implementation. [#66106](https://github.com/ClickHouse/ClickHouse/pull/66106) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Backported in [#66210](https://github.com/ClickHouse/ClickHouse/issues/66210): Disable the `merge-filters` optimization introduced in [#64760](https://github.com/ClickHouse/ClickHouse/issues/64760). It may cause an exception if optimization merges two filter expressions and does not apply a short-circuit evaluation. [#66126](https://github.com/ClickHouse/ClickHouse/pull/66126) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#66456](https://github.com/ClickHouse/ClickHouse/issues/66456): Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Backported in [#66228](https://github.com/ClickHouse/ClickHouse/issues/66228): Fix issue in SumIfToCountIfVisitor and signed integers. [#66146](https://github.com/ClickHouse/ClickHouse/pull/66146) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#66183](https://github.com/ClickHouse/ClickHouse/issues/66183): Fix rare case with missing data in the result of distributed query, close [#61432](https://github.com/ClickHouse/ClickHouse/issues/61432). [#66174](https://github.com/ClickHouse/ClickHouse/pull/66174) ([vdimir](https://github.com/vdimir)).
|
||||
* Backported in [#66271](https://github.com/ClickHouse/ClickHouse/issues/66271): Don't throw `TIMEOUT_EXCEEDED` for `none_only_active` mode of `distributed_ddl_output_mode`. [#66218](https://github.com/ClickHouse/ClickHouse/pull/66218) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Backported in [#66682](https://github.com/ClickHouse/ClickHouse/issues/66682): Fix handling limit for `system.numbers_mt` when no index can be used. [#66231](https://github.com/ClickHouse/ClickHouse/pull/66231) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
|
||||
* Backported in [#66587](https://github.com/ClickHouse/ClickHouse/issues/66587): Fixed how the ClickHouse server detects the maximum number of usable CPU cores as specified by cgroups v2 if the server runs in a container such as Docker. In more detail, containers often run their process in the root cgroup which has an empty name. In that case, ClickHouse ignored the CPU limits set by cgroups v2. [#66237](https://github.com/ClickHouse/ClickHouse/pull/66237) ([filimonov](https://github.com/filimonov)).
|
||||
* Backported in [#66362](https://github.com/ClickHouse/ClickHouse/issues/66362): Fix the `Not-ready set` error when a subquery with `IN` is used in the constraint. [#66261](https://github.com/ClickHouse/ClickHouse/pull/66261) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#66613](https://github.com/ClickHouse/ClickHouse/issues/66613): Fix `Column identifier is already registered` error with `group_by_use_nulls=true` and new analyzer. [#66400](https://github.com/ClickHouse/ClickHouse/pull/66400) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#66577](https://github.com/ClickHouse/ClickHouse/issues/66577): Fix `Cannot find column` error for queries with constant expression in `GROUP BY` key and new analyzer enabled. [#66433](https://github.com/ClickHouse/ClickHouse/pull/66433) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#66721](https://github.com/ClickHouse/ClickHouse/issues/66721): Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Backported in [#66670](https://github.com/ClickHouse/ClickHouse/issues/66670): Fix reading of uninitialized memory when hashing empty tuples. This closes [#66559](https://github.com/ClickHouse/ClickHouse/issues/66559). [#66562](https://github.com/ClickHouse/ClickHouse/pull/66562) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#66952](https://github.com/ClickHouse/ClickHouse/issues/66952): Fix an invalid result for queries with `WINDOW`. This could happen when `PARTITION` columns have sparse serialization and window functions are executed in parallel. [#66579](https://github.com/ClickHouse/ClickHouse/pull/66579) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#66956](https://github.com/ClickHouse/ClickHouse/issues/66956): Fix removing named collections in local storage. [#66599](https://github.com/ClickHouse/ClickHouse/pull/66599) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
|
||||
* Backported in [#66716](https://github.com/ClickHouse/ClickHouse/issues/66716): Fix removing named collections in local storage. [#66599](https://github.com/ClickHouse/ClickHouse/pull/66599) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
|
||||
* Backported in [#66759](https://github.com/ClickHouse/ClickHouse/issues/66759): Fix `Unknown identifier` and `Column is not under aggregate function` errors for queries with the expression `(column IS NULL).` The bug was triggered by [#65088](https://github.com/ClickHouse/ClickHouse/issues/65088), with the disabled analyzer only. [#66654](https://github.com/ClickHouse/ClickHouse/pull/66654) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#66751](https://github.com/ClickHouse/ClickHouse/issues/66751): Fix `Method getResultType is not supported for QUERY query node` error when scalar subquery was used as the first argument of IN (with new analyzer). [#66655](https://github.com/ClickHouse/ClickHouse/pull/66655) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#67635](https://github.com/ClickHouse/ClickHouse/issues/67635): Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Backported in [#67482](https://github.com/ClickHouse/ClickHouse/issues/67482): In rare cases ClickHouse could consider parts as broken because of some unexpected projections on disk. Now it's fixed. [#66898](https://github.com/ClickHouse/ClickHouse/pull/66898) ([alesapin](https://github.com/alesapin)).
|
||||
* Backported in [#67199](https://github.com/ClickHouse/ClickHouse/issues/67199): TRUNCATE DATABASE used to stop replication as if it was a DROP DATABASE query, it's fixed. [#67129](https://github.com/ClickHouse/ClickHouse/pull/67129) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Backported in [#67381](https://github.com/ClickHouse/ClickHouse/issues/67381): Fix error `Cannot convert column because it is non constant in source stream but must be constant in result.` for a query that reads from the `Merge` table over the `Distriburted` table with one shard. [#67146](https://github.com/ClickHouse/ClickHouse/pull/67146) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#67244](https://github.com/ClickHouse/ClickHouse/issues/67244): This closes [#67156](https://github.com/ClickHouse/ClickHouse/issues/67156). This closes [#66447](https://github.com/ClickHouse/ClickHouse/issues/66447). The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/62907. [#67178](https://github.com/ClickHouse/ClickHouse/pull/67178) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Backported in [#67578](https://github.com/ClickHouse/ClickHouse/issues/67578): Fix execution of nested short-circuit functions. [#67520](https://github.com/ClickHouse/ClickHouse/pull/67520) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Backported in [#67808](https://github.com/ClickHouse/ClickHouse/issues/67808): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
||||
#### NO CL ENTRY
|
||||
|
||||
* NO CL ENTRY: 'Revert "Backport [#66599](https://github.com/ClickHouse/ClickHouse/issues/66599) to 24.6: Fix dropping named collection in local storage"'. [#66922](https://github.com/ClickHouse/ClickHouse/pull/66922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Backported in [#66332](https://github.com/ClickHouse/ClickHouse/issues/66332): Do not raise a NOT_IMPLEMENTED error when getting s3 metrics with a multiple disk configuration. [#65403](https://github.com/ClickHouse/ClickHouse/pull/65403) ([Elena Torró](https://github.com/elenatorro)).
|
||||
* Backported in [#66142](https://github.com/ClickHouse/ClickHouse/issues/66142): Fix flaky test_storage_s3_queue tests. [#66009](https://github.com/ClickHouse/ClickHouse/pull/66009) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Backported in [#66389](https://github.com/ClickHouse/ClickHouse/issues/66389): Disable broken cases from 02911_join_on_nullsafe_optimization. [#66310](https://github.com/ClickHouse/ClickHouse/pull/66310) ([vdimir](https://github.com/vdimir)).
|
||||
* Backported in [#66428](https://github.com/ClickHouse/ClickHouse/issues/66428): Ignore subquery for IN in DDLLoadingDependencyVisitor. [#66395](https://github.com/ClickHouse/ClickHouse/pull/66395) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#66546](https://github.com/ClickHouse/ClickHouse/issues/66546): Add additional log masking in CI. [#66523](https://github.com/ClickHouse/ClickHouse/pull/66523) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#66861](https://github.com/ClickHouse/ClickHouse/issues/66861): Fix data race in S3::ClientCache. [#66644](https://github.com/ClickHouse/ClickHouse/pull/66644) ([Konstantin Morozov](https://github.com/k-morozov)).
|
||||
* Backported in [#66877](https://github.com/ClickHouse/ClickHouse/issues/66877): Support one more case in JOIN ON ... IS NULL. [#66725](https://github.com/ClickHouse/ClickHouse/pull/66725) ([vdimir](https://github.com/vdimir)).
|
||||
* Backported in [#67061](https://github.com/ClickHouse/ClickHouse/issues/67061): Increase asio pool size in case the server is tiny. [#66761](https://github.com/ClickHouse/ClickHouse/pull/66761) ([alesapin](https://github.com/alesapin)).
|
||||
* Backported in [#66940](https://github.com/ClickHouse/ClickHouse/issues/66940): Small fix in realloc memory tracking. [#66820](https://github.com/ClickHouse/ClickHouse/pull/66820) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Backported in [#67254](https://github.com/ClickHouse/ClickHouse/issues/67254): Followup [#66725](https://github.com/ClickHouse/ClickHouse/issues/66725). [#66869](https://github.com/ClickHouse/ClickHouse/pull/66869) ([vdimir](https://github.com/vdimir)).
|
||||
* Backported in [#67414](https://github.com/ClickHouse/ClickHouse/issues/67414): CI: Fix build results for release branches. [#67402](https://github.com/ClickHouse/ClickHouse/pull/67402) ([Max K.](https://github.com/maxknv)).
|
||||
|
37
docs/changelogs/v24.7.3.42-stable.md
Normal file
37
docs/changelogs/v24.7.3.42-stable.md
Normal file
@ -0,0 +1,37 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v24.7.3.42-stable (63730bc4293) FIXME as compared to v24.7.2.13-stable (6e41f601b2f)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
* Backported in [#67969](https://github.com/ClickHouse/ClickHouse/issues/67969): Fixed reading of subcolumns after `ALTER ADD COLUMN` query. [#66243](https://github.com/ClickHouse/ClickHouse/pull/66243) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Backported in [#67637](https://github.com/ClickHouse/ClickHouse/issues/67637): Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Backported in [#67820](https://github.com/ClickHouse/ClickHouse/issues/67820): Fix possible deadlock on query cancel with parallel replicas. [#66905](https://github.com/ClickHouse/ClickHouse/pull/66905) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Backported in [#67881](https://github.com/ClickHouse/ClickHouse/issues/67881): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Backported in [#67713](https://github.com/ClickHouse/ClickHouse/issues/67713): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Backported in [#67995](https://github.com/ClickHouse/ClickHouse/issues/67995): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
|
||||
#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)
|
||||
|
||||
* Backported in [#67818](https://github.com/ClickHouse/ClickHouse/issues/67818): Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Backported in [#67766](https://github.com/ClickHouse/ClickHouse/issues/67766): Fix crash of `uniq` and `uniqTheta ` with `tuple()` argument. Closes [#67303](https://github.com/ClickHouse/ClickHouse/issues/67303). [#67306](https://github.com/ClickHouse/ClickHouse/pull/67306) ([flynn](https://github.com/ucasfl)).
|
||||
* Backported in [#67854](https://github.com/ClickHouse/ClickHouse/issues/67854): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Backported in [#67840](https://github.com/ClickHouse/ClickHouse/issues/67840): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Backported in [#67518](https://github.com/ClickHouse/ClickHouse/issues/67518): Split slow test 03036_dynamic_read_subcolumns. [#66954](https://github.com/ClickHouse/ClickHouse/pull/66954) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Backported in [#67516](https://github.com/ClickHouse/ClickHouse/issues/67516): Split 01508_partition_pruning_long. [#66983](https://github.com/ClickHouse/ClickHouse/pull/66983) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Backported in [#67529](https://github.com/ClickHouse/ClickHouse/issues/67529): Reduce max time of 00763_long_lock_buffer_alter_destination_table. [#67185](https://github.com/ClickHouse/ClickHouse/pull/67185) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#67643](https://github.com/ClickHouse/ClickHouse/issues/67643): [Green CI] Fix potentially flaky test_mask_sensitive_info integration test. [#67506](https://github.com/ClickHouse/ClickHouse/pull/67506) ([Alexey Katsman](https://github.com/alexkats)).
|
||||
* Backported in [#67609](https://github.com/ClickHouse/ClickHouse/issues/67609): Fix test_zookeeper_config_load_balancing after adding the xdist worker name to the instance. [#67590](https://github.com/ClickHouse/ClickHouse/pull/67590) ([Pablo Marcos](https://github.com/pamarcos)).
|
||||
* Backported in [#67871](https://github.com/ClickHouse/ClickHouse/issues/67871): Fix 02434_cancel_insert_when_client_dies. [#67600](https://github.com/ClickHouse/ClickHouse/pull/67600) ([vdimir](https://github.com/vdimir)).
|
||||
* Backported in [#67704](https://github.com/ClickHouse/ClickHouse/issues/67704): Fix 02910_bad_logs_level_in_local in fast tests. [#67603](https://github.com/ClickHouse/ClickHouse/pull/67603) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#67689](https://github.com/ClickHouse/ClickHouse/issues/67689): Fix 01605_adaptive_granularity_block_borders. [#67605](https://github.com/ClickHouse/ClickHouse/pull/67605) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Backported in [#67827](https://github.com/ClickHouse/ClickHouse/issues/67827): Try fix 03143_asof_join_ddb_long. [#67620](https://github.com/ClickHouse/ClickHouse/pull/67620) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Backported in [#67892](https://github.com/ClickHouse/ClickHouse/issues/67892): Revert "Merge pull request [#66510](https://github.com/ClickHouse/ClickHouse/issues/66510) from canhld94/fix_trivial_count_non_deterministic_func". [#67800](https://github.com/ClickHouse/ClickHouse/pull/67800) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
|
||||
|
@ -118,7 +118,7 @@ And the result of interpreting the `INSERT SELECT` query is a "completed" `Query
|
||||
|
||||
`InterpreterSelectQuery` uses `ExpressionAnalyzer` and `ExpressionActions` machinery for query analysis and transformations. This is where most rule-based query optimizations are performed. `ExpressionAnalyzer` is quite messy and should be rewritten: various query transformations and optimizations should be extracted into separate classes to allow for modular transformations of the query.
|
||||
|
||||
To address current problems that exist in interpreters, a new `InterpreterSelectQueryAnalyzer` is being developed. It is a new version of `InterpreterSelectQuery` that does not use `ExpressionAnalyzer` and introduces an additional abstraction level between `AST` and `QueryPipeline` called `QueryTree`. It is not production-ready yet, but it can be tested with the `allow_experimental_analyzer` flag.
|
||||
To address problems that exist in interpreters, a new `InterpreterSelectQueryAnalyzer` has been developed. This is a new version of the `InterpreterSelectQuery`, which does not use the `ExpressionAnalyzer` and introduces an additional layer of abstraction between `AST` and `QueryPipeline`, called `QueryTree'. It is fully ready for use in production, but just in case it can be turned off by setting the value of the `enable_analyzer` setting to `false`.
|
||||
|
||||
## Functions {#functions}
|
||||
|
||||
|
@ -27,23 +27,23 @@ Avoid dumping copies of external code into the library directory.
|
||||
Instead create a Git submodule to pull third-party code from an external upstream repository.
|
||||
|
||||
All submodules used by ClickHouse are listed in the `.gitmodule` file.
|
||||
If the library can be used as-is (the default case), you can reference the upstream repository directly.
|
||||
If the library needs patching, create a fork of the upstream repository in the [ClickHouse organization on GitHub](https://github.com/ClickHouse).
|
||||
- If the library can be used as-is (the default case), you can reference the upstream repository directly.
|
||||
- If the library needs patching, create a fork of the upstream repository in the [ClickHouse organization on GitHub](https://github.com/ClickHouse).
|
||||
|
||||
In the latter case, we aim to isolate custom patches as much as possible from upstream commits.
|
||||
To that end, create a branch with prefix `clickhouse/` from the branch or tag you want to integrate, e.g. `clickhouse/master` (for branch `master`) or `clickhouse/release/vX.Y.Z` (for tag `release/vX.Y.Z`).
|
||||
This ensures that pulls from the upstream repository into the fork will leave custom `clickhouse/` branches unaffected.
|
||||
Submodules in `contrib/` must only track `clickhouse/` branches of forked third-party repositories.
|
||||
To that end, create a branch with prefix `ClickHouse/` from the branch or tag you want to integrate, e.g. `ClickHouse/2024_2` (for branch `2024_2`) or `ClickHouse/release/vX.Y.Z` (for tag `release/vX.Y.Z`).
|
||||
Avoid following upstream development branches `master`/ `main` / `dev` (i.e., prefix branches `ClickHouse/master` / `ClickHouse/main` / `ClickHouse/dev` in the fork repository).
|
||||
Such branches are moving targets which make proper versioning harder.
|
||||
"Prefix branches" ensure that pulls from the upstream repository into the fork will leave custom `ClickHouse/` branches unaffected.
|
||||
Submodules in `contrib/` must only track `ClickHouse/` branches of forked third-party repositories.
|
||||
|
||||
Patches are only applied against `clickhouse/` branches of external libraries.
|
||||
For that, push the patch as a branch with `clickhouse/`, e.g. `clickhouse/fix-some-desaster`.
|
||||
Then create a PR from the new branch against the custom tracking branch with `clickhouse/` prefix, (e.g. `clickhouse/master` or `clickhouse/release/vX.Y.Z`) and merge the patch.
|
||||
Patches are only applied against `ClickHouse/` branches of external libraries.
|
||||
|
||||
There are two ways to do that:
|
||||
- you like to make a new fix against a `ClickHouse/`-prefix branch in the forked repository, e.g. a sanitizer fix. In that case, push the fix as a branch with `ClickHouse/` prefix, e.g. `ClickHouse/fix-sanitizer-disaster`. Then create a PR from the new branch against the custom tracking branch, e.g. `ClickHouse/2024_2 <-- ClickHouse/fix-sanitizer-disaster` and merge the PR.
|
||||
- you update the submodule and need to re-apply earlier patches. In this case, re-creating old PRs is overkill. Instead, simply cherry-pick older commits into the new `ClickHouse/` branch (corresponding to the new version). Feel free to squash commits of PRs that had multiple commits. In the best case, we did contribute custom patches back to upstream and can omit patches in the new version.
|
||||
|
||||
Once the submodule has been updated, bump the submodule in ClickHouse to point to the new hash in the fork.
|
||||
|
||||
Create patches of third-party libraries with the official repository in mind and consider contributing the patch back to the upstream repository.
|
||||
This makes sure that others will also benefit from the patch and it will not be a maintenance burden for the ClickHouse team.
|
||||
|
||||
To pull upstream changes into the submodule, you can use two methods:
|
||||
- (less work but less clean): merge upstream `master` into the corresponding `clickhouse/` tracking branch in the forked repository. You will need to resolve merge conflicts with previous custom patches. This method can be used when the `clickhouse/` branch tracks an upstream development branch like `master`, `main`, `dev`, etc.
|
||||
- (more work but cleaner): create a new branch with `clickhouse/` prefix from the upstream commit or tag you like to integrate. Then re-apply all existing patches using new PRs (or squash them into a single PR). This method can be used when the `clickhouse/` branch tracks a specific upstream version branch or tag. It is cleaner in the sense that custom patches and upstream changes are better isolated from each other.
|
||||
|
||||
Once the submodule has been updated, bump the submodule in ClickHouse to point to the new hash in the fork.
|
||||
|
@ -14,7 +14,7 @@ Each functional test sends one or multiple queries to the running ClickHouse ser
|
||||
|
||||
Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from ClickHouse and it is available to general public.
|
||||
|
||||
Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`.
|
||||
Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`.
|
||||
|
||||
:::note
|
||||
A common mistake when testing data types `DateTime` and `DateTime64` is assuming that the server uses a specific time zone (e.g. "UTC"). This is not the case, time zones in CI test runs
|
||||
@ -38,7 +38,7 @@ For more options, see `tests/clickhouse-test --help`. You can simply run all tes
|
||||
|
||||
### Adding a New Test
|
||||
|
||||
To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client --multiquery < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`.
|
||||
To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`.
|
||||
|
||||
Tests should use (create, drop, etc) only tables in `test` database that is assumed to be created beforehand; also tests can use temporary tables.
|
||||
|
||||
|
@ -61,6 +61,7 @@ Engines in the family:
|
||||
- [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md)
|
||||
- [PostgreSQL](../../engines/table-engines/integrations/postgresql.md)
|
||||
- [S3Queue](../../engines/table-engines/integrations/s3queue.md)
|
||||
- [TimeSeries](../../engines/table-engines/integrations/time-series.md)
|
||||
|
||||
### Special Engines {#special-engines}
|
||||
|
||||
|
@ -251,6 +251,44 @@ The number of rows in one Kafka message depends on whether the format is row-bas
|
||||
- For row-based formats the number of rows in one Kafka message can be controlled by setting `kafka_max_rows_per_message`.
|
||||
- For block-based formats we cannot divide block into smaller parts, but the number of rows in one block can be controlled by general setting [max_block_size](../../../operations/settings/settings.md#setting-max_block_size).
|
||||
|
||||
## Experimental engine to store committed offsets in ClickHouse Keeper
|
||||
|
||||
If `allow_experimental_kafka_offsets_storage_in_keeper` is enabled, then two more settings can be specified to the Kafka table engine:
|
||||
- `kafka_keeper_path` specifies the path to the table in ClickHouse Keeper
|
||||
- `kafka_replica_name` specifies the replica name in ClickHouse Keeper
|
||||
|
||||
Either both of the settings must be specified or neither of them. When both of them are specified, then a new, experimental Kafka engine will be used. The new engine doesn't depend on storing the committed offsets in Kafka, but stores them in ClickHouse Keeper. It still tries to commit the offsets to Kafka, but it only depends on those offsets when the table is created. In any other circumstances (table is restarted, or recovered after some error) the offsets stored in ClickHouse Keeper will be used as an offset to continue consuming messages from. Apart from the committed offset, it also stores how many messages were consumed in the last batch, so if the insert fails, the same amount of messages will be consumed, thus enabling deduplication if necessary.
|
||||
|
||||
Example:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE experimental_kafka (key UInt64, value UInt64)
|
||||
ENGINE = Kafka('localhost:19092', 'my-topic', 'my-consumer', 'JSONEachRow')
|
||||
SETTINGS
|
||||
kafka_keeper_path = '/clickhouse/{database}/experimental_kafka',
|
||||
kafka_replica_name = 'r1'
|
||||
SETTINGS allow_experimental_kafka_offsets_storage_in_keeper=1;
|
||||
```
|
||||
|
||||
Or to utilize the `uuid` and `replica` macros similarly to ReplicatedMergeTree:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE experimental_kafka (key UInt64, value UInt64)
|
||||
ENGINE = Kafka('localhost:19092', 'my-topic', 'my-consumer', 'JSONEachRow')
|
||||
SETTINGS
|
||||
kafka_keeper_path = '/clickhouse/{database}/{uuid}',
|
||||
kafka_replica_name = '{replica}'
|
||||
SETTINGS allow_experimental_kafka_offsets_storage_in_keeper=1;
|
||||
```
|
||||
|
||||
### Known limitations
|
||||
|
||||
As the new engine is experimental, it is not production ready yet. There are few known limitations of the implementation:
|
||||
- The biggest limitation is the engine doesn't support direct reading. Reading from the engine using materialized views and writing to the engine work, but direct reading doesn't. As a result, all direct `SELECT` queries will fail.
|
||||
- Rapidly dropping and recreating the table or specifying the same ClickHouse Keeper path to different engines might cause issues. As best practice you can use the `{uuid}` in `kafka_keeper_path` to avoid clashing paths.
|
||||
- To make repeatable reads, messages cannot be consumed from multiple partitions on a single thread. On the other hand, the Kafka consumers have to be polled regularly to keep them alive. As a result of these two objectives, we decided to only allow creating multiple consumers if `kafka_thread_per_consumer` is enabled, otherwise it is too complicated to avoid issues regarding polling consumers regularly.
|
||||
- Consumers created by the new storage engine do not show up in [`system.kafka_consumers`](../../../operations/system-tables/kafka_consumers.md) table.
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
|
||||
|
@ -146,6 +146,7 @@ Code: 48. DB::Exception: Received from localhost:9000. DB::Exception: Reading fr
|
||||
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
|
||||
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
|
||||
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
|
||||
- `_etag` — ETag of the file. Type: `LowCardinalty(String)`. If the etag is unknown, the value is `NULL`.
|
||||
|
||||
For more information about virtual columns see [here](../../../engines/table-engines/index.md#table_engines-virtual_columns).
|
||||
|
||||
|
295
docs/en/engines/table-engines/integrations/time-series.md
Normal file
295
docs/en/engines/table-engines/integrations/time-series.md
Normal file
@ -0,0 +1,295 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/special/time_series
|
||||
sidebar_position: 60
|
||||
sidebar_label: TimeSeries
|
||||
---
|
||||
|
||||
# TimeSeries Engine [Experimental]
|
||||
|
||||
A table engine storing time series, i.e. a set of values associated with timestamps and tags (or labels):
|
||||
|
||||
```
|
||||
metric_name1[tag1=value1, tag2=value2, ...] = {timestamp1: value1, timestamp2: value2, ...}
|
||||
metric_name2[...] = ...
|
||||
```
|
||||
|
||||
:::info
|
||||
This is an experimental feature that may change in backwards-incompatible ways in the future releases.
|
||||
Enable usage of the TimeSeries table engine
|
||||
with [allow_experimental_time_series_table](../../../operations/settings/settings.md#allow-experimental-time-series-table) setting.
|
||||
Input the command `set allow_experimental_time_series_table = 1`.
|
||||
:::
|
||||
|
||||
## Syntax {#syntax}
|
||||
|
||||
``` sql
|
||||
CREATE TABLE name [(columns)] ENGINE=TimeSeries
|
||||
[SETTINGS var1=value1, ...]
|
||||
[DATA db.data_table_name | DATA ENGINE data_table_engine(arguments)]
|
||||
[TAGS db.tags_table_name | TAGS ENGINE tags_table_engine(arguments)]
|
||||
[METRICS db.metrics_table_name | METRICS ENGINE metrics_table_engine(arguments)]
|
||||
```
|
||||
|
||||
## Usage {#usage}
|
||||
|
||||
It's easier to start with everything set by default (it's allowed to create a `TimeSeries` table without specifying a list of columns):
|
||||
|
||||
``` sql
|
||||
CREATE TABLE my_table ENGINE=TimeSeries
|
||||
```
|
||||
|
||||
Then this table can be used with the following protocols (a port must be assigned in the server configuration):
|
||||
- [prometheus remote-write](../../../interfaces/prometheus.md#remote-write)
|
||||
- [prometheus remote-read](../../../interfaces/prometheus.md#remote-read)
|
||||
|
||||
## Target tables {#target-tables}
|
||||
|
||||
A `TimeSeries` table doesn't have its own data, everything is stored in its target tables.
|
||||
This is similar to how a [materialized view](../../../sql-reference/statements/create/view#materialized-view) works,
|
||||
with the difference that a materialized view has one target table
|
||||
whereas a `TimeSeries` table has three target tables named [data]{#data-table}, [tags]{#tags-table], and [metrics]{#metrics-table}.
|
||||
|
||||
The target tables can be either specified explicitly in the `CREATE TABLE` query
|
||||
or the `TimeSeries` table engine can generate inner target tables automatically.
|
||||
|
||||
The target tables are the following:
|
||||
1. The _data_ table {#data-table} contains time series associated with some identifier.
|
||||
The _data_ table must have columns:
|
||||
|
||||
| Name | Mandatory? | Default type | Possible types | Description |
|
||||
|---|---|---|---|---|
|
||||
| `id` | [x] | `UUID` | any | Identifies a combination of a metric names and tags |
|
||||
| `timestamp` | [x] | `DateTime64(3)` | `DateTime64(X)` | A time point |
|
||||
| `value` | [x] | `Float64` | `Float32` or `Float64` | A value associated with the `timestamp` |
|
||||
|
||||
2. The _tags_ table {#tags-table} contains identifiers calculated for each combination of a metric name and tags.
|
||||
The _tags_ table must have columns:
|
||||
|
||||
| Name | Mandatory? | Default type | Possible types | Description |
|
||||
|---|---|---|---|---|
|
||||
| `id` | [x] | `UUID` | any (must match the type of `id` in the [data]{#data-table} table) | An `id` identifies a combination of a metric name and tags. The DEFAULT expression specifies how to calculate such an identifier |
|
||||
| `metric_name` | [x] | `LowCardinality(String)` | `String` or `LowCardinality(String)` | The name of a metric |
|
||||
| `<tag_value_column>` | [ ] | `String` | `String` or `LowCardinality(String)` or `LowCardinality(Nullable(String))` | The value of a specific tag, the tag's name and the name of a corresponding column are specified in the [tags_to_columns](#settings) setting |
|
||||
| `tags` | [x] | `Map(LowCardinality(String), String)` | `Map(String, String)` or `Map(LowCardinality(String), String)` or `Map(LowCardinality(String), LowCardinality(String))` | Map of tags excluding the tag `__name__` containing the name of a metric and excluding tags with names enumerated in the [tags_to_columns](#settings) setting |
|
||||
| `all_tags` | [ ] | `Map(String, String)` | `Map(String, String)` or `Map(LowCardinality(String), String)` or `Map(LowCardinality(String), LowCardinality(String))` | Ephemeral column, each row is a map of all the tags excluding only the tag `__name__` containing the name of a metric. The only purpose of that column is to be used while calculating `id` |
|
||||
| `min_time` | [ ] | `Nullable(DateTime64(3))` | `DateTime64(X)` or `Nullable(DateTime64(X))` | Minimum timestamp of time series with that `id`. The column is created if [store_min_time_and_max_time](#settings) is `true` |
|
||||
| `max_time` | [ ] | `Nullable(DateTime64(3))` | `DateTime64(X)` or `Nullable(DateTime64(X))` | Maximum timestamp of time series with that `id`. The column is created if [store_min_time_and_max_time](#settings) is `true` |
|
||||
|
||||
3. The _metrics_ table {#metrics-table} contains some information about metrics been collected, the types of those metrics and their descriptions.
|
||||
The _metrics_ table must have columns:
|
||||
|
||||
| Name | Mandatory? | Default type | Possible types | Description |
|
||||
|---|---|---|---|---|
|
||||
| `metric_family_name` | [x] | `String` | `String` or `LowCardinality(String)` | The name of a metric family |
|
||||
| `type` | [x] | `String` | `String` or `LowCardinality(String)` | The type of a metric family, one of "counter", "gauge", "summary", "stateset", "histogram", "gaugehistogram" |
|
||||
| `unit` | [x] | `String` | `String` or `LowCardinality(String)` | The unit used in a metric |
|
||||
| `help` | [x] | `String` | `String` or `LowCardinality(String)` | The description of a metric |
|
||||
|
||||
Any row inserted into a `TimeSeries` table will be in fact stored in those three target tables.
|
||||
A `TimeSeries` table contains all those columns from the [data]{#data-table}, [tags]{#tags-table}, [metrics]{#metrics-table} tables.
|
||||
|
||||
## Creation {#creation}
|
||||
|
||||
There are multiple ways to create a table with the `TimeSeries` table engine.
|
||||
The simplest statement
|
||||
|
||||
``` sql
|
||||
CREATE TABLE my_table ENGINE=TimeSeries
|
||||
```
|
||||
|
||||
will actually create the following table (you can see that by executing `SHOW CREATE TABLE my_table`):
|
||||
|
||||
``` sql
|
||||
CREATE TABLE my_table
|
||||
(
|
||||
`id` UUID DEFAULT reinterpretAsUUID(sipHash128(metric_name, all_tags)),
|
||||
`timestamp` DateTime64(3),
|
||||
`value` Float64,
|
||||
`metric_name` LowCardinality(String),
|
||||
`tags` Map(LowCardinality(String), String),
|
||||
`all_tags` Map(String, String),
|
||||
`min_time` Nullable(DateTime64(3)),
|
||||
`max_time` Nullable(DateTime64(3)),
|
||||
`metric_family_name` String,
|
||||
`type` String,
|
||||
`unit` String,
|
||||
`help` String
|
||||
)
|
||||
ENGINE = TimeSeries
|
||||
DATA ENGINE = MergeTree ORDER BY (id, timestamp)
|
||||
DATA INNER UUID '01234567-89ab-cdef-0123-456789abcdef'
|
||||
TAGS ENGINE = AggregatingMergeTree PRIMARY KEY metric_name ORDER BY (metric_name, id)
|
||||
TAGS INNER UUID '01234567-89ab-cdef-0123-456789abcdef'
|
||||
METRICS ENGINE = ReplacingMergeTree ORDER BY metric_family_name
|
||||
METRICS INNER UUID '01234567-89ab-cdef-0123-456789abcdef'
|
||||
```
|
||||
|
||||
So the columns were generated automatically and also there are three inner UUIDs in this statement -
|
||||
one per each inner target table that was created.
|
||||
(Inner UUIDs are not shown normally until setting
|
||||
[show_table_uuid_in_table_create_query_if_not_nil](../../../operations/settings/settings#show_table_uuid_in_table_create_query_if_not_nil)
|
||||
is set.)
|
||||
|
||||
Inner target tables have names like `.inner_id.data.xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`,
|
||||
`.inner_id.tags.xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`, `.inner_id.metrics.xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`
|
||||
and each target table has columns which is a subset of the columns of the main `TimeSeries` table:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE default.`.inner_id.data.xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`
|
||||
(
|
||||
`id` UUID,
|
||||
`timestamp` DateTime64(3),
|
||||
`value` Float64
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (id, timestamp)
|
||||
```
|
||||
|
||||
``` sql
|
||||
CREATE TABLE default.`.inner_id.tags.xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`
|
||||
(
|
||||
`id` UUID DEFAULT reinterpretAsUUID(sipHash128(metric_name, all_tags)),
|
||||
`metric_name` LowCardinality(String),
|
||||
`tags` Map(LowCardinality(String), String),
|
||||
`all_tags` Map(String, String) EPHEMERAL,
|
||||
`min_time` SimpleAggregateFunction(min, Nullable(DateTime64(3))),
|
||||
`max_time` SimpleAggregateFunction(max, Nullable(DateTime64(3)))
|
||||
)
|
||||
ENGINE = AggregatingMergeTree
|
||||
PRIMARY KEY metric_name
|
||||
ORDER BY (metric_name, id)
|
||||
```
|
||||
|
||||
``` sql
|
||||
CREATE TABLE default.`.inner_id.metrics.xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`
|
||||
(
|
||||
`metric_family_name` String,
|
||||
`type` String,
|
||||
`unit` String,
|
||||
`help` String
|
||||
)
|
||||
ENGINE = ReplacingMergeTree
|
||||
ORDER BY metric_family_name
|
||||
```
|
||||
|
||||
## Adjusting types of columns {#adjusting-column-types}
|
||||
|
||||
You can adjust the types of almost any column of the inner target tables by specifying them explicitly
|
||||
while defining the main table. For example,
|
||||
|
||||
``` sql
|
||||
CREATE TABLE my_table
|
||||
(
|
||||
timestamp DateTime64(6)
|
||||
) ENGINE=TimeSeries
|
||||
```
|
||||
|
||||
will make the inner [data]{#data-table} table store timestamp in microseconds instead of milliseconds:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE default.`.inner_id.data.xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`
|
||||
(
|
||||
`id` UUID,
|
||||
`timestamp` DateTime64(6),
|
||||
`value` Float64
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (id, timestamp)
|
||||
```
|
||||
|
||||
## The `id` column {#id-column}
|
||||
|
||||
The `id` column contains identifiers, every identifier is calculated for a combination of a metric name and tags.
|
||||
The DEFAULT expression for the `id` column is an expression which will be used to calculate such identifiers.
|
||||
Both the type of the `id` column and that expression can be adjusted by specifying them explicitly:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE my_table
|
||||
(
|
||||
id UInt64 DEFAULT sipHash64(metric_name, all_tags)
|
||||
) ENGINE=TimeSeries
|
||||
```
|
||||
|
||||
## The `tags` and `all_tags` columns {#tags-and-all-tags}
|
||||
|
||||
There are two columns containing maps of tags - `tags` and `all_tags`. In this example they mean the same, however they can be different
|
||||
if setting `tags_to_columns` is used. This setting allows to specify that a specific tag should be stored in a separate column instead of storing
|
||||
in a map inside the `tags` column:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE my_table ENGINE=TimeSeries SETTINGS = {'instance': 'instance', 'job': 'job'}
|
||||
```
|
||||
|
||||
This statement will add columns
|
||||
```
|
||||
`instance` String,
|
||||
`job` String
|
||||
```
|
||||
to the definition of both `my_table` and its inner [tags]{#tags-table} target table. In this case the `tags` column will not contain tags `instance` and `job`,
|
||||
but the `all_tags` column will contain them. The `all_tags` column is ephemeral and its only purpose to be used in the DEFAULT expression
|
||||
for the `id` column.
|
||||
|
||||
The types of columns can be adjusted by specifying them explicitly:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE my_table (instance LowCardinality(String), job LowCardinality(Nullable(String)))
|
||||
ENGINE=TimeSeries SETTINGS = {'instance': 'instance', 'job': 'job'}
|
||||
```
|
||||
|
||||
## Table engines of inner target tables {#inner-table-engines}
|
||||
|
||||
By default inner target tables use the following table engines:
|
||||
- the [data]{#data-table} table uses [MergeTree](../mergetree-family/mergetree);
|
||||
- the [tags]{#tags-table} table uses [AggregatingMergeTree](../mergetree-family/aggregatingmergetree) because the same data is often inserted multiple times to this table so we need a way
|
||||
to remove duplicates, and also because it's required to do aggregation for columns `min_time` and `max_time`;
|
||||
- the [metrics]{#metrics-table} table uses [ReplacingMergeTree](../mergetree-family/replacingmergetree) because the same data is often inserted multiple times to this table so we need a way
|
||||
to remove duplicates.
|
||||
|
||||
Other table engines also can be used for inner target tables if it's specified so:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE my_table ENGINE=TimeSeries
|
||||
DATA ENGINE=ReplicatedMergeTree
|
||||
TAGS ENGINE=ReplicatedAggregatingMergeTree
|
||||
METRICS ENGINE=ReplicatedReplacingMergeTree
|
||||
```
|
||||
|
||||
## External target tables {#external-target-tables}
|
||||
|
||||
It's possible to make a `TimeSeries` table use a manually created table:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE data_for_my_table
|
||||
(
|
||||
`id` UUID,
|
||||
`timestamp` DateTime64(3),
|
||||
`value` Float64
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (id, timestamp);
|
||||
|
||||
CREATE TABLE tags_for_my_table ...
|
||||
|
||||
CREATE TABLE metrics_for_my_table ...
|
||||
|
||||
CREATE TABLE my_table ENGINE=TimeSeries DATA data_for_my_table TAGS tags_for_my_table METRICS metrics_for_my_table;
|
||||
```
|
||||
|
||||
## Settings {#settings}
|
||||
|
||||
Here is a list of settings which can be specified while defining a `TimeSeries` table:
|
||||
|
||||
| Name | Type | Default | Description |
|
||||
|---|---|---|---|
|
||||
| `tags_to_columns` | Map | {} | Map specifying which tags should be put to separate columns in the [tags]{#tags-table} table. Syntax: `{'tag1': 'column1', 'tag2' : column2, ...}` |
|
||||
| `use_all_tags_column_to_generate_id` | Bool | true | When generating an expression to calculate an identifier of a time series, this flag enables using the `all_tags` column in that calculation |
|
||||
| `store_min_time_and_max_time` | Bool | true | If set to true then the table will store `min_time` and `max_time` for each time series |
|
||||
| `aggregate_min_time_and_max_time` | Bool | true | When creating an inner target `tags` table, this flag enables using `SimpleAggregateFunction(min, Nullable(DateTime64(3)))` instead of just `Nullable(DateTime64(3))` as the type of the `min_time` column, and the same for the `max_time` column |
|
||||
| `filter_by_min_time_and_max_time` | Bool | true | If set to true then the table will use the `min_time` and `max_time` columns for filtering time series |
|
||||
|
||||
# Functions {#functions}
|
||||
|
||||
Here is a list of functions supporting a `TimeSeries` table as an argument:
|
||||
- [timeSeriesData](../../../sql-reference/table-functions/timeSeriesData.md)
|
||||
- [timeSeriesTags](../../../sql-reference/table-functions/timeSeriesTags.md)
|
||||
- [timeSeriesMetrics](../../../sql-reference/table-functions/timeSeriesMetrics.md)
|
@ -17,7 +17,7 @@ In terms of SQL, the nearest neighborhood problem can be expressed as follows:
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM table_with_ann_index
|
||||
FROM table
|
||||
ORDER BY Distance(vectors, Point)
|
||||
LIMIT N
|
||||
```
|
||||
@ -27,75 +27,111 @@ Function `Distance` computes the distance between two vectors. Often, the Euclid
|
||||
distance functions](/docs/en/sql-reference/functions/distance-functions.md) are also possible. `Point` is the reference point, e.g. `(0.17,
|
||||
0.33, ...)`, and `N` limits the number of search results.
|
||||
|
||||
An alternative formulation of the nearest neighborhood search problem looks as follows:
|
||||
This query returns the top-`N` closest points to the reference point. Parameter `N` limits the number of returned values which is useful for
|
||||
situations where `MaxDistance` is difficult to determine in advance.
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM table_with_ann_index
|
||||
WHERE Distance(vectors, Point) < MaxDistance
|
||||
LIMIT N
|
||||
```
|
||||
|
||||
While the first query returns the top-`N` closest points to the reference point, the second query returns all points closer to the reference
|
||||
point than a maximally allowed radius `MaxDistance`. Parameter `N` limits the number of returned values which is useful for situations where
|
||||
`MaxDistance` is difficult to determine in advance.
|
||||
|
||||
With brute force search, both queries are expensive (linear in the number of points) because the distance between all points in `vectors` and
|
||||
With brute force search, the query is expensive (linear in the number of points) because the distance between all points in `vectors` and
|
||||
`Point` must be computed. To speed this process up, Approximate Nearest Neighbor Search Indexes (ANN indexes) store a compact representation
|
||||
of the search space (using clustering, search trees, etc.) which allows to compute an approximate answer much quicker (in sub-linear time).
|
||||
|
||||
# Creating and Using ANN Indexes {#creating_using_ann_indexes}
|
||||
# Creating and Using Vector Similarity Indexes
|
||||
|
||||
Syntax to create an ANN index over an [Array(Float32)](../../../sql-reference/data-types/array.md) column:
|
||||
Syntax to create a vector similarity index over an [Array(Float32)](../../../sql-reference/data-types/array.md) column:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table_with_ann_index
|
||||
CREATE TABLE table
|
||||
(
|
||||
`id` Int64,
|
||||
`vectors` Array(Float32),
|
||||
INDEX [ann_index_name vectors TYPE [ann_index_type]([ann_index_parameters]) [GRANULARITY [N]]
|
||||
id Int64,
|
||||
vectors Array(Float32),
|
||||
INDEX index_name vectors TYPE vector_similarity(method, distance_function[, quantization, connectivity, expansion_add, expansion_search]) [GRANULARITY N]
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
```
|
||||
|
||||
Parameters:
|
||||
- `method`: Supports currently only `hnsw`.
|
||||
- `distance_function`: either `L2Distance` (the [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance) - the length of a
|
||||
line between two points in Euclidean space), or `cosineDistance` (the [cosine
|
||||
distance](https://en.wikipedia.org/wiki/Cosine_similarity#Cosine_distance)- the angle between two non-zero vectors).
|
||||
- `quantization`: either `f32`, `f16`, or `i8` for storing the vector with reduced precision (optional, default: `f32`)
|
||||
- `m`: the number of neighbors per graph node (optional, default: 16)
|
||||
- `ef_construction`: (optional, default: 128)
|
||||
- `ef_search`: (optional, default: 64)
|
||||
|
||||
Value 0 for parameters `m`, `ef_construction`, and `ef_search` refers to the default value.
|
||||
|
||||
Example:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table
|
||||
(
|
||||
id Int64,
|
||||
vectors Array(Float32),
|
||||
INDEX idx vectors TYPE vector_similarity('hnsw', 'L2Distance') -- Alternative syntax: TYPE vector_similarity(hnsw, L2Distance)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
```
|
||||
|
||||
Vector similarity indexes are based on the [USearch library](https://github.com/unum-cloud/usearch), which implements the [HNSW
|
||||
algorithm](https://arxiv.org/abs/1603.09320), i.e., a hierarchical graph where each point represents a vector and the edges represent
|
||||
similarity. Such hierarchical structures can be very efficient on large collections. They may often fetch 0.05% or less data from the
|
||||
overall dataset, while still providing 99% recall. This is especially useful when working with high-dimensional vectors, that are expensive
|
||||
to load and compare. The library also has several hardware-specific SIMD optimizations to accelerate further distance computations on modern
|
||||
Arm (NEON and SVE) and x86 (AVX2 and AVX-512) CPUs and OS-specific optimizations to allow efficient navigation around immutable persistent
|
||||
files, without loading them into RAM.
|
||||
|
||||
USearch indexes are currently experimental, to use them you first need to `SET allow_experimental_vector_similarity_index = 1`.
|
||||
|
||||
Vector similarity indexes currently support two distance functions:
|
||||
- `L2Distance`, also called Euclidean distance, is the length of a line segment between two points in Euclidean space
|
||||
([Wikipedia](https://en.wikipedia.org/wiki/Euclidean_distance)).
|
||||
- `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors
|
||||
([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)).
|
||||
|
||||
Vector similarity indexes allows storing the vectors in reduced precision formats. Supported scalar kinds are `f64`, `f32`, `f16` or `i8`.
|
||||
If no scalar kind was specified during index creation, `f16` is used as default.
|
||||
|
||||
For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no
|
||||
distance function was specified during index creation, `L2Distance` is used as default.
|
||||
|
||||
:::note
|
||||
All arrays must have same length. To avoid errors, you can use a
|
||||
[CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints), for example, `CONSTRAINT constraint_name_1 CHECK
|
||||
length(vectors) = 256`. Also, empty `Arrays` and unspecified `Array` values in INSERT statements (i.e. default values) are not supported.
|
||||
:::
|
||||
|
||||
:::note
|
||||
The vector similarity index currently does not work with per-table, non-default `index_granularity` settings (see
|
||||
[here](https://github.com/ClickHouse/ClickHouse/pull/51325#issuecomment-1605920475)). If necessary, the value must be changed in config.xml.
|
||||
:::
|
||||
|
||||
ANN indexes are built during column insertion and merge. As a result, `INSERT` and `OPTIMIZE` statements will be slower than for ordinary
|
||||
tables. ANNIndexes are ideally used only with immutable or rarely changed data, respectively when are far more read requests than write
|
||||
requests.
|
||||
|
||||
ANN indexes support two types of queries:
|
||||
|
||||
- ORDER BY queries:
|
||||
ANN indexes support these queries:
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM table_with_ann_index
|
||||
FROM table
|
||||
[WHERE ...]
|
||||
ORDER BY Distance(vectors, Point)
|
||||
LIMIT N
|
||||
```
|
||||
|
||||
- WHERE queries:
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM table_with_ann_index
|
||||
WHERE Distance(vectors, Point) < MaxDistance
|
||||
LIMIT N
|
||||
```
|
||||
|
||||
:::tip
|
||||
To avoid writing out large vectors, you can use [query
|
||||
parameters](/docs/en/interfaces/cli.md#queries-with-parameters-cli-queries-with-parameters), e.g.
|
||||
|
||||
```bash
|
||||
clickhouse-client --param_vec='hello' --query="SELECT * FROM table_with_ann_index WHERE L2Distance(vectors, {vec: Array(Float32)}) < 1.0"
|
||||
clickhouse-client --param_vec='hello' --query="SELECT * FROM table WHERE L2Distance(vectors, {vec: Array(Float32)}) < 1.0"
|
||||
```
|
||||
:::
|
||||
|
||||
**Restrictions**: Queries that contain both a `WHERE Distance(vectors, Point) < MaxDistance` and an `ORDER BY Distance(vectors, Point)`
|
||||
clause cannot use ANN indexes. Also, the approximate algorithms used to determine the nearest neighbors require a limit, hence queries
|
||||
without `LIMIT` clause cannot utilize ANN indexes. Also, ANN indexes are only used if the query has a `LIMIT` value smaller than setting
|
||||
**Restrictions**: Approximate algorithms used to determine the nearest neighbors require a limit, hence queries without `LIMIT` clause
|
||||
cannot utilize ANN indexes. Also, ANN indexes are only used if the query has a `LIMIT` value smaller than setting
|
||||
`max_limit_for_ann_queries` (default: 1 million rows). This is a safeguard to prevent large memory allocations by external libraries for
|
||||
approximate neighbor search.
|
||||
|
||||
@ -122,128 +158,3 @@ brute-force distance calculation over all rows of the granules. With a small `GR
|
||||
equally good, only the processing performance differs. It is generally recommended to use a large `GRANULARITY` for ANN indexes and fall
|
||||
back to a smaller `GRANULARITY` values only in case of problems like excessive memory consumption of the ANN structures. If no `GRANULARITY`
|
||||
was specified for ANN indexes, the default value is 100 million.
|
||||
|
||||
|
||||
# Available ANN Indexes {#available_ann_indexes}
|
||||
|
||||
- [Annoy](/docs/en/engines/table-engines/mergetree-family/annindexes.md#annoy-annoy)
|
||||
|
||||
- [USearch](/docs/en/engines/table-engines/mergetree-family/annindexes.md#usearch-usearch)
|
||||
|
||||
## Annoy {#annoy}
|
||||
|
||||
Annoy indexes are currently experimental, to use them you first need to `SET allow_experimental_annoy_index = 1`. They are also currently
|
||||
disabled on ARM due to memory safety problems with the algorithm.
|
||||
|
||||
This type of ANN index is based on the [Annoy library](https://github.com/spotify/annoy) which recursively divides the space into random
|
||||
linear surfaces (lines in 2D, planes in 3D etc.).
|
||||
|
||||
<div class='vimeo-container'>
|
||||
<iframe src="//www.youtube.com/embed/QkCCyLW0ehU"
|
||||
width="640"
|
||||
height="360"
|
||||
frameborder="0"
|
||||
allow="autoplay;
|
||||
fullscreen;
|
||||
picture-in-picture"
|
||||
allowfullscreen>
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
Syntax to create an Annoy index over an [Array(Float32)](../../../sql-reference/data-types/array.md) column:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table_with_annoy_index
|
||||
(
|
||||
id Int64,
|
||||
vectors Array(Float32),
|
||||
INDEX [ann_index_name] vectors TYPE annoy([Distance[, NumTrees]]) [GRANULARITY N]
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
```
|
||||
|
||||
Annoy currently supports two distance functions:
|
||||
- `L2Distance`, also called Euclidean distance, is the length of a line segment between two points in Euclidean space
|
||||
([Wikipedia](https://en.wikipedia.org/wiki/Euclidean_distance)).
|
||||
- `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors
|
||||
([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)).
|
||||
|
||||
For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no
|
||||
distance function was specified during index creation, `L2Distance` is used as default.
|
||||
|
||||
Parameter `NumTrees` is the number of trees which the algorithm creates (default if not specified: 100). Higher values of `NumTree` mean
|
||||
more accurate search results but slower index creation / query times (approximately linearly) as well as larger index sizes.
|
||||
|
||||
:::note
|
||||
All arrays must have same length. To avoid errors, you can use a
|
||||
[CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints), for example, `CONSTRAINT constraint_name_1 CHECK
|
||||
length(vectors) = 256`. Also, empty `Arrays` and unspecified `Array` values in INSERT statements (i.e. default values) are not supported.
|
||||
:::
|
||||
|
||||
The creation of Annoy indexes (whenever a new part is build, e.g. at the end of a merge) is a relatively slow process. You can increase
|
||||
setting `max_threads_for_annoy_index_creation` (default: 4) which controls how many threads are used to create an Annoy index. Please be
|
||||
careful with this setting, it is possible that multiple indexes are created in parallel in which case there can be overparallelization.
|
||||
|
||||
Setting `annoy_index_search_k_nodes` (default: `NumTrees * LIMIT`) determines how many tree nodes are inspected during SELECTs. Larger
|
||||
values mean more accurate results at the cost of longer query runtime:
|
||||
|
||||
```sql
|
||||
SELECT *
|
||||
FROM table_name
|
||||
ORDER BY L2Distance(vectors, Point)
|
||||
LIMIT N
|
||||
SETTINGS annoy_index_search_k_nodes=100;
|
||||
```
|
||||
|
||||
:::note
|
||||
The Annoy index currently does not work with per-table, non-default `index_granularity` settings (see
|
||||
[here](https://github.com/ClickHouse/ClickHouse/pull/51325#issuecomment-1605920475)). If necessary, the value must be changed in config.xml.
|
||||
:::
|
||||
|
||||
## USearch {#usearch}
|
||||
|
||||
This type of ANN index is based on the [USearch library](https://github.com/unum-cloud/usearch), which implements the [HNSW
|
||||
algorithm](https://arxiv.org/abs/1603.09320), i.e., builds a hierarchical graph where each point represents a vector and the edges represent
|
||||
similarity. Such hierarchical structures can be very efficient on large collections. They may often fetch 0.05% or less data from the
|
||||
overall dataset, while still providing 99% recall. This is especially useful when working with high-dimensional vectors,
|
||||
that are expensive to load and compare. The library also has several hardware-specific SIMD optimizations to accelerate further
|
||||
distance computations on modern Arm (NEON and SVE) and x86 (AVX2 and AVX-512) CPUs and OS-specific optimizations to allow efficient
|
||||
navigation around immutable persistent files, without loading them into RAM.
|
||||
|
||||
<div class='vimeo-container'>
|
||||
<iframe src="//www.youtube.com/embed/UMrhB3icP9w"
|
||||
width="640"
|
||||
height="360"
|
||||
frameborder="0"
|
||||
allow="autoplay;
|
||||
fullscreen;
|
||||
picture-in-picture"
|
||||
allowfullscreen>
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
Syntax to create an USearch index over an [Array](../../../sql-reference/data-types/array.md) column:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table_with_usearch_index
|
||||
(
|
||||
id Int64,
|
||||
vectors Array(Float32),
|
||||
INDEX [ann_index_name] vectors TYPE usearch([Distance[, ScalarKind]]) [GRANULARITY N]
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
```
|
||||
|
||||
USearch currently supports two distance functions:
|
||||
- `L2Distance`, also called Euclidean distance, is the length of a line segment between two points in Euclidean space
|
||||
([Wikipedia](https://en.wikipedia.org/wiki/Euclidean_distance)).
|
||||
- `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors
|
||||
([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)).
|
||||
|
||||
USearch allows storing the vectors in reduced precision formats. Supported scalar kinds are `f64`, `f32`, `f16` or `i8`. If no scalar kind
|
||||
was specified during index creation, `f16` is used as default.
|
||||
|
||||
For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no
|
||||
distance function was specified during index creation, `L2Distance` is used as default.
|
||||
|
@ -1005,7 +1005,7 @@ They can be used for prewhere optimization only if we enable `set allow_statisti
|
||||
|
||||
## Column-level Settings {#column-level-settings}
|
||||
|
||||
Certain MergeTree settings can be override at column level:
|
||||
Certain MergeTree settings can be overridden at column level:
|
||||
|
||||
- `max_compress_block_size` — Maximum size of blocks of uncompressed data before compressing for writing to a table.
|
||||
- `min_compress_block_size` — Minimum size of blocks of uncompressed data required for compression when writing the next mark.
|
||||
|
@ -379,7 +379,7 @@ You can mitigate this problem by enabling `wait_end_of_query=1` ([Response Buffe
|
||||
However, this does not completely solve the problem because the result must still fit within the `http_response_buffer_size`, and other settings like `send_progress_in_http_headers` can interfere with the delay of the header.
|
||||
The only way to catch all errors is to analyze the HTTP body before parsing it using the required format.
|
||||
|
||||
### Queries with Parameters {#cli-queries-with-parameters}
|
||||
## Queries with Parameters {#cli-queries-with-parameters}
|
||||
|
||||
You can create a query with parameters and pass values for them from the corresponding HTTP request parameters. For more information, see [Queries with Parameters for CLI](../interfaces/cli.md#cli-queries-with-parameters).
|
||||
|
||||
|
160
docs/en/interfaces/prometheus.md
Normal file
160
docs/en/interfaces/prometheus.md
Normal file
@ -0,0 +1,160 @@
|
||||
---
|
||||
slug: /en/interfaces/prometheus
|
||||
sidebar_position: 19
|
||||
sidebar_label: Prometheus protocols
|
||||
---
|
||||
|
||||
# Prometheus protocols
|
||||
|
||||
## Exposing metrics {#expose}
|
||||
|
||||
:::note
|
||||
ClickHouse Cloud does not currently support connecting to Prometheus. To be notified when this feature is supported, please contact support@clickhouse.com.
|
||||
:::
|
||||
|
||||
ClickHouse can expose its own metrics for scraping from Prometheus:
|
||||
|
||||
```xml
|
||||
<prometheus>
|
||||
<port>9363</port>
|
||||
<endpoint>/metrics</endpoint>
|
||||
<metrics>true</metrics>
|
||||
<asynchronous_metrics>true</asynchronous_metrics>
|
||||
<events>true</events>
|
||||
<errors>true</errors>
|
||||
</prometheus>
|
||||
|
||||
Section `<prometheus.handlers>` can be used to make more extended handlers.
|
||||
This section is similar to [<http_handlers>](/en/interfaces/http) but works for prometheus protocols:
|
||||
|
||||
```xml
|
||||
<prometheus>
|
||||
<port>9363</port>
|
||||
<handlers>
|
||||
<my_rule_1>
|
||||
<url>/metrics</url>
|
||||
<handler>
|
||||
<type>expose_metrics</type>
|
||||
<metrics>true</metrics>
|
||||
<asynchronous_metrics>true</asynchronous_metrics>
|
||||
<events>true</events>
|
||||
<errors>true</errors>
|
||||
</handler>
|
||||
</my_rule_1>
|
||||
</handlers>
|
||||
</prometheus>
|
||||
```
|
||||
|
||||
Settings:
|
||||
|
||||
| Name | Default | Description |
|
||||
|---|---|---|---|
|
||||
| `port` | none | Port for serving the exposing metrics protocol. |
|
||||
| `endpoint` | `/metrics` | HTTP endpoint for scraping metrics by prometheus server. Starts with `/`. Should not be used with the `<handlers>` section. |
|
||||
| `url` / `headers` / `method` | none | Filters used to find a matching handler for a request. Similar to the fields with the same names in the [<http_handlers>](/en/interfaces/http) section. |
|
||||
| `metrics` | true | Expose metrics from the [system.metrics](/en/operations/system-tables/metrics) table. |
|
||||
| `asynchronous_metrics` | true | Expose current metrics values from the [system.asynchronous_metrics](/en/operations/system-tables/asynchronous_metrics) table. |
|
||||
| `events` | true | Expose metrics from the [system.events](/en/operations/system-tables/events) table. |
|
||||
| `errors` | true | Expose the number of errors by error codes occurred since the last server restart. This information could be obtained from the [system.errors](/en/operations/system-tables/errors) as well. |
|
||||
|
||||
Check (replace `127.0.0.1` with the IP addr or hostname of your ClickHouse server):
|
||||
```bash
|
||||
curl 127.0.0.1:9363/metrics
|
||||
```
|
||||
|
||||
## Remote-write protocol {#remote-write}
|
||||
|
||||
ClickHouse supports the [remote-write](https://prometheus.io/docs/specs/remote_write_spec/) protocol.
|
||||
Data are received by this protocol and written to a [TimeSeries](/en/engines/table-engines/special/time_series) table
|
||||
(which should be created beforehand).
|
||||
|
||||
```xml
|
||||
<prometheus>
|
||||
<port>9363</port>
|
||||
<handlers>
|
||||
<my_rule_1>
|
||||
<url>/write</url>
|
||||
<handler>
|
||||
<type>remote_write</type>
|
||||
<database>db_name</database>
|
||||
<table>time_series_table</table>
|
||||
</handler>
|
||||
</my_rule_1>
|
||||
</handlers>
|
||||
</prometheus>
|
||||
```
|
||||
|
||||
Settings:
|
||||
|
||||
| Name | Default | Description |
|
||||
|---|---|---|---|
|
||||
| `port` | none | Port for serving the `remote-write` protocol. |
|
||||
| `url` / `headers` / `method` | none | Filters used to find a matching handler for a request. Similar to the fields with the same names in the [<http_handlers>](/en/interfaces/http) section. |
|
||||
| `table` | none | The name of a [TimeSeries](/en/engines/table-engines/special/time_series) table to write data received by the `remote-write` protocol. This name can optionally contain the name of a database too. |
|
||||
| `database` | none | The name of a database where the table specified in the `table` setting is located if it's not specified in the `table` setting. |
|
||||
|
||||
## Remote-read protocol {#remote-read}
|
||||
|
||||
ClickHouse supports the [remote-read](https://prometheus.io/docs/prometheus/latest/querying/remote_read_api/) protocol.
|
||||
Data are read from a [TimeSeries](/en/engines/table-engines/special/time_series) table and sent via this protocol.
|
||||
|
||||
```xml
|
||||
<prometheus>
|
||||
<port>9363</port>
|
||||
<handlers>
|
||||
<my_rule_1>
|
||||
<url>/read</url>
|
||||
<handler>
|
||||
<type>remote_read</type>
|
||||
<database>db_name</database>
|
||||
<table>time_series_table</table>
|
||||
</handler>
|
||||
</my_rule_1>
|
||||
</handlers>
|
||||
</prometheus>
|
||||
```
|
||||
|
||||
Settings:
|
||||
|
||||
| Name | Default | Description |
|
||||
|---|---|---|---|
|
||||
| `port` | none | Port for serving the `remote-read` protocol. |
|
||||
| `url` / `headers` / `method` | none | Filters used to find a matching handler for a request. Similar to the fields with the same names in the [<http_handlers>](/en/interfaces/http) section. |
|
||||
| `table` | none | The name of a [TimeSeries](/en/engines/table-engines/special/time_series) table to read data to send by the `remote-read` protocol. This name can optionally contain the name of a database too. |
|
||||
| `database` | none | The name of a database where the table specified in the `table` setting is located if it's not specified in the `table` setting. |
|
||||
|
||||
## Configuration for multiple protocols {#multiple-protocols}
|
||||
|
||||
Multiple protocols can be specified together in one place:
|
||||
|
||||
```xml
|
||||
<prometheus>
|
||||
<port>9363</port>
|
||||
<handlers>
|
||||
<my_rule_1>
|
||||
<url>/metrics</url>
|
||||
<handler>
|
||||
<type>expose_metrics</type>
|
||||
<metrics>true</metrics>
|
||||
<asynchronous_metrics>true</asynchronous_metrics>
|
||||
<events>true</events>
|
||||
<errors>true</errors>
|
||||
</handler>
|
||||
</my_rule_1>
|
||||
<my_rule_2>
|
||||
<url>/write</url>
|
||||
<handler>
|
||||
<type>remote_write</type>
|
||||
<table>db_name.time_series_table</table>
|
||||
</handler>
|
||||
</my_rule_2>
|
||||
<my_rule_3>
|
||||
<url>/read</url>
|
||||
<handler>
|
||||
<type>remote_read</type>
|
||||
<table>db_name.time_series_table</table>
|
||||
</handler>
|
||||
</my_rule_3>
|
||||
</handlers>
|
||||
</prometheus>
|
||||
```
|
@ -359,13 +359,14 @@ DESC format(JSONEachRow, '{"int" : 42, "float" : 42.42, "string" : "Hello, World
|
||||
Dates, DateTimes:
|
||||
|
||||
```sql
|
||||
DESC format(JSONEachRow, '{"date" : "2022-01-01", "datetime" : "2022-01-01 00:00:00"}')
|
||||
DESC format(JSONEachRow, '{"date" : "2022-01-01", "datetime" : "2022-01-01 00:00:00", "datetime64" : "2022-01-01 00:00:00.000"}')
|
||||
```
|
||||
```response
|
||||
┌─name─────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ date │ Nullable(Date) │ │ │ │ │ │
|
||||
│ datetime │ Nullable(DateTime64(9)) │ │ │ │ │ │
|
||||
└──────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
┌─name───────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ date │ Nullable(Date) │ │ │ │ │ │
|
||||
│ datetime │ Nullable(DateTime) │ │ │ │ │ │
|
||||
│ datetime64 │ Nullable(DateTime64(9)) │ │ │ │ │ │
|
||||
└────────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
Arrays:
|
||||
@ -759,12 +760,13 @@ DESC format(CSV, 'Hello world!,World hello!')
|
||||
Dates, DateTimes:
|
||||
|
||||
```sql
|
||||
DESC format(CSV, '"2020-01-01","2020-01-01 00:00:00"')
|
||||
DESC format(CSV, '"2020-01-01","2020-01-01 00:00:00","2022-01-01 00:00:00.000"')
|
||||
```
|
||||
```response
|
||||
┌─name─┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ c1 │ Nullable(Date) │ │ │ │ │ │
|
||||
│ c2 │ Nullable(DateTime64(9)) │ │ │ │ │ │
|
||||
│ c2 │ Nullable(DateTime) │ │ │ │ │ │
|
||||
│ c3 │ Nullable(DateTime64(9)) │ │ │ │ │ │
|
||||
└──────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
@ -956,12 +958,13 @@ DESC format(TSKV, 'int=42 float=42.42 bool=true string=Hello,World!\n')
|
||||
Dates, DateTimes:
|
||||
|
||||
```sql
|
||||
DESC format(TSV, '2020-01-01 2020-01-01 00:00:00')
|
||||
DESC format(TSV, '2020-01-01 2020-01-01 00:00:00 2022-01-01 00:00:00.000')
|
||||
```
|
||||
```response
|
||||
┌─name─┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ c1 │ Nullable(Date) │ │ │ │ │ │
|
||||
│ c2 │ Nullable(DateTime64(9)) │ │ │ │ │ │
|
||||
│ c2 │ Nullable(DateTime) │ │ │ │ │ │
|
||||
│ c3 │ Nullable(DateTime64(9)) │ │ │ │ │ │
|
||||
└──────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
@ -1126,12 +1129,13 @@ DESC format(Values, $$(42, 42.42, true, 'Hello,World!')$$)
|
||||
Dates, DateTimes:
|
||||
|
||||
```sql
|
||||
DESC format(Values, $$('2020-01-01', '2020-01-01 00:00:00')$$)
|
||||
```
|
||||
DESC format(Values, $$('2020-01-01', '2020-01-01 00:00:00', '2022-01-01 00:00:00.000')$$)
|
||||
```
|
||||
```response
|
||||
┌─name─┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ c1 │ Nullable(Date) │ │ │ │ │ │
|
||||
│ c2 │ Nullable(DateTime64(9)) │ │ │ │ │ │
|
||||
│ c2 │ Nullable(DateTime) │ │ │ │ │ │
|
||||
│ c3 │ Nullable(DateTime64(9)) │ │ │ │ │ │
|
||||
└──────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
@ -1504,8 +1508,8 @@ DESC format(JSONEachRow, $$
|
||||
|
||||
#### input_format_try_infer_datetimes
|
||||
|
||||
If enabled, ClickHouse will try to infer type `DateTime64` from string fields in schema inference for text formats.
|
||||
If all fields from a column in sample data were successfully parsed as datetimes, the result type will be `DateTime64(9)`,
|
||||
If enabled, ClickHouse will try to infer type `DateTime` or `DateTime64` from string fields in schema inference for text formats.
|
||||
If all fields from a column in sample data were successfully parsed as datetimes, the result type will be `DateTime` or `DateTime64(9)` (if any datetime had fractional part),
|
||||
if at least one field was not parsed as datetime, the result type will be `String`.
|
||||
|
||||
Enabled by default.
|
||||
@ -1513,39 +1517,66 @@ Enabled by default.
|
||||
**Examples**
|
||||
|
||||
```sql
|
||||
SET input_format_try_infer_datetimes = 0
|
||||
SET input_format_try_infer_datetimes = 0;
|
||||
DESC format(JSONEachRow, $$
|
||||
{"datetime" : "2021-01-01 00:00:00.000"}
|
||||
{"datetime" : "2022-01-01 00:00:00.000"}
|
||||
{"datetime" : "2021-01-01 00:00:00", "datetime64" : "2021-01-01 00:00:00.000"}
|
||||
{"datetime" : "2022-01-01 00:00:00", "datetime64" : "2022-01-01 00:00:00.000"}
|
||||
$$)
|
||||
```
|
||||
```response
|
||||
┌─name─────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ datetime │ Nullable(String) │ │ │ │ │ │
|
||||
└──────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
┌─name───────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ datetime │ Nullable(String) │ │ │ │ │ │
|
||||
│ datetime64 │ Nullable(String) │ │ │ │ │ │
|
||||
└────────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
```sql
|
||||
SET input_format_try_infer_datetimes = 1
|
||||
SET input_format_try_infer_datetimes = 1;
|
||||
DESC format(JSONEachRow, $$
|
||||
{"datetime" : "2021-01-01 00:00:00.000"}
|
||||
{"datetime" : "2022-01-01 00:00:00.000"}
|
||||
{"datetime" : "2021-01-01 00:00:00", "datetime64" : "2021-01-01 00:00:00.000"}
|
||||
{"datetime" : "2022-01-01 00:00:00", "datetime64" : "2022-01-01 00:00:00.000"}
|
||||
$$)
|
||||
```
|
||||
```response
|
||||
┌─name─────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ datetime │ Nullable(DateTime64(9)) │ │ │ │ │ │
|
||||
└──────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
┌─name───────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ datetime │ Nullable(DateTime) │ │ │ │ │ │
|
||||
│ datetime64 │ Nullable(DateTime64(9)) │ │ │ │ │ │
|
||||
└────────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
```sql
|
||||
DESC format(JSONEachRow, $$
|
||||
{"datetime" : "2021-01-01 00:00:00.000"}
|
||||
{"datetime" : "unknown"}
|
||||
{"datetime" : "2021-01-01 00:00:00", "datetime64" : "2021-01-01 00:00:00.000"}
|
||||
{"datetime" : "unknown", "datetime64" : "unknown"}
|
||||
$$)
|
||||
```
|
||||
```response
|
||||
┌─name─────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ datetime │ Nullable(String) │ │ │ │ │ │
|
||||
└──────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
┌─name───────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ datetime │ Nullable(String) │ │ │ │ │ │
|
||||
│ datetime64 │ Nullable(String) │ │ │ │ │ │
|
||||
└────────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
#### input_format_try_infer_datetimes_only_datetime64
|
||||
|
||||
If enabled, ClickHouse will always infer `DateTime64(9)` when `input_format_try_infer_datetimes` is enabled even if datetime values don't contain fractional part.
|
||||
|
||||
Disabled by default.
|
||||
|
||||
**Examples**
|
||||
|
||||
```sql
|
||||
SET input_format_try_infer_datetimes = 1;
|
||||
SET input_format_try_infer_datetimes_only_datetime64 = 1;
|
||||
DESC format(JSONEachRow, $$
|
||||
{"datetime" : "2021-01-01 00:00:00", "datetime64" : "2021-01-01 00:00:00.000"}
|
||||
{"datetime" : "2022-01-01 00:00:00", "datetime64" : "2022-01-01 00:00:00.000"}
|
||||
$$)
|
||||
```
|
||||
|
||||
```text
|
||||
┌─name───────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ datetime │ Nullable(DateTime64(9)) │ │ │ │ │ │
|
||||
│ datetime64 │ Nullable(DateTime64(9)) │ │ │ │ │ │
|
||||
└────────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
Note: Parsing datetimes during schema inference respect setting [date_time_input_format](/docs/en/operations/settings/settings-formats.md#date_time_input_format)
|
||||
|
@ -123,7 +123,7 @@ To ensure consistent and expected results, especially when migrating old queries
|
||||
In the new version of the analyzer, the rules for determining the common supertype for columns specified in the `USING` clause have been standardized to produce more predictable outcomes, especially when dealing with type modifiers like `LowCardinality` and `Nullable`.
|
||||
|
||||
- `LowCardinality(T)` and `T`: When a column of type `LowCardinality(T)` is joined with a column of type `T`, the resulting common supertype will be `T`, effectively discarding the `LowCardinality` modifier.
|
||||
|
||||
|
||||
- `Nullable(T)` and `T`: When a column of type `Nullable(T)` is joined with a column of type `T`, the resulting common supertype will be `Nullable(T)`, ensuring that the nullable property is preserved.
|
||||
|
||||
**Example:**
|
||||
@ -144,7 +144,7 @@ During projection names computation, aliases are not substituted.
|
||||
SELECT
|
||||
1 + 1 AS x,
|
||||
x + 1
|
||||
SETTINGS allow_experimental_analyzer = 0
|
||||
SETTINGS enable_analyzer = 0
|
||||
FORMAT PrettyCompact
|
||||
|
||||
┌─x─┬─plus(plus(1, 1), 1)─┐
|
||||
@ -154,7 +154,7 @@ FORMAT PrettyCompact
|
||||
SELECT
|
||||
1 + 1 AS x,
|
||||
x + 1
|
||||
SETTINGS allow_experimental_analyzer = 1
|
||||
SETTINGS enable_analyzer = 1
|
||||
FORMAT PrettyCompact
|
||||
|
||||
┌─x─┬─plus(x, 1)─┐
|
||||
@ -177,7 +177,7 @@ SELECT toTypeName(if(0, [2, 3, 4], 'String'))
|
||||
|
||||
### Heterogeneous clusters
|
||||
|
||||
The new analyzer significantly changed the communication protocol between servers in the cluster. Thus, it's impossible to run distributed queries on servers with different `allow_experimental_analyzer` setting values.
|
||||
The new analyzer significantly changed the communication protocol between servers in the cluster. Thus, it's impossible to run distributed queries on servers with different `enable_analyzer` setting values.
|
||||
|
||||
### Mutations are interpreted by previous analyzer
|
||||
|
||||
|
@ -143,6 +143,18 @@ value can be specified at session, profile or query level using setting [query_c
|
||||
Entries in the query cache are compressed by default. This reduces the overall memory consumption at the cost of slower writes into / reads
|
||||
from the query cache. To disable compression, use setting [query_cache_compress_entries](settings/settings.md#query-cache-compress-entries).
|
||||
|
||||
Sometimes it is useful to keep multiple results for the same query cached. This can be achieved using setting
|
||||
[query_cache_tag](settings/settings.md#query-cache-tag) that acts as as a label (or namespace) for a query cache entries. The query cache
|
||||
considers results of the same query with different tags different.
|
||||
|
||||
Example for creating three different query cache entries for the same query:
|
||||
|
||||
```sql
|
||||
SELECT 1 SETTINGS use_query_cache = true; -- query_cache_tag is implicitly '' (empty string)
|
||||
SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'tag 1';
|
||||
SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'tag 2';
|
||||
```
|
||||
|
||||
ClickHouse reads table data in blocks of [max_block_size](settings/settings.md#setting-max_block_size) rows. Due to filtering, aggregation,
|
||||
etc., result blocks are typically much smaller than 'max_block_size' but there are also cases where they are much bigger. Setting
|
||||
[query_cache_squash_partial_results](settings/settings.md#query-cache-squash-partial-results) (enabled by default) controls if result blocks
|
||||
|
@ -1400,6 +1400,16 @@ The number of seconds that ClickHouse waits for incoming requests before closing
|
||||
<keep_alive_timeout>10</keep_alive_timeout>
|
||||
```
|
||||
|
||||
## max_keep_alive_requests {#max-keep-alive-requests}
|
||||
|
||||
Maximal number of requests through a single keep-alive connection until it will be closed by ClickHouse server. Default to 10000.
|
||||
|
||||
**Example**
|
||||
|
||||
``` xml
|
||||
<max_keep_alive_requests>10</max_keep_alive_requests>
|
||||
```
|
||||
|
||||
## listen_host {#listen_host}
|
||||
|
||||
Restriction on hosts that requests can come from. If you want the server to answer all of them, specify `::`.
|
||||
@ -2112,48 +2122,6 @@ The trailing slash is mandatory.
|
||||
<path>/var/lib/clickhouse/</path>
|
||||
```
|
||||
|
||||
## Prometheus {#prometheus}
|
||||
|
||||
:::note
|
||||
ClickHouse Cloud does not currently support connecting to Prometheus. To be notified when this feature is supported, please contact support@clickhouse.com.
|
||||
:::
|
||||
|
||||
Exposing metrics data for scraping from [Prometheus](https://prometheus.io).
|
||||
|
||||
Settings:
|
||||
|
||||
- `endpoint` – HTTP endpoint for scraping metrics by prometheus server. Start from ‘/’.
|
||||
- `port` – Port for `endpoint`.
|
||||
- `metrics` – Expose metrics from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table.
|
||||
- `events` – Expose metrics from the [system.events](../../operations/system-tables/events.md#system_tables-events) table.
|
||||
- `asynchronous_metrics` – Expose current metrics values from the [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table.
|
||||
- `errors` - Expose the number of errors by error codes occurred since the last server restart. This information could be obtained from the [system.errors](../../operations/system-tables/asynchronous_metrics.md#system_tables-errors) as well.
|
||||
|
||||
**Example**
|
||||
|
||||
``` xml
|
||||
<clickhouse>
|
||||
<listen_host>0.0.0.0</listen_host>
|
||||
<http_port>8123</http_port>
|
||||
<tcp_port>9000</tcp_port>
|
||||
<!-- highlight-start -->
|
||||
<prometheus>
|
||||
<endpoint>/metrics</endpoint>
|
||||
<port>9363</port>
|
||||
<metrics>true</metrics>
|
||||
<events>true</events>
|
||||
<asynchronous_metrics>true</asynchronous_metrics>
|
||||
<errors>true</errors>
|
||||
</prometheus>
|
||||
<!-- highlight-end -->
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
Check (replace `127.0.0.1` with the IP addr or hostname of your ClickHouse server):
|
||||
```bash
|
||||
curl 127.0.0.1:9363/metrics
|
||||
```
|
||||
|
||||
## query_log {#query-log}
|
||||
|
||||
Setting for logging queries received with the [log_queries=1](../../operations/settings/settings.md) setting.
|
||||
|
@ -1041,3 +1041,27 @@ Compression rates of LZ4 or ZSTD improve on average by 20-40%.
|
||||
|
||||
This setting works best for tables with no primary key or a low-cardinality primary key, i.e. a table with only few distinct primary key values.
|
||||
High-cardinality primary keys, e.g. involving timestamp columns of type `DateTime64`, are not expected to benefit from this setting.
|
||||
|
||||
## lightweight_mutation_projection_mode
|
||||
|
||||
By default, lightweight delete `DELETE` does not work for tables with projections. This is because rows in a projection may be affected by a `DELETE` operation. So the default value would be `throw`.
|
||||
However, this option can change the behavior. With the value either `drop` or `rebuild`, deletes will work with projections. `drop` would delete the projection so it might be fast in the current query as projection gets deleted but slow in future queries as no projection attached.
|
||||
`rebuild` would rebuild the projection which might affect the performance of the current query, but might speedup for future queries. A good thing is that these options would only work in the part level,
|
||||
which means projections in the part that don't get touched would stay intact instead of triggering any action like drop or rebuild.
|
||||
|
||||
Possible values:
|
||||
|
||||
- throw, drop, rebuild
|
||||
|
||||
Default value: throw
|
||||
|
||||
## deduplicate_merge_projection_mode
|
||||
|
||||
Whether to allow create projection for the table with non-classic MergeTree, that is not (Replicated, Shared) MergeTree. If allowed, what is the action when merge projections, either drop or rebuild. So classic MergeTree would ignore this setting.
|
||||
It also controls `OPTIMIZE DEDUPLICATE` as well, but has effect on all MergeTree family members. Similar to the option `lightweight_mutation_projection_mode`, it is also part level.
|
||||
|
||||
Possible values:
|
||||
|
||||
- throw, drop, rebuild
|
||||
|
||||
Default value: throw
|
@ -1800,6 +1800,17 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## query_cache_tag {#query-cache-tag}
|
||||
|
||||
A string which acts as a label for [query cache](../query-cache.md) entries.
|
||||
The same queries with different tags are considered different by the query cache.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Any string
|
||||
|
||||
Default value: `''`
|
||||
|
||||
## query_cache_max_size_in_bytes {#query-cache-max-size-in-bytes}
|
||||
|
||||
The maximum amount of memory (in bytes) the current user may allocate in the [query cache](../query-cache.md). 0 means unlimited.
|
||||
@ -4051,7 +4062,7 @@ Rewrite aggregate functions with if expression as argument when logically equiva
|
||||
For example, `avg(if(cond, col, null))` can be rewritten to `avgOrNullIf(cond, col)`. It may improve performance.
|
||||
|
||||
:::note
|
||||
Supported only with experimental analyzer (`allow_experimental_analyzer = 1`).
|
||||
Supported only with experimental analyzer (`enable_analyzer = 1`).
|
||||
:::
|
||||
|
||||
## database_replicated_initial_query_timeout_sec {#database_replicated_initial_query_timeout_sec}
|
||||
@ -4629,8 +4640,8 @@ Default Value: 5.
|
||||
|
||||
## memory_overcommit_ratio_denominator {#memory_overcommit_ratio_denominator}
|
||||
|
||||
It represents soft memory limit in case when hard limit is reached on user level.
|
||||
This value is used to compute overcommit ratio for the query.
|
||||
It represents the soft memory limit when the hard limit is reached on the global level.
|
||||
This value is used to compute the overcommit ratio for the query.
|
||||
Zero means skip the query.
|
||||
Read more about [memory overcommit](memory-overcommit.md).
|
||||
|
||||
@ -4646,8 +4657,8 @@ Default value: `5000000`.
|
||||
|
||||
## memory_overcommit_ratio_denominator_for_user {#memory_overcommit_ratio_denominator_for_user}
|
||||
|
||||
It represents soft memory limit in case when hard limit is reached on global level.
|
||||
This value is used to compute overcommit ratio for the query.
|
||||
It represents the soft memory limit when the hard limit is reached on the user level.
|
||||
This value is used to compute the overcommit ratio for the query.
|
||||
Zero means skip the query.
|
||||
Read more about [memory overcommit](memory-overcommit.md).
|
||||
|
||||
@ -5609,8 +5620,43 @@ Minimal size of block to compress in CROSS JOIN. Zero value means - disable this
|
||||
|
||||
Default value: `1GiB`.
|
||||
|
||||
## restore_replace_external_engines_to_null
|
||||
|
||||
For testing purposes. Replaces all external engines to Null to not initiate external connections.
|
||||
|
||||
Default value: `False`
|
||||
|
||||
## restore_replace_external_table_functions_to_null
|
||||
|
||||
For testing purposes. Replaces all external table functions to Null to not initiate external connections.
|
||||
|
||||
Default value: `False`
|
||||
|
||||
## disable_insertion_and_mutation
|
||||
|
||||
Disable all insert and mutations (alter table update / alter table delete / alter table drop partition). Set to true, can make this node focus on reading queries.
|
||||
|
||||
Default value: `false`.
|
||||
|
||||
## use_hive_partitioning
|
||||
|
||||
When enabled, ClickHouse will detect Hive-style partitioning in path (`/name=value/`) in file-like table engines [File](../../engines/table-engines/special/file.md#hive-style-partitioning)/[S3](../../engines/table-engines/integrations/s3.md#hive-style-partitioning)/[URL](../../engines/table-engines/special/url.md#hive-style-partitioning)/[HDFS](../../engines/table-engines/integrations/hdfs.md#hive-style-partitioning)/[AzureBlobStorage](../../engines/table-engines/integrations/azureBlobStorage.md#hive-style-partitioning) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
|
||||
|
||||
Default value: `false`.
|
||||
|
||||
## allow_experimental_time_series_table {#allow-experimental-time-series-table}
|
||||
|
||||
Allows creation of tables with the [TimeSeries](../../engines/table-engines/integrations/time-series.md) table engine.
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 — the [TimeSeries](../../engines/table-engines/integrations/time-series.md) table engine is disabled.
|
||||
- 1 — the [TimeSeries](../../engines/table-engines/integrations/time-series.md) table engine is enabled.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## create_if_not_exists
|
||||
|
||||
Enable `IF NOT EXISTS` for `CREATE` statement by default. If either this setting or `IF NOT EXISTS` is specified and a table with the provided name already exists, no exception will be thrown.
|
||||
|
||||
Default value: `false`.
|
||||
|
@ -24,6 +24,7 @@ Columns:
|
||||
- `num_rebalance_revocations`, (UInt64) - number of times the consumer was revoked its partitions
|
||||
- `num_rebalance_assignments`, (UInt64) - number of times the consumer was assigned to Kafka cluster
|
||||
- `is_currently_used`, (UInt8) - consumer is in use
|
||||
- `last_used`, (UInt64) - last time this consumer was in use, unix time in microseconds
|
||||
- `rdkafka_stat` (String) - library internal statistic. See https://github.com/ClickHouse/librdkafka/blob/master/STATISTICS.md . Set `statistics_interval_ms` to 0 disable, default is 3000 (once in three seconds).
|
||||
|
||||
Example:
|
||||
|
@ -9,6 +9,7 @@ Columns:
|
||||
|
||||
- `query` ([String](../../sql-reference/data-types/string.md)) — Query string.
|
||||
- `result_size` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Size of the query cache entry.
|
||||
- `tag` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — Tag of the query cache entry.
|
||||
- `stale` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is stale.
|
||||
- `shared` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is shared between multiple users.
|
||||
- `compressed` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is compressed.
|
||||
@ -26,6 +27,7 @@ Row 1:
|
||||
──────
|
||||
query: SELECT 1 SETTINGS use_query_cache = 1
|
||||
result_size: 128
|
||||
tag:
|
||||
stale: 0
|
||||
shared: 0
|
||||
compressed: 1
|
||||
|
@ -3,7 +3,7 @@ slug: /en/operations/system-tables/trace_log
|
||||
---
|
||||
# trace_log
|
||||
|
||||
Contains stack traces collected by the sampling query profiler.
|
||||
Contains stack traces collected by the [sampling query profiler](../../operations/optimizing-performance/sampling-query-profiler.md).
|
||||
|
||||
ClickHouse creates this table when the [trace_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) server configuration section is set. Also see settings: [query_profiler_real_time_period_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns), [query_profiler_cpu_time_period_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns), [memory_profiler_step](../../operations/settings/settings.md#memory_profiler_step),
|
||||
[memory_profiler_sample_probability](../../operations/settings/settings.md#memory_profiler_sample_probability), [trace_profile_events](../../operations/settings/settings.md#trace_profile_events).
|
||||
|
@ -28,39 +28,39 @@ A client application to interact with clickhouse-keeper by its native protocol.
|
||||
Connected to ZooKeeper at [::1]:9181 with session_id 137
|
||||
/ :) ls
|
||||
keeper foo bar
|
||||
/ :) cd keeper
|
||||
/ :) cd 'keeper'
|
||||
/keeper :) ls
|
||||
api_version
|
||||
/keeper :) cd api_version
|
||||
/keeper :) cd 'api_version'
|
||||
/keeper/api_version :) ls
|
||||
|
||||
/keeper/api_version :) cd xyz
|
||||
/keeper/api_version :) cd 'xyz'
|
||||
Path /keeper/api_version/xyz does not exist
|
||||
/keeper/api_version :) cd ../../
|
||||
/ :) ls
|
||||
keeper foo bar
|
||||
/ :) get keeper/api_version
|
||||
/ :) get 'keeper/api_version'
|
||||
2
|
||||
```
|
||||
|
||||
## Commands {#clickhouse-keeper-client-commands}
|
||||
|
||||
- `ls [path]` -- Lists the nodes for the given path (default: cwd)
|
||||
- `cd [path]` -- Changes the working path (default `.`)
|
||||
- `exists <path>` -- Returns `1` if node exists, `0` otherwise
|
||||
- `set <path> <value> [version]` -- Updates the node's value. Only updates if version matches (default: -1)
|
||||
- `create <path> <value> [mode]` -- Creates new node with the set value
|
||||
- `touch <path>` -- Creates new node with an empty string as value. Doesn't throw an exception if the node already exists
|
||||
- `get <path>` -- Returns the node's value
|
||||
- `rm <path> [version]` -- Removes the node only if version matches (default: -1)
|
||||
- `rmr <path>` -- Recursively deletes path. Confirmation required
|
||||
- `ls '[path]'` -- Lists the nodes for the given path (default: cwd)
|
||||
- `cd '[path]'` -- Changes the working path (default `.`)
|
||||
- `exists '<path>'` -- Returns `1` if node exists, `0` otherwise
|
||||
- `set '<path>' <value> [version]` -- Updates the node's value. Only updates if version matches (default: -1)
|
||||
- `create '<path>' <value> [mode]` -- Creates new node with the set value
|
||||
- `touch '<path>'` -- Creates new node with an empty string as value. Doesn't throw an exception if the node already exists
|
||||
- `get '<path>'` -- Returns the node's value
|
||||
- `rm '<path>' [version]` -- Removes the node only if version matches (default: -1)
|
||||
- `rmr '<path>'` -- Recursively deletes path. Confirmation required
|
||||
- `flwc <command>` -- Executes four-letter-word command
|
||||
- `help` -- Prints this message
|
||||
- `get_direct_children_number [path]` -- Get numbers of direct children nodes under a specific path
|
||||
- `get_all_children_number [path]` -- Get all numbers of children nodes under a specific path
|
||||
- `get_stat [path]` -- Returns the node's stat (default `.`)
|
||||
- `find_super_nodes <threshold> [path]` -- Finds nodes with number of children larger than some threshold for the given path (default `.`)
|
||||
- `get_direct_children_number '[path]'` -- Get numbers of direct children nodes under a specific path
|
||||
- `get_all_children_number '[path]'` -- Get all numbers of children nodes under a specific path
|
||||
- `get_stat '[path]'` -- Returns the node's stat (default `.`)
|
||||
- `find_super_nodes <threshold> '[path]'` -- Finds nodes with number of children larger than some threshold for the given path (default `.`)
|
||||
- `delete_stale_backups` -- Deletes ClickHouse nodes used for backups that are now inactive
|
||||
- `find_big_family [path] [n]` -- Returns the top n nodes with the biggest family in the subtree (default path = `.` and n = 10)
|
||||
- `sync <path>` -- Synchronizes node between processes and leader
|
||||
- `sync '<path>'` -- Synchronizes node between processes and leader
|
||||
- `reconfig <add|remove|set> "<arg>" [version]` -- Reconfigure Keeper cluster. See https://clickhouse.com/docs/en/guides/sre/keeper/clickhouse-keeper#reconfiguration
|
||||
|
@ -10,7 +10,7 @@ Calculates a concatenated string from a group of strings, optionally separated b
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
groupConcat(expression [, delimiter] [, limit]);
|
||||
groupConcat[(delimiter [, limit])](expression);
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
@ -20,7 +20,7 @@ groupConcat(expression [, delimiter] [, limit]);
|
||||
- `limit` — A positive [integer](../../../sql-reference/data-types/int-uint.md) specifying the maximum number of elements to concatenate. If more elements are present, excess elements are ignored. This parameter is optional.
|
||||
|
||||
:::note
|
||||
If delimiter is specified without limit, it must be the first parameter following the expression. If both delimiter and limit are specified, delimiter must precede limit.
|
||||
If delimiter is specified without limit, it must be the first parameter. If both delimiter and limit are specified, delimiter must precede limit.
|
||||
:::
|
||||
|
||||
**Returned value**
|
||||
@ -61,7 +61,7 @@ This concatenates all names into one continuous string without any separator.
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT groupConcat(Name, ', ', 2) FROM Employees;
|
||||
SELECT groupConcat(', ')(Name) FROM Employees;
|
||||
```
|
||||
|
||||
Result:
|
||||
@ -78,7 +78,7 @@ This output shows the names separated by a comma followed by a space.
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT groupConcat(Name, ', ', 2) FROM Employees;
|
||||
SELECT groupConcat(', ', 2)(Name) FROM Employees;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
@ -14,7 +14,7 @@ To declare a column of `Dynamic` type, use the following syntax:
|
||||
<column_name> Dynamic(max_types=N)
|
||||
```
|
||||
|
||||
Where `N` is an optional parameter between `1` and `255` indicating how many different data types can be stored inside a column with type `Dynamic` across single block of data that is stored separately (for example across single data part for MergeTree table). If this limit is exceeded, all new types will be converted to type `String`. Default value of `max_types` is `32`.
|
||||
Where `N` is an optional parameter between `0` and `254` indicating how many different data types can be stored as separate subcolumns inside a column with type `Dynamic` across single block of data that is stored separately (for example across single data part for MergeTree table). If this limit is exceeded, all values with new types will be stored together in a special shared data structure in binary form. Default value of `max_types` is `32`.
|
||||
|
||||
:::note
|
||||
The Dynamic data type is an experimental feature. To use it, set `allow_experimental_dynamic_type = 1`.
|
||||
@ -224,41 +224,43 @@ SELECT d::Dynamic(max_types=5) as d2, dynamicType(d2) FROM test;
|
||||
└───────┴────────────────┘
|
||||
```
|
||||
|
||||
If `K < N`, then the values with the rarest types are converted to `String`:
|
||||
If `K < N`, then the values with the rarest types will be inserted into a single special subcolumn, but still will be accessible:
|
||||
```text
|
||||
CREATE TABLE test (d Dynamic(max_types=4)) ENGINE = Memory;
|
||||
INSERT INTO test VALUES (NULL), (42), (43), ('42.42'), (true), ([1, 2, 3]);
|
||||
SELECT d, dynamicType(d), d::Dynamic(max_types=2) as d2, dynamicType(d2) FROM test;
|
||||
SELECT d, dynamicType(d), d::Dynamic(max_types=2) as d2, dynamicType(d2), isDynamicElementInSharedData(d2) FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─d───────┬─dynamicType(d)─┬─d2──────┬─dynamicType(d2)─┐
|
||||
│ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ None │
|
||||
│ 42 │ Int64 │ 42 │ Int64 │
|
||||
│ 43 │ Int64 │ 43 │ Int64 │
|
||||
│ 42.42 │ String │ 42.42 │ String │
|
||||
│ true │ Bool │ true │ String │
|
||||
│ [1,2,3] │ Array(Int64) │ [1,2,3] │ String │
|
||||
└─────────┴────────────────┴─────────┴─────────────────┘
|
||||
┌─d───────┬─dynamicType(d)─┬─d2──────┬─dynamicType(d2)─┬─isDynamicElementInSharedData(d2)─┐
|
||||
│ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ None │ false │
|
||||
│ 42 │ Int64 │ 42 │ Int64 │ false │
|
||||
│ 43 │ Int64 │ 43 │ Int64 │ false │
|
||||
│ 42.42 │ String │ 42.42 │ String │ false │
|
||||
│ true │ Bool │ true │ Bool │ true │
|
||||
│ [1,2,3] │ Array(Int64) │ [1,2,3] │ Array(Int64) │ true │
|
||||
└─────────┴────────────────┴─────────┴─────────────────┴──────────────────────────────────┘
|
||||
```
|
||||
|
||||
If `K=1`, all types are converted to `String`:
|
||||
Functions `isDynamicElementInSharedData` returns `true` for rows that are stored in a special shared data structure inside `Dynamic` and as we can see, resulting column contains only 2 types that are not stored in shared data structure.
|
||||
|
||||
If `K=0`, all types will be inserted into single special subcolumn:
|
||||
|
||||
```text
|
||||
CREATE TABLE test (d Dynamic(max_types=4)) ENGINE = Memory;
|
||||
INSERT INTO test VALUES (NULL), (42), (43), ('42.42'), (true), ([1, 2, 3]);
|
||||
SELECT d, dynamicType(d), d::Dynamic(max_types=1) as d2, dynamicType(d2) FROM test;
|
||||
SELECT d, dynamicType(d), d::Dynamic(max_types=0) as d2, dynamicType(d2), isDynamicElementInSharedData(d2) FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─d───────┬─dynamicType(d)─┬─d2──────┬─dynamicType(d2)─┐
|
||||
│ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ None │
|
||||
│ 42 │ Int64 │ 42 │ String │
|
||||
│ 43 │ Int64 │ 43 │ String │
|
||||
│ 42.42 │ String │ 42.42 │ String │
|
||||
│ true │ Bool │ true │ String │
|
||||
│ [1,2,3] │ Array(Int64) │ [1,2,3] │ String │
|
||||
└─────────┴────────────────┴─────────┴─────────────────┘
|
||||
┌─d───────┬─dynamicType(d)─┬─d2──────┬─dynamicType(d2)─┬─isDynamicElementInSharedData(d2)─┐
|
||||
│ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ None │ false │
|
||||
│ 42 │ Int64 │ 42 │ Int64 │ true │
|
||||
│ 43 │ Int64 │ 43 │ Int64 │ true │
|
||||
│ 42.42 │ String │ 42.42 │ String │ true │
|
||||
│ true │ Bool │ true │ Bool │ true │
|
||||
│ [1,2,3] │ Array(Int64) │ [1,2,3] │ Array(Int64) │ true │
|
||||
└─────────┴────────────────┴─────────┴─────────────────┴──────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Reading Dynamic type from the data
|
||||
@ -411,17 +413,17 @@ SELECT d, dynamicType(d) FROM test ORDER by d;
|
||||
|
||||
## Reaching the limit in number of different data types stored inside Dynamic
|
||||
|
||||
`Dynamic` data type can store only limited number of different data types inside. By default, this limit is 32, but you can change it in type declaration using syntax `Dynamic(max_types=N)` where N is between 1 and 255 (due to implementation details, it's impossible to have more than 255 different data types inside Dynamic).
|
||||
When the limit is reached, all new data types inserted to `Dynamic` column will be casted to `String` and stored as `String` values.
|
||||
`Dynamic` data type can store only limited number of different data types as separate subcolumns. By default, this limit is 32, but you can change it in type declaration using syntax `Dynamic(max_types=N)` where N is between 0 and 254 (due to implementation details, it's impossible to have more than 254 different data types that can be stored as separate subcolumns inside Dynamic).
|
||||
When the limit is reached, all new data types inserted to `Dynamic` column will be inserted into a single shared data structure that stores values with different data types in binary form.
|
||||
|
||||
Let's see what happens when the limit is reached in different scenarios.
|
||||
|
||||
### Reaching the limit during data parsing
|
||||
|
||||
During parsing of `Dynamic` values from the data, when the limit is reached for current block of data, all new values will be inserted as `String` values:
|
||||
During parsing of `Dynamic` values from the data, when the limit is reached for current block of data, all new values will be inserted into shared data structure:
|
||||
|
||||
```sql
|
||||
SELECT d, dynamicType(d) FROM format(JSONEachRow, 'd Dynamic(max_types=3)', '
|
||||
SELECT d, dynamicType(d), isDynamicElementInSharedData(d) FROM format(JSONEachRow, 'd Dynamic(max_types=3)', '
|
||||
{"d" : 42}
|
||||
{"d" : [1, 2, 3]}
|
||||
{"d" : "Hello, World!"}
|
||||
@ -432,22 +434,22 @@ SELECT d, dynamicType(d) FROM format(JSONEachRow, 'd Dynamic(max_types=3)', '
|
||||
```
|
||||
|
||||
```text
|
||||
┌─d──────────────────────────┬─dynamicType(d)─┐
|
||||
│ 42 │ Int64 │
|
||||
│ [1,2,3] │ Array(Int64) │
|
||||
│ Hello, World! │ String │
|
||||
│ 2020-01-01 │ String │
|
||||
│ ["str1", "str2", "str3"] │ String │
|
||||
│ {"a" : 1, "b" : [1, 2, 3]} │ String │
|
||||
└────────────────────────────┴────────────────┘
|
||||
┌─d──────────────────────┬─dynamicType(d)─────────────────┬─isDynamicElementInSharedData(d)─┐
|
||||
│ 42 │ Int64 │ false │
|
||||
│ [1,2,3] │ Array(Int64) │ false │
|
||||
│ Hello, World! │ String │ false │
|
||||
│ 2020-01-01 │ Date │ true │
|
||||
│ ['str1','str2','str3'] │ Array(String) │ true │
|
||||
│ (1,[1,2,3]) │ Tuple(a Int64, b Array(Int64)) │ true │
|
||||
└────────────────────────┴────────────────────────────────┴─────────────────────────────────┘
|
||||
```
|
||||
|
||||
As we can see, after inserting 3 different data types `Int64`, `Array(Int64)` and `String` all new types were converted to `String`.
|
||||
As we can see, after inserting 3 different data types `Int64`, `Array(Int64)` and `String` all new types were inserted into special shared data structure.
|
||||
|
||||
### During merges of data parts in MergeTree table engines
|
||||
|
||||
During merge of several data parts in MergeTree table the `Dynamic` column in the resulting data part can reach the limit of different data types inside and won't be able to store all types from source parts.
|
||||
In this case ClickHouse chooses what types will remain after merge and what types will be casted to `String`. In most cases ClickHouse tries to keep the most frequent types and cast the rarest types to `String`, but it depends on the implementation.
|
||||
During merge of several data parts in MergeTree table the `Dynamic` column in the resulting data part can reach the limit of different data types that can be stored in separate subcolumns inside and won't be able to store all types as subcolumns from source parts.
|
||||
In this case ClickHouse chooses what types will remain as separate subcolumns after merge and what types will be inserted into shared data structure. In most cases ClickHouse tries to keep the most frequent types and store the rarest types in shared data structure, but it depends on the implementation.
|
||||
|
||||
Let's see an example of such merge. First, let's create a table with `Dynamic` column, set the limit of different data types to `3` and insert values with `5` different types:
|
||||
|
||||
@ -463,17 +465,17 @@ INSERT INTO test SELECT number, 'str_' || toString(number) FROM numbers(1);
|
||||
|
||||
Each insert will create a separate data pert with `Dynamic` column containing single type:
|
||||
```sql
|
||||
SELECT count(), dynamicType(d), _part FROM test GROUP BY _part, dynamicType(d) ORDER BY _part;
|
||||
SELECT count(), dynamicType(d), isDynamicElementInSharedData(d), _part FROM test GROUP BY _part, dynamicType(d), isDynamicElementInSharedData(d) ORDER BY _part, count();
|
||||
```
|
||||
|
||||
```text
|
||||
┌─count()─┬─dynamicType(d)──────┬─_part─────┐
|
||||
│ 5 │ UInt64 │ all_1_1_0 │
|
||||
│ 4 │ Array(UInt64) │ all_2_2_0 │
|
||||
│ 3 │ Date │ all_3_3_0 │
|
||||
│ 2 │ Map(UInt64, UInt64) │ all_4_4_0 │
|
||||
│ 1 │ String │ all_5_5_0 │
|
||||
└─────────┴─────────────────────┴───────────┘
|
||||
┌─count()─┬─dynamicType(d)──────┬─isDynamicElementInSharedData(d)─┬─_part─────┐
|
||||
│ 5 │ UInt64 │ false │ all_1_1_0 │
|
||||
│ 4 │ Array(UInt64) │ false │ all_2_2_0 │
|
||||
│ 3 │ Date │ false │ all_3_3_0 │
|
||||
│ 2 │ Map(UInt64, UInt64) │ false │ all_4_4_0 │
|
||||
│ 1 │ String │ false │ all_5_5_0 │
|
||||
└─────────┴─────────────────────┴─────────────────────────────────┴───────────┘
|
||||
```
|
||||
|
||||
Now, let's merge all parts into one and see what will happen:
|
||||
@ -481,18 +483,20 @@ Now, let's merge all parts into one and see what will happen:
|
||||
```sql
|
||||
SYSTEM START MERGES test;
|
||||
OPTIMIZE TABLE test FINAL;
|
||||
SELECT count(), dynamicType(d), _part FROM test GROUP BY _part, dynamicType(d) ORDER BY _part;
|
||||
SELECT count(), dynamicType(d), isDynamicElementInSharedData(d), _part FROM test GROUP BY _part, dynamicType(d), isDynamicElementInSharedData(d) ORDER BY _part, count() desc;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─count()─┬─dynamicType(d)─┬─_part─────┐
|
||||
│ 5 │ UInt64 │ all_1_5_2 │
|
||||
│ 6 │ String │ all_1_5_2 │
|
||||
│ 4 │ Array(UInt64) │ all_1_5_2 │
|
||||
└─────────┴────────────────┴───────────┘
|
||||
┌─count()─┬─dynamicType(d)──────┬─isDynamicElementInSharedData(d)─┬─_part─────┐
|
||||
│ 5 │ UInt64 │ false │ all_1_5_2 │
|
||||
│ 4 │ Array(UInt64) │ false │ all_1_5_2 │
|
||||
│ 3 │ Date │ false │ all_1_5_2 │
|
||||
│ 2 │ Map(UInt64, UInt64) │ true │ all_1_5_2 │
|
||||
│ 1 │ String │ true │ all_1_5_2 │
|
||||
└─────────┴─────────────────────┴─────────────────────────────────┴───────────┘
|
||||
```
|
||||
|
||||
As we can see, ClickHouse kept the most frequent types `UInt64` and `Array(UInt64)` and casted all other types to `String`.
|
||||
As we can see, ClickHouse kept the most frequent types `UInt64` and `Array(UInt64)` as subcolumns and inserted all other types into shared data.
|
||||
|
||||
## JSONExtract functions with Dynamic
|
||||
|
||||
@ -509,22 +513,23 @@ SELECT JSONExtract('{"a" : [1, 2, 3]}', 'a', 'Dynamic') AS dynamic, dynamicType(
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT JSONExtract('{"obj" : {"a" : 42, "b" : "Hello", "c" : [1,2,3]}}', 'obj', 'Map(String, Variant(UInt32, String, Array(UInt32)))') AS map_of_dynamics, mapApply((k, v) -> (k, variantType(v)), map_of_dynamics) AS map_of_dynamic_types```
|
||||
SELECT JSONExtract('{"obj" : {"a" : 42, "b" : "Hello", "c" : [1,2,3]}}', 'obj', 'Map(String, Dynamic)') AS map_of_dynamics, mapApply((k, v) -> (k, dynamicType(v)), map_of_dynamics) AS map_of_dynamic_types
|
||||
```
|
||||
|
||||
```text
|
||||
┌─map_of_dynamics──────────────────┬─map_of_dynamic_types────────────────────────────┐
|
||||
│ {'a':42,'b':'Hello','c':[1,2,3]} │ {'a':'UInt32','b':'String','c':'Array(UInt32)'} │
|
||||
└──────────────────────────────────┴─────────────────────────────────────────────────┘
|
||||
┌─map_of_dynamics──────────────────┬─map_of_dynamic_types────────────────────────────────────┐
|
||||
│ {'a':42,'b':'Hello','c':[1,2,3]} │ {'a':'Int64','b':'String','c':'Array(Nullable(Int64))'} │
|
||||
└──────────────────────────────────┴─────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT JSONExtractKeysAndValues('{"a" : 42, "b" : "Hello", "c" : [1,2,3]}', 'Variant(UInt32, String, Array(UInt32))') AS dynamics, arrayMap(x -> (x.1, variantType(x.2)), dynamics) AS dynamic_types```
|
||||
SELECT JSONExtractKeysAndValues('{"a" : 42, "b" : "Hello", "c" : [1,2,3]}', 'Dynamic') AS dynamics, arrayMap(x -> (x.1, dynamicType(x.2)), dynamics) AS dynamic_types```
|
||||
```
|
||||
|
||||
```text
|
||||
┌─dynamics───────────────────────────────┬─dynamic_types─────────────────────────────────────────┐
|
||||
│ [('a',42),('b','Hello'),('c',[1,2,3])] │ [('a','UInt32'),('b','String'),('c','Array(UInt32)')] │
|
||||
└────────────────────────────────────────┴───────────────────────────────────────────────────────┘
|
||||
┌─dynamics───────────────────────────────┬─dynamic_types─────────────────────────────────────────────────┐
|
||||
│ [('a',42),('b','Hello'),('c',[1,2,3])] │ [('a','Int64'),('b','String'),('c','Array(Nullable(Int64))')] │
|
||||
└────────────────────────────────────────┴───────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Binary output format
|
||||
|
@ -52,6 +52,48 @@ Result:
|
||||
└───────────────────────────────┴───────────────┘
|
||||
```
|
||||
|
||||
## LineString
|
||||
|
||||
`LineString` is a line stored as an array of points: [Array](array.md)([Point](#point)).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE geo_linestring (l LineString) ENGINE = Memory();
|
||||
INSERT INTO geo_linestring VALUES([(0, 0), (10, 0), (10, 10), (0, 10)]);
|
||||
SELECT l, toTypeName(l) FROM geo_linestring;
|
||||
```
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─r─────────────────────────────┬─toTypeName(r)─┐
|
||||
│ [(0,0),(10,0),(10,10),(0,10)] │ LineString │
|
||||
└───────────────────────────────┴───────────────┘
|
||||
```
|
||||
|
||||
## MultiLineString
|
||||
|
||||
`MultiLineString` is multiple lines stored as an array of `LineString`: [Array](array.md)([LineString](#linestring)).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE geo_multilinestring (l MultiLineString) ENGINE = Memory();
|
||||
INSERT INTO geo_multilinestring VALUES([[(0, 0), (10, 0), (10, 10), (0, 10)], [(1, 1), (2, 2), (3, 3)]]);
|
||||
SELECT l, toTypeName(l) FROM geo_multilinestring;
|
||||
```
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─l───────────────────────────────────────────────────┬─toTypeName(l)───┐
|
||||
│ [[(0,0),(10,0),(10,10),(0,10)],[(1,1),(2,2),(3,3)]] │ MultiLineString │
|
||||
└─────────────────────────────────────────────────────┴─────────────────┘
|
||||
```
|
||||
|
||||
## Polygon
|
||||
|
||||
`Polygon` is a polygon with holes stored as an array of rings: [Array](array.md)([Ring](#ring)). First element of outer array is the outer shape of polygon and all the following elements are holes.
|
||||
|
@ -6,11 +6,13 @@ title: "Functions for Working with Polygons"
|
||||
|
||||
## WKT
|
||||
|
||||
Returns a WKT (Well Known Text) geometric object from various [Geo Data Types](../../data-types/geo.md). Supported WKT objects are:
|
||||
Returns a WKT (Well Known Text) geometric object from various [Geo Data Types](../../data-types/geo.md). Supported WKT objects are:
|
||||
|
||||
- POINT
|
||||
- POLYGON
|
||||
- MULTIPOLYGON
|
||||
- LINESTRING
|
||||
- MULTILINESTRING
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -26,12 +28,16 @@ WKT(geo_data)
|
||||
- [Ring](../../data-types/geo.md#ring)
|
||||
- [Polygon](../../data-types/geo.md#polygon)
|
||||
- [MultiPolygon](../../data-types/geo.md#multipolygon)
|
||||
- [LineString](../../data-types/geo.md#linestring)
|
||||
- [MultiLineString](../../data-types/geo.md#multilinestring)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- WKT geometric object `POINT` is returned for a Point.
|
||||
- WKT geometric object `POLYGON` is returned for a Polygon
|
||||
- WKT geometric object `MULTIPOLYGON` is returned for a MultiPolygon.
|
||||
- WKT geometric object `MULTIPOLYGON` is returned for a MultiPolygon.
|
||||
- WKT geometric object `LINESTRING` is returned for a LineString.
|
||||
- WKT geometric object `MULTILINESTRING` is returned for a MultiLineString.
|
||||
|
||||
**Examples**
|
||||
|
||||
@ -84,7 +90,7 @@ SELECT
|
||||
|
||||
### Input parameters
|
||||
|
||||
String starting with `MULTIPOLYGON`
|
||||
String starting with `MULTIPOLYGON`
|
||||
|
||||
### Returned value
|
||||
|
||||
@ -170,6 +176,34 @@ SELECT readWKTLineString('LINESTRING (1 1, 2 2, 3 3, 1 1)');
|
||||
[(1,1),(2,2),(3,3),(1,1)]
|
||||
```
|
||||
|
||||
## readWKTMultiLineString
|
||||
|
||||
Parses a Well-Known Text (WKT) representation of a MultiLineString geometry and returns it in the internal ClickHouse format.
|
||||
|
||||
### Syntax
|
||||
|
||||
```sql
|
||||
readWKTMultiLineString(wkt_string)
|
||||
```
|
||||
|
||||
### Arguments
|
||||
|
||||
- `wkt_string`: The input WKT string representing a MultiLineString geometry.
|
||||
|
||||
### Returned value
|
||||
|
||||
The function returns a ClickHouse internal representation of the multilinestring geometry.
|
||||
|
||||
### Example
|
||||
|
||||
```sql
|
||||
SELECT readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3), (4 4, 5 5, 6 6))');
|
||||
```
|
||||
|
||||
```response
|
||||
[[(1,1),(2,2),(3,3)],[(4,4),(5,5),(6,6)]]
|
||||
```
|
||||
|
||||
## readWKTRing
|
||||
|
||||
Parses a Well-Known Text (WKT) representation of a Polygon geometry and returns a ring (closed linestring) in the internal ClickHouse format.
|
||||
@ -219,7 +253,7 @@ UInt8, 0 for false, 1 for true
|
||||
|
||||
## polygonsDistanceSpherical
|
||||
|
||||
Calculates the minimal distance between two points where one point belongs to the first polygon and the second to another polygon. Spherical means that coordinates are interpreted as coordinates on a pure and ideal sphere, which is not true for the Earth. Using this type of coordinate system speeds up execution, but of course is not precise.
|
||||
Calculates the minimal distance between two points where one point belongs to the first polygon and the second to another polygon. Spherical means that coordinates are interpreted as coordinates on a pure and ideal sphere, which is not true for the Earth. Using this type of coordinate system speeds up execution, but of course is not precise.
|
||||
|
||||
### Example
|
||||
|
||||
|
@ -4189,3 +4189,94 @@ Result:
|
||||
│ 32 │
|
||||
└─────────────────────────────┘
|
||||
```
|
||||
|
||||
## getSubcolumn
|
||||
|
||||
Takes a table expression or identifier and constant string with the name of the sub-column, and returns the requested sub-column extracted from the expression.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
getSubcolumn(col_name, subcol_name)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `col_name` — Table expression or identifier. [Expression](../syntax.md/#expressions), [Identifier](../syntax.md/#identifiers).
|
||||
- `subcol_name` — The name of the sub-column. [String](../data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns the extracted sub-column.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE t_arr (arr Array(Tuple(subcolumn1 UInt32, subcolumn2 String))) ENGINE = MergeTree ORDER BY tuple();
|
||||
INSERT INTO t_arr VALUES ([(1, 'Hello'), (2, 'World')]), ([(3, 'This'), (4, 'is'), (5, 'subcolumn')]);
|
||||
SELECT getSubcolumn(arr, 'subcolumn1'), getSubcolumn(arr, 'subcolumn2') FROM t_arr;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─getSubcolumn(arr, 'subcolumn1')─┬─getSubcolumn(arr, 'subcolumn2')─┐
|
||||
1. │ [1,2] │ ['Hello','World'] │
|
||||
2. │ [3,4,5] │ ['This','is','subcolumn'] │
|
||||
└─────────────────────────────────┴─────────────────────────────────┘
|
||||
```
|
||||
|
||||
## getTypeSerializationStreams
|
||||
|
||||
Enumerates stream paths of a data type.
|
||||
|
||||
:::note
|
||||
This function is intended for use by developers.
|
||||
:::
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
getTypeSerializationStreams(col)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `col` — Column or string representation of a data-type from which the data type will be detected.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns an array with all the serialization sub-stream paths.[Array](../data-types/array.md)([String](../data-types/string.md)).
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT getTypeSerializationStreams(tuple('a', 1, 'b', 2));
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─getTypeSerializationStreams(('a', 1, 'b', 2))─────────────────────────────────────────────────────────────────────────┐
|
||||
1. │ ['{TupleElement(1), Regular}','{TupleElement(2), Regular}','{TupleElement(3), Regular}','{TupleElement(4), Regular}'] │
|
||||
└───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT getTypeSerializationStreams('Map(String, Int64)');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─getTypeSerializationStreams('Map(String, Int64)')────────────────────────────────────────────────────────────────┐
|
||||
1. │ ['{ArraySizes}','{ArrayElements, TupleElement(keys), Regular}','{ArrayElements, TupleElement(values), Regular}'] │
|
||||
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user