Merge remote-tracking branch 'blessed/master' into experiment_int128

This commit is contained in:
Raúl Marín 2024-03-13 13:57:41 +01:00
commit cc92b8ffc5
826 changed files with 14431 additions and 18069 deletions

View File

@ -305,7 +305,7 @@ jobs:
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
MarkReleaseReady:
if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
if: ${{ !failure() && !cancelled() }}
needs:
- BuilderBinDarwin
- BuilderBinDarwinAarch64
@ -313,9 +313,25 @@ jobs:
- BuilderDebAarch64
runs-on: [self-hosted, style-checker]
steps:
- name: Debug
run: |
echo need with different filters
cat << 'EOF'
${{ toJSON(needs) }}
${{ toJSON(needs.*.result) }}
no failures ${{ !contains(needs.*.result, 'failure') }}
no skips ${{ !contains(needs.*.result, 'skipped') }}
no both ${{ !(contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
EOF
- name: Not ready
# fail the job to be able restart it
if: ${{ contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure') }}
run: exit 1
- name: Check out repository code
if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
uses: ClickHouse/checkout@v1
- name: Mark Commit Release Ready
if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 mark_release_ready.py

View File

@ -13,9 +13,7 @@ on: # yamllint disable-line rule:truthy
- opened
branches:
- master
##########################################################################################
##################################### SMALL CHECKS #######################################
##########################################################################################
jobs:
RunConfig:
runs-on: [self-hosted, style-checker-aarch64]
@ -70,13 +68,13 @@ jobs:
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ runner.temp }}/ci_run_data.json --post --job-name 'Style check'
BuildDockers:
needs: [RunConfig]
if: ${{ !failure() && !cancelled() }}
if: ${{ !failure() && !cancelled() && toJson(fromJson(needs.RunConfig.outputs.data).docker_data.missing_multi) != '[]' }}
uses: ./.github/workflows/reusable_docker.yml
with:
data: ${{ needs.RunConfig.outputs.data }}
StyleCheck:
needs: [RunConfig, BuildDockers]
if: ${{ !failure() && !cancelled() }}
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Style check')}}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Style check
@ -89,19 +87,9 @@ jobs:
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
RCSK
DocsCheck:
needs: [RunConfig, StyleCheck]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Docs check
runner_type: func-tester-aarch64
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 docs_check.py
FastTest:
needs: [RunConfig, StyleCheck]
if: ${{ !failure() && !cancelled() }}
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Fast test') }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Fast test
@ -109,818 +97,83 @@ jobs:
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 fast_test_check.py
CompatibilityCheckX86:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Compatibility check (amd64)
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
CompatibilityCheckAarch64:
needs: [RunConfig, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Compatibility check (aarch64)
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
#########################################################################################
#################################### ORDINARY BUILDS ####################################
#########################################################################################
BuilderDebDebug:
################################# Main statges #################################
# for main CI chain
#
Builds_1:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Builds_1') }}
# using callable wf (reusable_stage.yml) allows to group all nested jobs under a tab
uses: ./.github/workflows/reusable_build_stage.yml
with:
build_name: package_debug
stage: Builds_1
data: ${{ needs.RunConfig.outputs.data }}
BuilderDebRelease:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
Tests_1:
needs: [RunConfig, Builds_1]
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_1') }}
# using callable wf (reusable_stage.yml) allows to group all nested jobs under a tab
uses: ./.github/workflows/reusable_test_stage.yml
with:
build_name: package_release
checkout_depth: 0
stage: Tests_1
data: ${{ needs.RunConfig.outputs.data }}
BuilderDebReleaseCoverage:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
Builds_2:
needs: [RunConfig, Builds_1]
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Builds_2') }}
# using callable wf (reusable_stage.yml) allows to group all nested jobs under a tab
uses: ./.github/workflows/reusable_build_stage.yml
with:
build_name: package_release_coverage
checkout_depth: 0
stage: Builds_2
data: ${{ needs.RunConfig.outputs.data }}
BuilderDebAarch64:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
Tests_2:
needs: [RunConfig, Builds_2]
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2') }}
# using callable wf (reusable_stage.yml) allows to group all nested jobs under a tab
uses: ./.github/workflows/reusable_test_stage.yml
with:
build_name: package_aarch64
checkout_depth: 0
stage: Tests_2
data: ${{ needs.RunConfig.outputs.data }}
BuilderBinRelease:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_release
data: ${{ needs.RunConfig.outputs.data }}
BuilderDebAsan:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_asan
data: ${{ needs.RunConfig.outputs.data }}
BuilderDebUBsan:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_ubsan
data: ${{ needs.RunConfig.outputs.data }}
BuilderDebTsan:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_tsan
data: ${{ needs.RunConfig.outputs.data }}
BuilderDebMsan:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_msan
data: ${{ needs.RunConfig.outputs.data }}
##########################################################################################
##################################### SPECIAL BUILDS #####################################
##########################################################################################
BuilderBinClangTidy:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_tidy
data: ${{ needs.RunConfig.outputs.data }}
BuilderBinDarwin:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_darwin
data: ${{ needs.RunConfig.outputs.data }}
BuilderBinAarch64:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_aarch64
data: ${{ needs.RunConfig.outputs.data }}
BuilderBinFreeBSD:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_freebsd
data: ${{ needs.RunConfig.outputs.data }}
BuilderBinDarwinAarch64:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_darwin_aarch64
data: ${{ needs.RunConfig.outputs.data }}
BuilderBinPPC64:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_ppc64le
data: ${{ needs.RunConfig.outputs.data }}
BuilderBinAmd64Compat:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_amd64_compat
data: ${{ needs.RunConfig.outputs.data }}
BuilderBinAmd64Musl:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_amd64_musl
data: ${{ needs.RunConfig.outputs.data }}
BuilderBinAarch64V80Compat:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_aarch64_v80compat
data: ${{ needs.RunConfig.outputs.data }}
BuilderBinRISCV64:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_riscv64
data: ${{ needs.RunConfig.outputs.data }}
BuilderBinS390X:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_s390x
data: ${{ needs.RunConfig.outputs.data }}
############################################################################################
##################################### Docker images #######################################
############################################################################################
DockerServerImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Docker server image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
DockerKeeperImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Docker keeper image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
############################################################################################
##################################### BUILD REPORTER #######################################
############################################################################################
BuilderReport:
################################# Reports #################################
# Reports should by run even if Builds_1/2 fail, so put them separatly in wf (not in Tests_1/2)
Builds_1_Report:
# run report check for failed builds to indicate the CI error
if: ${{ !cancelled() }}
if: ${{ !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }}
needs:
- RunConfig
- BuilderDebAarch64
- BuilderDebAsan
- BuilderDebDebug
- BuilderDebMsan
- BuilderDebRelease
- BuilderDebTsan
- BuilderDebUBsan
- Builds_1
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse build check
runner_type: style-checker
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
BuilderSpecialReport:
Builds_2_Report:
# run report check for failed builds to indicate the CI error
if: ${{ !cancelled() }}
if: ${{ !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse special build check') }}
needs:
- RunConfig
- BuilderBinAarch64
- BuilderBinDarwin
- BuilderBinDarwinAarch64
- BuilderBinFreeBSD
- BuilderBinPPC64
- BuilderBinRISCV64
- BuilderBinS390X
- BuilderBinAmd64Compat
- BuilderBinAarch64V80Compat
- BuilderBinClangTidy
- BuilderDebReleaseCoverage
- BuilderBinRelease
- Builds_2
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse special build check
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
############################################################################################
#################################### INSTALL PACKAGES ######################################
############################################################################################
InstallPackagesTestRelease:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Install packages (amd64)
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 install_check.py "$CHECK_NAME"
InstallPackagesTestAarch64:
needs: [RunConfig, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Install packages (arm64)
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 install_check.py "$CHECK_NAME"
##############################################################################################
########################### FUNCTIONAl STATELESS TESTS #######################################
##############################################################################################
FunctionalStatelessTestRelease:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateless tests (release)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestReleaseAnalyzerS3Replicated:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateless tests (release, analyzer, s3, DatabaseReplicated)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestS3Debug:
needs: [RunConfig, BuilderDebDebug]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateless tests (debug, s3 storage)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestS3Tsan:
needs: [RunConfig, BuilderDebTsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateless tests (tsan, s3 storage)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestAarch64:
needs: [RunConfig, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateless tests (aarch64)
runner_type: func-tester-aarch64
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestAsan:
needs: [RunConfig, BuilderDebAsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateless tests (asan)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestTsan:
needs: [RunConfig, BuilderDebTsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateless tests (tsan)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestMsan:
needs: [RunConfig, BuilderDebMsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateless tests (msan)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestUBsan:
needs: [RunConfig, BuilderDebUBsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateless tests (ubsan)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestDebug:
needs: [RunConfig, BuilderDebDebug]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateless tests (debug)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestFlakyCheck:
needs: [RunConfig, BuilderDebAsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateless tests flaky check (asan)
data: ${{ needs.RunConfig.outputs.data }}
runner_type: func-tester
TestsBugfixCheck:
needs: [RunConfig, StyleCheck]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Bugfix validation
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
##############################################################################################
############################ FUNCTIONAl STATEFUL TESTS #######################################
##############################################################################################
FunctionalStatefulTestRelease:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (release)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestAarch64:
needs: [RunConfig, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (aarch64)
runner_type: func-tester-aarch64
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestAsan:
needs: [RunConfig, BuilderDebAsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (asan)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestTsan:
needs: [RunConfig, BuilderDebTsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (tsan)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestMsan:
needs: [RunConfig, BuilderDebMsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (msan)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestUBsan:
needs: [RunConfig, BuilderDebUBsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (ubsan)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestDebug:
needs: [RunConfig, BuilderDebDebug]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (debug)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
# Parallel replicas
FunctionalStatefulTestDebugParallelReplicas:
needs: [RunConfig, BuilderDebDebug]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (debug, ParallelReplicas)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestUBsanParallelReplicas:
needs: [RunConfig, BuilderDebUBsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (ubsan, ParallelReplicas)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestMsanParallelReplicas:
needs: [RunConfig, BuilderDebMsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (msan, ParallelReplicas)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestTsanParallelReplicas:
needs: [RunConfig, BuilderDebTsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (tsan, ParallelReplicas)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestAsanParallelReplicas:
needs: [RunConfig, BuilderDebAsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (asan, ParallelReplicas)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestReleaseParallelReplicas:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (release, ParallelReplicas)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
##############################################################################################
########################### ClickBench #######################################################
##############################################################################################
ClickBenchAMD64:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickBench (amd64)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 clickbench.py "$CHECK_NAME"
ClickBenchAarch64:
needs: [RunConfig, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickBench (aarch64)
runner_type: func-tester-aarch64
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 clickbench.py "$CHECK_NAME"
##############################################################################################
######################################### STRESS TESTS #######################################
##############################################################################################
StressTestAsan:
needs: [RunConfig, BuilderDebAsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stress test (asan)
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
StressTestTsan:
needs: [RunConfig, BuilderDebTsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stress test (tsan)
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
StressTestMsan:
needs: [RunConfig, BuilderDebMsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stress test (msan)
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
StressTestUBsan:
needs: [RunConfig, BuilderDebUBsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stress test (ubsan)
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
StressTestDebug:
needs: [RunConfig, BuilderDebDebug]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stress test (debug)
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
##############################################################################################
######################################### UPGRADE CHECK ######################################
##############################################################################################
UpgradeCheckAsan:
needs: [RunConfig, BuilderDebAsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Upgrade check (asan)
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
UpgradeCheckTsan:
needs: [RunConfig, BuilderDebTsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Upgrade check (tsan)
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
UpgradeCheckMsan:
needs: [RunConfig, BuilderDebMsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Upgrade check (msan)
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
UpgradeCheckDebug:
needs: [RunConfig, BuilderDebDebug]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Upgrade check (debug)
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
##############################################################################################
##################################### AST FUZZERS ############################################
##############################################################################################
ASTFuzzerTestAsan:
needs: [RunConfig, BuilderDebAsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: AST fuzzer (asan)
runner_type: fuzzer-unit-tester
data: ${{ needs.RunConfig.outputs.data }}
ASTFuzzerTestTsan:
needs: [RunConfig, BuilderDebTsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: AST fuzzer (tsan)
runner_type: fuzzer-unit-tester
data: ${{ needs.RunConfig.outputs.data }}
ASTFuzzerTestUBSan:
needs: [RunConfig, BuilderDebUBsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: AST fuzzer (ubsan)
runner_type: fuzzer-unit-tester
data: ${{ needs.RunConfig.outputs.data }}
ASTFuzzerTestMSan:
needs: [RunConfig, BuilderDebMsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: AST fuzzer (msan)
runner_type: fuzzer-unit-tester
data: ${{ needs.RunConfig.outputs.data }}
ASTFuzzerTestDebug:
needs: [RunConfig, BuilderDebDebug]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: AST fuzzer (debug)
runner_type: fuzzer-unit-tester
data: ${{ needs.RunConfig.outputs.data }}
#############################################################################################
############################# INTEGRATION TESTS #############################################
#############################################################################################
IntegrationTestsAnalyzerAsan:
needs: [RunConfig, BuilderDebAsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Integration tests (asan, analyzer)
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
IntegrationTestsTsan:
needs: [RunConfig, BuilderDebTsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Integration tests (tsan)
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
IntegrationTestsAarch64:
needs: [RunConfig, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Integration tests (aarch64)
# FIXME: there is no stress-tester for aarch64. func-tester-aarch64 is ok?
runner_type: func-tester-aarch64
data: ${{ needs.RunConfig.outputs.data }}
IntegrationTestsFlakyCheck:
needs: [RunConfig, BuilderDebAsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Integration tests flaky check (asan)
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
#############################################################################################
#################################### UNIT TESTS #############################################
#############################################################################################
UnitTestsAsan:
needs: [RunConfig, BuilderDebAsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Unit tests (asan)
runner_type: fuzzer-unit-tester
data: ${{ needs.RunConfig.outputs.data }}
UnitTestsRelease:
needs: [RunConfig, BuilderBinRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Unit tests (release)
runner_type: fuzzer-unit-tester
data: ${{ needs.RunConfig.outputs.data }}
UnitTestsTsan:
needs: [RunConfig, BuilderDebTsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Unit tests (tsan)
runner_type: fuzzer-unit-tester
data: ${{ needs.RunConfig.outputs.data }}
UnitTestsMsan:
needs: [RunConfig, BuilderDebMsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Unit tests (msan)
runner_type: fuzzer-unit-tester
data: ${{ needs.RunConfig.outputs.data }}
UnitTestsUBsan:
needs: [RunConfig, BuilderDebUBsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Unit tests (ubsan)
runner_type: fuzzer-unit-tester
data: ${{ needs.RunConfig.outputs.data }}
#############################################################################################
#################################### PERFORMANCE TESTS ######################################
#############################################################################################
PerformanceComparisonX86:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Performance Comparison
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
PerformanceComparisonAarch:
needs: [RunConfig, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Performance Comparison Aarch64
runner_type: func-tester-aarch64
data: ${{ needs.RunConfig.outputs.data }}
##############################################################################################
###################################### SQLANCER FUZZERS ######################################
##############################################################################################
SQLancerTestRelease:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: SQLancer (release)
runner_type: fuzzer-unit-tester
data: ${{ needs.RunConfig.outputs.data }}
SQLancerTestDebug:
needs: [RunConfig, BuilderDebDebug]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: SQLancer (debug)
runner_type: fuzzer-unit-tester
data: ${{ needs.RunConfig.outputs.data }}
################################# Stage Final #################################
#
FinishCheck:
if: ${{ !failure() && !cancelled() }}
needs:
- BuilderReport
- BuilderSpecialReport
- DocsCheck
- FastTest
- TestsBugfixCheck
- FunctionalStatelessTestDebug
- FunctionalStatelessTestRelease
- FunctionalStatelessTestAarch64
- FunctionalStatelessTestAsan
- FunctionalStatelessTestTsan
- FunctionalStatelessTestMsan
- FunctionalStatelessTestUBsan
- FunctionalStatefulTestDebug
- FunctionalStatefulTestRelease
- FunctionalStatefulTestAarch64
- FunctionalStatefulTestAsan
- FunctionalStatefulTestTsan
- FunctionalStatefulTestMsan
- FunctionalStatefulTestUBsan
- FunctionalStatelessTestS3Debug
- FunctionalStatelessTestS3Tsan
- FunctionalStatelessTestReleaseAnalyzerS3Replicated
- FunctionalStatefulTestReleaseParallelReplicas
- FunctionalStatefulTestAsanParallelReplicas
- FunctionalStatefulTestTsanParallelReplicas
- FunctionalStatefulTestMsanParallelReplicas
- FunctionalStatefulTestUBsanParallelReplicas
- FunctionalStatefulTestDebugParallelReplicas
- StressTestDebug
- StressTestAsan
- StressTestTsan
- StressTestMsan
- StressTestUBsan
- UpgradeCheckAsan
- UpgradeCheckTsan
- UpgradeCheckMsan
- UpgradeCheckDebug
- ASTFuzzerTestDebug
- ASTFuzzerTestAsan
- ASTFuzzerTestTsan
- ASTFuzzerTestMSan
- ASTFuzzerTestUBSan
- IntegrationTestsAnalyzerAsan
- IntegrationTestsTsan
- IntegrationTestsAarch64
- IntegrationTestsFlakyCheck
- PerformanceComparisonX86
- PerformanceComparisonAarch
- UnitTestsAsan
- UnitTestsTsan
- UnitTestsMsan
- UnitTestsUBsan
- UnitTestsRelease
- CompatibilityCheckX86
- CompatibilityCheckAarch64
- SQLancerTestRelease
- SQLancerTestDebug
needs: [Tests_1, Tests_2]
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Finish label
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 finish_check.py
python3 merge_pr.py --check-approved
##############################################################################################
############################ SQLLOGIC TEST ###################################################
##############################################################################################
SQLLogicTestRelease:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Sqllogic test (release)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
##############################################################################################
##################################### SQL TEST ###############################################
##############################################################################################
SQLTest:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: SQLTest
runner_type: fuzzer-unit-tester
data: ${{ needs.RunConfig.outputs.data }}
#############################################################################################
###################################### NOT IN FINISH ########################################
#############################################################################################
###################################### JEPSEN TESTS #########################################
#############################################################################################
@ -931,19 +184,11 @@ jobs:
# we need concurrency as the job uses dedicated instances in the cloud
concurrency:
group: jepsen
if: ${{ !failure() && !cancelled() }}
needs: [RunConfig, BuilderBinRelease]
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse Keeper Jepsen') }}
# jepsen needs binary_release build which is in Builds_2
needs: [RunConfig, Builds_2]
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse Keeper Jepsen
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
#############################################################################################
####################################### libFuzzer ###########################################
#############################################################################################
libFuzzer:
if: ${{ !failure() && !cancelled() }}
needs: [RunConfig, StyleCheck]
uses: ./.github/workflows/libfuzzer.yml
with:
data: ${{ needs.RunConfig.outputs.data }}

View File

@ -206,7 +206,7 @@ jobs:
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
MarkReleaseReady:
if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
if: ${{ !failure() && !cancelled() }}
needs:
- BuilderBinDarwin
- BuilderBinDarwinAarch64
@ -214,9 +214,25 @@ jobs:
- BuilderDebAarch64
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Debug
run: |
echo need with different filters
cat << 'EOF'
${{ toJSON(needs) }}
${{ toJSON(needs.*.result) }}
no failures ${{ !contains(needs.*.result, 'failure') }}
no skips ${{ !contains(needs.*.result, 'skipped') }}
no both ${{ !(contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
EOF
- name: Not ready
# fail the job to be able restart it
if: ${{ contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure') }}
run: exit 1
- name: Check out repository code
if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
uses: ClickHouse/checkout@v1
- name: Mark Commit Release Ready
if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 mark_release_ready.py

View File

@ -43,7 +43,8 @@ jobs:
runs-on: [self-hosted, '${{inputs.runner_type}}']
steps:
- name: Check out repository code
uses: ClickHouse/checkout@v1
# WIP: temporary try commit with limited perallelization of checkout
uses: ClickHouse/checkout@0be3f7b3098bae494d3ef5d29d2e0676fb606232
with:
clear-repository: true
ref: ${{ fromJson(inputs.data).git_ref }}

View File

@ -0,0 +1,32 @@
### FIXME: merge reusable_test.yml and reusable_build.yml as they are almost identical
# and then merge reusable_build_stage.yml and reusable_test_stage.yml
name: BuildStageWF
'on':
workflow_call:
inputs:
stage:
description: stage name
type: string
required: true
data:
description: ci data
type: string
required: true
jobs:
s:
if: ${{ !failure() && !cancelled() }}
strategy:
fail-fast: false
matrix:
job_name_and_runner_type: ${{ fromJson(inputs.data).stages_data[inputs.stage] }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: ${{ matrix.job_name_and_runner_type.job_name }}
runner_type: ${{ matrix.job_name_and_runner_type.runner_type }}
# don't forget to pass force flag (no ci cache/no reuse) - once it's needed
force: false
# for now let's do I deep checkout for builds
checkout_depth: 0
data: ${{ inputs.data }}

View File

@ -0,0 +1,25 @@
name: StageWF
'on':
workflow_call:
inputs:
stage:
description: stage name
type: string
required: true
data:
description: ci data
type: string
required: true
jobs:
s:
if: ${{ !failure() && !cancelled() }}
strategy:
fail-fast: false
matrix:
job_name_and_runner_type: ${{ fromJson(inputs.data).stages_data[inputs.stage] }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ${{ matrix.job_name_and_runner_type.job_name }}
runner_type: ${{ matrix.job_name_and_runner_type.runner_type }}
data: ${{ inputs.data }}

2
.gitignore vendored
View File

@ -165,7 +165,7 @@ tests/queries/0_stateless/*.expect.history
tests/integration/**/_gen
# rust
/rust/**/target
/rust/**/target*
# It is autogenerated from *.in
/rust/**/.cargo/config.toml
/rust/**/vendor

View File

@ -61,8 +61,8 @@ if (ENABLE_CHECK_HEAVY_BUILDS)
# set CPU time limit to 1000 seconds
set (RLIMIT_CPU 1000)
# -fsanitize=memory is too heavy
if (SANITIZE STREQUAL "memory")
# -fsanitize=memory and address are too heavy
if (SANITIZE)
set (RLIMIT_DATA 10000000000) # 10G
endif()
@ -319,7 +319,8 @@ if (COMPILER_CLANG)
endif()
endif ()
set (COMPILER_FLAGS "${COMPILER_FLAGS}")
# Disable floating-point expression contraction in order to get consistent floating point calculation results across platforms
set (COMPILER_FLAGS "${COMPILER_FLAGS} -ffp-contract=off")
# Our built-in unwinder only supports DWARF version up to 4.
set (DEBUG_INFO_FLAGS "-g")

View File

@ -31,15 +31,30 @@ curl https://clickhouse.com/ | sh
* [Static Analysis (SonarCloud)](https://sonarcloud.io/project/issues?resolved=false&id=ClickHouse_ClickHouse) proposes C++ quality improvements.
* [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any.
## Monthly Release & Community Call
Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know.
* [v24.3 Community Call](https://clickhouse.com/company/events/v24-3-community-release-call) - Mar 26
* [v24.4 Community Call](https://clickhouse.com/company/events/v24-4-community-release-call) - Apr 30
## Upcoming Events
Keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `<at>` clickhouse `<dot>` com.
Keep an eye out for upcoming meetups and eventsaround the world. Somewhere else you want us to be? Please feel free to reach out to tyler `<at>` clickhouse `<dot>` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc.
* [ClickHouse Meetup in Bellevue](https://www.meetup.com/clickhouse-seattle-user-group/events/298650371/) - Mar 11
* [ClickHouse Meetup at Ramp's Offices in NYC](https://www.meetup.com/clickhouse-new-york-user-group/events/298640542/) - Mar 19
* [ClickHouse Melbourne Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/299479750/) - Mar 20
* [ClickHouse Meetup in Paris](https://www.meetup.com/clickhouse-france-user-group/events/298997115/) - Mar 21
* [ClickHouse Meetup in Bengaluru](https://www.meetup.com/clickhouse-bangalore-user-group/events/299479850/) - Mar 23
* [ClickHouse Meetup in Zurich](https://www.meetup.com/clickhouse-switzerland-meetup-group/events/299628922/) - Apr 16
* [ClickHouse Meetup in Copenhagen](https://www.meetup.com/clickhouse-denmark-meetup-group/events/299629133/) - Apr 23
* [ClickHouse Meetup in Dubai](https://www.meetup.com/clickhouse-dubai-meetup-group/events/299629189/) - May 28
## Recent Recordings
* **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"
* **Recording available**: [**v24.1 Release Webinar**](https://www.youtube.com/watch?v=pBF9g0wGAGs) All the features of 24.1, one convenient video! Watch it now!
* **All release webinar recordings**: [YouTube playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3jAlSy1JxyP8zluvXaN3nxU)
* **Recording available**: [**v24.2 Release Call**](https://www.youtube.com/watch?v=iN2y-TK8f3A) All the features of 24.2, one convenient video! Watch it now!
## Interested in joining ClickHouse and making it your full-time job?

View File

@ -13,6 +13,7 @@ set (SRCS
cgroupsv2.cpp
coverage.cpp
demangle.cpp
Decimal.cpp
getAvailableMemoryAmount.cpp
getFQDNOrHostName.cpp
getMemoryAmount.cpp

87
base/base/Decimal.cpp Normal file
View File

@ -0,0 +1,87 @@
#include <base/Decimal.h>
#include <base/extended_types.h>
namespace DB
{
/// Explicit template instantiations.
#define FOR_EACH_UNDERLYING_DECIMAL_TYPE(M) \
M(Int32) \
M(Int64) \
M(Int128) \
M(Int256)
#define FOR_EACH_UNDERLYING_DECIMAL_TYPE_PASS(M, X) \
M(Int32, X) \
M(Int64, X) \
M(Int128, X) \
M(Int256, X)
template <typename T> const Decimal<T> & Decimal<T>::operator += (const T & x) { value += x; return *this; }
template <typename T> const Decimal<T> & Decimal<T>::operator -= (const T & x) { value -= x; return *this; }
template <typename T> const Decimal<T> & Decimal<T>::operator *= (const T & x) { value *= x; return *this; }
template <typename T> const Decimal<T> & Decimal<T>::operator /= (const T & x) { value /= x; return *this; }
template <typename T> const Decimal<T> & Decimal<T>::operator %= (const T & x) { value %= x; return *this; }
template <typename T> void NO_SANITIZE_UNDEFINED Decimal<T>::addOverflow(const T & x) { value += x; }
/// Maybe this explicit instantiation affects performance since operators cannot be inlined.
template <typename T> template <typename U> const Decimal<T> & Decimal<T>::operator += (const Decimal<U> & x) { value += static_cast<T>(x.value); return *this; }
template <typename T> template <typename U> const Decimal<T> & Decimal<T>::operator -= (const Decimal<U> & x) { value -= static_cast<T>(x.value); return *this; }
template <typename T> template <typename U> const Decimal<T> & Decimal<T>::operator *= (const Decimal<U> & x) { value *= static_cast<T>(x.value); return *this; }
template <typename T> template <typename U> const Decimal<T> & Decimal<T>::operator /= (const Decimal<U> & x) { value /= static_cast<T>(x.value); return *this; }
template <typename T> template <typename U> const Decimal<T> & Decimal<T>::operator %= (const Decimal<U> & x) { value %= static_cast<T>(x.value); return *this; }
#define DISPATCH(TYPE_T, TYPE_U) \
template const Decimal<TYPE_T> & Decimal<TYPE_T>::operator += (const Decimal<TYPE_U> & x); \
template const Decimal<TYPE_T> & Decimal<TYPE_T>::operator -= (const Decimal<TYPE_U> & x); \
template const Decimal<TYPE_T> & Decimal<TYPE_T>::operator *= (const Decimal<TYPE_U> & x); \
template const Decimal<TYPE_T> & Decimal<TYPE_T>::operator /= (const Decimal<TYPE_U> & x); \
template const Decimal<TYPE_T> & Decimal<TYPE_T>::operator %= (const Decimal<TYPE_U> & x);
#define INVOKE(X) FOR_EACH_UNDERLYING_DECIMAL_TYPE_PASS(DISPATCH, X)
FOR_EACH_UNDERLYING_DECIMAL_TYPE(INVOKE);
#undef INVOKE
#undef DISPATCH
#define DISPATCH(TYPE) template struct Decimal<TYPE>;
FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH)
#undef DISPATCH
template <typename T> bool operator< (const Decimal<T> & x, const Decimal<T> & y) { return x.value < y.value; }
template <typename T> bool operator> (const Decimal<T> & x, const Decimal<T> & y) { return x.value > y.value; }
template <typename T> bool operator<= (const Decimal<T> & x, const Decimal<T> & y) { return x.value <= y.value; }
template <typename T> bool operator>= (const Decimal<T> & x, const Decimal<T> & y) { return x.value >= y.value; }
template <typename T> bool operator== (const Decimal<T> & x, const Decimal<T> & y) { return x.value == y.value; }
template <typename T> bool operator!= (const Decimal<T> & x, const Decimal<T> & y) { return x.value != y.value; }
#define DISPATCH(TYPE) \
template bool operator< (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
template bool operator> (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
template bool operator<= (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
template bool operator>= (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
template bool operator== (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
template bool operator!= (const Decimal<TYPE> & x, const Decimal<TYPE> & y);
FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH)
#undef DISPATCH
template <typename T> Decimal<T> operator+ (const Decimal<T> & x, const Decimal<T> & y) { return x.value + y.value; }
template <typename T> Decimal<T> operator- (const Decimal<T> & x, const Decimal<T> & y) { return x.value - y.value; }
template <typename T> Decimal<T> operator* (const Decimal<T> & x, const Decimal<T> & y) { return x.value * y.value; }
template <typename T> Decimal<T> operator/ (const Decimal<T> & x, const Decimal<T> & y) { return x.value / y.value; }
template <typename T> Decimal<T> operator- (const Decimal<T> & x) { return -x.value; }
#define DISPATCH(TYPE) \
template Decimal<TYPE> operator+ (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
template Decimal<TYPE> operator- (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
template Decimal<TYPE> operator* (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
template Decimal<TYPE> operator/ (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
template Decimal<TYPE> operator- (const Decimal<TYPE> & x);
FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH)
#undef DISPATCH
#undef FOR_EACH_UNDERLYING_DECIMAL_TYPE_PASS
#undef FOR_EACH_UNDERLYING_DECIMAL_TYPE
}

View File

@ -2,6 +2,7 @@
#include <base/extended_types.h>
#include <base/Decimal_fwd.h>
#include <base/types.h>
#include <base/defines.h>
@ -10,6 +11,18 @@ namespace DB
template <class> struct Decimal;
class DateTime64;
#define FOR_EACH_UNDERLYING_DECIMAL_TYPE(M) \
M(Int32) \
M(Int64) \
M(Int128) \
M(Int256)
#define FOR_EACH_UNDERLYING_DECIMAL_TYPE_PASS(M, X) \
M(Int32, X) \
M(Int64, X) \
M(Int128, X) \
M(Int256, X)
using Decimal32 = Decimal<Int32>;
using Decimal64 = Decimal<Int64>;
using Decimal128 = Decimal<Int128>;
@ -50,36 +63,73 @@ struct Decimal
return static_cast<U>(value);
}
const Decimal<T> & operator += (const T & x) { value += x; return *this; }
const Decimal<T> & operator -= (const T & x) { value -= x; return *this; }
const Decimal<T> & operator *= (const T & x) { value *= x; return *this; }
const Decimal<T> & operator /= (const T & x) { value /= x; return *this; }
const Decimal<T> & operator %= (const T & x) { value %= x; return *this; }
const Decimal<T> & operator += (const T & x);
const Decimal<T> & operator -= (const T & x);
const Decimal<T> & operator *= (const T & x);
const Decimal<T> & operator /= (const T & x);
const Decimal<T> & operator %= (const T & x);
template <typename U> const Decimal<T> & operator += (const Decimal<U> & x) { value += x.value; return *this; }
template <typename U> const Decimal<T> & operator -= (const Decimal<U> & x) { value -= x.value; return *this; }
template <typename U> const Decimal<T> & operator *= (const Decimal<U> & x) { value *= x.value; return *this; }
template <typename U> const Decimal<T> & operator /= (const Decimal<U> & x) { value /= x.value; return *this; }
template <typename U> const Decimal<T> & operator %= (const Decimal<U> & x) { value %= x.value; return *this; }
template <typename U> const Decimal<T> & operator += (const Decimal<U> & x);
template <typename U> const Decimal<T> & operator -= (const Decimal<U> & x);
template <typename U> const Decimal<T> & operator *= (const Decimal<U> & x);
template <typename U> const Decimal<T> & operator /= (const Decimal<U> & x);
template <typename U> const Decimal<T> & operator %= (const Decimal<U> & x);
/// This is to avoid UB for sumWithOverflow()
void NO_SANITIZE_UNDEFINED addOverflow(const T & x) { value += x; }
void NO_SANITIZE_UNDEFINED addOverflow(const T & x);
T value;
};
template <typename T> inline bool operator< (const Decimal<T> & x, const Decimal<T> & y) { return x.value < y.value; }
template <typename T> inline bool operator> (const Decimal<T> & x, const Decimal<T> & y) { return x.value > y.value; }
template <typename T> inline bool operator<= (const Decimal<T> & x, const Decimal<T> & y) { return x.value <= y.value; }
template <typename T> inline bool operator>= (const Decimal<T> & x, const Decimal<T> & y) { return x.value >= y.value; }
template <typename T> inline bool operator== (const Decimal<T> & x, const Decimal<T> & y) { return x.value == y.value; }
template <typename T> inline bool operator!= (const Decimal<T> & x, const Decimal<T> & y) { return x.value != y.value; }
#define DISPATCH(TYPE) extern template struct Decimal<TYPE>;
FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH)
#undef DISPATCH
template <typename T> inline Decimal<T> operator+ (const Decimal<T> & x, const Decimal<T> & y) { return x.value + y.value; }
template <typename T> inline Decimal<T> operator- (const Decimal<T> & x, const Decimal<T> & y) { return x.value - y.value; }
template <typename T> inline Decimal<T> operator* (const Decimal<T> & x, const Decimal<T> & y) { return x.value * y.value; }
template <typename T> inline Decimal<T> operator/ (const Decimal<T> & x, const Decimal<T> & y) { return x.value / y.value; }
template <typename T> inline Decimal<T> operator- (const Decimal<T> & x) { return -x.value; }
#define DISPATCH(TYPE_T, TYPE_U) \
extern template const Decimal<TYPE_T> & Decimal<TYPE_T>::operator += (const Decimal<TYPE_U> & x); \
extern template const Decimal<TYPE_T> & Decimal<TYPE_T>::operator -= (const Decimal<TYPE_U> & x); \
extern template const Decimal<TYPE_T> & Decimal<TYPE_T>::operator *= (const Decimal<TYPE_U> & x); \
extern template const Decimal<TYPE_T> & Decimal<TYPE_T>::operator /= (const Decimal<TYPE_U> & x); \
extern template const Decimal<TYPE_T> & Decimal<TYPE_T>::operator %= (const Decimal<TYPE_U> & x);
#define INVOKE(X) FOR_EACH_UNDERLYING_DECIMAL_TYPE_PASS(DISPATCH, X)
FOR_EACH_UNDERLYING_DECIMAL_TYPE(INVOKE);
#undef INVOKE
#undef DISPATCH
template <typename T> bool operator< (const Decimal<T> & x, const Decimal<T> & y);
template <typename T> bool operator> (const Decimal<T> & x, const Decimal<T> & y);
template <typename T> bool operator<= (const Decimal<T> & x, const Decimal<T> & y);
template <typename T> bool operator>= (const Decimal<T> & x, const Decimal<T> & y);
template <typename T> bool operator== (const Decimal<T> & x, const Decimal<T> & y);
template <typename T> bool operator!= (const Decimal<T> & x, const Decimal<T> & y);
#define DISPATCH(TYPE) \
extern template bool operator< (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
extern template bool operator> (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
extern template bool operator<= (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
extern template bool operator>= (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
extern template bool operator== (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
extern template bool operator!= (const Decimal<TYPE> & x, const Decimal<TYPE> & y);
FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH)
#undef DISPATCH
template <typename T> Decimal<T> operator+ (const Decimal<T> & x, const Decimal<T> & y);
template <typename T> Decimal<T> operator- (const Decimal<T> & x, const Decimal<T> & y);
template <typename T> Decimal<T> operator* (const Decimal<T> & x, const Decimal<T> & y);
template <typename T> Decimal<T> operator/ (const Decimal<T> & x, const Decimal<T> & y);
template <typename T> Decimal<T> operator- (const Decimal<T> & x);
#define DISPATCH(TYPE) \
extern template Decimal<TYPE> operator+ (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
extern template Decimal<TYPE> operator- (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
extern template Decimal<TYPE> operator* (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
extern template Decimal<TYPE> operator/ (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
extern template Decimal<TYPE> operator- (const Decimal<TYPE> & x);
FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH)
#undef DISPATCH
#undef FOR_EACH_UNDERLYING_DECIMAL_TYPE_PASS
#undef FOR_EACH_UNDERLYING_DECIMAL_TYPE
/// Distinguishable type to allow function resolution/deduction based on value type,
/// but also relatively easy to convert to/from Decimal64.

View File

@ -108,16 +108,22 @@
{
[[noreturn]] void abortOnFailedAssertion(const String & description);
}
#define chassert(x) do { static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(#x); } while (0)
#define chassert_1(x, ...) do { static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(#x); } while (0)
#define chassert_2(x, comment, ...) do { static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(comment); } while (0)
#define UNREACHABLE() abort()
// clang-format off
#else
/// Here sizeof() trick is used to suppress unused warning for result,
/// since simple "(void)x" will evaluate the expression, while
/// "sizeof(!(x))" will not.
#define chassert(x) (void)sizeof(!(x))
#define chassert_1(x, ...) (void)sizeof(!(x))
#define chassert_2(x, comment, ...) (void)sizeof(!(x))
#define UNREACHABLE() __builtin_unreachable()
#endif
#define CHASSERT_DISPATCH(_1,_2, N,...) N(_1, _2)
#define CHASSERT_INVOKE(tuple) CHASSERT_DISPATCH tuple
#define chassert(...) CHASSERT_INVOKE((__VA_ARGS__, chassert_2, chassert_1))
#endif
/// Macros for Clang Thread Safety Analysis (TSA). They can be safely ignored by other compilers.

View File

@ -64,6 +64,44 @@ template <> struct is_arithmetic<UInt256> { static constexpr bool value = true;
template <typename T>
inline constexpr bool is_arithmetic_v = is_arithmetic<T>::value;
#define FOR_EACH_ARITHMETIC_TYPE(M) \
M(DataTypeDate) \
M(DataTypeDate32) \
M(DataTypeDateTime) \
M(DataTypeInt8) \
M(DataTypeUInt8) \
M(DataTypeInt16) \
M(DataTypeUInt16) \
M(DataTypeInt32) \
M(DataTypeUInt32) \
M(DataTypeInt64) \
M(DataTypeUInt64) \
M(DataTypeInt128) \
M(DataTypeUInt128) \
M(DataTypeInt256) \
M(DataTypeUInt256) \
M(DataTypeFloat32) \
M(DataTypeFloat64)
#define FOR_EACH_ARITHMETIC_TYPE_PASS(M, X) \
M(DataTypeDate, X) \
M(DataTypeDate32, X) \
M(DataTypeDateTime, X) \
M(DataTypeInt8, X) \
M(DataTypeUInt8, X) \
M(DataTypeInt16, X) \
M(DataTypeUInt16, X) \
M(DataTypeInt32, X) \
M(DataTypeUInt32, X) \
M(DataTypeInt64, X) \
M(DataTypeUInt64, X) \
M(DataTypeInt128, X) \
M(DataTypeUInt128, X) \
M(DataTypeInt256, X) \
M(DataTypeUInt256, X) \
M(DataTypeFloat32, X) \
M(DataTypeFloat64, X)
template <typename T>
struct make_unsigned // NOLINT(readability-identifier-naming)
{

View File

@ -45,6 +45,8 @@ namespace Net
~HTTPChunkedStreamBuf();
void close();
bool isComplete() const { return _chunk == std::char_traits<char>::eof(); }
protected:
int readFromDevice(char * buffer, std::streamsize length);
int writeToDevice(const char * buffer, std::streamsize length);
@ -68,6 +70,8 @@ namespace Net
~HTTPChunkedIOS();
HTTPChunkedStreamBuf * rdbuf();
bool isComplete() const { return _buf.isComplete(); }
protected:
HTTPChunkedStreamBuf _buf;
};

View File

@ -210,7 +210,7 @@ namespace Net
void setKeepAliveTimeout(const Poco::Timespan & timeout);
/// Sets the connection timeout for HTTP connections.
const Poco::Timespan & getKeepAliveTimeout() const;
Poco::Timespan getKeepAliveTimeout() const;
/// Returns the connection timeout for HTTP connections.
virtual std::ostream & sendRequest(HTTPRequest & request);
@ -275,7 +275,7 @@ namespace Net
/// This method should only be called if the request contains
/// a "Expect: 100-continue" header.
void flushRequest();
virtual void flushRequest();
/// Flushes the request stream.
///
/// Normally this method does not need to be called.
@ -283,7 +283,7 @@ namespace Net
/// fully sent if receiveResponse() is not called, e.g.,
/// because the underlying socket will be detached.
void reset();
virtual void reset();
/// Resets the session and closes the socket.
///
/// The next request will initiate a new connection,
@ -303,6 +303,9 @@ namespace Net
/// Returns true if the proxy should be bypassed
/// for the current host.
const Poco::Timestamp & getLastRequest() const;
/// Returns time when connection has been used last time
protected:
enum
{
@ -338,6 +341,10 @@ namespace Net
/// Calls proxyConnect() and attaches the resulting StreamSocket
/// to the HTTPClientSession.
void setLastRequest(Poco::Timestamp time);
void assign(HTTPClientSession & session);
HTTPSessionFactory _proxySessionFactory;
/// Factory to create HTTPClientSession to proxy.
private:
@ -433,11 +440,20 @@ namespace Net
}
inline const Poco::Timespan & HTTPClientSession::getKeepAliveTimeout() const
inline Poco::Timespan HTTPClientSession::getKeepAliveTimeout() const
{
return _keepAliveTimeout;
}
inline const Poco::Timestamp & HTTPClientSession::getLastRequest() const
{
return _lastRequest;
}
inline void HTTPClientSession::setLastRequest(Poco::Timestamp time)
{
_lastRequest = time;
}
}
} // namespace Poco::Net

View File

@ -48,6 +48,8 @@ namespace Net
HTTPFixedLengthStreamBuf(HTTPSession & session, ContentLength length, openmode mode);
~HTTPFixedLengthStreamBuf();
bool isComplete() const;
protected:
int readFromDevice(char * buffer, std::streamsize length);
int writeToDevice(const char * buffer, std::streamsize length);
@ -67,6 +69,8 @@ namespace Net
~HTTPFixedLengthIOS();
HTTPFixedLengthStreamBuf * rdbuf();
bool isComplete() const { return _buf.isComplete(); }
protected:
HTTPFixedLengthStreamBuf _buf;
};

View File

@ -64,6 +64,15 @@ namespace Net
Poco::Timespan getTimeout() const;
/// Returns the timeout for the HTTP session.
Poco::Timespan getConnectionTimeout() const;
/// Returns connection timeout for the HTTP session.
Poco::Timespan getSendTimeout() const;
/// Returns send timeout for the HTTP session.
Poco::Timespan getReceiveTimeout() const;
/// Returns receive timeout for the HTTP session.
bool connected() const;
/// Returns true if the underlying socket is connected.
@ -217,12 +226,25 @@ namespace Net
return _keepAlive;
}
inline Poco::Timespan HTTPSession::getTimeout() const
{
return _receiveTimeout;
}
inline Poco::Timespan HTTPSession::getConnectionTimeout() const
{
return _connectionTimeout;
}
inline Poco::Timespan HTTPSession::getSendTimeout() const
{
return _sendTimeout;
}
inline Poco::Timespan HTTPSession::getReceiveTimeout() const
{
return _receiveTimeout;
}
inline StreamSocket & HTTPSession::socket()
{

View File

@ -63,6 +63,8 @@ namespace Net
~HTTPIOS();
HTTPStreamBuf * rdbuf();
bool isComplete() const { return false; }
protected:
HTTPStreamBuf _buf;
};

View File

@ -49,10 +49,12 @@ HTTPChunkedStreamBuf::~HTTPChunkedStreamBuf()
void HTTPChunkedStreamBuf::close()
{
if (_mode & std::ios::out)
if (_mode & std::ios::out && _chunk != std::char_traits<char>::eof())
{
sync();
_session.write("0\r\n\r\n", 5);
_chunk = std::char_traits<char>::eof();
}
}

View File

@ -227,7 +227,7 @@ void HTTPClientSession::setKeepAliveTimeout(const Poco::Timespan& timeout)
std::ostream& HTTPClientSession::sendRequest(HTTPRequest& request)
{
_pRequestStream = 0;
_pResponseStream = 0;
_pResponseStream = 0;
clearException();
_responseReceived = false;
@ -501,5 +501,26 @@ bool HTTPClientSession::bypassProxy() const
else return false;
}
void HTTPClientSession::assign(Poco::Net::HTTPClientSession & session)
{
poco_assert (this != &session);
if (session.buffered())
throw Poco::LogicException("assign a session with not empty buffered data");
if (buffered())
throw Poco::LogicException("assign to a session with not empty buffered data");
attachSocket(session.detachSocket());
setLastRequest(session.getLastRequest());
setResolvedHost(session.getResolvedHost());
setKeepAlive(session.getKeepAlive());
setTimeout(session.getConnectionTimeout(), session.getSendTimeout(), session.getReceiveTimeout());
setKeepAliveTimeout(session.getKeepAliveTimeout());
setProxyConfig(session.getProxyConfig());
session.reset();
}
} } // namespace Poco::Net

View File

@ -43,6 +43,12 @@ HTTPFixedLengthStreamBuf::~HTTPFixedLengthStreamBuf()
}
bool HTTPFixedLengthStreamBuf::isComplete() const
{
return _count == _length;
}
int HTTPFixedLengthStreamBuf::readFromDevice(char* buffer, std::streamsize length)
{
int n = 0;

2
contrib/aws vendored

@ -1 +1 @@
Subproject commit 5f0542b3ad7eef25b0540d37d778207e0345ea8f
Subproject commit 32870e234cac03e0ac46370c26858b0ffdf14200

2
contrib/curl vendored

@ -1 +1 @@
Subproject commit 5ce164e0e9290c96eb7d502173426c0a135ec008
Subproject commit 1a05e833f8f7140628b27882b10525fd9ec4b873

View File

@ -1,8 +1,12 @@
{
"docker/packager/binary": {
"docker/packager/binary-builder": {
"name": "clickhouse/binary-builder",
"dependent": []
},
"docker/packager/cctools": {
"name": "clickhouse/cctools",
"dependent": []
},
"docker/test/compatibility/centos": {
"name": "clickhouse/test-old-centos",
"dependent": []
@ -30,7 +34,6 @@
"docker/test/util": {
"name": "clickhouse/test-util",
"dependent": [
"docker/packager/binary",
"docker/test/base",
"docker/test/fasttest"
]
@ -67,7 +70,9 @@
},
"docker/test/fasttest": {
"name": "clickhouse/fasttest",
"dependent": []
"dependent": [
"docker/packager/binary-builder"
]
},
"docker/test/style": {
"name": "clickhouse/style-test",

View File

@ -28,7 +28,6 @@ lrwxrwxrwx 1 root root 10 clickhouse-benchmark -> clickhouse
lrwxrwxrwx 1 root root 10 clickhouse-clang -> clickhouse
lrwxrwxrwx 1 root root 10 clickhouse-client -> clickhouse
lrwxrwxrwx 1 root root 10 clickhouse-compressor -> clickhouse
lrwxrwxrwx 1 root root 10 clickhouse-copier -> clickhouse
lrwxrwxrwx 1 root root 10 clickhouse-extract-from-config -> clickhouse
lrwxrwxrwx 1 root root 10 clickhouse-format -> clickhouse
lrwxrwxrwx 1 root root 10 clickhouse-lld -> clickhouse

View File

@ -1,43 +1,6 @@
# docker build -t clickhouse/binary-builder .
ARG FROM_TAG=latest
FROM clickhouse/test-util:latest AS cctools
# The cctools are built always from the clickhouse/test-util:latest and cached inline
# Theoretically, it should improve rebuild speed significantly
ENV CC=clang-${LLVM_VERSION}
ENV CXX=clang++-${LLVM_VERSION}
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# DO NOT PUT ANYTHING BEFORE THE NEXT TWO `RUN` DIRECTIVES
# THE MOST HEAVY OPERATION MUST BE THE FIRST IN THE CACHE
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# libtapi is required to support .tbh format from recent MacOS SDKs
RUN git clone https://github.com/tpoechtrager/apple-libtapi.git \
&& cd apple-libtapi \
&& git checkout 15dfc2a8c9a2a89d06ff227560a69f5265b692f9 \
&& INSTALLPREFIX=/cctools ./build.sh \
&& ./install.sh \
&& cd .. \
&& rm -rf apple-libtapi
# Build and install tools for cross-linking to Darwin (x86-64)
# Build and install tools for cross-linking to Darwin (aarch64)
RUN git clone https://github.com/tpoechtrager/cctools-port.git \
&& cd cctools-port/cctools \
&& git checkout 2a3e1c2a6ff54a30f898b70cfb9ba1692a55fad7 \
&& ./configure --prefix=/cctools --with-libtapi=/cctools \
--target=x86_64-apple-darwin \
&& make install -j$(nproc) \
&& make clean \
&& ./configure --prefix=/cctools --with-libtapi=/cctools \
--target=aarch64-apple-darwin \
&& make install -j$(nproc) \
&& cd ../.. \
&& rm -rf cctools-port
# !!!!!!!!!!!
# END COMPILE
# !!!!!!!!!!!
FROM clickhouse/test-util:$FROM_TAG
FROM clickhouse/fasttest:$FROM_TAG
ENV CC=clang-${LLVM_VERSION}
ENV CXX=clang++-${LLVM_VERSION}
@ -110,7 +73,8 @@ RUN curl -Lo /usr/bin/clang-tidy-cache \
"https://raw.githubusercontent.com/matus-chochlik/ctcache/$CLANG_TIDY_SHA1/clang-tidy-cache" \
&& chmod +x /usr/bin/clang-tidy-cache
COPY --from=cctools /cctools /cctools
# If the cctools is updated, then first build it in the CI, then update here in a different commit
COPY --from=clickhouse/cctools:5a908f73878a /cctools /cctools
RUN mkdir /workdir && chmod 777 /workdir
WORKDIR /workdir

View File

@ -0,0 +1,31 @@
# This is a hack to significantly reduce the build time of the clickhouse/binary-builder
# It's based on the assumption that we don't care of the cctools version so much
# It event does not depend on the clickhouse/fasttest in the `docker/images.json`
ARG FROM_TAG=latest
FROM clickhouse/fasttest:$FROM_TAG
ENV CC=clang-${LLVM_VERSION}
ENV CXX=clang++-${LLVM_VERSION}
RUN git clone https://github.com/tpoechtrager/apple-libtapi.git \
&& cd apple-libtapi \
&& git checkout 15dfc2a8c9a2a89d06ff227560a69f5265b692f9 \
&& INSTALLPREFIX=/cctools ./build.sh \
&& ./install.sh \
&& cd .. \
&& rm -rf apple-libtapi
# Build and install tools for cross-linking to Darwin (x86-64)
# Build and install tools for cross-linking to Darwin (aarch64)
RUN git clone https://github.com/tpoechtrager/cctools-port.git \
&& cd cctools-port/cctools \
&& git checkout 2a3e1c2a6ff54a30f898b70cfb9ba1692a55fad7 \
&& ./configure --prefix=/cctools --with-libtapi=/cctools \
--target=x86_64-apple-darwin \
&& make install -j$(nproc) \
&& make clean \
&& ./configure --prefix=/cctools --with-libtapi=/cctools \
--target=aarch64-apple-darwin \
&& make install -j$(nproc) \
&& cd ../.. \
&& rm -rf cctools-port

View File

@ -1,16 +1,16 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import subprocess
import os
import argparse
import logging
import os
import subprocess
import sys
from pathlib import Path
from typing import List, Optional
SCRIPT_PATH = Path(__file__).absolute()
IMAGE_TYPE = "binary"
IMAGE_NAME = f"clickhouse/{IMAGE_TYPE}-builder"
IMAGE_TYPE = "binary-builder"
IMAGE_NAME = f"clickhouse/{IMAGE_TYPE}"
class BuildException(Exception):

View File

@ -33,6 +33,9 @@ ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_m
ENV UBSAN_OPTIONS='print_stacktrace=1'
ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'
# for external_symbolizer_path
RUN ln -s /usr/bin/llvm-symbolizer-${LLVM_VERSION} /usr/bin/llvm-symbolizer
RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8
ENV LC_ALL en_US.UTF-8

View File

@ -6,9 +6,17 @@ FROM clickhouse/test-util:$FROM_TAG
RUN apt-get update \
&& apt-get install \
brotli \
clang-${LLVM_VERSION} \
clang-tidy-${LLVM_VERSION} \
cmake \
expect \
file \
libclang-${LLVM_VERSION}-dev \
libclang-rt-${LLVM_VERSION}-dev \
lld-${LLVM_VERSION} \
llvm-${LLVM_VERSION}-dev \
lsof \
ninja-build \
odbcinst \
psmisc \
python3 \
@ -26,14 +34,48 @@ RUN apt-get update \
RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3
ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.4.20200302/clickhouse-odbc-1.1.4-Linux.tar.gz"
# This symlink is required by gcc to find the lld linker
RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
# FIXME: workaround for "The imported target "merge-fdata" references the file" error
# https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d
RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake
RUN mkdir -p /tmp/clickhouse-odbc-tmp \
&& wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \
&& cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib/ \
&& odbcinst -i -d -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbcinst.ini.sample \
&& odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \
&& rm -rf /tmp/clickhouse-odbc-tmp
ARG CCACHE_VERSION=4.6.1
RUN mkdir /tmp/ccache \
&& cd /tmp/ccache \
&& curl -L \
-O https://github.com/ccache/ccache/releases/download/v$CCACHE_VERSION/ccache-$CCACHE_VERSION.tar.xz \
-O https://github.com/ccache/ccache/releases/download/v$CCACHE_VERSION/ccache-$CCACHE_VERSION.tar.xz.asc \
&& gpg --recv-keys --keyserver hkps://keyserver.ubuntu.com 5A939A71A46792CF57866A51996DDA075594ADB8 \
&& gpg --verify ccache-4.6.1.tar.xz.asc \
&& tar xf ccache-$CCACHE_VERSION.tar.xz \
&& cd /tmp/ccache/ccache-$CCACHE_VERSION \
&& cmake -DCMAKE_INSTALL_PREFIX=/usr \
-DCMAKE_BUILD_TYPE=None \
-DZSTD_FROM_INTERNET=ON \
-DREDIS_STORAGE_BACKEND=OFF \
-Wno-dev \
-B build \
-S . \
&& make VERBOSE=1 -C build \
&& make install -C build \
&& cd / \
&& rm -rf /tmp/ccache
ARG TARGETARCH
ARG SCCACHE_VERSION=v0.7.7
ENV SCCACHE_IGNORE_SERVER_IO_ERROR=1
# sccache requires a value for the region. So by default we use The Default Region
ENV SCCACHE_REGION=us-east-1
RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \
amd64) rarch=x86_64 ;; \
arm64) rarch=aarch64 ;; \
esac \
&& curl -Ls "https://github.com/mozilla/sccache/releases/download/$SCCACHE_VERSION/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl.tar.gz" | \
tar xz -C /tmp \
&& mv "/tmp/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl/sccache" /usr/bin \
&& rm "/tmp/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl" -r
# Give suid to gdb to grant it attach permissions
# chmod 777 to make the container user independent

View File

@ -343,10 +343,9 @@ quit
# which is confusing.
task_exit_code=$fuzzer_exit_code
echo "failure" > status.txt
{ rg -ao "Found error:.*" fuzzer.log \
|| rg -ao "Exception:.*" fuzzer.log \
|| echo "Fuzzer failed ($fuzzer_exit_code). See the logs." ; } \
| tail -1 > description.txt
echo "Let op!" > description.txt
echo "Fuzzer went wrong with error code: ($fuzzer_exit_code). Its process died somehow when the server stayed alive. The server log probably won't tell you much so try to find information in other files." >>description.txt
{ rg -ao "Found error:.*" fuzzer.log || rg -ao "Exception:.*" fuzzer.log; } | tail -1 >>description.txt
fi
if test -f core.*; then

View File

@ -24,17 +24,18 @@ RUN pip3 install \
deepdiff \
sqlglot
ARG odbc_repo="https://github.com/ClickHouse/clickhouse-odbc.git"
ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.6.20200320/clickhouse-odbc-1.1.6-Linux.tar.gz"
RUN mkdir -p /tmp/clickhouse-odbc-tmp \
&& cd /tmp/clickhouse-odbc-tmp \
&& curl -L ${odbc_driver_url} | tar --strip-components=1 -xz clickhouse-odbc-1.1.6-Linux \
&& mkdir /usr/local/lib64 -p \
&& cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib64/ \
&& odbcinst -i -d -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbcinst.ini.sample \
&& odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \
&& sed -i 's"=libclickhouseodbc"=/usr/local/lib64/libclickhouseodbc"' /etc/odbcinst.ini \
&& rm -rf /tmp/clickhouse-odbc-tmp
RUN git clone --recursive ${odbc_repo} \
&& mkdir -p /clickhouse-odbc/build \
&& cmake -S /clickhouse-odbc -B /clickhouse-odbc/build \
&& ls /clickhouse-odbc/build/driver \
&& make -j 10 -C /clickhouse-odbc/build \
&& ls /clickhouse-odbc/build/driver \
&& mkdir -p /usr/local/lib64/ && cp /clickhouse-odbc/build/driver/lib*.so /usr/local/lib64/ \
&& odbcinst -i -d -f /clickhouse-odbc/packaging/odbcinst.ini.sample \
&& odbcinst -i -s -l -f /clickhouse-odbc/packaging/odbc.ini.sample
ENV TZ=Europe/Amsterdam
ENV MAX_RUN_TIME=9000

1
docker/test/stateless/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/minio_data

View File

@ -3,7 +3,7 @@
ARG FROM_TAG=latest
FROM clickhouse/test-base:$FROM_TAG
ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.4.20200302/clickhouse-odbc-1.1.4-Linux.tar.gz"
ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.6.20200320/clickhouse-odbc-1.1.6-Linux.tar.gz"
# golang version 1.13 on Ubuntu 20 is enough for tests
RUN apt-get update -y \
@ -35,7 +35,6 @@ RUN apt-get update -y \
sudo \
tree \
unixodbc \
wget \
rustc \
cargo \
zstd \
@ -50,11 +49,14 @@ RUN apt-get update -y \
RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3 pyarrow==15.0.0
RUN mkdir -p /tmp/clickhouse-odbc-tmp \
&& wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \
&& cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib/ \
&& odbcinst -i -d -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbcinst.ini.sample \
&& odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \
&& rm -rf /tmp/clickhouse-odbc-tmp
&& cd /tmp/clickhouse-odbc-tmp \
&& curl -L ${odbc_driver_url} | tar --strip-components=1 -xz clickhouse-odbc-1.1.6-Linux \
&& mkdir /usr/local/lib64 -p \
&& cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib64/ \
&& odbcinst -i -d -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbcinst.ini.sample \
&& odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \
&& sed -i 's"=libclickhouseodbc"=/usr/local/lib64/libclickhouseodbc"' /etc/odbcinst.ini \
&& rm -rf /tmp/clickhouse-odbc-tmp
ENV TZ=Europe/Amsterdam
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
@ -70,11 +72,11 @@ ARG TARGETARCH
# Download Minio-related binaries
RUN arch=${TARGETARCH:-amd64} \
&& wget "https://dl.min.io/server/minio/release/linux-${arch}/archive/minio.RELEASE.${MINIO_SERVER_VERSION}" -O ./minio \
&& wget "https://dl.min.io/client/mc/release/linux-${arch}/archive/mc.RELEASE.${MINIO_CLIENT_VERSION}" -O ./mc \
&& curl -L "https://dl.min.io/server/minio/release/linux-${arch}/archive/minio.RELEASE.${MINIO_SERVER_VERSION}" -o ./minio \
&& curl -L "https://dl.min.io/client/mc/release/linux-${arch}/archive/mc.RELEASE.${MINIO_CLIENT_VERSION}" -o ./mc \
&& chmod +x ./mc ./minio
RUN wget --no-verbose 'https://archive.apache.org/dist/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz' \
RUN curl -L --no-verbose -O 'https://archive.apache.org/dist/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz' \
&& tar -xvf hadoop-3.3.1.tar.gz \
&& rm -rf hadoop-3.3.1.tar.gz

View File

@ -18,7 +18,8 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
python3-pip \
yamllint \
locales \
&& pip3 install black==23.1.0 boto3 codespell==2.2.1 mypy==1.3.0 PyGithub unidiff pylint==2.6.2 \
&& pip3 install black==23.12.0 boto3 codespell==2.2.1 mypy==1.8.0 PyGithub unidiff pylint==3.1.0 \
requests types-requests \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* \
&& rm -rf /root/.cache/pip
@ -59,5 +60,4 @@ RUN arch=${TARGETARCH:-amd64} \
COPY run.sh /
COPY process_style_check_result.py /
CMD ["/bin/bash", "/run.sh"]

View File

@ -79,6 +79,18 @@ remove_keeper_config "async_replication" "1"
# create_if_not_exists feature flag doesn't exist on some older versions
remove_keeper_config "create_if_not_exists" "[01]"
#todo: remove these after 24.3 released.
sudo cat /etc/clickhouse-server/config.d/azure_storage_conf.xml \
| sed "s|<object_storage_type>azure|<object_storage_type>azure_blob_storage|" \
> /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp
sudo mv /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp /etc/clickhouse-server/config.d/azure_storage_conf.xml
#todo: remove these after 24.3 released.
sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \
| sed "s|<object_storage_type>local|<object_storage_type>local_blob_storage|" \
> /etc/clickhouse-server/config.d/storage_conf.xml.tmp
sudo mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml
# latest_logs_cache_size_threshold setting doesn't exist on some older versions
remove_keeper_config "latest_logs_cache_size_threshold" "[[:digit:]]\+"
@ -113,6 +125,18 @@ sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
#todo: remove these after 24.3 released.
sudo cat /etc/clickhouse-server/config.d/azure_storage_conf.xml \
| sed "s|<object_storage_type>azure|<object_storage_type>azure_blob_storage|" \
> /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp
sudo mv /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp /etc/clickhouse-server/config.d/azure_storage_conf.xml
#todo: remove these after 24.3 released.
sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \
| sed "s|<object_storage_type>local|<object_storage_type>local_blob_storage|" \
> /etc/clickhouse-server/config.d/storage_conf.xml.tmp
sudo mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml
# async_replication setting doesn't exist on some older versions
remove_keeper_config "async_replication" "1"

View File

@ -26,6 +26,8 @@ RUN apt-get update \
&& export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
&& echo "deb https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \
/etc/apt/sources.list \
&& apt-get update \
&& apt-get install --yes --no-install-recommends --verbose-versions llvm-${LLVM_VERSION} \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
@ -41,20 +43,11 @@ RUN apt-get update \
bash \
bsdmainutils \
build-essential \
clang-${LLVM_VERSION} \
clang-tidy-${LLVM_VERSION} \
cmake \
gdb \
git \
gperf \
libclang-rt-${LLVM_VERSION}-dev \
lld-${LLVM_VERSION} \
llvm-${LLVM_VERSION} \
llvm-${LLVM_VERSION}-dev \
libclang-${LLVM_VERSION}-dev \
moreutils \
nasm \
ninja-build \
pigz \
rename \
software-properties-common \
@ -63,49 +56,4 @@ RUN apt-get update \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
# This symlink is required by gcc to find the lld linker
RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
# for external_symbolizer_path
RUN ln -s /usr/bin/llvm-symbolizer-${LLVM_VERSION} /usr/bin/llvm-symbolizer
# FIXME: workaround for "The imported target "merge-fdata" references the file" error
# https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d
RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake
ARG CCACHE_VERSION=4.6.1
RUN mkdir /tmp/ccache \
&& cd /tmp/ccache \
&& curl -L \
-O https://github.com/ccache/ccache/releases/download/v$CCACHE_VERSION/ccache-$CCACHE_VERSION.tar.xz \
-O https://github.com/ccache/ccache/releases/download/v$CCACHE_VERSION/ccache-$CCACHE_VERSION.tar.xz.asc \
&& gpg --recv-keys --keyserver hkps://keyserver.ubuntu.com 5A939A71A46792CF57866A51996DDA075594ADB8 \
&& gpg --verify ccache-4.6.1.tar.xz.asc \
&& tar xf ccache-$CCACHE_VERSION.tar.xz \
&& cd /tmp/ccache/ccache-$CCACHE_VERSION \
&& cmake -DCMAKE_INSTALL_PREFIX=/usr \
-DCMAKE_BUILD_TYPE=None \
-DZSTD_FROM_INTERNET=ON \
-DREDIS_STORAGE_BACKEND=OFF \
-Wno-dev \
-B build \
-S . \
&& make VERBOSE=1 -C build \
&& make install -C build \
&& cd / \
&& rm -rf /tmp/ccache
ARG TARGETARCH
ARG SCCACHE_VERSION=v0.5.4
ENV SCCACHE_IGNORE_SERVER_IO_ERROR=1
# sccache requires a value for the region. So by default we use The Default Region
ENV SCCACHE_REGION=us-east-1
RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \
amd64) rarch=x86_64 ;; \
arm64) rarch=aarch64 ;; \
esac \
&& curl -Ls "https://github.com/mozilla/sccache/releases/download/$SCCACHE_VERSION/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl.tar.gz" | \
tar xz -C /tmp \
&& mv "/tmp/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl/sccache" /usr/bin \
&& rm "/tmp/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl" -r
COPY process_functional_tests_result.py /

View File

@ -78,8 +78,8 @@ It is recommended to use official pre-compiled `deb` packages for Debian or Ubun
#### Setup the Debian repository
``` bash
sudo apt-get install -y apt-transport-https ca-certificates dirmngr
sudo gpg --no-default-keyring --keyring /usr/share/keyrings/clickhouse-keyring.gpg --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 8919F6BD2B48D754
sudo apt-get install -y apt-transport-https ca-certificates curl gnupg
curl -fsSL 'https://packages.clickhouse.com/rpm/lts/repodata/repomd.xml.key' | sudo gpg --dearmor -o /usr/share/keyrings/clickhouse-keyring.gpg
echo "deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb stable main" | sudo tee \
/etc/apt/sources.list.d/clickhouse.list

View File

@ -170,7 +170,7 @@ RESTORE TABLE test.table PARTITIONS '2', '3'
### Backups as tar archives
Backups can also be stored as tar archives. The functionality is the same as for zip, except that a password is not supported.
Backups can also be stored as tar archives. The functionality is the same as for zip, except that a password is not supported.
Write a backup as a tar:
```
@ -444,10 +444,6 @@ Often data that is ingested into ClickHouse is delivered through some sort of pe
Some local filesystems provide snapshot functionality (for example, [ZFS](https://en.wikipedia.org/wiki/ZFS)), but they might not be the best choice for serving live queries. A possible solution is to create additional replicas with this kind of filesystem and exclude them from the [Distributed](../engines/table-engines/special/distributed.md) tables that are used for `SELECT` queries. Snapshots on such replicas will be out of reach of any queries that modify data. As a bonus, these replicas might have special hardware configurations with more disks attached per server, which would be cost-effective.
### clickhouse-copier {#clickhouse-copier}
[clickhouse-copier](../operations/utilities/clickhouse-copier.md) is a versatile tool that was initially created to re-shard petabyte-sized tables. It can also be used for backup and restore purposes because it reliably copies data between ClickHouse tables and clusters.
For smaller volumes of data, a simple `INSERT INTO ... SELECT ...` to remote tables might work as well.
### Manipulations with Parts {#manipulations-with-parts}

View File

@ -3954,6 +3954,7 @@ Possible values:
- `none` — Is similar to throw, but distributed DDL query returns no result set.
- `null_status_on_timeout` — Returns `NULL` as execution status in some rows of result set instead of throwing `TIMEOUT_EXCEEDED` if query is not finished on the corresponding hosts.
- `never_throw` — Do not throw `TIMEOUT_EXCEEDED` and do not rethrow exceptions if query has failed on some hosts.
- `none_only_active` - similar to `none`, but doesn't wait for inactive replicas of the `Replicated` database. Note: with this mode it's impossible to figure out that the query was not executed on some replica and will be executed in background.
- `null_status_on_timeout_only_active` — similar to `null_status_on_timeout`, but doesn't wait for inactive replicas of the `Replicated` database
- `throw_only_active` — similar to `throw`, but doesn't wait for inactive replicas of the `Replicated` database

View File

@ -513,10 +513,6 @@ Part was moved to another disk and should be deleted in own destructor.
Not active data part with identity refcounter, it is deleting right now by a cleaner.
### PartsInMemory
In-memory parts.
### PartsOutdated
Not active data part, but could be used by only current SELECTs, could be deleted after SELECTs finishes.

View File

@ -1,187 +0,0 @@
---
slug: /en/operations/utilities/clickhouse-copier
sidebar_position: 59
sidebar_label: clickhouse-copier
---
# clickhouse-copier
Copies data from the tables in one cluster to tables in another (or the same) cluster.
:::note
To get a consistent copy, the data in the source tables and partitions should not change during the entire process.
:::
You can run multiple `clickhouse-copier` instances on different servers to perform the same job. ClickHouse Keeper, or ZooKeeper, is used for syncing the processes.
After starting, `clickhouse-copier`:
- Connects to ClickHouse Keeper and receives:
- Copying jobs.
- The state of the copying jobs.
- It performs the jobs.
Each running process chooses the “closest” shard of the source cluster and copies the data into the destination cluster, resharding the data if necessary.
`clickhouse-copier` tracks the changes in ClickHouse Keeper and applies them on the fly.
To reduce network traffic, we recommend running `clickhouse-copier` on the same server where the source data is located.
## Running Clickhouse-copier {#running-clickhouse-copier}
The utility should be run manually:
``` bash
$ clickhouse-copier --daemon --config keeper.xml --task-path /task/path --base-dir /path/to/dir
```
Parameters:
- `daemon` — Starts `clickhouse-copier` in daemon mode.
- `config` — The path to the `keeper.xml` file with the parameters for the connection to ClickHouse Keeper.
- `task-path` — The path to the ClickHouse Keeper node. This node is used for syncing `clickhouse-copier` processes and storing tasks. Tasks are stored in `$task-path/description`.
- `task-file` — Optional path to file with task configuration for initial upload to ClickHouse Keeper.
- `task-upload-force` — Force upload `task-file` even if node already exists. Default is false.
- `base-dir` — The path to logs and auxiliary files. When it starts, `clickhouse-copier` creates `clickhouse-copier_YYYYMMHHSS_<PID>` subdirectories in `$base-dir`. If this parameter is omitted, the directories are created in the directory where `clickhouse-copier` was launched.
## Format of keeper.xml {#format-of-zookeeper-xml}
``` xml
<clickhouse>
<logger>
<level>trace</level>
<size>100M</size>
<count>3</count>
</logger>
<zookeeper>
<node index="1">
<host>127.0.0.1</host>
<port>2181</port>
</node>
</zookeeper>
</clickhouse>
```
## Configuration of Copying Tasks {#configuration-of-copying-tasks}
``` xml
<clickhouse>
<!-- Configuration of clusters as in an ordinary server config -->
<remote_servers>
<source_cluster>
<!--
source cluster & destination clusters accept exactly the same
parameters as parameters for the usual Distributed table
see https://clickhouse.com/docs/en/engines/table-engines/special/distributed/
-->
<shard>
<internal_replication>false</internal_replication>
<replica>
<host>127.0.0.1</host>
<port>9000</port>
<!--
<user>default</user>
<password>default</password>
<secure>1</secure>
-->
</replica>
</shard>
...
</source_cluster>
<destination_cluster>
...
</destination_cluster>
</remote_servers>
<!-- How many simultaneously active workers are possible. If you run more workers superfluous workers will sleep. -->
<max_workers>2</max_workers>
<!-- Setting used to fetch (pull) data from source cluster tables -->
<settings_pull>
<readonly>1</readonly>
</settings_pull>
<!-- Setting used to insert (push) data to destination cluster tables -->
<settings_push>
<readonly>0</readonly>
</settings_push>
<!-- Common setting for fetch (pull) and insert (push) operations. Also, copier process context uses it.
They are overlaid by <settings_pull/> and <settings_push/> respectively. -->
<settings>
<connect_timeout>3</connect_timeout>
<!-- Sync insert is set forcibly, leave it here just in case. -->
<distributed_foreground_insert>1</distributed_foreground_insert>
</settings>
<!-- Copying tasks description.
You could specify several table task in the same task description (in the same ZooKeeper node), they will be performed
sequentially.
-->
<tables>
<!-- A table task, copies one table. -->
<table_hits>
<!-- Source cluster name (from <remote_servers/> section) and tables in it that should be copied -->
<cluster_pull>source_cluster</cluster_pull>
<database_pull>test</database_pull>
<table_pull>hits</table_pull>
<!-- Destination cluster name and tables in which the data should be inserted -->
<cluster_push>destination_cluster</cluster_push>
<database_push>test</database_push>
<table_push>hits2</table_push>
<!-- Engine of destination tables.
If destination tables have not be created, workers create them using columns definition from source tables and engine
definition from here.
NOTE: If the first worker starts insert data and detects that destination partition is not empty then the partition will
be dropped and refilled, take it into account if you already have some data in destination tables. You could directly
specify partitions that should be copied in <enabled_partitions/>, they should be in quoted format like partition column of
system.parts table.
-->
<engine>
ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/hits2', '{replica}')
PARTITION BY toMonday(date)
ORDER BY (CounterID, EventDate)
</engine>
<!-- Sharding key used to insert data to destination cluster -->
<sharding_key>jumpConsistentHash(intHash64(UserID), 2)</sharding_key>
<!-- Optional expression that filter data while pull them from source servers -->
<where_condition>CounterID != 0</where_condition>
<!-- This section specifies partitions that should be copied, other partition will be ignored.
Partition names should have the same format as
partition column of system.parts table (i.e. a quoted text).
Since partition key of source and destination cluster could be different,
these partition names specify destination partitions.
NOTE: In spite of this section is optional (if it is not specified, all partitions will be copied),
it is strictly recommended to specify them explicitly.
If you already have some ready partitions on destination cluster they
will be removed at the start of the copying since they will be interpeted
as unfinished data from the previous copying!!!
-->
<enabled_partitions>
<partition>'2018-02-26'</partition>
<partition>'2018-03-05'</partition>
...
</enabled_partitions>
</table_hits>
<!-- Next table to copy. It is not copied until previous table is copying. -->
<table_visits>
...
</table_visits>
...
</tables>
</clickhouse>
```
`clickhouse-copier` tracks the changes in `/task/path/description` and applies them on the fly. For instance, if you change the value of `max_workers`, the number of processes running tasks will also change.

View File

@ -201,12 +201,12 @@ Arguments:
- `-S`, `--structure` — table structure for input data.
- `--input-format` — input format, `TSV` by default.
- `-f`, `--file` — path to data, `stdin` by default.
- `-F`, `--file` — path to data, `stdin` by default.
- `-q`, `--query` — queries to execute with `;` as delimiter. `--query` can be specified multiple times, e.g. `--query "SELECT 1" --query "SELECT 2"`. Cannot be used simultaneously with `--queries-file`.
- `--queries-file` - file path with queries to execute. `--queries-file` can be specified multiple times, e.g. `--query queries1.sql --query queries2.sql`. Cannot be used simultaneously with `--query`.
- `--multiquery, -n` If specified, multiple queries separated by semicolons can be listed after the `--query` option. For convenience, it is also possible to omit `--query` and pass the queries directly after `--multiquery`.
- `-N`, `--table` — table name where to put output data, `table` by default.
- `--format`, `--output-format` — output format, `TSV` by default.
- `-f`, `--format`, `--output-format` — output format, `TSV` by default.
- `-d`, `--database` — default database, `_local` by default.
- `--stacktrace` — whether to dump debug output in case of exception.
- `--echo` — print query before execution.

View File

@ -2,13 +2,11 @@
slug: /en/operations/utilities/
sidebar_position: 56
sidebar_label: List of tools and utilities
pagination_next: 'en/operations/utilities/clickhouse-copier'
---
# List of tools and utilities
- [clickhouse-local](../../operations/utilities/clickhouse-local.md) — Allows running SQL queries on data without starting the ClickHouse server, similar to how `awk` does this.
- [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) — Copies (and reshards) data from one cluster to another cluster.
- [clickhouse-benchmark](../../operations/utilities/clickhouse-benchmark.md) — Loads server with the custom queries and settings.
- [clickhouse-format](../../operations/utilities/clickhouse-format.md) — Enables formatting input queries.
- [ClickHouse obfuscator](../../operations/utilities/clickhouse-obfuscator.md) — Obfuscates data.

View File

@ -14,8 +14,6 @@
- `N` The number of elements to return.
If the parameter is omitted, default value is the size of input.
- `column` The value (Integer, String, Float and other Generic types).
**Example**
@ -36,13 +34,12 @@
Gets all the String implementations of all numbers in column:
``` sql
SELECT groupArraySorted(str) FROM (SELECT toString(number) as str FROM numbers(5));
SELECT groupArraySorted(5)(str) FROM (SELECT toString(number) as str FROM numbers(5));
```
``` text
┌─groupArraySorted(str)────────┐
│ ['0','1','2','3','4'] │
└──────────────────────────────┘
```
┌─groupArraySorted(5)(str)─┐
│ ['0','1','2','3','4'] │
└──────────────────────────┘
```

View File

@ -19,7 +19,7 @@ empty([x])
An array is considered empty if it does not contain any elements.
:::note
Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT empty(arr) FROM TABLE;` transforms to `SELECT arr.size0 = 0 FROM TABLE;`.
Can be optimized by enabling the [`optimize_functions_to_subcolumns` setting](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT empty(arr) FROM TABLE;` transforms to `SELECT arr.size0 = 0 FROM TABLE;`.
:::
The function also works for [strings](string-functions.md#empty) or [UUID](uuid-functions.md#empty).
@ -104,17 +104,416 @@ Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operat
Alias: `OCTET_LENGTH`
## emptyArrayUInt8, emptyArrayUInt16, emptyArrayUInt32, emptyArrayUInt64
## emptyArrayUInt8
## emptyArrayInt8, emptyArrayInt16, emptyArrayInt32, emptyArrayInt64
Returns an empty UInt8 array.
## emptyArrayFloat32, emptyArrayFloat64
**Syntax**
## emptyArrayDate, emptyArrayDateTime
```sql
emptyArrayUInt8()
```
**Arguments**
None.
**Returned value**
An empty array.
**Examples**
Query:
```sql
SELECT emptyArrayUInt8();
```
Result:
```response
[]
```
## emptyArrayUInt16
Returns an empty UInt16 array.
**Syntax**
```sql
emptyArrayUInt16()
```
**Arguments**
None.
**Returned value**
An empty array.
**Examples**
Query:
```sql
SELECT emptyArrayUInt16();
```
Result:
```response
[]
```
## emptyArrayUInt32
Returns an empty UInt32 array.
**Syntax**
```sql
emptyArrayUInt32()
```
**Arguments**
None.
**Returned value**
An empty array.
**Examples**
Query:
```sql
SELECT emptyArrayUInt32();
```
Result:
```response
[]
```
## emptyArrayUInt64
Returns an empty UInt64 array.
**Syntax**
```sql
emptyArrayUInt64()
```
**Arguments**
None.
**Returned value**
An empty array.
**Examples**
Query:
```sql
SELECT emptyArrayUInt64();
```
Result:
```response
[]
```
## emptyArrayInt8
Returns an empty Int8 array.
**Syntax**
```sql
emptyArrayInt8()
```
**Arguments**
None.
**Returned value**
An empty array.
**Examples**
Query:
```sql
SELECT emptyArrayInt8();
```
Result:
```response
[]
```
## emptyArrayInt16
Returns an empty Int16 array.
**Syntax**
```sql
emptyArrayInt16()
```
**Arguments**
None.
**Returned value**
An empty array.
**Examples**
Query:
```sql
SELECT emptyArrayInt16();
```
Result:
```response
[]
```
## emptyArrayInt32
Returns an empty Int32 array.
**Syntax**
```sql
emptyArrayInt32()
```
**Arguments**
None.
**Returned value**
An empty array.
**Examples**
Query:
```sql
SELECT emptyArrayInt32();
```
Result:
```response
[]
```
## emptyArrayInt64
Returns an empty Int64 array.
**Syntax**
```sql
emptyArrayInt64()
```
**Arguments**
None.
**Returned value**
An empty array.
**Examples**
Query:
```sql
SELECT emptyArrayInt64();
```
Result:
```response
[]
```
## emptyArrayFloat32
Returns an empty Float32 array.
**Syntax**
```sql
emptyArrayFloat32()
```
**Arguments**
None.
**Returned value**
An empty array.
**Examples**
Query:
```sql
SELECT emptyArrayFloat32();
```
Result:
```response
[]
```
## emptyArrayFloat64
Returns an empty Float64 array.
**Syntax**
```sql
emptyArrayFloat64()
```
**Arguments**
None.
**Returned value**
An empty array.
**Examples**
Query:
```sql
SELECT emptyArrayFloat64();
```
Result:
```response
[]
```
## emptyArrayDate
Returns an empty Date array.
**Syntax**
```sql
emptyArrayDate()
```
**Arguments**
None.
**Returned value**
An empty array.
**Examples**
Query:
```sql
SELECT emptyArrayDate();
```
## emptyArrayDateTime
Returns an empty DateTime array.
**Syntax**
```sql
[]
```
**Arguments**
None.
**Returned value**
An empty array.
**Examples**
Query:
```sql
SELECT emptyArrayDateTime();
```
Result:
```response
[]
```
## emptyArrayString
Accepts zero arguments and returns an empty array of the appropriate type.
Returns an empty String array.
**Syntax**
```sql
emptyArrayString()
```
**Arguments**
None.
**Returned value**
An empty array.
**Examples**
Query:
```sql
SELECT emptyArrayString();
```
Result:
```response
[]
```
## emptyArrayToSingle

View File

@ -394,8 +394,7 @@ Result:
## toYear
Converts a date or date with time to the year number (AD) as `UInt16` value.
Returns the year component (AD) of a date or date with time.
**Syntax**
@ -431,7 +430,7 @@ Result:
## toQuarter
Converts a date or date with time to the quarter number (1-4) as `UInt8` value.
Returns the quarter (1-4) of a date or date with time.
**Syntax**
@ -465,10 +464,9 @@ Result:
└──────────────────────────────────────────────┘
```
## toMonth
Converts a date or date with time to the month number (1-12) as `UInt8` value.
Returns the month component (1-12) of a date or date with time.
**Syntax**
@ -504,7 +502,7 @@ Result:
## toDayOfYear
Converts a date or date with time to the number of the day of the year (1-366) as `UInt16` value.
Returns the number of the day within the year (1-366) of a date or date with time.
**Syntax**
@ -540,7 +538,7 @@ Result:
## toDayOfMonth
Converts a date or date with time to the number of the day in the month (1-31) as `UInt8` value.
Returns the number of the day within the month (1-31) of a date or date with time.
**Syntax**
@ -576,7 +574,7 @@ Result:
## toDayOfWeek
Converts a date or date with time to the number of the day in the week as `UInt8` value.
Returns the number of the day within the week of a date or date with time.
The two-argument form of `toDayOfWeek()` enables you to specify whether the week starts on Monday or Sunday, and whether the return value should be in the range from 0 to 6 or 1 to 7. If the mode argument is omitted, the default mode is 0. The time zone of the date can be specified as the third argument.
@ -627,7 +625,7 @@ Result:
## toHour
Converts a date with time to the number of the hour in 24-hour time (0-23) as `UInt8` value.
Returns the hour component (0-24) of a date with time.
Assumes that if clocks are moved ahead, it is by one hour and occurs at 2 a.m., and if clocks are moved back, it is by one hour and occurs at 3 a.m. (which is not always exactly when it occurs - it depends on the timezone).
@ -641,7 +639,7 @@ Alias: `HOUR`
**Arguments**
- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
- `value` - a [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
**Returned value**
@ -665,7 +663,7 @@ Result:
## toMinute
Converts a date with time to the number of the minute of the hour (0-59) as `UInt8` value.
Returns the minute component (0-59) a date with time.
**Syntax**
@ -677,7 +675,7 @@ Alias: `MINUTE`
**Arguments**
- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
- `value` - a [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
**Returned value**
@ -701,7 +699,7 @@ Result:
## toSecond
Converts a date with time to the second in the minute (0-59) as `UInt8` value. Leap seconds are not considered.
Returns the second component (0-59) of a date with time. Leap seconds are not considered.
**Syntax**
@ -713,7 +711,7 @@ Alias: `SECOND`
**Arguments**
- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
- `value` - a [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
**Returned value**
@ -735,6 +733,40 @@ Result:
└─────────────────────────────────────────────┘
```
## toMillisecond
Returns the millisecond component (0-999) of a date with time.
**Syntax**
```sql
toMillisecond(value)
```
*Arguments**
- `value` - [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
Alias: `MILLISECOND`
```sql
SELECT toMillisecond(toDateTime64('2023-04-21 10:20:30.456', 3))
```
Result:
```response
┌──toMillisecond(toDateTime64('2023-04-21 10:20:30.456', 3))─┐
│ 456 │
└────────────────────────────────────────────────────────────┘
```
**Returned value**
- The millisecond in the minute (0 - 59) of the given date/time
Type: `UInt16`
## toUnixTimestamp
Converts a string, a date or a date with time to the [Unix Timestamp](https://en.wikipedia.org/wiki/Unix_time) in `UInt32` representation.

View File

@ -433,3 +433,292 @@ Result:
│ [0,1,2,3,4,5,6,7] │
└───────────────────┘
```
## mortonEncode
Calculates the Morton encoding (ZCurve) for a list of unsigned integers.
The function has two modes of operation:
- Simple
- Expanded
### Simple mode
Accepts up to 8 unsigned integers as arguments and produces a UInt64 code.
**Syntax**
```sql
mortonEncode(args)
```
**Parameters**
- `args`: up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) or columns of the aforementioned type.
**Returned value**
- A UInt64 code
Type: [UInt64](../../sql-reference/data-types/int-uint.md)
**Example**
Query:
```sql
SELECT mortonEncode(1, 2, 3);
```
Result:
```response
53
```
### Expanded mode
Accepts a range mask ([tuple](../../sql-reference/data-types/tuple.md)) as a first argument and up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) as other arguments.
Each number in the mask configures the amount of range expansion:<br/>
1 - no expansion<br/>
2 - 2x expansion<br/>
3 - 3x expansion<br/>
...<br/>
Up to 8x expansion.<br/>
**Syntax**
```sql
mortonEncode(range_mask, args)
```
**Parameters**
- `range_mask`: 1-8.
- `args`: up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) or columns of the aforementioned type.
Note: when using columns for `args` the provided `range_mask` tuple should still be a constant.
**Returned value**
- A UInt64 code
Type: [UInt64](../../sql-reference/data-types/int-uint.md)
**Example**
Range expansion can be beneficial when you need a similar distribution for arguments with wildly different ranges (or cardinality)
For example: 'IP Address' (0...FFFFFFFF) and 'Country code' (0...FF).
Query:
```sql
SELECT mortonEncode((1,2), 1024, 16);
```
Result:
```response
1572864
```
Note: tuple size must be equal to the number of the other arguments.
**Example**
Morton encoding for one argument is always the argument itself:
Query:
```sql
SELECT mortonEncode(1);
```
Result:
```response
1
```
**Example**
It is also possible to expand one argument too:
Query:
```sql
SELECT mortonEncode(tuple(2), 128);
```
Result:
```response
32768
```
**Example**
You can also use column names in the function.
Query:
First create the table and insert some data.
```sql
create table morton_numbers(
n1 UInt32,
n2 UInt32,
n3 UInt16,
n4 UInt16,
n5 UInt8,
n6 UInt8,
n7 UInt8,
n8 UInt8
)
Engine=MergeTree()
ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
insert into morton_numbers (*) values(1,2,3,4,5,6,7,8);
```
Use column names instead of constants as function arguments to `mortonEncode`
Query:
```sql
SELECT mortonEncode(n1, n2, n3, n4, n5, n6, n7, n8) FROM morton_numbers;
```
Result:
```response
2155374165
```
**implementation details**
Please note that you can fit only so many bits of information into Morton code as [UInt64](../../sql-reference/data-types/int-uint.md) has. Two arguments will have a range of maximum 2^32 (64/2) each, three arguments a range of max 2^21 (64/3) each and so on. All overflow will be clamped to zero.
## mortonDecode
Decodes a Morton encoding (ZCurve) into the corresponding unsigned integer tuple.
As with the `mortonEncode` function, this function has two modes of operation:
- Simple
- Expanded
### Simple mode
Accepts a resulting tuple size as the first argument and the code as the second argument.
**Syntax**
```sql
mortonDecode(tuple_size, code)
```
**Parameters**
- `tuple_size`: integer value no more than 8.
- `code`: [UInt64](../../sql-reference/data-types/int-uint.md) code.
**Returned value**
- [tuple](../../sql-reference/data-types/tuple.md) of the specified size.
Type: [UInt64](../../sql-reference/data-types/int-uint.md)
**Example**
Query:
```sql
SELECT mortonDecode(3, 53);
```
Result:
```response
["1","2","3"]
```
### Expanded mode
Accepts a range mask (tuple) as a first argument and the code as the second argument.
Each number in the mask configures the amount of range shrink:<br/>
1 - no shrink<br/>
2 - 2x shrink<br/>
3 - 3x shrink<br/>
...<br/>
Up to 8x shrink.<br/>
Range expansion can be beneficial when you need a similar distribution for arguments with wildly different ranges (or cardinality)
For example: 'IP Address' (0...FFFFFFFF) and 'Country code' (0...FF).
As with the encode function, this is limited to 8 numbers at most.
**Example**
Query:
```sql
SELECT mortonDecode(1, 1);
```
Result:
```response
["1"]
```
**Example**
It is also possible to shrink one argument:
Query:
```sql
SELECT mortonDecode(tuple(2), 32768);
```
Result:
```response
["128"]
```
**Example**
You can also use column names in the function.
First create the table and insert some data.
Query:
```sql
create table morton_numbers(
n1 UInt32,
n2 UInt32,
n3 UInt16,
n4 UInt16,
n5 UInt8,
n6 UInt8,
n7 UInt8,
n8 UInt8
)
Engine=MergeTree()
ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
insert into morton_numbers (*) values(1,2,3,4,5,6,7,8);
```
Use column names instead of constants as function arguments to `mortonDecode`
Query:
```sql
select untuple(mortonDecode(8, mortonEncode(n1, n2, n3, n4, n5, n6, n7, n8))) from morton_numbers;
```
Result:
```response
1 2 3 4 5 6 7 8
```

View File

@ -53,6 +53,62 @@ String starting with `POLYGON`
Polygon
## readWKTPoint
The `readWKTPoint` function in ClickHouse parses a Well-Known Text (WKT) representation of a Point geometry and returns a point in the internal ClickHouse format.
### Syntax
```sql
readWKTPoint(wkt_string)
```
### Arguments
- `wkt_string`: The input WKT string representing a Point geometry.
### Returned value
The function returns a ClickHouse internal representation of the Point geometry.
### Example
```sql
SELECT readWKTPoint('POINT (1.2 3.4)');
```
```response
(1.2,3.4)
```
## readWKTRing
Parses a Well-Known Text (WKT) representation of a Polygon geometry and returns a ring (closed linestring) in the internal ClickHouse format.
### Syntax
```sql
readWKTRing(wkt_string)
```
### Arguments
- `wkt_string`: The input WKT string representing a Polygon geometry.
### Returned value
The function returns a ClickHouse internal representation of the ring (closed linestring) geometry.
### Example
```sql
SELECT readWKTRing('LINESTRING (1 1, 2 2, 3 3, 1 1)');
```
```response
[(1,1),(2,2),(3,3),(1,1)]
```
## polygonsWithinSpherical
Returns true or false depending on whether or not one polygon lies completely inside another polygon. Reference https://www.boost.org/doc/libs/1_62_0/libs/geometry/doc/html/geometry/reference/algorithms/within/within_2.html

File diff suppressed because it is too large Load Diff

View File

@ -11,79 +11,173 @@ elimination](../../sql-reference/functions/index.md#common-subexpression-elimina
function return different random values.
Related content
- Blog: [Generating random data in ClickHouse](https://clickhouse.com/blog/generating-random-test-distribution-data-for-clickhouse)
:::note
The random numbers are generated by non-cryptographic algorithms.
:::
## rand, rand32
## rand
Returns a random UInt32 number, evenly distributed across the range of all possible UInt32 numbers.
Returns a random UInt32 number with uniform distribution.
Uses a linear congruential generator.
Uses a linear congruential generator with an initial state obtained from the system, which means that while it appears random, it's not truly random and can be predictable if the initial state is known. For scenarios where true randomness is crucial, consider using alternative methods like system-level calls or integrating with external libraries.
### Syntax
```sql
rand()
```
Alias: `rand32`
### Arguments
None.
### Returned value
Returns a number of type UInt32.
### Example
```sql
SELECT rand();
```
```response
1569354847 -- Note: The actual output will be a random number, not the specific number shown in the example
```
## rand64
Returns a random UInt64 number, evenly distributed across the range of all possible UInt64 numbers.
Returns a random UInt64 integer (UInt64) number
Uses a linear congruential generator.
### Syntax
```sql
rand64()
```
### Arguments
None.
### Returned value
Returns a number UInt64 number with uniform distribution.
Uses a linear congruential generator with an initial state obtained from the system, which means that while it appears random, it's not truly random and can be predictable if the initial state is known. For scenarios where true randomness is crucial, consider using alternative methods like system-level calls or integrating with external libraries.
### Example
```sql
SELECT rand64();
```
```response
15030268859237645412 -- Note: The actual output will be a random number, not the specific number shown in the example.
```
## randCanonical
Returns a random Float64 value, evenly distributed in interval [0, 1).
Returns a random Float64 number.
### Syntax
```sql
randCanonical()
```
### Arguments
None.
### Returned value
Returns a Float64 value between 0 (inclusive) and 1 (exclusive).
### Example
```sql
SELECT randCanonical();
```
```response
0.3452178901234567 - Note: The actual output will be a random Float64 number between 0 and 1, not the specific number shown in the example.
```
## randConstant
Like `rand` but produces a constant column with a random value.
Generates a single constant column filled with a random value. Unlike `rand`, this function ensures the same random value appears in every row of the generated column, making it useful for scenarios requiring a consistent random seed across rows in a single query.
**Example**
### Syntax
``` sql
SELECT rand(), rand(1), rand(number), randConstant(), randConstant(1), randConstant(number)
FROM numbers(3)
```sql
randConstant([x]);
```
Result:
### Arguments
``` result
┌─────rand()─┬────rand(1)─┬─rand(number)─┬─randConstant()─┬─randConstant(1)─┬─randConstant(number)─┐
│ 3047369878 │ 4132449925 │ 4044508545 │ 2740811946 │ 4229401477 │ 1924032898 │
│ 2938880146 │ 1267722397 │ 4154983056 │ 2740811946 │ 4229401477 │ 1924032898 │
│ 956619638 │ 4238287282 │ 1104342490 │ 2740811946 │ 4229401477 │ 1924032898 │
└────────────┴────────────┴──────────────┴────────────────┴─────────────────┴──────────────────────┘
- **[x] (Optional):** An optional expression that influences the generated random value. Even if provided, the resulting value will still be constant within the same query execution. Different queries using the same expression will likely generate different constant values.
### Returned value
Returns a column of type UInt32 containing the same random value in each row.
### Implementation details
The actual output will be different for each query execution, even with the same optional expression. The optional parameter may not significantly change the generated value compared to using `randConstant` alone.
### Examples
```sql
SELECT randConstant() AS random_value;
```
```response
| random_value |
|--------------|
| 1234567890 |
```
```sql
SELECT randConstant(10) AS random_value;
```
```response
| random_value |
|--------------|
| 9876543210 |
```
## randUniform
Returns a random Float64 drawn uniformly from interval [`min`, `max`) ([continuous uniform distribution](https://en.wikipedia.org/wiki/Continuous_uniform_distribution)).
Returns a random Float64 drawn uniformly from interval [`min`, `max`].
**Syntax**
### Syntax
``` sql
```sql
randUniform(min, max)
```
**Arguments**
### Arguments
- `min` - `Float64` - left boundary of the range,
- `max` - `Float64` - right boundary of the range.
**Returned value**
### Returned value
- Random number.
A random number of type [Float64](/docs/en/sql-reference/data-types/float.md).
Type: [Float64](/docs/en/sql-reference/data-types/float.md).
### Example
**Example**
``` sql
```sql
SELECT randUniform(5.5, 10) FROM numbers(5)
```
Result:
``` result
```response
┌─randUniform(5.5, 10)─┐
│ 8.094978491443102 │
│ 7.3181248914450885 │
@ -99,7 +193,7 @@ Returns a random Float64 drawn from a [normal distribution](https://en.wikipedia
**Syntax**
``` sql
```sql
randNormal(mean, variance)
```
@ -116,13 +210,13 @@ Type: [Float64](/docs/en/sql-reference/data-types/float.md).
**Example**
``` sql
```sql
SELECT randNormal(10, 2) FROM numbers(5)
```
Result:
``` result
```result
┌──randNormal(10, 2)─┐
│ 13.389228911709653 │
│ 8.622949707401295 │
@ -138,7 +232,7 @@ Returns a random Float64 drawn from a [log-normal distribution](https://en.wikip
**Syntax**
``` sql
```sql
randLogNormal(mean, variance)
```
@ -155,13 +249,13 @@ Type: [Float64](/docs/en/sql-reference/data-types/float.md).
**Example**
``` sql
```sql
SELECT randLogNormal(100, 5) FROM numbers(5)
```
Result:
``` result
```result
┌─randLogNormal(100, 5)─┐
│ 1.295699673937363e48 │
│ 9.719869109186684e39 │
@ -177,7 +271,7 @@ Returns a random UInt64 drawn from a [binomial distribution](https://en.wikipedi
**Syntax**
``` sql
```sql
randBinomial(experiments, probability)
```
@ -194,13 +288,13 @@ Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
**Example**
``` sql
```sql
SELECT randBinomial(100, .75) FROM numbers(5)
```
Result:
``` result
```result
┌─randBinomial(100, 0.75)─┐
│ 74 │
│ 78 │
@ -216,7 +310,7 @@ Returns a random UInt64 drawn from a [negative binomial distribution](https://en
**Syntax**
``` sql
```sql
randNegativeBinomial(experiments, probability)
```
@ -233,13 +327,13 @@ Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
**Example**
``` sql
```sql
SELECT randNegativeBinomial(100, .75) FROM numbers(5)
```
Result:
``` result
```result
┌─randNegativeBinomial(100, 0.75)─┐
│ 33 │
│ 32 │
@ -255,7 +349,7 @@ Returns a random UInt64 drawn from a [Poisson distribution](https://en.wikipedia
**Syntax**
``` sql
```sql
randPoisson(n)
```
@ -271,13 +365,13 @@ Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
**Example**
``` sql
```sql
SELECT randPoisson(10) FROM numbers(5)
```
Result:
``` result
```result
┌─randPoisson(10)─┐
│ 8 │
│ 8 │
@ -293,7 +387,7 @@ Returns a random UInt64 drawn from a [Bernoulli distribution](https://en.wikiped
**Syntax**
``` sql
```sql
randBernoulli(probability)
```
@ -309,13 +403,13 @@ Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
**Example**
``` sql
```sql
SELECT randBernoulli(.75) FROM numbers(5)
```
Result:
``` result
```result
┌─randBernoulli(0.75)─┐
│ 1 │
│ 1 │
@ -331,7 +425,7 @@ Returns a random Float64 drawn from a [exponential distribution](https://en.wiki
**Syntax**
``` sql
```sql
randExponential(lambda)
```
@ -347,13 +441,13 @@ Type: [Float64](/docs/en/sql-reference/data-types/float.md).
**Example**
``` sql
```sql
SELECT randExponential(1/10) FROM numbers(5)
```
Result:
``` result
```result
┌─randExponential(divide(1, 10))─┐
│ 44.71628934340778 │
│ 4.211013337903262 │
@ -369,7 +463,7 @@ Returns a random Float64 drawn from a [Chi-square distribution](https://en.wikip
**Syntax**
``` sql
```sql
randChiSquared(degree_of_freedom)
```
@ -385,13 +479,13 @@ Type: [Float64](/docs/en/sql-reference/data-types/float.md).
**Example**
``` sql
```sql
SELECT randChiSquared(10) FROM numbers(5)
```
Result:
``` result
```result
┌─randChiSquared(10)─┐
│ 10.015463656521543 │
│ 9.621799919882768 │
@ -407,7 +501,7 @@ Returns a random Float64 drawn from a [Student's t-distribution](https://en.wiki
**Syntax**
``` sql
```sql
randStudentT(degree_of_freedom)
```
@ -423,13 +517,13 @@ Type: [Float64](/docs/en/sql-reference/data-types/float.md).
**Example**
``` sql
```sql
SELECT randStudentT(10) FROM numbers(5)
```
Result:
``` result
```result
┌─────randStudentT(10)─┐
│ 1.2217309938538725 │
│ 1.7941971681200541 │
@ -445,7 +539,7 @@ Returns a random Float64 drawn from a [F-distribution](https://en.wikipedia.org/
**Syntax**
``` sql
```sql
randFisherF(d1, d2)
```
@ -462,13 +556,13 @@ Type: [Float64](/docs/en/sql-reference/data-types/float.md).
**Example**
``` sql
```sql
SELECT randFisherF(10, 3) FROM numbers(5)
```
Result:
``` result
```result
┌──randFisherF(10, 3)─┐
│ 7.286287504216609 │
│ 0.26590779413050386 │
@ -484,7 +578,7 @@ Generates a string of the specified length filled with random bytes (including z
**Syntax**
``` sql
```sql
randomString(length)
```
@ -502,13 +596,13 @@ Type: [String](../../sql-reference/data-types/string.md).
Query:
``` sql
```sql
SELECT randomString(30) AS str, length(str) AS len FROM numbers(2) FORMAT Vertical;
```
Result:
``` text
```text
Row 1:
──────
str: 3 G : pT ?w тi k aV f6
@ -526,7 +620,7 @@ Generates a binary string of the specified length filled with random bytes (incl
**Syntax**
``` sql
```sql
randomFixedString(length);
```
@ -563,7 +657,7 @@ If you pass `length < 0`, the behavior of the function is undefined.
**Syntax**
``` sql
```sql
randomPrintableASCII(length)
```
@ -579,11 +673,11 @@ Type: [String](../../sql-reference/data-types/string.md)
**Example**
``` sql
```sql
SELECT number, randomPrintableASCII(30) as str, length(str) FROM system.numbers LIMIT 3
```
``` text
```text
┌─number─┬─str────────────────────────────┬─length(randomPrintableASCII(30))─┐
│ 0 │ SuiCOSTvC0csfABSw=UcSzp2.`rv8x │ 30 │
│ 1 │ 1Ag NlJ &RCN:*>HVPG;PE-nO"SUFD │ 30 │
@ -597,7 +691,7 @@ Generates a random string of a specified length. Result string contains valid UT
**Syntax**
``` sql
```sql
randomStringUTF8(length);
```
@ -635,11 +729,12 @@ Flips the bits of String or FixedString `s`, each with probability `prob`.
**Syntax**
``` sql
```sql
fuzzBits(s, prob)
```
**Arguments**
- `s` - `String` or `FixedString`,
- `prob` - constant `Float32/64` between 0.0 and 1.0.
@ -649,14 +744,14 @@ Fuzzed string with same type as `s`.
**Example**
``` sql
```sql
SELECT fuzzBits(materialize('abacaba'), 0.1)
FROM numbers(3)
```
Result:
``` result
```result
┌─fuzzBits(materialize('abacaba'), 0.1)─┐
│ abaaaja │
│ a*cjab+ │

View File

@ -968,7 +968,7 @@ Converts a numeric value to String with the number of fractional digits in the o
toDecimalString(number, scale)
```
**Parameters**
**Arguments**
- `number` — Value to be represented as String, [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md), [Decimal](/docs/en/sql-reference/data-types/decimal.md),
- `scale` — Number of fractional digits, [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
@ -1261,7 +1261,7 @@ Converts input value `x` to the specified data type `T`. Always returns [Nullabl
accurateCastOrNull(x, T)
```
**Parameters**
**Arguments**
- `x` — Input value.
- `T` — The name of the returned data type.
@ -1314,7 +1314,7 @@ Converts input value `x` to the specified data type `T`. Returns default type va
accurateCastOrDefault(x, T)
```
**Parameters**
**Arguments**
- `x` — Input value.
- `T` — The name of the returned data type.
@ -1675,7 +1675,7 @@ Same as [parseDateTimeBestEffort](#parsedatetimebesteffort) function but also pa
parseDateTime64BestEffort(time_string [, precision [, time_zone]])
```
**Parameters**
**Arguments**
- `time_string` — String containing a date or date with time to convert. [String](/docs/en/sql-reference/data-types/string.md).
- `precision` — Required precision. `3` — for milliseconds, `6` — for microseconds. Default — `3`. Optional. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
@ -1990,7 +1990,7 @@ Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wi
snowflakeToDateTime(value[, time_zone])
```
**Parameters**
**Arguments**
- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md).
- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md).
@ -2026,7 +2026,7 @@ Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wi
snowflakeToDateTime64(value[, time_zone])
```
**Parameters**
**Arguments**
- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md).
- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md).
@ -2062,7 +2062,7 @@ Converts a [DateTime](/docs/en/sql-reference/data-types/datetime.md) value to th
dateTimeToSnowflake(value)
```
**Parameters**
**Arguments**
- `value` — Date with time. [DateTime](/docs/en/sql-reference/data-types/datetime.md).
@ -2096,7 +2096,7 @@ Convert a [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) to the f
dateTime64ToSnowflake(value)
```
**Parameters**
**Arguments**
- `value` — Date with time. [DateTime64](/docs/en/sql-reference/data-types/datetime64.md).

View File

@ -155,7 +155,7 @@ Configuration example:
cutToFirstSignificantSubdomain(URL, TLD)
```
**Parameters**
**Arguments**
- `URL` — URL. [String](../../sql-reference/data-types/string.md).
- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md).
@ -209,7 +209,7 @@ Configuration example:
cutToFirstSignificantSubdomainCustomWithWWW(URL, TLD)
```
**Parameters**
**Arguments**
- `URL` — URL. [String](../../sql-reference/data-types/string.md).
- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md).
@ -263,7 +263,7 @@ Configuration example:
firstSignificantSubdomainCustom(URL, TLD)
```
**Parameters**
**Arguments**
- `URL` — URL. [String](../../sql-reference/data-types/string.md).
- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md).

View File

@ -335,7 +335,7 @@ The `ALTER` query lets you create and delete separate elements (columns) in nest
There is no support for deleting columns in the primary key or the sampling key (columns that are used in the `ENGINE` expression). Changing the type for columns that are included in the primary key is only possible if this change does not cause the data to be modified (for example, you are allowed to add values to an Enum or to change a type from `DateTime` to `UInt32`).
If the `ALTER` query is not sufficient to make the table changes you need, you can create a new table, copy the data to it using the [INSERT SELECT](/docs/en/sql-reference/statements/insert-into.md/#inserting-the-results-of-select) query, then switch the tables using the [RENAME](/docs/en/sql-reference/statements/rename.md/#rename-table) query and delete the old table. You can use the [clickhouse-copier](/docs/en/operations/utilities/clickhouse-copier.md) as an alternative to the `INSERT SELECT` query.
If the `ALTER` query is not sufficient to make the table changes you need, you can create a new table, copy the data to it using the [INSERT SELECT](/docs/en/sql-reference/statements/insert-into.md/#inserting-the-results-of-select) query, then switch the tables using the [RENAME](/docs/en/sql-reference/statements/rename.md/#rename-table) query and delete the old table.
The `ALTER` query blocks all reads and writes for the table. In other words, if a long `SELECT` is running at the time of the `ALTER` query, the `ALTER` query will wait for it to complete. At the same time, all new queries to the same table will wait while this `ALTER` is running.

View File

@ -202,6 +202,13 @@ Hierarchy of privileges:
- `S3`
- [dictGet](#grant-dictget)
- [displaySecretsInShowAndSelect](#grant-display-secrets)
- [NAMED COLLECTION ADMIN](#grant-named-collection-admin)
- `CREATE NAMED COLLECTION`
- `DROP NAMED COLLECTION`
- `ALTER NAMED COLLECTION`
- `SHOW NAMED COLLECTIONS`
- `SHOW NAMED COLLECTIONS SECRETS`
- `NAMED COLLECTION`
Examples of how this hierarchy is treated:
@ -498,6 +505,25 @@ and
[`format_display_secrets_in_show_and_select` format setting](../../operations/settings/formats#format_display_secrets_in_show_and_select)
are turned on.
### NAMED COLLECTION ADMIN
Allows a certain operation on a specified named collection. Before version 23.7 it was called NAMED COLLECTION CONTROL, and after 23.7 NAMED COLLECTION ADMIN was added and NAMED COLLECTION CONTROL is preserved as an alias.
- `NAMED COLLECTION ADMIN`. Level: `NAMED_COLLECTION`. Aliases: `NAMED COLLECTION CONTROL`
- `CREATE NAMED COLLECTION`. Level: `NAMED_COLLECTION`
- `DROP NAMED COLLECTION`. Level: `NAMED_COLLECTION`
- `ALTER NAMED COLLECTION`. Level: `NAMED_COLLECTION`
- `SHOW NAMED COLLECTIONS`. Level: `NAMED_COLLECTION`. Aliases: `SHOW NAMED COLLECTIONS`
- `SHOW NAMED COLLECTIONS SECRETS`. Level: `NAMED_COLLECTION`. Aliases: `SHOW NAMED COLLECTIONS SECRETS`
- `NAMED COLLECTION`. Level: `NAMED_COLLECTION`. Aliases: `NAMED COLLECTION USAGE, USE NAMED COLLECTION`
Unlike all other grants (CREATE, DROP, ALTER, SHOW) grant NAMED COLLECTION was added only in 23.7, while all others were added earlier - in 22.12.
**Examples**
Assuming a named collection is called abc, we grant privilege CREATE NAMED COLLECTION to user john.
- `GRANT CREATE NAMED COLLECTION ON abc TO john`
### ALL
Grants all the privileges on regulated entity to a user account or a role.

View File

@ -5,7 +5,7 @@ sidebar_label: cluster
title: "cluster, clusterAllReplicas"
---
Allows to access all shards in an existing cluster which configured in `remote_servers` section without creating a [Distributed](../../engines/table-engines/special/distributed.md) table. One replica of each shard is queried.
Allows to access all shards (configured in the `remote_servers` section) of a cluster without creating a [Distributed](../../engines/table-engines/special/distributed.md) table. Only one replica of each shard is queried.
`clusterAllReplicas` function — same as `cluster`, but all replicas are queried. Each replica in a cluster is used as a separate shard/connection.

View File

@ -5,7 +5,12 @@ sidebar_label: Window Functions
title: Window Functions
---
ClickHouse supports the standard grammar for defining windows and window functions. The following features are currently supported:
Windows functions let you perform calculations across a set of rows that are related to the current row.
Some of the calculations that you can do are similar to those that can be done with an aggregate function, but a window function doesn't cause rows to be grouped into a single output - the individual rows are still returned.
## Standard Window Functions
ClickHouse supports the standard grammar for defining windows and window functions. The table below indicates whether a feature is currently supported.
| Feature | Support or workaround |
|------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
@ -25,6 +30,8 @@ ClickHouse supports the standard grammar for defining windows and window functio
## ClickHouse-specific Window Functions
There are also the following window function that's specific to ClickHouse:
### nonNegativeDerivative(metric_column, timestamp_column[, INTERVAL X UNITS])
Finds non-negative derivative for given `metric_column` by `timestamp_column`.
@ -33,40 +40,6 @@ The computed value is the following for each row:
- `0` for 1st row,
- ${metric_i - metric_{i-1} \over timestamp_i - timestamp_{i-1}} * interval$ for $i_th$ row.
## References
### GitHub Issues
The roadmap for the initial support of window functions is [in this issue](https://github.com/ClickHouse/ClickHouse/issues/18097).
All GitHub issues related to window functions have the [comp-window-functions](https://github.com/ClickHouse/ClickHouse/labels/comp-window-functions) tag.
### Tests
These tests contain the examples of the currently supported grammar:
https://github.com/ClickHouse/ClickHouse/blob/master/tests/performance/window_functions.xml
https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/01591_window_functions.sql
### Postgres Docs
https://www.postgresql.org/docs/current/sql-select.html#SQL-WINDOW
https://www.postgresql.org/docs/devel/sql-expressions.html#SYNTAX-WINDOW-FUNCTIONS
https://www.postgresql.org/docs/devel/functions-window.html
https://www.postgresql.org/docs/devel/tutorial-window.html
### MySQL Docs
https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
https://dev.mysql.com/doc/refman/8.0/en/window-functions-usage.html
https://dev.mysql.com/doc/refman/8.0/en/window-functions-frames.html
## Syntax
```text
@ -80,20 +53,7 @@ WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]
- `PARTITION BY` - defines how to break a resultset into groups.
- `ORDER BY` - defines how to order rows inside the group during calculation aggregate_function.
- `ROWS or RANGE` - defines bounds of a frame, aggregate_function is calculated within a frame.
- `WINDOW` - allows to reuse a window definition with multiple expressions.
### Functions
These functions can be used only as a window function.
- `row_number()` - Number the current row within its partition starting from 1.
- `first_value(x)` - Return the first non-NULL value evaluated within its ordered frame.
- `last_value(x)` - Return the last non-NULL value evaluated within its ordered frame.
- `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame.
- `rank()` - Rank the current row within its partition with gaps.
- `dense_rank()` - Rank the current row within its partition without gaps.
- `lagInFrame(x)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame.
- `leadInFrame(x)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame.
- `WINDOW` - allows multiple expressions to use the same window definition.
```text
PARTITION
@ -112,8 +72,23 @@ These functions can be used only as a window function.
└─────────────────┘ <--- UNBOUNDED FOLLOWING (END of the PARTITION)
```
### Functions
These functions can be used only as a window function.
- `row_number()` - Number the current row within its partition starting from 1.
- `first_value(x)` - Return the first non-NULL value evaluated within its ordered frame.
- `last_value(x)` - Return the last non-NULL value evaluated within its ordered frame.
- `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame.
- `rank()` - Rank the current row within its partition with gaps.
- `dense_rank()` - Rank the current row within its partition without gaps.
- `lagInFrame(x)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame.
- `leadInFrame(x)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame.
## Examples
Let's have a look at some examples of how window functions can be used.
```sql
CREATE TABLE wf_partition
(
@ -589,6 +564,41 @@ ORDER BY
└──────────────┴─────────────────────┴───────┴─────────────────────────┘
```
## References
### GitHub Issues
The roadmap for the initial support of window functions is [in this issue](https://github.com/ClickHouse/ClickHouse/issues/18097).
All GitHub issues related to window functions have the [comp-window-functions](https://github.com/ClickHouse/ClickHouse/labels/comp-window-functions) tag.
### Tests
These tests contain the examples of the currently supported grammar:
https://github.com/ClickHouse/ClickHouse/blob/master/tests/performance/window_functions.xml
https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/01591_window_functions.sql
### Postgres Docs
https://www.postgresql.org/docs/current/sql-select.html#SQL-WINDOW
https://www.postgresql.org/docs/devel/sql-expressions.html#SYNTAX-WINDOW-FUNCTIONS
https://www.postgresql.org/docs/devel/functions-window.html
https://www.postgresql.org/docs/devel/tutorial-window.html
### MySQL Docs
https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
https://dev.mysql.com/doc/refman/8.0/en/window-functions-usage.html
https://dev.mysql.com/doc/refman/8.0/en/window-functions-frames.html
## Related Content
- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse)

View File

@ -585,10 +585,6 @@ ENGINE = Distributed(perftest_3shards_1replicas, tutorial, hits_local, rand());
INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1;
```
:::danger Внимание!
Этот подход не годится для сегментирования больших таблиц. Есть инструмент [clickhouse-copier](../operations/utilities/clickhouse-copier.md), специально предназначенный для перераспределения любых больших таблиц.
:::
Как и следовало ожидать, вычислительно сложные запросы работают втрое быстрее, если они выполняются на трёх серверах, а не на одном.
В данном случае мы использовали кластер из трёх сегментов с одной репликой для каждого.

View File

@ -24,10 +24,6 @@ sidebar_label: "Резервное копирование данных"
Некоторые локальные файловые системы позволяют делать снимки (например, [ZFS](https://en.wikipedia.org/wiki/ZFS)), но они могут быть не лучшим выбором для обслуживания живых запросов. Возможным решением является создание дополнительных реплик с такой файловой системой и исключение их из [Distributed](../engines/table-engines/special/distributed.md) таблиц, используемых для запросов `SELECT`. Снимки на таких репликах будут недоступны для запросов, изменяющих данные. В качестве бонуса, эти реплики могут иметь особые конфигурации оборудования с большим количеством дисков, подключенных к серверу, что будет экономически эффективным.
## clickhouse-copier {#clickhouse-copier}
[clickhouse-copier](utilities/clickhouse-copier.md) — это универсальный инструмент, который изначально был создан для перешардирования таблиц с петабайтами данных. Его также можно использовать для резервного копирования и восстановления, поскольку он надёжно копирует данные между таблицами и кластерами ClickHouse.
Для небольших объёмов данных можно применять `INSERT INTO ... SELECT ...` в удалённые таблицы.
## Манипуляции с партициями {#manipuliatsii-s-partitsiiami}

View File

@ -1,183 +0,0 @@
---
slug: /ru/operations/utilities/clickhouse-copier
sidebar_position: 59
sidebar_label: clickhouse-copier
---
# clickhouse-copier {#clickhouse-copier}
Копирует данные из таблиц одного кластера в таблицы другого (или этого же) кластера.
Можно запустить несколько `clickhouse-copier` для разных серверах для выполнения одного и того же задания. Для синхронизации между процессами используется ZooKeeper.
После запуска, `clickhouse-copier`:
- Соединяется с ZooKeeper и получает:
- Задания на копирование.
- Состояние заданий на копирование.
- Выполняет задания.
Каждый запущенный процесс выбирает "ближайший" шард исходного кластера и копирует данные в кластер назначения, при необходимости перешардируя их.
`clickhouse-copier` отслеживает изменения в ZooKeeper и применяет их «на лету».
Для снижения сетевого трафика рекомендуем запускать `clickhouse-copier` на том же сервере, где находятся исходные данные.
## Запуск Clickhouse-copier {#zapusk-clickhouse-copier}
Утилиту следует запускать вручную следующим образом:
``` bash
$ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --base-dir /path/to/dir
```
Параметры запуска:
- `daemon` - запускает `clickhouse-copier` в режиме демона.
- `config` - путь к файлу `zookeeper.xml` с параметрами соединения с ZooKeeper.
- `task-path` - путь к ноде ZooKeeper. Нода используется для синхронизации между процессами `clickhouse-copier` и для хранения заданий. Задания хранятся в `$task-path/description`.
- `task-file` - необязательный путь к файлу с описанием конфигурация заданий для загрузки в ZooKeeper.
- `task-upload-force` - Загрузить `task-file` в ZooKeeper даже если уже было загружено.
- `base-dir` - путь к логам и вспомогательным файлам. При запуске `clickhouse-copier` создает в `$base-dir` подкаталоги `clickhouse-copier_YYYYMMHHSS_<PID>`. Если параметр не указан, то каталоги будут создаваться в каталоге, где `clickhouse-copier` был запущен.
## Формат Zookeeper.xml {#format-zookeeper-xml}
``` xml
<clickhouse>
<logger>
<level>trace</level>
<size>100M</size>
<count>3</count>
</logger>
<zookeeper>
<node index="1">
<host>127.0.0.1</host>
<port>2181</port>
</node>
</zookeeper>
</clickhouse>
```
## Конфигурация заданий на копирование {#konfiguratsiia-zadanii-na-kopirovanie}
``` xml
<clickhouse>
<!-- Configuration of clusters as in an ordinary server config -->
<remote_servers>
<source_cluster>
<!--
source cluster & destination clusters accept exactly the same
parameters as parameters for the usual Distributed table
see https://clickhouse.com/docs/ru/engines/table-engines/special/distributed/
-->
<shard>
<internal_replication>false</internal_replication>
<replica>
<host>127.0.0.1</host>
<port>9000</port>
<!--
<user>default</user>
<password>default</password>
<secure>1</secure>
-->
</replica>
</shard>
...
</source_cluster>
<destination_cluster>
...
</destination_cluster>
</remote_servers>
<!-- How many simultaneously active workers are possible. If you run more workers superfluous workers will sleep. -->
<max_workers>2</max_workers>
<!-- Setting used to fetch (pull) data from source cluster tables -->
<settings_pull>
<readonly>1</readonly>
</settings_pull>
<!-- Setting used to insert (push) data to destination cluster tables -->
<settings_push>
<readonly>0</readonly>
</settings_push>
<!-- Common setting for fetch (pull) and insert (push) operations. Also, copier process context uses it.
They are overlaid by <settings_pull/> and <settings_push/> respectively. -->
<settings>
<connect_timeout>3</connect_timeout>
<!-- Sync insert is set forcibly, leave it here just in case. -->
<distributed_foreground_insert>1</distributed_foreground_insert>
</settings>
<!-- Copying tasks description.
You could specify several table task in the same task description (in the same ZooKeeper node), they will be performed
sequentially.
-->
<tables>
<!-- A table task, copies one table. -->
<table_hits>
<!-- Source cluster name (from <remote_servers/> section) and tables in it that should be copied -->
<cluster_pull>source_cluster</cluster_pull>
<database_pull>test</database_pull>
<table_pull>hits</table_pull>
<!-- Destination cluster name and tables in which the data should be inserted -->
<cluster_push>destination_cluster</cluster_push>
<database_push>test</database_push>
<table_push>hits2</table_push>
<!-- Engine of destination tables.
If destination tables have not be created, workers create them using columns definition from source tables and engine
definition from here.
NOTE: If the first worker starts insert data and detects that destination partition is not empty then the partition will
be dropped and refilled, take it into account if you already have some data in destination tables. You could directly
specify partitions that should be copied in <enabled_partitions/>, they should be in quoted format like partition column of
system.parts table.
-->
<engine>
ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/hits2', '{replica}')
PARTITION BY toMonday(date)
ORDER BY (CounterID, EventDate)
</engine>
<!-- Sharding key used to insert data to destination cluster -->
<sharding_key>jumpConsistentHash(intHash64(UserID), 2)</sharding_key>
<!-- Optional expression that filter data while pull them from source servers -->
<where_condition>CounterID != 0</where_condition>
<!-- This section specifies partitions that should be copied, other partition will be ignored.
Partition names should have the same format as
partition column of system.parts table (i.e. a quoted text).
Since partition key of source and destination cluster could be different,
these partition names specify destination partitions.
NOTE: In spite of this section is optional (if it is not specified, all partitions will be copied),
it is strictly recommended to specify them explicitly.
If you already have some ready partitions on destination cluster they
will be removed at the start of the copying since they will be interpeted
as unfinished data from the previous copying!!!
-->
<enabled_partitions>
<partition>'2018-02-26'</partition>
<partition>'2018-03-05'</partition>
...
</enabled_partitions>
</table_hits>
<!-- Next table to copy. It is not copied until previous table is copying. -->
<table_visits>
...
</table_visits>
...
</tables>
</clickhouse>
```
`clickhouse-copier` отслеживает изменения `/task/path/description` и применяет их «на лету». Если вы поменяете, например, значение `max_workers`, то количество процессов, выполняющих задания, также изменится.

View File

@ -7,7 +7,6 @@ sidebar_position: 56
# Утилиты ClickHouse {#utility-clickhouse}
- [clickhouse-local](clickhouse-local.md) - позволяет выполнять SQL-запросы над данными без остановки сервера ClickHouse, подобно утилите `awk`.
- [clickhouse-copier](clickhouse-copier.md) - копирует (и перешардирует) данные с одного кластера на другой.
- [clickhouse-benchmark](../../operations/utilities/clickhouse-benchmark.md) — устанавливает соединение с сервером ClickHouse и запускает циклическое выполнение указанных запросов.
- [clickhouse-format](../../operations/utilities/clickhouse-format.md) — позволяет форматировать входящие запросы.
- [ClickHouse obfuscator](../../operations/utilities/clickhouse-obfuscator.md) — обфусцирует данные.

View File

@ -94,7 +94,7 @@ RENAME COLUMN [IF EXISTS] name to new_name
Переименовывает столбец `name` в `new_name`. Если указано выражение `IF EXISTS`, то запрос не будет возвращать ошибку при условии, что столбец `name` не существует. Поскольку переименование не затрагивает физические данные колонки, запрос выполняется практически мгновенно.
**ЗАМЕЧЕНИЕ**: Столбцы, являющиеся частью основного ключа или ключа сортировки (заданные с помощью `ORDER BY` или `PRIMARY KEY`), не могут быть переименованы. Попытка переименовать эти слобцы приведет к `SQL Error [524]`.
**ЗАМЕЧЕНИЕ**: Столбцы, являющиеся частью основного ключа или ключа сортировки (заданные с помощью `ORDER BY` или `PRIMARY KEY`), не могут быть переименованы. Попытка переименовать эти слобцы приведет к `SQL Error [524]`.
Пример:
@ -254,7 +254,7 @@ SELECT groupArray(x), groupArray(s) FROM tmp;
Отсутствует возможность удалять столбцы, входящие в первичный ключ или ключ для сэмплирования (в общем, входящие в выражение `ENGINE`). Изменение типа у столбцов, входящих в первичный ключ возможно только в том случае, если это изменение не приводит к изменению данных (например, разрешено добавление значения в Enum или изменение типа с `DateTime` на `UInt32`).
Если возможностей запроса `ALTER` не хватает для нужного изменения таблицы, вы можете создать новую таблицу, скопировать туда данные с помощью запроса [INSERT SELECT](../insert-into.md#inserting-the-results-of-select), затем поменять таблицы местами с помощью запроса [RENAME](../rename.md#rename-table), и удалить старую таблицу. В качестве альтернативы для запроса `INSERT SELECT`, можно использовать инструмент [clickhouse-copier](../../../sql-reference/statements/alter/index.md).
Если возможностей запроса `ALTER` не хватает для нужного изменения таблицы, вы можете создать новую таблицу, скопировать туда данные с помощью запроса [INSERT SELECT](../insert-into.md#inserting-the-results-of-select), затем поменять таблицы местами с помощью запроса [RENAME](../rename.md#rename-table), и удалить старую таблицу.
Запрос `ALTER` блокирует все чтения и записи для таблицы. То есть если на момент запроса `ALTER` выполнялся долгий `SELECT`, то запрос `ALTER` сначала дождётся его выполнения. И в это время все новые запросы к той же таблице будут ждать, пока завершится этот `ALTER`.

View File

@ -582,8 +582,6 @@ ENGINE = Distributed(perftest_3shards_1replicas, tutorial, hits_local, rand());
INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1;
```
!!! warning "注意:"
这种方法不适合大型表的分片。 有一个单独的工具 [clickhouse-copier](../operations/utilities/clickhouse-copier.md) 这可以重新分片任意大表。
正如您所期望的那样如果计算量大的查询使用3台服务器而不是一个则运行速度快N倍。

View File

@ -24,12 +24,6 @@ sidebar_label: "\u6570\u636E\u5907\u4EFD"
某些本地文件系统提供快照功能(例如, [ZFS](https://en.wikipedia.org/wiki/ZFS)),但它们可能不是提供实时查询的最佳选择。 一个可能的解决方案是使用这种文件系统创建额外的副本,并将它们与用于`SELECT` 查询的 [分布式](../engines/table-engines/special/distributed.md) 表分离。 任何修改数据的查询都无法访问此类副本上的快照。 作为回报,这些副本可能具有特殊的硬件配置,每个服务器附加更多的磁盘,这将是经济高效的。
## clickhouse-copier {#clickhouse-copier}
[clickhouse-copier](utilities/clickhouse-copier.md) 是一个多功能工具最初创建它是为了用于重新切分pb大小的表。 因为它能够在ClickHouse表和集群之间可靠地复制数据所以它也可用于备份和还原数据。
对于较小的数据量,一个简单的 `INSERT INTO ... SELECT ...` 到远程表也可以工作。
## part操作 {#manipulations-with-parts}
ClickHouse允许使用 `ALTER TABLE ... FREEZE PARTITION ...` 查询以创建表分区的本地副本。 这是利用硬链接(hardlink)到 `/var/lib/clickhouse/shadow/` 文件夹中实现的,所以它通常不会因为旧数据而占用额外的磁盘空间。 创建的文件副本不由ClickHouse服务器处理所以你可以把它们留在那里你将有一个简单的备份不需要任何额外的外部系统但它仍然容易出现硬件问题。 出于这个原因,最好将它们远程复制到另一个位置,然后删除本地副本。 分布式文件系统和对象存储仍然是一个不错的选择,但是具有足够大容量的正常附加文件服务器也可以工作(在这种情况下,传输将通过网络文件系统或者也许是 [rsync](https://en.wikipedia.org/wiki/Rsync) 来进行).

View File

@ -649,11 +649,22 @@ log_query_threads=1
## max_query_size {#settings-max_query_size}
查询的最大部分可以被带到RAM用于使用SQL解析器进行解析。
插入查询还包含由单独的流解析器消耗O(1)RAM处理的插入数据这些数据不包含在此限制中。
SQL 解析器解析的查询字符串的最大字节数。 INSERT 查询的 VALUES 子句中的数据由单独的流解析器(消耗 O(1) RAM处理并且不受此限制的影响。
默认值256KiB。
## max_parser_depth {#max_parser_depth}
限制递归下降解析器中的最大递归深度。允许控制堆栈大小。
可能的值:
- 正整数。
- 0 — 递归深度不受限制。
默认值1000。
## interactive_delay {#interactive-delay}
以微秒为单位的间隔,用于检查请求执行是否已被取消并发送进度。
@ -1064,6 +1075,28 @@ ClickHouse生成异常
默认值0。
## optimize_functions_to_subcolumns {#optimize_functions_to_subcolumns}
启用或禁用通过将某些函数转换为读取子列的优化。这减少了要读取的数据量。
这些函数可以转化为:
- [length](../../sql-reference/functions/array-functions.md/#array_functions-length) 读取 [size0](../../sql-reference/data-types/array.md/#array-size子列。
- [empty](../../sql-reference/functions/array-functions.md/#empty函数) 读取 [size0](../../sql-reference/data-types/array.md/#array-size子列。
- [notEmpty](../../sql-reference/functions/array-functions.md/#notempty函数) 读取 [size0](../../sql-reference/data-types/array.md/#array-size子列。
- [isNull](../../sql-reference/operators/index.md#operator-is-null) 读取 [null](../../sql-reference/data-types/nullable. md/#finding-null) 子列。
- [isNotNull](../../sql-reference/operators/index.md#is-not-null) 读取 [null](../../sql-reference/data-types/nullable. md/#finding-null) 子列。
- [count](../../sql-reference/aggregate-functions/reference/count.md) 读取 [null](../../sql-reference/data-types/nullable.md/#finding-null) 子列。
- [mapKeys](../../sql-reference/functions/tuple-map-functions.mdx/#mapkeys) 读取 [keys](../../sql-reference/data-types/map.md/#map-subcolumns) 子列。
- [mapValues](../../sql-reference/functions/tuple-map-functions.mdx/#mapvalues) 读取 [values](../../sql-reference/data-types/map.md/#map-subcolumns) 子列。
可能的值:
- 0 — 禁用优化。
- 1 — 优化已启用。
默认值:`0`。
## distributed_replica_error_half_life {#settings-distributed_replica_error_half_life}
- 类型:秒

View File

@ -1,172 +0,0 @@
---
slug: /zh/operations/utilities/clickhouse-copier
---
# clickhouse-copier {#clickhouse-copier}
将数据从一个群集中的表复制到另一个(或相同)群集中的表。
您可以运行多个 `clickhouse-copier` 不同服务器上的实例执行相同的作业。 ZooKeeper用于同步进程。
开始后, `clickhouse-copier`:
- 连接到ZooKeeper并且接收:
- 复制作业。
- 复制作业的状态。
- 它执行的工作。
每个正在运行的进程都会选择源集群的“最接近”分片,然后将数据复制到目标集群,并在必要时重新分片数据。
`clickhouse-copier` 跟踪ZooKeeper中的更改并实时应用它们。
为了减少网络流量,我们建议运行 `clickhouse-copier` 在源数据所在的同一服务器上。
## 运行Clickhouse-copier {#running-clickhouse-copier}
该实用程序应手动运行:
``` bash
clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --base-dir /path/to/dir
```
参数:
- `daemon` — 在守护进程模式下启动`clickhouse-copier`。
- `config``zookeeper.xml`文件的路径其中包含用于连接ZooKeeper的参数。
- `task-path` — ZooKeeper节点的路径。 该节点用于同步`clickhouse-copier`进程和存储任务。 任务存储在`$task-path/description`中。
- `task-file` — 可选的非必须参数, 指定一个包含任务配置的参数文件, 用于初始上传到ZooKeeper。
- `task-upload-force` — 即使节点已经存在,也强制上载`task-file`。
- `base-dir` — 日志和辅助文件的路径。 启动时,`clickhouse-copier`在`$base-dir`中创建`clickhouse-copier_YYYYMMHHSS_<PID>`子目录。 如果省略此参数,则会在启动`clickhouse-copier`的目录中创建目录。
## Zookeeper.xml格式 {#format-of-zookeeper-xml}
``` xml
<clickhouse>
<logger>
<level>trace</level>
<size>100M</size>
<count>3</count>
</logger>
<zookeeper>
<node index="1">
<host>127.0.0.1</host>
<port>2181</port>
</node>
</zookeeper>
</clickhouse>
```
## 复制任务的配置 {#configuration-of-copying-tasks}
``` xml
<clickhouse>
<!-- Configuration of clusters as in an ordinary server config -->
<remote_servers>
<source_cluster>
<shard>
<internal_replication>false</internal_replication>
<replica>
<host>127.0.0.1</host>
<port>9000</port>
</replica>
</shard>
...
</source_cluster>
<destination_cluster>
...
</destination_cluster>
</remote_servers>
<!-- How many simultaneously active workers are possible. If you run more workers superfluous workers will sleep. -->
<max_workers>2</max_workers>
<!-- Setting used to fetch (pull) data from source cluster tables -->
<settings_pull>
<readonly>1</readonly>
</settings_pull>
<!-- Setting used to insert (push) data to destination cluster tables -->
<settings_push>
<readonly>0</readonly>
</settings_push>
<!-- Common setting for fetch (pull) and insert (push) operations. Also, copier process context uses it.
They are overlaid by <settings_pull/> and <settings_push/> respectively. -->
<settings>
<connect_timeout>3</connect_timeout>
<!-- Sync insert is set forcibly, leave it here just in case. -->
<distributed_foreground_insert>1</distributed_foreground_insert>
</settings>
<!-- Copying tasks description.
You could specify several table task in the same task description (in the same ZooKeeper node), they will be performed
sequentially.
-->
<tables>
<!-- A table task, copies one table. -->
<table_hits>
<!-- Source cluster name (from <remote_servers/> section) and tables in it that should be copied -->
<cluster_pull>source_cluster</cluster_pull>
<database_pull>test</database_pull>
<table_pull>hits</table_pull>
<!-- Destination cluster name and tables in which the data should be inserted -->
<cluster_push>destination_cluster</cluster_push>
<database_push>test</database_push>
<table_push>hits2</table_push>
<!-- Engine of destination tables.
If destination tables have not be created, workers create them using columns definition from source tables and engine
definition from here.
NOTE: If the first worker starts insert data and detects that destination partition is not empty then the partition will
be dropped and refilled, take it into account if you already have some data in destination tables. You could directly
specify partitions that should be copied in <enabled_partitions/>, they should be in quoted format like partition column of
system.parts table.
-->
<engine>
ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/hits2', '{replica}')
PARTITION BY toMonday(date)
ORDER BY (CounterID, EventDate)
</engine>
<!-- Sharding key used to insert data to destination cluster -->
<sharding_key>jumpConsistentHash(intHash64(UserID), 2)</sharding_key>
<!-- Optional expression that filter data while pull them from source servers -->
<where_condition>CounterID != 0</where_condition>
<!-- This section specifies partitions that should be copied, other partition will be ignored.
Partition names should have the same format as
partition column of system.parts table (i.e. a quoted text).
Since partition key of source and destination cluster could be different,
these partition names specify destination partitions.
NOTE: In spite of this section is optional (if it is not specified, all partitions will be copied),
it is strictly recommended to specify them explicitly.
If you already have some ready partitions on destination cluster they
will be removed at the start of the copying since they will be interpeted
as unfinished data from the previous copying!!!
-->
<enabled_partitions>
<partition>'2018-02-26'</partition>
<partition>'2018-03-05'</partition>
...
</enabled_partitions>
</table_hits>
<!-- Next table to copy. It is not copied until previous table is copying. -->
<table_visits>
...
</table_visits>
...
</tables>
</clickhouse>
```
`clickhouse-copier` 跟踪更改 `/task/path/description` 并在飞行中应用它们。 例如,如果你改变的值 `max_workers`,运行任务的进程数也会发生变化。

View File

@ -4,5 +4,4 @@ slug: /zh/operations/utilities/
# 实用工具 {#clickhouse-utility}
- [本地查询](clickhouse-local.md) — 在不停止ClickHouse服务的情况下对数据执行查询操作(类似于 `awk` 命令)。
- [跨集群复制](clickhouse-copier.md) — 在不同集群间复制数据。
- [性能测试](clickhouse-benchmark.md) — 连接到Clickhouse服务器执行性能测试。

View File

@ -1,7 +1,7 @@
---
slug: /zh/sql-reference/data-types/array
---
# 阵列(T) {#data-type-array}
# 数组(T) {#data-type-array}
`T` 类型元素组成的数组。
@ -66,3 +66,27 @@ SELECT array(1, 'a')
Received exception from server (version 1.1.54388):
Code: 386. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: There is no supertype for types UInt8, String because some of them are String/FixedString and some of them are not.
```
## 数组大小 {#array-size}
可以使用 `size0` 子列找到数组的大小,而无需读取整个列。对于多维数组,您可以使用 `sizeN-1`,其中 `N` 是所需的维度。
**例子**
SQL查询
```sql
CREATE TABLE t_arr (`arr` Array(Array(Array(UInt32)))) ENGINE = MergeTree ORDER BY tuple();
INSERT INTO t_arr VALUES ([[[12, 13, 0, 1],[12]]]);
SELECT arr.size0, arr.size1, arr.size2 FROM t_arr;
```
结果:
``` text
┌─arr.size0─┬─arr.size1─┬─arr.size2─┐
│ 1 │ [2] │ [[4,1]] │
└───────────┴───────────┴───────────┘
```

View File

@ -20,6 +20,33 @@ slug: /zh/sql-reference/data-types/nullable
掩码文件中的条目允许ClickHouse区分每个表行的对应数据类型的«NULL»和默认值由于有额外的文件«Nullable»列比普通列消耗更多的存储空间
## null子列 {#finding-null}
通过使用 `null` 子列可以在列中查找 `NULL` 值,而无需读取整个列。如果对应的值为 `NULL`,则返回 `1`,否则返回 `0`
**示例**
SQL查询:
``` sql
CREATE TABLE nullable (`n` Nullable(UInt32)) ENGINE = MergeTree ORDER BY tuple();
INSERT INTO nullable VALUES (1) (NULL) (2) (NULL);
SELECT n.null FROM nullable;
```
结果:
``` text
┌─n.null─┐
│ 0 │
│ 1 │
│ 0 │
│ 1 │
└────────┘
```
## 用法示例 {#yong-fa-shi-li}
``` sql

View File

@ -150,7 +150,7 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String)
不支持对primary key或者sampling key中的列`ENGINE` 表达式中用到的列进行删除操作。改变包含在primary key中的列的类型时如果操作不会导致数据的变化例如往Enum中添加一个值或者将`DateTime` 类型改成 `UInt32`),那么这种操作是可行的。
如果 `ALTER` 操作不足以完成你想要的表变动操作,你可以创建一张新的表,通过 [INSERT SELECT](../../sql-reference/statements/insert-into.md#inserting-the-results-of-select)将数据拷贝进去,然后通过 [RENAME](../../sql-reference/statements/misc.md#misc_operations-rename)将新的表改成和原有表一样的名称,并删除原有的表。你可以使用 [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) 代替 `INSERT SELECT`
如果 `ALTER` 操作不足以完成你想要的表变动操作,你可以创建一张新的表,通过 [INSERT SELECT](../../sql-reference/statements/insert-into.md#inserting-the-results-of-select)将数据拷贝进去,然后通过 [RENAME](../../sql-reference/statements/misc.md#misc_operations-rename)将新的表改成和原有表一样的名称,并删除原有的表。
`ALTER` 操作会阻塞对表的所有读写操作。换句话说,当一个大的 `SELECT` 语句和 `ALTER`同时执行时,`ALTER`会等待,直到 `SELECT` 执行结束。与此同时,当 `ALTER` 运行时,新的 sql 语句将会等待。

View File

@ -50,8 +50,6 @@ contents:
dst: /etc/init.d/clickhouse-server
- src: clickhouse-server.service
dst: /lib/systemd/system/clickhouse-server.service
- src: root/usr/bin/clickhouse-copier
dst: /usr/bin/clickhouse-copier
- src: root/usr/bin/clickhouse-server
dst: /usr/bin/clickhouse-server
# clickhouse-keeper part

View File

@ -122,7 +122,6 @@ add_subdirectory (local)
add_subdirectory (benchmark)
add_subdirectory (extract-from-config)
add_subdirectory (compressor)
add_subdirectory (copier)
add_subdirectory (format)
add_subdirectory (obfuscator)
add_subdirectory (install)
@ -200,7 +199,6 @@ clickhouse_program_install(clickhouse-server server)
clickhouse_program_install(clickhouse-client client chc)
clickhouse_program_install(clickhouse-local local chl ch)
clickhouse_program_install(clickhouse-benchmark benchmark)
clickhouse_program_install(clickhouse-copier copier)
clickhouse_program_install(clickhouse-extract-from-config extract-from-config)
clickhouse_program_install(clickhouse-compressor compressor)
clickhouse_program_install(clickhouse-format format)

View File

@ -1,15 +0,0 @@
#pragma once
#include <base/types.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <utility>
namespace DB
{
using ConfigurationPtr = Poco::AutoPtr<Poco::Util::AbstractConfiguration>;
using DatabaseAndTableName = std::pair<String, String>;
using ListOfDatabasesAndTableNames = std::vector<DatabaseAndTableName>;
}

View File

@ -1,28 +0,0 @@
set(CLICKHOUSE_COPIER_SOURCES
"${CMAKE_CURRENT_SOURCE_DIR}/ClusterCopierApp.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/ClusterCopier.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/Internals.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/ShardPartition.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/ShardPartitionPiece.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/StatusAccumulator.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/TaskCluster.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/TaskShard.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/TaskTable.cpp")
set (CLICKHOUSE_COPIER_LINK
PRIVATE
clickhouse_common_zookeeper
clickhouse_common_config
clickhouse_parsers
clickhouse_functions
clickhouse_table_functions
clickhouse_aggregate_functions
string_utils
PUBLIC
daemon
)
set(CLICKHOUSE_COPIER_INCLUDE SYSTEM PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
clickhouse_program_add(copier)

File diff suppressed because it is too large Load Diff

View File

@ -1,240 +0,0 @@
#pragma once
#include "Aliases.h"
#include "Internals.h"
#include "TaskCluster.h"
#include "TaskShard.h"
#include "TaskTable.h"
#include "ShardPartition.h"
#include "ShardPartitionPiece.h"
#include "ZooKeeperStaff.h"
namespace DB
{
class ClusterCopier : WithMutableContext
{
public:
ClusterCopier(const String & task_path_,
const String & host_id_,
const String & proxy_database_name_,
ContextMutablePtr context_,
LoggerRawPtr log_)
: WithMutableContext(context_),
task_zookeeper_path(task_path_),
host_id(host_id_),
working_database_name(proxy_database_name_),
log(log_) {}
void init();
template <typename T>
decltype(auto) retry(T && func, UInt64 max_tries = 100);
void discoverShardPartitions(const ConnectionTimeouts & timeouts, const TaskShardPtr & task_shard);
/// Compute set of partitions, assume set of partitions aren't changed during the processing
void discoverTablePartitions(const ConnectionTimeouts & timeouts, TaskTable & task_table, UInt64 num_threads = 0);
void uploadTaskDescription(const std::string & task_path, const std::string & task_file, bool force);
void reloadTaskDescription();
void updateConfigIfNeeded();
void process(const ConnectionTimeouts & timeouts);
/// Disables DROP PARTITION commands that used to clear data after errors
void setSafeMode(bool is_safe_mode_ = true)
{
is_safe_mode = is_safe_mode_;
}
void setCopyFaultProbability(double copy_fault_probability_)
{
copy_fault_probability = copy_fault_probability_;
}
void setMoveFaultProbability(double move_fault_probability_)
{
move_fault_probability = move_fault_probability_;
}
void setExperimentalUseSampleOffset(bool value)
{
experimental_use_sample_offset = value;
}
void setMaxTableTries(UInt64 tries)
{
max_table_tries = tries;
}
void setMaxShardPartitionTries(UInt64 tries)
{
max_shard_partition_tries = tries;
}
void setMaxShardPartitionPieceTriesForAlter(UInt64 tries)
{
max_shard_partition_piece_tries_for_alter = tries;
}
void setRetryDelayMs(std::chrono::milliseconds ms)
{
retry_delay_ms = ms;
}
protected:
String getWorkersPath() const
{
return task_cluster->task_zookeeper_path + "/task_active_workers";
}
String getWorkersPathVersion() const
{
return getWorkersPath() + "_version";
}
String getCurrentWorkerNodePath() const
{
return getWorkersPath() + "/" + host_id;
}
zkutil::EphemeralNodeHolder::Ptr createTaskWorkerNodeAndWaitIfNeed(
const zkutil::ZooKeeperPtr & zookeeper,
const String & description,
bool unprioritized);
/*
* Checks that partition piece or some other entity is clean.
* The only requirement is that you have to pass is_dirty_flag_path and is_dirty_cleaned_path to the function.
* And is_dirty_flag_path is a parent of is_dirty_cleaned_path.
* */
static bool checkPartitionPieceIsClean(
const zkutil::ZooKeeperPtr & zookeeper,
const CleanStateClock & clean_state_clock,
const String & task_status_path);
bool checkAllPiecesInPartitionAreDone(const TaskTable & task_table, const String & partition_name, const TasksShard & shards_with_partition);
/** Checks that the whole partition of a table was copied. We should do it carefully due to dirty lock.
* State of some task could change during the processing.
* We have to ensure that all shards have the finished state and there is no dirty flag.
* Moreover, we have to check status twice and check zxid, because state can change during the checking.
*/
/* The same as function above
* Assume that we don't know on which shards do we have partition certain piece.
* We'll check them all (I mean shards that contain the whole partition)
* And shards that don't have certain piece MUST mark that piece is_done true.
* */
bool checkPartitionPieceIsDone(const TaskTable & task_table, const String & partition_name,
size_t piece_number, const TasksShard & shards_with_partition);
/*Alter successful insertion to helping tables it will move all pieces to destination table*/
TaskStatus tryMoveAllPiecesToDestinationTable(const TaskTable & task_table, const String & partition_name);
/// Removes MATERIALIZED and ALIAS columns from create table query
static ASTPtr removeAliasMaterializedAndTTLColumnsFromCreateQuery(const ASTPtr & query_ast, bool allow_to_copy_alias_and_materialized_columns);
bool tryDropPartitionPiece(ShardPartition & task_partition, size_t current_piece_number,
const zkutil::ZooKeeperPtr & zookeeper, const CleanStateClock & clean_state_clock);
bool tryProcessTable(const ConnectionTimeouts & timeouts, TaskTable & task_table);
TaskStatus tryCreateDestinationTable(const ConnectionTimeouts & timeouts, TaskTable & task_table);
/// Job for copying partition from particular shard.
TaskStatus tryProcessPartitionTask(const ConnectionTimeouts & timeouts,
ShardPartition & task_partition,
bool is_unprioritized_task);
TaskStatus iterateThroughAllPiecesInPartition(const ConnectionTimeouts & timeouts,
ShardPartition & task_partition,
bool is_unprioritized_task);
TaskStatus processPartitionPieceTaskImpl(const ConnectionTimeouts & timeouts,
ShardPartition & task_partition,
size_t current_piece_number,
bool is_unprioritized_task);
void dropAndCreateLocalTable(const ASTPtr & create_ast);
void dropLocalTableIfExists(const DatabaseAndTableName & table_name) const;
void dropHelpingTables(const TaskTable & task_table);
void dropHelpingTablesByPieceNumber(const TaskTable & task_table, size_t current_piece_number);
/// Is used for usage less disk space.
/// After all pieces were successfully moved to original destination
/// table we can get rid of partition pieces (partitions in helping tables).
void dropParticularPartitionPieceFromAllHelpingTables(const TaskTable & task_table, const String & partition_name);
String getRemoteCreateTable(const DatabaseAndTableName & table, Connection & connection, const Settings & settings);
ASTPtr getCreateTableForPullShard(const ConnectionTimeouts & timeouts, TaskShard & task_shard);
/// If it is implicitly asked to create split Distributed table for certain piece on current shard, we will do it.
void createShardInternalTables(const ConnectionTimeouts & timeouts, TaskShard & task_shard, bool create_split = true);
std::set<String> getShardPartitions(const ConnectionTimeouts & timeouts, TaskShard & task_shard);
bool checkShardHasPartition(const ConnectionTimeouts & timeouts, TaskShard & task_shard, const String & partition_quoted_name);
bool checkPresentPartitionPiecesOnCurrentShard(const ConnectionTimeouts & timeouts,
TaskShard & task_shard, const String & partition_quoted_name, size_t current_piece_number);
/*
* This class is used in executeQueryOnCluster function
* You can execute query on each shard (no sense it is executed on each replica of a shard or not)
* or you can execute query on each replica on each shard.
* First mode is useful for INSERTS queries.
* */
enum ClusterExecutionMode
{
ON_EACH_SHARD,
ON_EACH_NODE
};
/** Executes simple query (without output streams, for example DDL queries) on each shard of the cluster
* Returns number of shards for which at least one replica executed query successfully
*/
UInt64 executeQueryOnCluster(
const ClusterPtr & cluster,
const String & query,
const Settings & current_settings,
ClusterExecutionMode execution_mode = ClusterExecutionMode::ON_EACH_SHARD) const;
private:
String task_zookeeper_path;
String task_description_path;
String host_id;
String working_database_name;
/// Auto update config stuff
UInt64 task_description_current_version = 1;
std::atomic<UInt64> task_description_version{1};
Coordination::WatchCallback task_description_watch_callback;
/// ZooKeeper session used to set the callback
zkutil::ZooKeeperPtr task_description_watch_zookeeper;
ConfigurationPtr task_cluster_initial_config;
ConfigurationPtr task_cluster_current_config;
std::unique_ptr<TaskCluster> task_cluster;
bool is_safe_mode = false;
double copy_fault_probability = 0.0;
double move_fault_probability = 0.0;
bool experimental_use_sample_offset{false};
LoggerRawPtr log;
UInt64 max_table_tries = 3;
UInt64 max_shard_partition_tries = 3;
UInt64 max_shard_partition_piece_tries_for_alter = 10;
std::chrono::milliseconds retry_delay_ms{1000};
};
}

View File

@ -1,252 +0,0 @@
#include "ClusterCopierApp.h"
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Common/StatusFile.h>
#include <Common/TerminalSize.h>
#include <Databases/registerDatabases.h>
#include <IO/ConnectionTimeouts.h>
#include <Interpreters/registerInterpreters.h>
#include <Formats/registerFormats.h>
#include <Common/scope_guard_safe.h>
#include <unistd.h>
#include <filesystem>
namespace fs = std::filesystem;
namespace DB
{
/// ClusterCopierApp
void ClusterCopierApp::initialize(Poco::Util::Application & self)
{
is_help = config().has("help");
if (is_help)
return;
config_xml_path = config().getString("config-file");
task_path = config().getString("task-path");
log_level = config().getString("log-level", "info");
is_safe_mode = config().has("safe-mode");
is_status_mode = config().has("status");
if (config().has("copy-fault-probability"))
copy_fault_probability = std::max(std::min(config().getDouble("copy-fault-probability"), 1.0), 0.0);
if (config().has("move-fault-probability"))
move_fault_probability = std::max(std::min(config().getDouble("move-fault-probability"), 1.0), 0.0);
base_dir = (config().has("base-dir")) ? config().getString("base-dir") : fs::current_path().string();
max_table_tries = std::max<size_t>(config().getUInt("max-table-tries", 3), 1);
max_shard_partition_tries = std::max<size_t>(config().getUInt("max-shard-partition-tries", 3), 1);
max_shard_partition_piece_tries_for_alter = std::max<size_t>(config().getUInt("max-shard-partition-piece-tries-for-alter", 10), 1);
retry_delay_ms = std::chrono::milliseconds(std::max<size_t>(config().getUInt("retry-delay-ms", 1000), 100));
if (config().has("experimental-use-sample-offset"))
experimental_use_sample_offset = config().getBool("experimental-use-sample-offset");
// process_id is '<hostname>#<start_timestamp>_<pid>'
time_t timestamp = Poco::Timestamp().epochTime();
auto curr_pid = Poco::Process::id();
process_id = std::to_string(DateLUT::serverTimezoneInstance().toNumYYYYMMDDhhmmss(timestamp)) + "_" + std::to_string(curr_pid);
host_id = escapeForFileName(getFQDNOrHostName()) + '#' + process_id;
process_path = fs::weakly_canonical(fs::path(base_dir) / ("clickhouse-copier_" + process_id));
fs::create_directories(process_path);
/// Override variables for BaseDaemon
if (config().has("log-level"))
config().setString("logger.level", config().getString("log-level"));
if (config().has("base-dir") || !config().has("logger.log"))
config().setString("logger.log", fs::path(process_path) / "log.log");
if (config().has("base-dir") || !config().has("logger.errorlog"))
config().setString("logger.errorlog", fs::path(process_path) / "log.err.log");
Base::initialize(self);
}
void ClusterCopierApp::handleHelp(const std::string &, const std::string &)
{
uint16_t terminal_width = 0;
if (isatty(STDIN_FILENO))
terminal_width = getTerminalWidth();
Poco::Util::HelpFormatter help_formatter(options());
if (terminal_width)
help_formatter.setWidth(terminal_width);
help_formatter.setCommand(commandName());
help_formatter.setHeader("Copies tables from one cluster to another");
help_formatter.setUsage("--config-file <config-file> --task-path <task-path>");
help_formatter.format(std::cerr);
help_formatter.setFooter("See also: https://clickhouse.com/docs/en/operations/utilities/clickhouse-copier/");
stopOptionsProcessing();
}
void ClusterCopierApp::defineOptions(Poco::Util::OptionSet & options)
{
Base::defineOptions(options);
options.addOption(Poco::Util::Option("task-path", "", "path to task in ZooKeeper")
.argument("task-path").binding("task-path"));
options.addOption(Poco::Util::Option("task-file", "", "path to task file for uploading in ZooKeeper to task-path")
.argument("task-file").binding("task-file"));
options.addOption(Poco::Util::Option("task-upload-force", "", "Force upload task-file even node already exists. Default is false.")
.argument("task-upload-force").binding("task-upload-force"));
options.addOption(Poco::Util::Option("safe-mode", "", "disables ALTER DROP PARTITION in case of errors")
.binding("safe-mode"));
options.addOption(Poco::Util::Option("copy-fault-probability", "", "the copying fails with specified probability (used to test partition state recovering)")
.argument("copy-fault-probability").binding("copy-fault-probability"));
options.addOption(Poco::Util::Option("move-fault-probability", "", "the moving fails with specified probability (used to test partition state recovering)")
.argument("move-fault-probability").binding("move-fault-probability"));
options.addOption(Poco::Util::Option("log-level", "", "sets log level")
.argument("log-level").binding("log-level"));
options.addOption(Poco::Util::Option("base-dir", "", "base directory for copiers, consecutive copier launches will populate /base-dir/launch_id/* directories")
.argument("base-dir").binding("base-dir"));
options.addOption(Poco::Util::Option("experimental-use-sample-offset", "", "Use SAMPLE OFFSET query instead of cityHash64(PRIMARY KEY) % n == k")
.argument("experimental-use-sample-offset").binding("experimental-use-sample-offset"));
options.addOption(Poco::Util::Option("status", "", "Get for status for current execution").binding("status"));
options.addOption(Poco::Util::Option("max-table-tries", "", "Number of tries for the copy table task")
.argument("max-table-tries").binding("max-table-tries"));
options.addOption(Poco::Util::Option("max-shard-partition-tries", "", "Number of tries for the copy one partition task")
.argument("max-shard-partition-tries").binding("max-shard-partition-tries"));
options.addOption(Poco::Util::Option("max-shard-partition-piece-tries-for-alter", "", "Number of tries for final ALTER ATTACH to destination table")
.argument("max-shard-partition-piece-tries-for-alter").binding("max-shard-partition-piece-tries-for-alter"));
options.addOption(Poco::Util::Option("retry-delay-ms", "", "Delay between task retries")
.argument("retry-delay-ms").binding("retry-delay-ms"));
using Me = std::decay_t<decltype(*this)>;
options.addOption(Poco::Util::Option("help", "", "produce this help message").binding("help")
.callback(Poco::Util::OptionCallback<Me>(this, &Me::handleHelp)));
}
void ClusterCopierApp::mainImpl()
{
/// Status command
{
if (is_status_mode)
{
SharedContextHolder shared_context = Context::createShared();
auto context = Context::createGlobal(shared_context.get());
context->makeGlobalContext();
SCOPE_EXIT_SAFE(context->shutdown());
auto zookeeper = context->getZooKeeper();
auto status_json = zookeeper->get(task_path + "/status");
LOG_INFO(&logger(), "{}", status_json);
std::cout << status_json << std::endl;
context->resetZooKeeper();
return;
}
}
StatusFile status_file(process_path + "/status", StatusFile::write_full_info);
ThreadStatus thread_status;
auto * log = &logger();
LOG_INFO(log, "Starting clickhouse-copier (id {}, host_id {}, path {}, revision {})", process_id, host_id, process_path, ClickHouseRevision::getVersionRevision());
SharedContextHolder shared_context = Context::createShared();
auto context = Context::createGlobal(shared_context.get());
context->makeGlobalContext();
SCOPE_EXIT_SAFE(context->shutdown());
context->setConfig(loaded_config.configuration);
context->setApplicationType(Context::ApplicationType::LOCAL);
context->setPath(process_path + "/");
registerInterpreters();
registerFunctions();
registerAggregateFunctions();
registerTableFunctions();
registerDatabases();
registerStorages();
registerDictionaries();
registerDisks(/* global_skip_access_check= */ true);
registerFormats();
static const std::string default_database = "_local";
DatabaseCatalog::instance().attachDatabase(default_database, std::make_shared<DatabaseMemory>(default_database, context));
context->setCurrentDatabase(default_database);
/// Disable queries logging, since:
/// - There are bits that is not allowed for global context, like adding factories info (for the query_log)
/// - And anyway it is useless for copier.
context->setSetting("log_queries", false);
auto local_context = Context::createCopy(context);
/// Initialize query scope just in case.
CurrentThread::QueryScope query_scope(local_context);
auto copier = std::make_unique<ClusterCopier>(
task_path, host_id, default_database, local_context, log);
copier->setSafeMode(is_safe_mode);
copier->setCopyFaultProbability(copy_fault_probability);
copier->setMoveFaultProbability(move_fault_probability);
copier->setMaxTableTries(max_table_tries);
copier->setMaxShardPartitionTries(max_shard_partition_tries);
copier->setMaxShardPartitionPieceTriesForAlter(max_shard_partition_piece_tries_for_alter);
copier->setRetryDelayMs(retry_delay_ms);
copier->setExperimentalUseSampleOffset(experimental_use_sample_offset);
auto task_file = config().getString("task-file", "");
if (!task_file.empty())
copier->uploadTaskDescription(task_path, task_file, config().getBool("task-upload-force", false));
zkutil::validateZooKeeperConfig(config());
copier->init();
copier->process(ConnectionTimeouts::getTCPTimeoutsWithoutFailover(context->getSettingsRef()));
/// Reset ZooKeeper before removing ClusterCopier.
/// Otherwise zookeeper watch can call callback which use already removed ClusterCopier object.
context->resetZooKeeper();
}
int ClusterCopierApp::main(const std::vector<std::string> &)
{
if (is_help)
return 0;
try
{
mainImpl();
}
catch (...)
{
tryLogCurrentException(&Poco::Logger::root(), __PRETTY_FUNCTION__);
auto code = getCurrentExceptionCode();
return (code) ? code : -1;
}
return 0;
}
}
#pragma GCC diagnostic ignored "-Wunused-function"
#pragma GCC diagnostic ignored "-Wmissing-declarations"
int mainEntryClickHouseClusterCopier(int argc, char ** argv)
{
try
{
DB::ClusterCopierApp app;
return app.run(argc, argv);
}
catch (...)
{
std::cerr << DB::getCurrentExceptionMessage(true) << "\n";
auto code = DB::getCurrentExceptionCode();
return (code) ? code : -1;
}
}

View File

@ -1,99 +0,0 @@
#pragma once
#include <Poco/Util/ServerApplication.h>
#include <Daemon/BaseDaemon.h>
#include "ClusterCopier.h"
/* clickhouse cluster copier util
* Copies tables data from one cluster to new tables of other (possibly the same) cluster in distributed fault-tolerant manner.
*
* See overview in the docs: docs/en/utils/clickhouse-copier.md
*
* Implementation details:
*
* cluster-copier workers pull each partition of each shard of the source cluster and push it to the destination cluster through
* Distributed table (to perform data resharding). So, worker job is a partition of a source shard.
* A job has three states: Active, Finished and Abandoned. Abandoned means that worker died and did not finish the job.
*
* If an error occurred during the copying (a worker failed or a worker did not finish the INSERT), then the whole partition (on
* all destination servers) should be dropped and refilled. So, copying entity is a partition of all destination shards.
* If a failure is detected a special /is_dirty node is created in ZooKeeper signalling that other workers copying the same partition
* should stop, after a refilling procedure should start.
*
* ZooKeeper task node has the following structure:
* /task/path_root - path passed in --task-path parameter
* /description - contains user-defined XML config of the task
* /task_active_workers - contains ephemeral nodes of all currently active workers, used to implement max_workers limitation
* /server_fqdn#PID_timestamp - cluster-copier worker ID
* ...
* /tables - directory with table tasks
* /cluster.db.table1 - directory of table_hits task
* /partition1 - directory for partition1
* /shards - directory for source cluster shards
* /1 - worker job for the first shard of partition1 of table test.hits
* Contains info about current status (Active or Finished) and worker ID.
* /2
* ...
* /partition_active_workers
* /1 - for each job in /shards a corresponding ephemeral node created in /partition_active_workers
* It is used to detect Abandoned jobs (if there is Active node in /shards and there is no node in
* /partition_active_workers).
* Also, it is used to track active workers in the partition (when we need to refill the partition we do
* not DROP PARTITION while there are active workers)
* /2
* ...
* /is_dirty - the node is set if some worker detected that an error occurred (the INSERT is failed or an Abandoned node is
* detected). If the node appeared workers in this partition should stop and start cleaning and refilling
* partition procedure.
* During this procedure a single 'cleaner' worker is selected. The worker waits for stopping all partition
* workers, removes /shards node, executes DROP PARTITION on each destination node and removes /is_dirty node.
* /cleaner- An ephemeral node used to select 'cleaner' worker. Contains ID of the worker.
* /cluster.db.table2
* ...
*/
namespace DB
{
class ClusterCopierApp : public BaseDaemon
{
public:
void initialize(Poco::Util::Application & self) override;
void handleHelp(const std::string &, const std::string &);
void defineOptions(Poco::Util::OptionSet & options) override;
int main(const std::vector<std::string> &) override;
private:
using Base = BaseDaemon;
void mainImpl();
std::string config_xml_path;
std::string task_path;
std::string log_level = "info";
bool is_safe_mode = false;
bool is_status_mode = false;
double copy_fault_probability = 0.0;
double move_fault_probability = 0.0;
bool is_help = false;
UInt64 max_table_tries = 3;
UInt64 max_shard_partition_tries = 3;
UInt64 max_shard_partition_piece_tries_for_alter = 10;
std::chrono::milliseconds retry_delay_ms{1000};
bool experimental_use_sample_offset{false};
std::string base_dir;
std::string process_path;
std::string process_id;
std::string host_id;
};
}

View File

@ -1,22 +0,0 @@
#pragma once
#include <base/types.h>
#include <map>
namespace DB
{
/// Contains info about all shards that contain a partition
struct ClusterPartition
{
double elapsed_time_seconds = 0;
UInt64 bytes_copied = 0;
UInt64 rows_copied = 0;
UInt64 blocks_copied = 0;
UInt64 total_tries = 0;
};
using ClusterPartitions = std::map<String, ClusterPartition, std::greater<>>;
}

View File

@ -1,280 +0,0 @@
#include "Internals.h"
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Processors/Transforms/SquashingChunksTransform.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/extractKeyExpressionList.h>
#include <QueryPipeline/QueryPipelineBuilder.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
using ConfigurationPtr = Poco::AutoPtr<Poco::Util::AbstractConfiguration>;
ConfigurationPtr getConfigurationFromXMLString(const std::string & xml_data)
{
std::stringstream ss(xml_data); // STYLE_CHECK_ALLOW_STD_STRING_STREAM
Poco::XML::InputSource input_source{ss};
return {new Poco::Util::XMLConfiguration{&input_source}};
}
String getQuotedTable(const String & database, const String & table)
{
if (database.empty())
return backQuoteIfNeed(table);
return backQuoteIfNeed(database) + "." + backQuoteIfNeed(table);
}
String getQuotedTable(const DatabaseAndTableName & db_and_table)
{
return getQuotedTable(db_and_table.first, db_and_table.second);
}
// Creates AST representing 'ENGINE = Distributed(cluster, db, table, [sharding_key])
std::shared_ptr<ASTStorage> createASTStorageDistributed(
const String & cluster_name, const String & database, const String & table,
const ASTPtr & sharding_key_ast)
{
auto args = std::make_shared<ASTExpressionList>();
args->children.emplace_back(std::make_shared<ASTLiteral>(cluster_name));
args->children.emplace_back(std::make_shared<ASTIdentifier>(database));
args->children.emplace_back(std::make_shared<ASTIdentifier>(table));
if (sharding_key_ast)
args->children.emplace_back(sharding_key_ast);
auto engine = std::make_shared<ASTFunction>();
engine->name = "Distributed";
engine->arguments = args;
auto storage = std::make_shared<ASTStorage>();
storage->set(storage->engine, engine);
return storage;
}
Block getBlockWithAllStreamData(QueryPipelineBuilder builder)
{
builder.addTransform(std::make_shared<SquashingChunksTransform>(
builder.getHeader(),
std::numeric_limits<size_t>::max(),
std::numeric_limits<size_t>::max()));
auto cur_pipeline = QueryPipelineBuilder::getPipeline(std::move(builder));
Block block;
PullingPipelineExecutor executor(cur_pipeline);
executor.pull(block);
return block;
}
bool isExtendedDefinitionStorage(const ASTPtr & storage_ast)
{
const auto & storage = storage_ast->as<ASTStorage &>();
return storage.partition_by || storage.order_by || storage.sample_by;
}
ASTPtr extractPartitionKey(const ASTPtr & storage_ast)
{
String storage_str = queryToString(storage_ast);
const auto & storage = storage_ast->as<ASTStorage &>();
const auto & engine = storage.engine->as<ASTFunction &>();
if (!endsWith(engine.name, "MergeTree"))
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str);
}
if (isExtendedDefinitionStorage(storage_ast))
{
if (storage.partition_by)
return storage.partition_by->clone();
static const char * all = "all";
return std::make_shared<ASTLiteral>(Field(all, strlen(all)));
}
else
{
bool is_replicated = startsWith(engine.name, "Replicated");
size_t min_args = is_replicated ? 3 : 1;
if (!engine.arguments)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected arguments in {}", storage_str);
ASTPtr arguments_ast = engine.arguments->clone();
ASTs & arguments = arguments_ast->children;
if (arguments.size() < min_args)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected at least {} arguments in {}", min_args, storage_str);
ASTPtr & month_arg = is_replicated ? arguments[2] : arguments[1];
return makeASTFunction("toYYYYMM", month_arg->clone());
}
}
ASTPtr extractPrimaryKey(const ASTPtr & storage_ast)
{
String storage_str = queryToString(storage_ast);
const auto & storage = storage_ast->as<ASTStorage &>();
const auto & engine = storage.engine->as<ASTFunction &>();
if (!endsWith(engine.name, "MergeTree"))
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str);
}
if (!isExtendedDefinitionStorage(storage_ast))
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Is not extended deginition storage {} Will be fixed later.", storage_str);
}
if (storage.primary_key)
return storage.primary_key->clone();
return nullptr;
}
ASTPtr extractOrderBy(const ASTPtr & storage_ast)
{
String storage_str = queryToString(storage_ast);
const auto & storage = storage_ast->as<ASTStorage &>();
const auto & engine = storage.engine->as<ASTFunction &>();
if (!endsWith(engine.name, "MergeTree"))
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str);
}
if (!isExtendedDefinitionStorage(storage_ast))
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Is not extended deginition storage {} Will be fixed later.", storage_str);
}
if (storage.order_by)
return storage.order_by->clone();
throw Exception(ErrorCodes::BAD_ARGUMENTS, "ORDER BY cannot be empty");
}
/// Wraps only identifiers with backticks.
std::string wrapIdentifiersWithBackticks(const ASTPtr & root)
{
if (auto identifier = std::dynamic_pointer_cast<ASTIdentifier>(root))
return backQuote(identifier->name());
if (auto function = std::dynamic_pointer_cast<ASTFunction>(root))
return function->name + '(' + wrapIdentifiersWithBackticks(function->arguments) + ')';
if (auto expression_list = std::dynamic_pointer_cast<ASTExpressionList>(root))
{
Names function_arguments(expression_list->children.size());
for (size_t i = 0; i < expression_list->children.size(); ++i)
function_arguments[i] = wrapIdentifiersWithBackticks(expression_list->children[0]);
return boost::algorithm::join(function_arguments, ", ");
}
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key could be represented only as columns or functions from columns.");
}
Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast)
{
const auto sorting_key_ast = extractOrderBy(storage_ast);
const auto primary_key_ast = extractPrimaryKey(storage_ast);
const auto sorting_key_expr_list = extractKeyExpressionList(sorting_key_ast);
const auto primary_key_expr_list = primary_key_ast
? extractKeyExpressionList(primary_key_ast) : sorting_key_expr_list->clone();
/// Maybe we have to handle VersionedCollapsing engine separately. But in our case in looks pointless.
size_t primary_key_size = primary_key_expr_list->children.size();
size_t sorting_key_size = sorting_key_expr_list->children.size();
if (primary_key_size > sorting_key_size)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key must be a prefix of the sorting key, but its length: "
"{} is greater than the sorting key length: {}",
primary_key_size, sorting_key_size);
Names primary_key_columns;
NameSet primary_key_columns_set;
for (size_t i = 0; i < sorting_key_size; ++i)
{
/// Column name could be represented as a f_1(f_2(...f_n(column_name))).
/// Each f_i could take one or more parameters.
/// We will wrap identifiers with backticks to allow non-standard identifier names.
String sorting_key_column = sorting_key_expr_list->children[i]->getColumnName();
if (i < primary_key_size)
{
String pk_column = primary_key_expr_list->children[i]->getColumnName();
if (pk_column != sorting_key_column)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Primary key must be a prefix of the sorting key, "
"but the column in the position {} is {}, not {}", i, sorting_key_column, pk_column);
if (!primary_key_columns_set.emplace(pk_column).second)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key contains duplicate columns");
primary_key_columns.push_back(wrapIdentifiersWithBackticks(primary_key_expr_list->children[i]));
}
}
return primary_key_columns;
}
bool isReplicatedTableEngine(const ASTPtr & storage_ast)
{
const auto & storage = storage_ast->as<ASTStorage &>();
const auto & engine = storage.engine->as<ASTFunction &>();
if (!endsWith(engine.name, "MergeTree"))
{
String storage_str = queryToString(storage_ast);
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str);
}
return startsWith(engine.name, "Replicated");
}
ShardPriority getReplicasPriority(const Cluster::Addresses & replicas, const std::string & local_hostname, UInt8 random)
{
ShardPriority res;
if (replicas.empty())
return res;
res.is_remote = 1;
for (const auto & replica : replicas)
{
if (isLocalAddress(DNSResolver::instance().resolveHost(replica.host_name)))
{
res.is_remote = 0;
break;
}
}
res.hostname_difference = std::numeric_limits<size_t>::max();
for (const auto & replica : replicas)
{
size_t difference = getHostNamePrefixDistance(local_hostname, replica.host_name);
res.hostname_difference = std::min(difference, res.hostname_difference);
}
res.random = random;
return res;
}
}

View File

@ -1,198 +0,0 @@
#pragma once
#include <chrono>
#include <optional>
#include <Poco/Util/XMLConfiguration.h>
#include <Poco/Logger.h>
#include <Poco/ConsoleChannel.h>
#include <Poco/FormattingChannel.h>
#include <Poco/PatternFormatter.h>
#include <Poco/UUIDGenerator.h>
#include <Poco/Process.h>
#include <Poco/FileChannel.h>
#include <Poco/SplitterChannel.h>
#include <Poco/Util/HelpFormatter.h>
#include <boost/algorithm/string.hpp>
#include <Common/logger_useful.h>
#include <Common/ThreadPool.h>
#include <Common/Exception.h>
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Common/ZooKeeper/KeeperException.h>
#include <base/getFQDNOrHostName.h>
#include <Common/isLocalAddress.h>
#include <Common/typeid_cast.h>
#include <Common/ClickHouseRevision.h>
#include <Common/formatReadable.h>
#include <Common/DNSResolver.h>
#include <Common/CurrentThread.h>
#include <Common/getNumberOfPhysicalCPUCores.h>
#include <Common/ThreadStatus.h>
#include <Client/Connection.h>
#include <Interpreters/Context.h>
#include <Interpreters/Cluster.h>
#include <Interpreters/InterpreterFactory.h>
#include <Interpreters/InterpreterExistsQuery.h>
#include <Interpreters/InterpreterShowCreateQuery.h>
#include <Interpreters/InterpreterDropQuery.h>
#include <Interpreters/InterpreterCreateQuery.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/NestedUtils.h>
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/parseQuery.h>
#include <Parsers/ParserQuery.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/queryToString.h>
#include <Parsers/ASTDropQuery.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTExpressionList.h>
#include <Formats/FormatSettings.h>
#include <QueryPipeline/RemoteQueryExecutor.h>
#include <IO/ConnectionTimeouts.h>
#include <IO/Operators.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadBufferFromFile.h>
#include <Functions/registerFunctions.h>
#include <TableFunctions/registerTableFunctions.h>
#include <AggregateFunctions/registerAggregateFunctions.h>
#include <Storages/registerStorages.h>
#include <Storages/StorageDistributed.h>
#include <Dictionaries/registerDictionaries.h>
#include <Disks/registerDisks.h>
#include <Databases/DatabaseMemory.h>
#include "Aliases.h"
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
ConfigurationPtr getConfigurationFromXMLString(const std::string & xml_data);
String getQuotedTable(const String & database, const String & table);
String getQuotedTable(const DatabaseAndTableName & db_and_table);
enum class TaskState
{
Started = 0,
Finished,
Unknown
};
/// Used to mark status of shard partition tasks
struct TaskStateWithOwner
{
TaskStateWithOwner() = default;
TaskStateWithOwner(TaskState state_, const String & owner_) : state(state_), owner(owner_) {}
TaskState state{TaskState::Unknown};
String owner;
static String getData(TaskState state, const String &owner)
{
return TaskStateWithOwner(state, owner).toString();
}
String toString() const
{
WriteBufferFromOwnString wb;
wb << static_cast<UInt32>(state) << "\n" << escape << owner;
return wb.str();
}
static TaskStateWithOwner fromString(const String & data)
{
ReadBufferFromString rb(data);
TaskStateWithOwner res;
UInt32 state;
rb >> state >> "\n" >> escape >> res.owner;
if (state >= static_cast<int>(TaskState::Unknown))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown state {}", data);
res.state = static_cast<TaskState>(state);
return res;
}
};
struct ShardPriority
{
UInt8 is_remote = 1;
size_t hostname_difference = 0;
UInt8 random = 0;
static bool greaterPriority(const ShardPriority & current, const ShardPriority & other)
{
return std::forward_as_tuple(current.is_remote, current.hostname_difference, current.random)
< std::forward_as_tuple(other.is_remote, other.hostname_difference, other.random);
}
};
/// Execution status of a task.
/// Is used for: partition copying task status, partition piece copying task status, partition moving task status.
enum class TaskStatus
{
Active,
Finished,
Error,
};
struct MultiTransactionInfo
{
int32_t code;
Coordination::Requests requests;
Coordination::Responses responses;
};
// Creates AST representing 'ENGINE = Distributed(cluster, db, table, [sharding_key])
std::shared_ptr<ASTStorage> createASTStorageDistributed(
const String & cluster_name, const String & database, const String & table,
const ASTPtr & sharding_key_ast = nullptr);
Block getBlockWithAllStreamData(QueryPipelineBuilder builder);
bool isExtendedDefinitionStorage(const ASTPtr & storage_ast);
ASTPtr extractPartitionKey(const ASTPtr & storage_ast);
/*
* Choosing a Primary Key that Differs from the Sorting Key
* It is possible to specify a primary key (an expression with values that are written in the index file for each mark)
* that is different from the sorting key (an expression for sorting the rows in data parts).
* In this case the primary key expression tuple must be a prefix of the sorting key expression tuple.
* This feature is helpful when using the SummingMergeTree and AggregatingMergeTree table engines.
* In a common case when using these engines, the table has two types of columns: dimensions and measures.
* Typical queries aggregate values of measure columns with arbitrary GROUP BY and filtering by dimensions.
* Because SummingMergeTree and AggregatingMergeTree aggregate rows with the same value of the sorting key,
* it is natural to add all dimensions to it. As a result, the key expression consists of a long list of columns
* and this list must be frequently updated with newly added dimensions.
* In this case it makes sense to leave only a few columns in the primary key that will provide efficient
* range scans and add the remaining dimension columns to the sorting key tuple.
* ALTER of the sorting key is a lightweight operation because when a new column is simultaneously added t
* o the table and to the sorting key, existing data parts don't need to be changed.
* Since the old sorting key is a prefix of the new sorting key and there is no data in the newly added column,
* the data is sorted by both the old and new sorting keys at the moment of table modification.
*
* */
ASTPtr extractPrimaryKey(const ASTPtr & storage_ast);
ASTPtr extractOrderBy(const ASTPtr & storage_ast);
Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast);
bool isReplicatedTableEngine(const ASTPtr & storage_ast);
ShardPriority getReplicasPriority(const Cluster::Addresses & replicas, const std::string & local_hostname, UInt8 random);
}

View File

@ -1,70 +0,0 @@
#include "ShardPartition.h"
#include "TaskShard.h"
#include "TaskTable.h"
namespace DB
{
ShardPartition::ShardPartition(TaskShard & parent, String name_quoted_, size_t number_of_splits)
: task_shard(parent)
, name(std::move(name_quoted_))
{
pieces.reserve(number_of_splits);
}
String ShardPartition::getPartitionCleanStartPath() const
{
return getPartitionPath() + "/clean_start";
}
String ShardPartition::getPartitionPieceCleanStartPath(size_t current_piece_number) const
{
assert(current_piece_number < task_shard.task_table.number_of_splits);
return getPartitionPiecePath(current_piece_number) + "/clean_start";
}
String ShardPartition::getPartitionPath() const
{
return task_shard.task_table.getPartitionPath(name);
}
String ShardPartition::getPartitionPiecePath(size_t current_piece_number) const
{
assert(current_piece_number < task_shard.task_table.number_of_splits);
return task_shard.task_table.getPartitionPiecePath(name, current_piece_number);
}
String ShardPartition::getShardStatusPath() const
{
// schema: /<root...>/tables/<table>/<partition>/shards/<shard>
// e.g. /root/table_test.hits/201701/shards/1
return getPartitionShardsPath() + "/" + toString(task_shard.numberInCluster());
}
String ShardPartition::getPartitionShardsPath() const
{
return getPartitionPath() + "/shards";
}
String ShardPartition::getPartitionActiveWorkersPath() const
{
return getPartitionPath() + "/partition_active_workers";
}
String ShardPartition::getActiveWorkerPath() const
{
return getPartitionActiveWorkersPath() + "/" + toString(task_shard.numberInCluster());
}
String ShardPartition::getCommonPartitionIsDirtyPath() const
{
return getPartitionPath() + "/is_dirty";
}
String ShardPartition::getCommonPartitionIsCleanedPath() const
{
return getCommonPartitionIsDirtyPath() + "/cleaned";
}
}

View File

@ -1,54 +0,0 @@
#pragma once
#include "ShardPartitionPiece.h"
#include <base/types.h>
#include <map>
namespace DB
{
struct TaskShard;
/// Just destination partition of a shard
/// I don't know what this comment means.
/// In short, when we discovered what shards contain currently processing partition,
/// This class describes a partition (name) that is stored on the shard (parent).
struct ShardPartition
{
ShardPartition(TaskShard &parent, String name_quoted_, size_t number_of_splits = 10);
String getPartitionPath() const;
String getPartitionPiecePath(size_t current_piece_number) const;
String getPartitionCleanStartPath() const;
String getPartitionPieceCleanStartPath(size_t current_piece_number) const;
String getCommonPartitionIsDirtyPath() const;
String getCommonPartitionIsCleanedPath() const;
String getPartitionActiveWorkersPath() const;
String getActiveWorkerPath() const;
String getPartitionShardsPath() const;
String getShardStatusPath() const;
/// What partition pieces are present in current shard.
/// FYI: Piece is a part of partition which has modulo equals to concrete constant (less than number_of_splits obliously)
/// For example SELECT ... from ... WHERE partition=current_partition AND cityHash64(*) == const;
/// Absent pieces have field is_absent_piece equals to true.
PartitionPieces pieces;
TaskShard & task_shard;
String name;
};
using TasksPartition = std::map<String, ShardPartition, std::greater<>>;
}

View File

@ -1,64 +0,0 @@
#include "ShardPartitionPiece.h"
#include "ShardPartition.h"
#include "TaskShard.h"
#include <IO/WriteHelpers.h>
namespace DB
{
ShardPartitionPiece::ShardPartitionPiece(ShardPartition & parent, size_t current_piece_number_, bool is_present_piece_)
: is_absent_piece(!is_present_piece_)
, current_piece_number(current_piece_number_)
, shard_partition(parent)
{
}
String ShardPartitionPiece::getPartitionPiecePath() const
{
return shard_partition.getPartitionPath() + "/piece_" + toString(current_piece_number);
}
String ShardPartitionPiece::getPartitionPieceCleanStartPath() const
{
return getPartitionPiecePath() + "/clean_start";
}
String ShardPartitionPiece::getPartitionPieceIsDirtyPath() const
{
return getPartitionPiecePath() + "/is_dirty";
}
String ShardPartitionPiece::getPartitionPieceIsCleanedPath() const
{
return getPartitionPieceIsDirtyPath() + "/cleaned";
}
String ShardPartitionPiece::getPartitionPieceActiveWorkersPath() const
{
return getPartitionPiecePath() + "/partition_piece_active_workers";
}
String ShardPartitionPiece::getActiveWorkerPath() const
{
return getPartitionPieceActiveWorkersPath() + "/" + toString(shard_partition.task_shard.numberInCluster());
}
/// On what shards do we have current partition.
String ShardPartitionPiece::getPartitionPieceShardsPath() const
{
return getPartitionPiecePath() + "/shards";
}
String ShardPartitionPiece::getShardStatusPath() const
{
return getPartitionPieceShardsPath() + "/" + toString(shard_partition.task_shard.numberInCluster());
}
String ShardPartitionPiece::getPartitionPieceCleanerPath() const
{
return getPartitionPieceIsDirtyPath() + "/cleaner";
}
}

View File

@ -1,43 +0,0 @@
#pragma once
#include <base/types.h>
#include <vector>
namespace DB
{
struct ShardPartition;
struct ShardPartitionPiece
{
ShardPartitionPiece(ShardPartition & parent, size_t current_piece_number_, bool is_present_piece_);
String getPartitionPiecePath() const;
String getPartitionPieceCleanStartPath() const;
String getPartitionPieceIsDirtyPath() const;
String getPartitionPieceIsCleanedPath() const;
String getPartitionPieceActiveWorkersPath() const;
String getActiveWorkerPath() const ;
/// On what shards do we have current partition.
String getPartitionPieceShardsPath() const;
String getShardStatusPath() const;
String getPartitionPieceCleanerPath() const;
bool is_absent_piece;
const size_t current_piece_number;
ShardPartition & shard_partition;
};
using PartitionPieces = std::vector<ShardPartitionPiece>;
}

View File

@ -1,48 +0,0 @@
#include "StatusAccumulator.h"
#include <Poco/JSON/Parser.h>
#include <Poco/JSON/JSON.h>
#include <Poco/JSON/Object.h>
#include <Poco/JSON/Stringifier.h>
#include <iostream>
namespace DB
{
StatusAccumulator::MapPtr StatusAccumulator::fromJSON(String state_json)
{
Poco::JSON::Parser parser;
auto state = parser.parse(state_json).extract<Poco::JSON::Object::Ptr>();
MapPtr result_ptr = std::make_shared<Map>();
for (const auto & table_name : state->getNames())
{
auto table_status_json = state->getValue<String>(table_name);
auto table_status = parser.parse(table_status_json).extract<Poco::JSON::Object::Ptr>();
/// Map entry will be created if it is absent
auto & map_table_status = (*result_ptr)[table_name];
map_table_status.all_partitions_count += table_status->getValue<size_t>("all_partitions_count");
map_table_status.processed_partitions_count += table_status->getValue<size_t>("processed_partitions_count");
}
return result_ptr;
}
String StatusAccumulator::serializeToJSON(MapPtr statuses)
{
Poco::JSON::Object result_json;
for (const auto & [table_name, table_status] : *statuses)
{
Poco::JSON::Object status_json;
status_json.set("all_partitions_count", table_status.all_partitions_count);
status_json.set("processed_partitions_count", table_status.processed_partitions_count);
result_json.set(table_name, status_json);
}
std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
oss.exceptions(std::ios::failbit);
Poco::JSON::Stringifier::stringify(result_json, oss);
auto result = oss.str();
return result;
}
}

View File

@ -1,27 +0,0 @@
#pragma once
#include <base/types.h>
#include <memory>
#include <unordered_map>
namespace DB
{
class StatusAccumulator
{
public:
struct TableStatus
{
size_t all_partitions_count;
size_t processed_partitions_count;
};
using Map = std::unordered_map<String, TableStatus>;
using MapPtr = std::shared_ptr<Map>;
static MapPtr fromJSON(String state_json);
static String serializeToJSON(MapPtr statuses);
};
}

View File

@ -1,74 +0,0 @@
#include "TaskCluster.h"
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
TaskCluster::TaskCluster(const String & task_zookeeper_path_, const String & default_local_database_)
: task_zookeeper_path(task_zookeeper_path_)
, default_local_database(default_local_database_)
{}
void DB::TaskCluster::loadTasks(const Poco::Util::AbstractConfiguration & config, const String & base_key)
{
String prefix = base_key.empty() ? "" : base_key + ".";
clusters_prefix = prefix + "remote_servers";
if (!config.has(clusters_prefix))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "You should specify list of clusters in {}", clusters_prefix);
Poco::Util::AbstractConfiguration::Keys tables_keys;
config.keys(prefix + "tables", tables_keys);
for (const auto & table_key : tables_keys)
{
table_tasks.emplace_back(*this, config, prefix + "tables", table_key);
}
}
void DB::TaskCluster::reloadSettings(const Poco::Util::AbstractConfiguration & config, const String & base_key)
{
String prefix = base_key.empty() ? "" : base_key + ".";
max_workers = config.getUInt64(prefix + "max_workers");
settings_common = Settings();
if (config.has(prefix + "settings"))
settings_common.loadSettingsFromConfig(prefix + "settings", config);
settings_common.prefer_localhost_replica = false;
settings_pull = settings_common;
if (config.has(prefix + "settings_pull"))
settings_pull.loadSettingsFromConfig(prefix + "settings_pull", config);
settings_push = settings_common;
if (config.has(prefix + "settings_push"))
settings_push.loadSettingsFromConfig(prefix + "settings_push", config);
auto set_default_value = [] (auto && setting, auto && default_value)
{
setting = setting.changed ? setting.value : default_value;
};
/// Override important settings
settings_pull.readonly = 1;
settings_pull.prefer_localhost_replica = false;
settings_push.distributed_foreground_insert = true;
settings_push.prefer_localhost_replica = false;
set_default_value(settings_pull.load_balancing, LoadBalancing::NEAREST_HOSTNAME);
set_default_value(settings_pull.max_threads, 1);
set_default_value(settings_pull.max_block_size, 8192UL);
set_default_value(settings_pull.preferred_block_size_bytes, 0);
set_default_value(settings_push.distributed_background_insert_timeout, 0);
set_default_value(settings_push.alter_sync, 2);
}
}

View File

@ -1,51 +0,0 @@
#pragma once
#include "TaskTable.h"
#include <Core/Settings.h>
#include <base/types.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <pcg_random.hpp>
namespace DB
{
struct TaskCluster
{
TaskCluster(const String & task_zookeeper_path_, const String & default_local_database_);
void loadTasks(const Poco::Util::AbstractConfiguration & config, const String & base_key = "");
/// Set (or update) settings and max_workers param
void reloadSettings(const Poco::Util::AbstractConfiguration & config, const String & base_key = "");
/// Base node for all tasks. Its structure:
/// workers/ - directory with active workers (amount of them is less or equal max_workers)
/// description - node with task configuration
/// table_table1/ - directories with per-partition copying status
String task_zookeeper_path;
/// Database used to create temporary Distributed tables
String default_local_database;
/// Limits number of simultaneous workers
UInt64 max_workers = 0;
/// Base settings for pull and push
Settings settings_common;
/// Settings used to fetch data
Settings settings_pull;
/// Settings used to insert data
Settings settings_push;
String clusters_prefix;
/// Subtasks
TasksTable table_tasks;
pcg64 random_engine;
};
}

View File

@ -1,37 +0,0 @@
#include "TaskShard.h"
#include "TaskTable.h"
namespace DB
{
TaskShard::TaskShard(TaskTable & parent, const Cluster::ShardInfo & info_)
: task_table(parent)
, info(info_)
{
list_of_split_tables_on_shard.assign(task_table.number_of_splits, DatabaseAndTableName());
}
UInt32 TaskShard::numberInCluster() const
{
return info.shard_num;
}
UInt32 TaskShard::indexInCluster() const
{
return info.shard_num - 1;
}
String DB::TaskShard::getDescription() const
{
return fmt::format("N{} (having a replica {}, pull table {} of cluster {}",
numberInCluster(), getHostNameExample(), getQuotedTable(task_table.table_pull), task_table.cluster_pull_name);
}
String DB::TaskShard::getHostNameExample() const
{
const auto & replicas = task_table.cluster_pull->getShardsAddresses().at(indexInCluster());
return replicas.at(0).readableString();
}
}

View File

@ -1,56 +0,0 @@
#pragma once
#include "Aliases.h"
#include "Internals.h"
#include "ClusterPartition.h"
#include "ShardPartition.h"
namespace DB
{
struct TaskTable;
struct TaskShard
{
TaskShard(TaskTable & parent, const Cluster::ShardInfo & info_);
TaskTable & task_table;
Cluster::ShardInfo info;
UInt32 numberInCluster() const;
UInt32 indexInCluster() const;
String getDescription() const;
String getHostNameExample() const;
/// Used to sort clusters by their proximity
ShardPriority priority;
/// Column with unique destination partitions (computed from engine_push_partition_key expr.) in the shard
ColumnWithTypeAndName partition_key_column;
/// There is a task for each destination partition
TasksPartition partition_tasks;
/// Which partitions have been checked for existence
/// If some partition from this lists is exists, it is in partition_tasks
std::set<String> checked_partitions;
/// Last CREATE TABLE query of the table of the shard
ASTPtr current_pull_table_create_query;
ASTPtr current_push_table_create_query;
/// Internal distributed tables
DatabaseAndTableName table_read_shard;
DatabaseAndTableName main_table_split_shard;
ListOfDatabasesAndTableNames list_of_split_tables_on_shard;
};
using TaskShardPtr = std::shared_ptr<TaskShard>;
using TasksShard = std::vector<TaskShardPtr>;
}

View File

@ -1,222 +0,0 @@
#include "TaskTable.h"
#include "ClusterPartition.h"
#include "TaskCluster.h"
#include <Parsers/ASTFunction.h>
#include <Common/escapeForFileName.h>
#include <boost/algorithm/string/join.hpp>
namespace DB
{
namespace ErrorCodes
{
extern const int UNKNOWN_ELEMENT_IN_CONFIG;
extern const int LOGICAL_ERROR;
}
TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfiguration & config,
const String & prefix_, const String & table_key)
: task_cluster(parent)
{
String table_prefix = prefix_ + "." + table_key + ".";
name_in_config = table_key;
number_of_splits = config.getUInt64(table_prefix + "number_of_splits", 3);
allow_to_copy_alias_and_materialized_columns = config.getBool(table_prefix + "allow_to_copy_alias_and_materialized_columns", false);
allow_to_drop_target_partitions = config.getBool(table_prefix + "allow_to_drop_target_partitions", false);
cluster_pull_name = config.getString(table_prefix + "cluster_pull");
cluster_push_name = config.getString(table_prefix + "cluster_push");
table_pull.first = config.getString(table_prefix + "database_pull");
table_pull.second = config.getString(table_prefix + "table_pull");
table_push.first = config.getString(table_prefix + "database_push");
table_push.second = config.getString(table_prefix + "table_push");
/// Used as node name in ZooKeeper
table_id = escapeForFileName(cluster_push_name)
+ "." + escapeForFileName(table_push.first)
+ "." + escapeForFileName(table_push.second);
engine_push_str = config.getString(table_prefix + "engine", "rand()");
{
ParserStorage parser_storage{ParserStorage::TABLE_ENGINE};
engine_push_ast = parseQuery(parser_storage, engine_push_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
engine_push_partition_key_ast = extractPartitionKey(engine_push_ast);
primary_key_comma_separated = boost::algorithm::join(extractPrimaryKeyColumnNames(engine_push_ast), ", ");
is_replicated_table = isReplicatedTableEngine(engine_push_ast);
}
sharding_key_str = config.getString(table_prefix + "sharding_key");
auxiliary_engine_split_asts.reserve(number_of_splits);
{
ParserExpressionWithOptionalAlias parser_expression(false);
sharding_key_ast = parseQuery(parser_expression, sharding_key_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
main_engine_split_ast = createASTStorageDistributed(cluster_push_name, table_push.first, table_push.second,
sharding_key_ast);
for (const auto piece_number : collections::range(0, number_of_splits))
{
auxiliary_engine_split_asts.emplace_back
(
createASTStorageDistributed(cluster_push_name, table_push.first,
table_push.second + "_piece_" + toString(piece_number), sharding_key_ast)
);
}
}
where_condition_str = config.getString(table_prefix + "where_condition", "");
if (!where_condition_str.empty())
{
ParserExpressionWithOptionalAlias parser_expression(false);
where_condition_ast = parseQuery(parser_expression, where_condition_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
// Will use canonical expression form
where_condition_str = queryToString(where_condition_ast);
}
String enabled_partitions_prefix = table_prefix + "enabled_partitions";
has_enabled_partitions = config.has(enabled_partitions_prefix);
if (has_enabled_partitions)
{
Strings keys;
config.keys(enabled_partitions_prefix, keys);
if (keys.empty())
{
/// Parse list of partition from space-separated string
String partitions_str = config.getString(table_prefix + "enabled_partitions");
boost::trim_if(partitions_str, isWhitespaceASCII);
boost::split(enabled_partitions, partitions_str, isWhitespaceASCII, boost::token_compress_on);
}
else
{
/// Parse sequence of <partition>...</partition>
for (const String &key : keys)
{
if (!startsWith(key, "partition"))
throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown key {} in {}", key, enabled_partitions_prefix);
enabled_partitions.emplace_back(config.getString(enabled_partitions_prefix + "." + key));
}
}
std::copy(enabled_partitions.begin(), enabled_partitions.end(), std::inserter(enabled_partitions_set, enabled_partitions_set.begin()));
}
}
String TaskTable::getPartitionPath(const String & partition_name) const
{
return task_cluster.task_zookeeper_path // root
+ "/tables/" + table_id // tables/dst_cluster.merge.hits
+ "/" + escapeForFileName(partition_name); // 201701
}
String TaskTable::getPartitionAttachIsActivePath(const String & partition_name) const
{
return getPartitionPath(partition_name) + "/attach_active";
}
String TaskTable::getPartitionAttachIsDonePath(const String & partition_name) const
{
return getPartitionPath(partition_name) + "/attach_is_done";
}
String TaskTable::getPartitionPiecePath(const String & partition_name, size_t piece_number) const
{
assert(piece_number < number_of_splits);
return getPartitionPath(partition_name) + "/piece_" + toString(piece_number); // 1...number_of_splits
}
String TaskTable::getCertainPartitionIsDirtyPath(const String &partition_name) const
{
return getPartitionPath(partition_name) + "/is_dirty";
}
String TaskTable::getCertainPartitionPieceIsDirtyPath(const String & partition_name, const size_t piece_number) const
{
return getPartitionPiecePath(partition_name, piece_number) + "/is_dirty";
}
String TaskTable::getCertainPartitionIsCleanedPath(const String & partition_name) const
{
return getCertainPartitionIsDirtyPath(partition_name) + "/cleaned";
}
String TaskTable::getCertainPartitionPieceIsCleanedPath(const String & partition_name, const size_t piece_number) const
{
return getCertainPartitionPieceIsDirtyPath(partition_name, piece_number) + "/cleaned";
}
String TaskTable::getCertainPartitionTaskStatusPath(const String & partition_name) const
{
return getPartitionPath(partition_name) + "/shards";
}
String TaskTable::getCertainPartitionPieceTaskStatusPath(const String & partition_name, const size_t piece_number) const
{
return getPartitionPiecePath(partition_name, piece_number) + "/shards";
}
bool TaskTable::isReplicatedTable() const
{
return is_replicated_table;
}
String TaskTable::getStatusAllPartitionCount() const
{
return task_cluster.task_zookeeper_path + "/status/all_partitions_count";
}
String TaskTable::getStatusProcessedPartitionsCount() const
{
return task_cluster.task_zookeeper_path + "/status/processed_partitions_count";
}
ASTPtr TaskTable::rewriteReplicatedCreateQueryToPlain() const
{
ASTPtr prev_engine_push_ast = engine_push_ast->clone();
auto & new_storage_ast = prev_engine_push_ast->as<ASTStorage &>();
auto & new_engine_ast = new_storage_ast.engine->as<ASTFunction &>();
/// Remove "Replicated" from name
new_engine_ast.name = new_engine_ast.name.substr(10);
if (new_engine_ast.arguments)
{
auto & replicated_table_arguments = new_engine_ast.arguments->children;
/// In some cases of Atomic database engine usage ReplicatedMergeTree tables
/// could be created without arguments.
if (!replicated_table_arguments.empty())
{
/// Delete first two arguments of Replicated...MergeTree() table.
replicated_table_arguments.erase(replicated_table_arguments.begin());
replicated_table_arguments.erase(replicated_table_arguments.begin());
}
}
return new_storage_ast.clone();
}
ClusterPartition & TaskTable::getClusterPartition(const String & partition_name)
{
auto it = cluster_partitions.find(partition_name);
if (it == cluster_partitions.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "There are no cluster partition {} in {}", partition_name, table_id);
return it->second;
}
}

View File

@ -1,173 +0,0 @@
#pragma once
#include "Aliases.h"
#include "TaskShard.h"
namespace DB
{
struct ClusterPartition;
struct TaskCluster;
struct TaskTable
{
TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfiguration & config, const String & prefix, const String & table_key);
TaskCluster & task_cluster;
/// These functions used in checkPartitionIsDone() or checkPartitionPieceIsDone()
/// They are implemented here not to call task_table.tasks_shard[partition_name].second.pieces[current_piece_number] etc.
String getPartitionPath(const String & partition_name) const;
String getPartitionAttachIsActivePath(const String & partition_name) const;
String getPartitionAttachIsDonePath(const String & partition_name) const;
String getPartitionPiecePath(const String & partition_name, size_t piece_number) const;
String getCertainPartitionIsDirtyPath(const String & partition_name) const;
String getCertainPartitionPieceIsDirtyPath(const String & partition_name, size_t piece_number) const;
String getCertainPartitionIsCleanedPath(const String & partition_name) const;
String getCertainPartitionPieceIsCleanedPath(const String & partition_name, size_t piece_number) const;
String getCertainPartitionTaskStatusPath(const String & partition_name) const;
String getCertainPartitionPieceTaskStatusPath(const String & partition_name, size_t piece_number) const;
bool isReplicatedTable() const;
/// These nodes are used for check-status option
String getStatusAllPartitionCount() const;
String getStatusProcessedPartitionsCount() const;
/// Partitions will be split into number-of-splits pieces.
/// Each piece will be copied independently. (10 by default)
size_t number_of_splits;
bool allow_to_copy_alias_and_materialized_columns{false};
bool allow_to_drop_target_partitions{false};
String name_in_config;
/// Used as task ID
String table_id;
/// Column names in primary key
String primary_key_comma_separated;
/// Source cluster and table
String cluster_pull_name;
DatabaseAndTableName table_pull;
/// Destination cluster and table
String cluster_push_name;
DatabaseAndTableName table_push;
/// Storage of destination table
/// (tables that are stored on each shard of target cluster)
String engine_push_str;
ASTPtr engine_push_ast;
ASTPtr engine_push_partition_key_ast;
/// First argument of Replicated...MergeTree()
String engine_push_zk_path;
bool is_replicated_table;
ASTPtr rewriteReplicatedCreateQueryToPlain() const;
/*
* A Distributed table definition used to split data
* Distributed table will be created on each shard of default
* cluster to perform data copying and resharding
* */
String sharding_key_str;
ASTPtr sharding_key_ast;
ASTPtr main_engine_split_ast;
/*
* To copy partition piece form one cluster to another we have to use Distributed table.
* In case of usage separate table (engine_push) for each partition piece,
* we have to use many Distributed tables.
* */
ASTs auxiliary_engine_split_asts;
/// Additional WHERE expression to filter input data
String where_condition_str;
ASTPtr where_condition_ast;
/// Resolved clusters
ClusterPtr cluster_pull;
ClusterPtr cluster_push;
/// Filter partitions that should be copied
bool has_enabled_partitions = false;
Strings enabled_partitions;
NameSet enabled_partitions_set;
/**
* Prioritized list of shards
* all_shards contains information about all shards in the table.
* So we have to check whether particular shard have current partition or not while processing.
*/
TasksShard all_shards;
TasksShard local_shards;
/// All partitions of the current table.
ClusterPartitions cluster_partitions;
NameSet finished_cluster_partitions;
/// Partition names to process in user-specified order
Strings ordered_partition_names;
ClusterPartition & getClusterPartition(const String & partition_name);
Stopwatch watch;
UInt64 bytes_copied = 0;
UInt64 rows_copied = 0;
template <typename RandomEngine>
void initShards(RandomEngine &&random_engine);
};
using TasksTable = std::list<TaskTable>;
template<typename RandomEngine>
inline void TaskTable::initShards(RandomEngine && random_engine)
{
const String & fqdn_name = getFQDNOrHostName();
std::uniform_int_distribution<uint8_t> get_urand(0, std::numeric_limits<UInt8>::max());
// Compute the priority
for (const auto & shard_info : cluster_pull->getShardsInfo())
{
TaskShardPtr task_shard = std::make_shared<TaskShard>(*this, shard_info);
const auto & replicas = cluster_pull->getShardsAddresses().at(task_shard->indexInCluster());
task_shard->priority = getReplicasPriority(replicas, fqdn_name, get_urand(random_engine));
all_shards.emplace_back(task_shard);
}
// Sort by priority
std::sort(all_shards.begin(), all_shards.end(),
[](const TaskShardPtr & lhs, const TaskShardPtr & rhs)
{
return ShardPriority::greaterPriority(lhs->priority, rhs->priority);
});
// Cut local shards
auto it_first_remote = std::lower_bound(all_shards.begin(), all_shards.end(), 1,
[](const TaskShardPtr & lhs, UInt8 is_remote)
{
return lhs->priority.is_remote < is_remote;
});
local_shards.assign(all_shards.begin(), it_first_remote);
}
}

View File

@ -1,221 +0,0 @@
#pragma once
/** Allows to compare two incremental counters of type UInt32 in presence of possible overflow.
* We assume that we compare values that are not too far away.
* For example, when we increment 0xFFFFFFFF, we get 0. So, 0xFFFFFFFF is less than 0.
*/
class WrappingUInt32
{
public:
UInt32 value;
explicit WrappingUInt32(UInt32 _value)
: value(_value)
{}
bool operator<(const WrappingUInt32 & other) const
{
return value != other.value && *this <= other;
}
bool operator<=(const WrappingUInt32 & other) const
{
const UInt32 HALF = static_cast<UInt32>(1) << 31;
return (value <= other.value && other.value - value < HALF)
|| (value > other.value && value - other.value > HALF);
}
bool operator==(const WrappingUInt32 & other) const
{
return value == other.value;
}
};
/** Conforming Zxid definition.
* cf. https://github.com/apache/zookeeper/blob/631d1b284f0edb1c4f6b0fb221bf2428aec71aaa/zookeeper-docs/src/main/resources/markdown/zookeeperInternals.md#guarantees-properties-and-definitions
*
* But it is better to read this: https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html
*
* Actually here is the definition of Zxid.
* Every change to the ZooKeeper state receives a stamp in the form of a zxid (ZooKeeper Transaction Id).
* This exposes the total ordering of all changes to ZooKeeper. Each change will have a unique zxid
* and if zxid1 is smaller than zxid2 then zxid1 happened before zxid2.
*/
class Zxid
{
public:
WrappingUInt32 epoch;
WrappingUInt32 counter;
explicit Zxid(UInt64 _zxid)
: epoch(static_cast<UInt32>(_zxid >> 32))
, counter(static_cast<UInt32>(_zxid))
{}
bool operator<=(const Zxid & other) const
{
return (epoch < other.epoch)
|| (epoch == other.epoch && counter <= other.counter);
}
bool operator==(const Zxid & other) const
{
return epoch == other.epoch && counter == other.counter;
}
};
/* When multiple ClusterCopiers discover that the target partition is not empty,
* they will attempt to clean up this partition before proceeding to copying.
*
* Instead of purging is_dirty, the history of cleaning work is preserved and partition hygiene is established
* based on a happens-before relation between the events.
* This relation is encoded by LogicalClock based on the mzxid of the is_dirty ZNode and is_dirty/cleaned.
* The fact of the partition hygiene is encoded by CleanStateClock.
*
* For you to know what mzxid means:
*
* ZooKeeper Stat Structure:
* The Stat structure for each znode in ZooKeeper is made up of the following fields:
*
* -- czxid
* The zxid of the change that caused this znode to be created.
*
* -- mzxid
* The zxid of the change that last modified this znode.
*
* -- ctime
* The time in milliseconds from epoch when this znode was created.
*
* -- mtime
* The time in milliseconds from epoch when this znode was last modified.
*
* -- version
* The number of changes to the data of this znode.
*
* -- cversion
* The number of changes to the children of this znode.
*
* -- aversion
* The number of changes to the ACL of this znode.
*
* -- ephemeralOwner
* The session id of the owner of this znode if the znode is an ephemeral node.
* If it is not an ephemeral node, it will be zero.
*
* -- dataLength
* The length of the data field of this znode.
*
* -- numChildren
* The number of children of this znode.
* */
class LogicalClock
{
public:
std::optional<Zxid> zxid;
LogicalClock() = default;
explicit LogicalClock(UInt64 _zxid)
: zxid(_zxid)
{}
bool hasHappened() const
{
return bool(zxid);
}
/// happens-before relation with a reasonable time bound
bool happensBefore(const LogicalClock & other) const
{
return !zxid
|| (other.zxid && *zxid <= *other.zxid);
}
bool operator<=(const LogicalClock & other) const
{
return happensBefore(other);
}
/// strict equality check
bool operator==(const LogicalClock & other) const
{
return zxid == other.zxid;
}
};
class CleanStateClock
{
public:
LogicalClock discovery_zxid;
std::optional<UInt32> discovery_version;
LogicalClock clean_state_zxid;
std::optional<UInt32> clean_state_version;
std::shared_ptr<std::atomic_bool> stale;
bool is_clean() const
{
return !is_stale()
&& (!discovery_zxid.hasHappened() || (clean_state_zxid.hasHappened() && discovery_zxid <= clean_state_zxid));
}
bool is_stale() const
{
return stale->load();
}
CleanStateClock(
const zkutil::ZooKeeperPtr & zookeeper,
const String & discovery_path,
const String & clean_state_path)
: stale(std::make_shared<std::atomic_bool>(false))
{
Coordination::Stat stat{};
String _some_data;
auto watch_callback =
[my_stale = stale] (const Coordination::WatchResponse & rsp)
{
auto logger = getLogger("ClusterCopier");
if (rsp.error == Coordination::Error::ZOK)
{
switch (rsp.type) /// NOLINT(bugprone-switch-missing-default-case)
{
case Coordination::CREATED:
LOG_DEBUG(logger, "CleanStateClock change: CREATED, at {}", rsp.path);
my_stale->store(true);
break;
case Coordination::CHANGED:
LOG_DEBUG(logger, "CleanStateClock change: CHANGED, at {}", rsp.path);
my_stale->store(true);
}
}
};
if (zookeeper->tryGetWatch(discovery_path, _some_data, &stat, watch_callback))
{
discovery_zxid = LogicalClock(stat.mzxid);
discovery_version = stat.version;
}
if (zookeeper->tryGetWatch(clean_state_path, _some_data, &stat, watch_callback))
{
clean_state_zxid = LogicalClock(stat.mzxid);
clean_state_version = stat.version;
}
}
bool operator==(const CleanStateClock & other) const
{
return !is_stale()
&& !other.is_stale()
&& discovery_zxid == other.discovery_zxid
&& discovery_version == other.discovery_version
&& clean_state_zxid == other.clean_state_zxid
&& clean_state_version == other.clean_state_version;
}
bool operator!=(const CleanStateClock & other) const
{
return !(*this == other);
}
};

View File

@ -1 +0,0 @@
int mainEntryClickHouseClusterCopier(int argc, char ** argv);

View File

@ -94,7 +94,7 @@
<http_port>8123</http_port>
<!-- Port for interaction by native protocol with:
- clickhouse-client and other native ClickHouse tools (clickhouse-benchmark, clickhouse-copier);
- clickhouse-client and other native ClickHouse tools (clickhouse-benchmark);
- clickhouse-server with other clickhouse-servers for distributed query processing;
- ClickHouse drivers and applications supporting native protocol
(this protocol is also informally called as "the TCP protocol");

View File

@ -56,7 +56,7 @@ logger:
http_port: 8123
# Port for interaction by native protocol with:
# - clickhouse-client and other native ClickHouse tools (clickhouse-benchmark, clickhouse-copier);
# - clickhouse-client and other native ClickHouse tools (clickhouse-benchmark);
# - clickhouse-server with other clickhouse-servers for distributed query processing;
# - ClickHouse drivers and applications supporting native protocol
# (this protocol is also informally called as "the TCP protocol");

View File

@ -94,7 +94,7 @@
<http_port>8123</http_port>
<!-- Port for interaction by native protocol with:
- clickhouse-client and other native ClickHouse tools (clickhouse-benchmark, clickhouse-copier);
- clickhouse-client and other native ClickHouse tools (clickhouse-benchmark);
- clickhouse-server with other clickhouse-servers for distributed query processing;
- ClickHouse drivers and applications supporting native protocol
(this protocol is also informally called as "the TCP protocol");

Some files were not shown because too many files have changed in this diff Show More