Merge branch 'master' into hanfei/check-limit-periodically

This commit is contained in:
Han Fei 2024-03-08 03:59:41 +01:00
commit c92fb8700f
583 changed files with 7626 additions and 5960 deletions

View File

@ -10,7 +10,7 @@
# TODO Let clang-tidy check headers in further directories
# --> HeaderFilterRegex: '^.*/(src|base|programs|utils)/.*(h|hpp)$'
HeaderFilterRegex: '^.*/(base)/.*(h|hpp)$'
HeaderFilterRegex: '^.*/(base|programs|utils)/.*(h|hpp)$'
Checks: '*,
-abseil-*,

View File

@ -305,7 +305,7 @@ jobs:
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
MarkReleaseReady:
if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
if: ${{ !failure() && !cancelled() }}
needs:
- BuilderBinDarwin
- BuilderBinDarwinAarch64
@ -313,9 +313,25 @@ jobs:
- BuilderDebAarch64
runs-on: [self-hosted, style-checker]
steps:
- name: Debug
run: |
echo need with different filters
cat << 'EOF'
${{ toJSON(needs) }}
${{ toJSON(needs.*.result) }}
no failures ${{ !contains(needs.*.result, 'failure') }}
no skips ${{ !contains(needs.*.result, 'skipped') }}
no both ${{ !(contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
EOF
- name: Not ready
# fail the job to be able restart it
if: ${{ contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure') }}
run: exit 1
- name: Check out repository code
if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
uses: ClickHouse/checkout@v1
- name: Mark Commit Release Ready
if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 mark_release_ready.py

View File

@ -13,9 +13,7 @@ on: # yamllint disable-line rule:truthy
- opened
branches:
- master
##########################################################################################
##################################### SMALL CHECKS #######################################
##########################################################################################
jobs:
RunConfig:
runs-on: [self-hosted, style-checker-aarch64]
@ -70,13 +68,13 @@ jobs:
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ runner.temp }}/ci_run_data.json --post --job-name 'Style check'
BuildDockers:
needs: [RunConfig]
if: ${{ !failure() && !cancelled() }}
if: ${{ !failure() && !cancelled() && toJson(fromJson(needs.RunConfig.outputs.data).docker_data.missing_multi) != '[]' }}
uses: ./.github/workflows/reusable_docker.yml
with:
data: ${{ needs.RunConfig.outputs.data }}
StyleCheck:
needs: [RunConfig, BuildDockers]
if: ${{ !failure() && !cancelled() }}
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Style check')}}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Style check
@ -89,19 +87,9 @@ jobs:
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
RCSK
DocsCheck:
needs: [RunConfig, StyleCheck]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Docs check
runner_type: func-tester-aarch64
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 docs_check.py
FastTest:
needs: [RunConfig, StyleCheck]
if: ${{ !failure() && !cancelled() }}
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Fast test') }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Fast test
@ -109,818 +97,83 @@ jobs:
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 fast_test_check.py
CompatibilityCheckX86:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Compatibility check (amd64)
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
CompatibilityCheckAarch64:
needs: [RunConfig, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Compatibility check (aarch64)
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
#########################################################################################
#################################### ORDINARY BUILDS ####################################
#########################################################################################
BuilderDebDebug:
################################# Main statges #################################
# for main CI chain
#
Builds_1:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Builds_1') }}
# using callable wf (reusable_stage.yml) allows to group all nested jobs under a tab
uses: ./.github/workflows/reusable_build_stage.yml
with:
build_name: package_debug
stage: Builds_1
data: ${{ needs.RunConfig.outputs.data }}
BuilderDebRelease:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
Tests_1:
needs: [RunConfig, Builds_1]
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_1') }}
# using callable wf (reusable_stage.yml) allows to group all nested jobs under a tab
uses: ./.github/workflows/reusable_test_stage.yml
with:
build_name: package_release
checkout_depth: 0
stage: Tests_1
data: ${{ needs.RunConfig.outputs.data }}
BuilderDebReleaseCoverage:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
Builds_2:
needs: [RunConfig, Builds_1]
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Builds_2') }}
# using callable wf (reusable_stage.yml) allows to group all nested jobs under a tab
uses: ./.github/workflows/reusable_build_stage.yml
with:
build_name: package_release_coverage
checkout_depth: 0
stage: Builds_2
data: ${{ needs.RunConfig.outputs.data }}
BuilderDebAarch64:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
Tests_2:
needs: [RunConfig, Builds_2]
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2') }}
# using callable wf (reusable_stage.yml) allows to group all nested jobs under a tab
uses: ./.github/workflows/reusable_test_stage.yml
with:
build_name: package_aarch64
checkout_depth: 0
stage: Tests_2
data: ${{ needs.RunConfig.outputs.data }}
BuilderBinRelease:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_release
data: ${{ needs.RunConfig.outputs.data }}
BuilderDebAsan:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_asan
data: ${{ needs.RunConfig.outputs.data }}
BuilderDebUBsan:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_ubsan
data: ${{ needs.RunConfig.outputs.data }}
BuilderDebTsan:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_tsan
data: ${{ needs.RunConfig.outputs.data }}
BuilderDebMsan:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_msan
data: ${{ needs.RunConfig.outputs.data }}
##########################################################################################
##################################### SPECIAL BUILDS #####################################
##########################################################################################
BuilderBinClangTidy:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_tidy
data: ${{ needs.RunConfig.outputs.data }}
BuilderBinDarwin:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_darwin
data: ${{ needs.RunConfig.outputs.data }}
BuilderBinAarch64:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_aarch64
data: ${{ needs.RunConfig.outputs.data }}
BuilderBinFreeBSD:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_freebsd
data: ${{ needs.RunConfig.outputs.data }}
BuilderBinDarwinAarch64:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_darwin_aarch64
data: ${{ needs.RunConfig.outputs.data }}
BuilderBinPPC64:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_ppc64le
data: ${{ needs.RunConfig.outputs.data }}
BuilderBinAmd64Compat:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_amd64_compat
data: ${{ needs.RunConfig.outputs.data }}
BuilderBinAmd64Musl:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_amd64_musl
data: ${{ needs.RunConfig.outputs.data }}
BuilderBinAarch64V80Compat:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_aarch64_v80compat
data: ${{ needs.RunConfig.outputs.data }}
BuilderBinRISCV64:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_riscv64
data: ${{ needs.RunConfig.outputs.data }}
BuilderBinS390X:
needs: [RunConfig, FastTest]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_s390x
data: ${{ needs.RunConfig.outputs.data }}
############################################################################################
##################################### Docker images #######################################
############################################################################################
DockerServerImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Docker server image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
DockerKeeperImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Docker keeper image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
############################################################################################
##################################### BUILD REPORTER #######################################
############################################################################################
BuilderReport:
################################# Reports #################################
# Reports should by run even if Builds_1/2 fail, so put them separatly in wf (not in Tests_1/2)
Builds_1_Report:
# run report check for failed builds to indicate the CI error
if: ${{ !cancelled() }}
if: ${{ !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }}
needs:
- RunConfig
- BuilderDebAarch64
- BuilderDebAsan
- BuilderDebDebug
- BuilderDebMsan
- BuilderDebRelease
- BuilderDebTsan
- BuilderDebUBsan
- Builds_1
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse build check
runner_type: style-checker
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
BuilderSpecialReport:
Builds_2_Report:
# run report check for failed builds to indicate the CI error
if: ${{ !cancelled() }}
if: ${{ !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse special build check') }}
needs:
- RunConfig
- BuilderBinAarch64
- BuilderBinDarwin
- BuilderBinDarwinAarch64
- BuilderBinFreeBSD
- BuilderBinPPC64
- BuilderBinRISCV64
- BuilderBinS390X
- BuilderBinAmd64Compat
- BuilderBinAarch64V80Compat
- BuilderBinClangTidy
- BuilderDebReleaseCoverage
- BuilderBinRelease
- Builds_2
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse special build check
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
############################################################################################
#################################### INSTALL PACKAGES ######################################
############################################################################################
InstallPackagesTestRelease:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Install packages (amd64)
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 install_check.py "$CHECK_NAME"
InstallPackagesTestAarch64:
needs: [RunConfig, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Install packages (arm64)
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 install_check.py "$CHECK_NAME"
##############################################################################################
########################### FUNCTIONAl STATELESS TESTS #######################################
##############################################################################################
FunctionalStatelessTestRelease:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateless tests (release)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestReleaseAnalyzerS3Replicated:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateless tests (release, analyzer, s3, DatabaseReplicated)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestS3Debug:
needs: [RunConfig, BuilderDebDebug]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateless tests (debug, s3 storage)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestS3Tsan:
needs: [RunConfig, BuilderDebTsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateless tests (tsan, s3 storage)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestAarch64:
needs: [RunConfig, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateless tests (aarch64)
runner_type: func-tester-aarch64
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestAsan:
needs: [RunConfig, BuilderDebAsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateless tests (asan)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestTsan:
needs: [RunConfig, BuilderDebTsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateless tests (tsan)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestMsan:
needs: [RunConfig, BuilderDebMsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateless tests (msan)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestUBsan:
needs: [RunConfig, BuilderDebUBsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateless tests (ubsan)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestDebug:
needs: [RunConfig, BuilderDebDebug]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateless tests (debug)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestFlakyCheck:
needs: [RunConfig, BuilderDebAsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateless tests flaky check (asan)
data: ${{ needs.RunConfig.outputs.data }}
runner_type: func-tester
TestsBugfixCheck:
needs: [RunConfig, StyleCheck]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Bugfix validation
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
##############################################################################################
############################ FUNCTIONAl STATEFUL TESTS #######################################
##############################################################################################
FunctionalStatefulTestRelease:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (release)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestAarch64:
needs: [RunConfig, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (aarch64)
runner_type: func-tester-aarch64
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestAsan:
needs: [RunConfig, BuilderDebAsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (asan)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestTsan:
needs: [RunConfig, BuilderDebTsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (tsan)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestMsan:
needs: [RunConfig, BuilderDebMsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (msan)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestUBsan:
needs: [RunConfig, BuilderDebUBsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (ubsan)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestDebug:
needs: [RunConfig, BuilderDebDebug]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (debug)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
# Parallel replicas
FunctionalStatefulTestDebugParallelReplicas:
needs: [RunConfig, BuilderDebDebug]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (debug, ParallelReplicas)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestUBsanParallelReplicas:
needs: [RunConfig, BuilderDebUBsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (ubsan, ParallelReplicas)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestMsanParallelReplicas:
needs: [RunConfig, BuilderDebMsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (msan, ParallelReplicas)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestTsanParallelReplicas:
needs: [RunConfig, BuilderDebTsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (tsan, ParallelReplicas)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestAsanParallelReplicas:
needs: [RunConfig, BuilderDebAsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (asan, ParallelReplicas)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestReleaseParallelReplicas:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (release, ParallelReplicas)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
##############################################################################################
########################### ClickBench #######################################################
##############################################################################################
ClickBenchAMD64:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickBench (amd64)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 clickbench.py "$CHECK_NAME"
ClickBenchAarch64:
needs: [RunConfig, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickBench (aarch64)
runner_type: func-tester-aarch64
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 clickbench.py "$CHECK_NAME"
##############################################################################################
######################################### STRESS TESTS #######################################
##############################################################################################
StressTestAsan:
needs: [RunConfig, BuilderDebAsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stress test (asan)
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
StressTestTsan:
needs: [RunConfig, BuilderDebTsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stress test (tsan)
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
StressTestMsan:
needs: [RunConfig, BuilderDebMsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stress test (msan)
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
StressTestUBsan:
needs: [RunConfig, BuilderDebUBsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stress test (ubsan)
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
StressTestDebug:
needs: [RunConfig, BuilderDebDebug]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stress test (debug)
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
##############################################################################################
######################################### UPGRADE CHECK ######################################
##############################################################################################
UpgradeCheckAsan:
needs: [RunConfig, BuilderDebAsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Upgrade check (asan)
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
UpgradeCheckTsan:
needs: [RunConfig, BuilderDebTsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Upgrade check (tsan)
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
UpgradeCheckMsan:
needs: [RunConfig, BuilderDebMsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Upgrade check (msan)
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
UpgradeCheckDebug:
needs: [RunConfig, BuilderDebDebug]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Upgrade check (debug)
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
##############################################################################################
##################################### AST FUZZERS ############################################
##############################################################################################
ASTFuzzerTestAsan:
needs: [RunConfig, BuilderDebAsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: AST fuzzer (asan)
runner_type: fuzzer-unit-tester
data: ${{ needs.RunConfig.outputs.data }}
ASTFuzzerTestTsan:
needs: [RunConfig, BuilderDebTsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: AST fuzzer (tsan)
runner_type: fuzzer-unit-tester
data: ${{ needs.RunConfig.outputs.data }}
ASTFuzzerTestUBSan:
needs: [RunConfig, BuilderDebUBsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: AST fuzzer (ubsan)
runner_type: fuzzer-unit-tester
data: ${{ needs.RunConfig.outputs.data }}
ASTFuzzerTestMSan:
needs: [RunConfig, BuilderDebMsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: AST fuzzer (msan)
runner_type: fuzzer-unit-tester
data: ${{ needs.RunConfig.outputs.data }}
ASTFuzzerTestDebug:
needs: [RunConfig, BuilderDebDebug]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: AST fuzzer (debug)
runner_type: fuzzer-unit-tester
data: ${{ needs.RunConfig.outputs.data }}
#############################################################################################
############################# INTEGRATION TESTS #############################################
#############################################################################################
IntegrationTestsAnalyzerAsan:
needs: [RunConfig, BuilderDebAsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Integration tests (asan, analyzer)
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
IntegrationTestsTsan:
needs: [RunConfig, BuilderDebTsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Integration tests (tsan)
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
IntegrationTestsAarch64:
needs: [RunConfig, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Integration tests (aarch64)
# FIXME: there is no stress-tester for aarch64. func-tester-aarch64 is ok?
runner_type: func-tester-aarch64
data: ${{ needs.RunConfig.outputs.data }}
IntegrationTestsFlakyCheck:
needs: [RunConfig, BuilderDebAsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Integration tests flaky check (asan)
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
#############################################################################################
#################################### UNIT TESTS #############################################
#############################################################################################
UnitTestsAsan:
needs: [RunConfig, BuilderDebAsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Unit tests (asan)
runner_type: fuzzer-unit-tester
data: ${{ needs.RunConfig.outputs.data }}
UnitTestsRelease:
needs: [RunConfig, BuilderBinRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Unit tests (release)
runner_type: fuzzer-unit-tester
data: ${{ needs.RunConfig.outputs.data }}
UnitTestsTsan:
needs: [RunConfig, BuilderDebTsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Unit tests (tsan)
runner_type: fuzzer-unit-tester
data: ${{ needs.RunConfig.outputs.data }}
UnitTestsMsan:
needs: [RunConfig, BuilderDebMsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Unit tests (msan)
runner_type: fuzzer-unit-tester
data: ${{ needs.RunConfig.outputs.data }}
UnitTestsUBsan:
needs: [RunConfig, BuilderDebUBsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Unit tests (ubsan)
runner_type: fuzzer-unit-tester
data: ${{ needs.RunConfig.outputs.data }}
#############################################################################################
#################################### PERFORMANCE TESTS ######################################
#############################################################################################
PerformanceComparisonX86:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Performance Comparison
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
PerformanceComparisonAarch:
needs: [RunConfig, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Performance Comparison Aarch64
runner_type: func-tester-aarch64
data: ${{ needs.RunConfig.outputs.data }}
##############################################################################################
###################################### SQLANCER FUZZERS ######################################
##############################################################################################
SQLancerTestRelease:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: SQLancer (release)
runner_type: fuzzer-unit-tester
data: ${{ needs.RunConfig.outputs.data }}
SQLancerTestDebug:
needs: [RunConfig, BuilderDebDebug]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: SQLancer (debug)
runner_type: fuzzer-unit-tester
data: ${{ needs.RunConfig.outputs.data }}
################################# Stage Final #################################
#
FinishCheck:
if: ${{ !failure() && !cancelled() }}
needs:
- BuilderReport
- BuilderSpecialReport
- DocsCheck
- FastTest
- TestsBugfixCheck
- FunctionalStatelessTestDebug
- FunctionalStatelessTestRelease
- FunctionalStatelessTestAarch64
- FunctionalStatelessTestAsan
- FunctionalStatelessTestTsan
- FunctionalStatelessTestMsan
- FunctionalStatelessTestUBsan
- FunctionalStatefulTestDebug
- FunctionalStatefulTestRelease
- FunctionalStatefulTestAarch64
- FunctionalStatefulTestAsan
- FunctionalStatefulTestTsan
- FunctionalStatefulTestMsan
- FunctionalStatefulTestUBsan
- FunctionalStatelessTestS3Debug
- FunctionalStatelessTestS3Tsan
- FunctionalStatelessTestReleaseAnalyzerS3Replicated
- FunctionalStatefulTestReleaseParallelReplicas
- FunctionalStatefulTestAsanParallelReplicas
- FunctionalStatefulTestTsanParallelReplicas
- FunctionalStatefulTestMsanParallelReplicas
- FunctionalStatefulTestUBsanParallelReplicas
- FunctionalStatefulTestDebugParallelReplicas
- StressTestDebug
- StressTestAsan
- StressTestTsan
- StressTestMsan
- StressTestUBsan
- UpgradeCheckAsan
- UpgradeCheckTsan
- UpgradeCheckMsan
- UpgradeCheckDebug
- ASTFuzzerTestDebug
- ASTFuzzerTestAsan
- ASTFuzzerTestTsan
- ASTFuzzerTestMSan
- ASTFuzzerTestUBSan
- IntegrationTestsAnalyzerAsan
- IntegrationTestsTsan
- IntegrationTestsAarch64
- IntegrationTestsFlakyCheck
- PerformanceComparisonX86
- PerformanceComparisonAarch
- UnitTestsAsan
- UnitTestsTsan
- UnitTestsMsan
- UnitTestsUBsan
- UnitTestsRelease
- CompatibilityCheckX86
- CompatibilityCheckAarch64
- SQLancerTestRelease
- SQLancerTestDebug
needs: [Tests_1, Tests_2]
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Finish label
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 finish_check.py
python3 merge_pr.py --check-approved
##############################################################################################
############################ SQLLOGIC TEST ###################################################
##############################################################################################
SQLLogicTestRelease:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Sqllogic test (release)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
##############################################################################################
##################################### SQL TEST ###############################################
##############################################################################################
SQLTest:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: SQLTest
runner_type: fuzzer-unit-tester
data: ${{ needs.RunConfig.outputs.data }}
#############################################################################################
###################################### NOT IN FINISH ########################################
#############################################################################################
###################################### JEPSEN TESTS #########################################
#############################################################################################
@ -931,19 +184,11 @@ jobs:
# we need concurrency as the job uses dedicated instances in the cloud
concurrency:
group: jepsen
if: ${{ !failure() && !cancelled() }}
needs: [RunConfig, BuilderBinRelease]
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse Keeper Jepsen') }}
# jepsen needs binary_release build which is in Builds_2
needs: [RunConfig, Builds_2]
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse Keeper Jepsen
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
#############################################################################################
####################################### libFuzzer ###########################################
#############################################################################################
libFuzzer:
if: ${{ !failure() && !cancelled() }}
needs: [RunConfig, StyleCheck]
uses: ./.github/workflows/libfuzzer.yml
with:
data: ${{ needs.RunConfig.outputs.data }}

View File

@ -206,7 +206,7 @@ jobs:
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
MarkReleaseReady:
if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
if: ${{ !failure() && !cancelled() }}
needs:
- BuilderBinDarwin
- BuilderBinDarwinAarch64
@ -214,9 +214,25 @@ jobs:
- BuilderDebAarch64
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Debug
run: |
echo need with different filters
cat << 'EOF'
${{ toJSON(needs) }}
${{ toJSON(needs.*.result) }}
no failures ${{ !contains(needs.*.result, 'failure') }}
no skips ${{ !contains(needs.*.result, 'skipped') }}
no both ${{ !(contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
EOF
- name: Not ready
# fail the job to be able restart it
if: ${{ contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure') }}
run: exit 1
- name: Check out repository code
if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
uses: ClickHouse/checkout@v1
- name: Mark Commit Release Ready
if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 mark_release_ready.py

View File

@ -43,7 +43,8 @@ jobs:
runs-on: [self-hosted, '${{inputs.runner_type}}']
steps:
- name: Check out repository code
uses: ClickHouse/checkout@v1
# WIP: temporary try commit with limited perallelization of checkout
uses: ClickHouse/checkout@0be3f7b3098bae494d3ef5d29d2e0676fb606232
with:
clear-repository: true
ref: ${{ fromJson(inputs.data).git_ref }}

View File

@ -0,0 +1,32 @@
### FIXME: merge reusable_test.yml and reusable_build.yml as they are almost identical
# and then merge reusable_build_stage.yml and reusable_test_stage.yml
name: BuildStageWF
'on':
workflow_call:
inputs:
stage:
description: stage name
type: string
required: true
data:
description: ci data
type: string
required: true
jobs:
s:
if: ${{ !failure() && !cancelled() }}
strategy:
fail-fast: false
matrix:
job_name_and_runner_type: ${{ fromJson(inputs.data).stages_data[inputs.stage] }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: ${{ matrix.job_name_and_runner_type.job_name }}
runner_type: ${{ matrix.job_name_and_runner_type.runner_type }}
# don't forget to pass force flag (no ci cache/no reuse) - once it's needed
force: false
# for now let's do I deep checkout for builds
checkout_depth: 0
data: ${{ inputs.data }}

View File

@ -0,0 +1,25 @@
name: StageWF
'on':
workflow_call:
inputs:
stage:
description: stage name
type: string
required: true
data:
description: ci data
type: string
required: true
jobs:
s:
if: ${{ !failure() && !cancelled() }}
strategy:
fail-fast: false
matrix:
job_name_and_runner_type: ${{ fromJson(inputs.data).stages_data[inputs.stage] }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ${{ matrix.job_name_and_runner_type.job_name }}
runner_type: ${{ matrix.job_name_and_runner_type.runner_type }}
data: ${{ inputs.data }}

View File

@ -31,15 +31,30 @@ curl https://clickhouse.com/ | sh
* [Static Analysis (SonarCloud)](https://sonarcloud.io/project/issues?resolved=false&id=ClickHouse_ClickHouse) proposes C++ quality improvements.
* [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any.
## Monthly Release & Community Call
Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know.
* [v24.3 Community Call](https://clickhouse.com/company/events/v24-3-community-release-call) - Mar 26
* [v24.4 Community Call](https://clickhouse.com/company/events/v24-4-community-release-call) - Apr 30
## Upcoming Events
Keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `<at>` clickhouse `<dot>` com.
Keep an eye out for upcoming meetups and eventsaround the world. Somewhere else you want us to be? Please feel free to reach out to tyler `<at>` clickhouse `<dot>` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc.
* [ClickHouse Meetup in Bellevue](https://www.meetup.com/clickhouse-seattle-user-group/events/298650371/) - Mar 11
* [ClickHouse Meetup at Ramp's Offices in NYC](https://www.meetup.com/clickhouse-new-york-user-group/events/298640542/) - Mar 19
* [ClickHouse Melbourne Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/299479750/) - Mar 20
* [ClickHouse Meetup in Paris](https://www.meetup.com/clickhouse-france-user-group/events/298997115/) - Mar 21
* [ClickHouse Meetup in Bengaluru](https://www.meetup.com/clickhouse-bangalore-user-group/events/299479850/) - Mar 23
* [ClickHouse Meetup in Zurich](https://www.meetup.com/clickhouse-switzerland-meetup-group/events/299628922/) - Apr 16
* [ClickHouse Meetup in Copenhagen](https://www.meetup.com/clickhouse-denmark-meetup-group/events/299629133/) - Apr 23
* [ClickHouse Meetup in Dubai](https://www.meetup.com/clickhouse-dubai-meetup-group/events/299629189/) - May 28
## Recent Recordings
* **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"
* **Recording available**: [**v24.1 Release Webinar**](https://www.youtube.com/watch?v=pBF9g0wGAGs) All the features of 24.1, one convenient video! Watch it now!
* **All release webinar recordings**: [YouTube playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3jAlSy1JxyP8zluvXaN3nxU)
* **Recording available**: [**v24.2 Release Call**](https://www.youtube.com/watch?v=iN2y-TK8f3A) All the features of 24.2, one convenient video! Watch it now!
## Interested in joining ClickHouse and making it your full-time job?

View File

@ -13,6 +13,7 @@ set (SRCS
cgroupsv2.cpp
coverage.cpp
demangle.cpp
Decimal.cpp
getAvailableMemoryAmount.cpp
getFQDNOrHostName.cpp
getMemoryAmount.cpp

87
base/base/Decimal.cpp Normal file
View File

@ -0,0 +1,87 @@
#include <base/Decimal.h>
#include <base/extended_types.h>
namespace DB
{
/// Explicit template instantiations.
#define FOR_EACH_UNDERLYING_DECIMAL_TYPE(M) \
M(Int32) \
M(Int64) \
M(Int128) \
M(Int256)
#define FOR_EACH_UNDERLYING_DECIMAL_TYPE_PASS(M, X) \
M(Int32, X) \
M(Int64, X) \
M(Int128, X) \
M(Int256, X)
template <typename T> const Decimal<T> & Decimal<T>::operator += (const T & x) { value += x; return *this; }
template <typename T> const Decimal<T> & Decimal<T>::operator -= (const T & x) { value -= x; return *this; }
template <typename T> const Decimal<T> & Decimal<T>::operator *= (const T & x) { value *= x; return *this; }
template <typename T> const Decimal<T> & Decimal<T>::operator /= (const T & x) { value /= x; return *this; }
template <typename T> const Decimal<T> & Decimal<T>::operator %= (const T & x) { value %= x; return *this; }
template <typename T> void NO_SANITIZE_UNDEFINED Decimal<T>::addOverflow(const T & x) { value += x; }
/// Maybe this explicit instantiation affects performance since operators cannot be inlined.
template <typename T> template <typename U> const Decimal<T> & Decimal<T>::operator += (const Decimal<U> & x) { value += static_cast<T>(x.value); return *this; }
template <typename T> template <typename U> const Decimal<T> & Decimal<T>::operator -= (const Decimal<U> & x) { value -= static_cast<T>(x.value); return *this; }
template <typename T> template <typename U> const Decimal<T> & Decimal<T>::operator *= (const Decimal<U> & x) { value *= static_cast<T>(x.value); return *this; }
template <typename T> template <typename U> const Decimal<T> & Decimal<T>::operator /= (const Decimal<U> & x) { value /= static_cast<T>(x.value); return *this; }
template <typename T> template <typename U> const Decimal<T> & Decimal<T>::operator %= (const Decimal<U> & x) { value %= static_cast<T>(x.value); return *this; }
#define DISPATCH(TYPE_T, TYPE_U) \
template const Decimal<TYPE_T> & Decimal<TYPE_T>::operator += (const Decimal<TYPE_U> & x); \
template const Decimal<TYPE_T> & Decimal<TYPE_T>::operator -= (const Decimal<TYPE_U> & x); \
template const Decimal<TYPE_T> & Decimal<TYPE_T>::operator *= (const Decimal<TYPE_U> & x); \
template const Decimal<TYPE_T> & Decimal<TYPE_T>::operator /= (const Decimal<TYPE_U> & x); \
template const Decimal<TYPE_T> & Decimal<TYPE_T>::operator %= (const Decimal<TYPE_U> & x);
#define INVOKE(X) FOR_EACH_UNDERLYING_DECIMAL_TYPE_PASS(DISPATCH, X)
FOR_EACH_UNDERLYING_DECIMAL_TYPE(INVOKE);
#undef INVOKE
#undef DISPATCH
#define DISPATCH(TYPE) template struct Decimal<TYPE>;
FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH)
#undef DISPATCH
template <typename T> bool operator< (const Decimal<T> & x, const Decimal<T> & y) { return x.value < y.value; }
template <typename T> bool operator> (const Decimal<T> & x, const Decimal<T> & y) { return x.value > y.value; }
template <typename T> bool operator<= (const Decimal<T> & x, const Decimal<T> & y) { return x.value <= y.value; }
template <typename T> bool operator>= (const Decimal<T> & x, const Decimal<T> & y) { return x.value >= y.value; }
template <typename T> bool operator== (const Decimal<T> & x, const Decimal<T> & y) { return x.value == y.value; }
template <typename T> bool operator!= (const Decimal<T> & x, const Decimal<T> & y) { return x.value != y.value; }
#define DISPATCH(TYPE) \
template bool operator< (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
template bool operator> (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
template bool operator<= (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
template bool operator>= (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
template bool operator== (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
template bool operator!= (const Decimal<TYPE> & x, const Decimal<TYPE> & y);
FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH)
#undef DISPATCH
template <typename T> Decimal<T> operator+ (const Decimal<T> & x, const Decimal<T> & y) { return x.value + y.value; }
template <typename T> Decimal<T> operator- (const Decimal<T> & x, const Decimal<T> & y) { return x.value - y.value; }
template <typename T> Decimal<T> operator* (const Decimal<T> & x, const Decimal<T> & y) { return x.value * y.value; }
template <typename T> Decimal<T> operator/ (const Decimal<T> & x, const Decimal<T> & y) { return x.value / y.value; }
template <typename T> Decimal<T> operator- (const Decimal<T> & x) { return -x.value; }
#define DISPATCH(TYPE) \
template Decimal<TYPE> operator+ (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
template Decimal<TYPE> operator- (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
template Decimal<TYPE> operator* (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
template Decimal<TYPE> operator/ (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
template Decimal<TYPE> operator- (const Decimal<TYPE> & x);
FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH)
#undef DISPATCH
#undef FOR_EACH_UNDERLYING_DECIMAL_TYPE_PASS
#undef FOR_EACH_UNDERLYING_DECIMAL_TYPE
}

View File

@ -2,6 +2,7 @@
#include <base/extended_types.h>
#include <base/Decimal_fwd.h>
#include <base/types.h>
#include <base/defines.h>
@ -10,6 +11,18 @@ namespace DB
template <class> struct Decimal;
class DateTime64;
#define FOR_EACH_UNDERLYING_DECIMAL_TYPE(M) \
M(Int32) \
M(Int64) \
M(Int128) \
M(Int256)
#define FOR_EACH_UNDERLYING_DECIMAL_TYPE_PASS(M, X) \
M(Int32, X) \
M(Int64, X) \
M(Int128, X) \
M(Int256, X)
using Decimal32 = Decimal<Int32>;
using Decimal64 = Decimal<Int64>;
using Decimal128 = Decimal<Int128>;
@ -50,36 +63,73 @@ struct Decimal
return static_cast<U>(value);
}
const Decimal<T> & operator += (const T & x) { value += x; return *this; }
const Decimal<T> & operator -= (const T & x) { value -= x; return *this; }
const Decimal<T> & operator *= (const T & x) { value *= x; return *this; }
const Decimal<T> & operator /= (const T & x) { value /= x; return *this; }
const Decimal<T> & operator %= (const T & x) { value %= x; return *this; }
const Decimal<T> & operator += (const T & x);
const Decimal<T> & operator -= (const T & x);
const Decimal<T> & operator *= (const T & x);
const Decimal<T> & operator /= (const T & x);
const Decimal<T> & operator %= (const T & x);
template <typename U> const Decimal<T> & operator += (const Decimal<U> & x) { value += x.value; return *this; }
template <typename U> const Decimal<T> & operator -= (const Decimal<U> & x) { value -= x.value; return *this; }
template <typename U> const Decimal<T> & operator *= (const Decimal<U> & x) { value *= x.value; return *this; }
template <typename U> const Decimal<T> & operator /= (const Decimal<U> & x) { value /= x.value; return *this; }
template <typename U> const Decimal<T> & operator %= (const Decimal<U> & x) { value %= x.value; return *this; }
template <typename U> const Decimal<T> & operator += (const Decimal<U> & x);
template <typename U> const Decimal<T> & operator -= (const Decimal<U> & x);
template <typename U> const Decimal<T> & operator *= (const Decimal<U> & x);
template <typename U> const Decimal<T> & operator /= (const Decimal<U> & x);
template <typename U> const Decimal<T> & operator %= (const Decimal<U> & x);
/// This is to avoid UB for sumWithOverflow()
void NO_SANITIZE_UNDEFINED addOverflow(const T & x) { value += x; }
void NO_SANITIZE_UNDEFINED addOverflow(const T & x);
T value;
};
template <typename T> inline bool operator< (const Decimal<T> & x, const Decimal<T> & y) { return x.value < y.value; }
template <typename T> inline bool operator> (const Decimal<T> & x, const Decimal<T> & y) { return x.value > y.value; }
template <typename T> inline bool operator<= (const Decimal<T> & x, const Decimal<T> & y) { return x.value <= y.value; }
template <typename T> inline bool operator>= (const Decimal<T> & x, const Decimal<T> & y) { return x.value >= y.value; }
template <typename T> inline bool operator== (const Decimal<T> & x, const Decimal<T> & y) { return x.value == y.value; }
template <typename T> inline bool operator!= (const Decimal<T> & x, const Decimal<T> & y) { return x.value != y.value; }
#define DISPATCH(TYPE) extern template struct Decimal<TYPE>;
FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH)
#undef DISPATCH
template <typename T> inline Decimal<T> operator+ (const Decimal<T> & x, const Decimal<T> & y) { return x.value + y.value; }
template <typename T> inline Decimal<T> operator- (const Decimal<T> & x, const Decimal<T> & y) { return x.value - y.value; }
template <typename T> inline Decimal<T> operator* (const Decimal<T> & x, const Decimal<T> & y) { return x.value * y.value; }
template <typename T> inline Decimal<T> operator/ (const Decimal<T> & x, const Decimal<T> & y) { return x.value / y.value; }
template <typename T> inline Decimal<T> operator- (const Decimal<T> & x) { return -x.value; }
#define DISPATCH(TYPE_T, TYPE_U) \
extern template const Decimal<TYPE_T> & Decimal<TYPE_T>::operator += (const Decimal<TYPE_U> & x); \
extern template const Decimal<TYPE_T> & Decimal<TYPE_T>::operator -= (const Decimal<TYPE_U> & x); \
extern template const Decimal<TYPE_T> & Decimal<TYPE_T>::operator *= (const Decimal<TYPE_U> & x); \
extern template const Decimal<TYPE_T> & Decimal<TYPE_T>::operator /= (const Decimal<TYPE_U> & x); \
extern template const Decimal<TYPE_T> & Decimal<TYPE_T>::operator %= (const Decimal<TYPE_U> & x);
#define INVOKE(X) FOR_EACH_UNDERLYING_DECIMAL_TYPE_PASS(DISPATCH, X)
FOR_EACH_UNDERLYING_DECIMAL_TYPE(INVOKE);
#undef INVOKE
#undef DISPATCH
template <typename T> bool operator< (const Decimal<T> & x, const Decimal<T> & y);
template <typename T> bool operator> (const Decimal<T> & x, const Decimal<T> & y);
template <typename T> bool operator<= (const Decimal<T> & x, const Decimal<T> & y);
template <typename T> bool operator>= (const Decimal<T> & x, const Decimal<T> & y);
template <typename T> bool operator== (const Decimal<T> & x, const Decimal<T> & y);
template <typename T> bool operator!= (const Decimal<T> & x, const Decimal<T> & y);
#define DISPATCH(TYPE) \
extern template bool operator< (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
extern template bool operator> (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
extern template bool operator<= (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
extern template bool operator>= (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
extern template bool operator== (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
extern template bool operator!= (const Decimal<TYPE> & x, const Decimal<TYPE> & y);
FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH)
#undef DISPATCH
template <typename T> Decimal<T> operator+ (const Decimal<T> & x, const Decimal<T> & y);
template <typename T> Decimal<T> operator- (const Decimal<T> & x, const Decimal<T> & y);
template <typename T> Decimal<T> operator* (const Decimal<T> & x, const Decimal<T> & y);
template <typename T> Decimal<T> operator/ (const Decimal<T> & x, const Decimal<T> & y);
template <typename T> Decimal<T> operator- (const Decimal<T> & x);
#define DISPATCH(TYPE) \
extern template Decimal<TYPE> operator+ (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
extern template Decimal<TYPE> operator- (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
extern template Decimal<TYPE> operator* (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
extern template Decimal<TYPE> operator/ (const Decimal<TYPE> & x, const Decimal<TYPE> & y); \
extern template Decimal<TYPE> operator- (const Decimal<TYPE> & x);
FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH)
#undef DISPATCH
#undef FOR_EACH_UNDERLYING_DECIMAL_TYPE_PASS
#undef FOR_EACH_UNDERLYING_DECIMAL_TYPE
/// Distinguishable type to allow function resolution/deduction based on value type,
/// but also relatively easy to convert to/from Decimal64.

View File

@ -64,6 +64,44 @@ template <> struct is_arithmetic<UInt256> { static constexpr bool value = true;
template <typename T>
inline constexpr bool is_arithmetic_v = is_arithmetic<T>::value;
#define FOR_EACH_ARITHMETIC_TYPE(M) \
M(DataTypeDate) \
M(DataTypeDate32) \
M(DataTypeDateTime) \
M(DataTypeInt8) \
M(DataTypeUInt8) \
M(DataTypeInt16) \
M(DataTypeUInt16) \
M(DataTypeInt32) \
M(DataTypeUInt32) \
M(DataTypeInt64) \
M(DataTypeUInt64) \
M(DataTypeInt128) \
M(DataTypeUInt128) \
M(DataTypeInt256) \
M(DataTypeUInt256) \
M(DataTypeFloat32) \
M(DataTypeFloat64)
#define FOR_EACH_ARITHMETIC_TYPE_PASS(M, X) \
M(DataTypeDate, X) \
M(DataTypeDate32, X) \
M(DataTypeDateTime, X) \
M(DataTypeInt8, X) \
M(DataTypeUInt8, X) \
M(DataTypeInt16, X) \
M(DataTypeUInt16, X) \
M(DataTypeInt32, X) \
M(DataTypeUInt32, X) \
M(DataTypeInt64, X) \
M(DataTypeUInt64, X) \
M(DataTypeInt128, X) \
M(DataTypeUInt128, X) \
M(DataTypeInt256, X) \
M(DataTypeUInt256, X) \
M(DataTypeFloat32, X) \
M(DataTypeFloat64, X)
template <typename T>
struct make_unsigned // NOLINT(readability-identifier-naming)
{

2
contrib/aws vendored

@ -1 +1 @@
Subproject commit 5f0542b3ad7eef25b0540d37d778207e0345ea8f
Subproject commit 32870e234cac03e0ac46370c26858b0ffdf14200

View File

@ -247,6 +247,12 @@ quit
fuzzer_pid=$!
echo "Fuzzer pid is $fuzzer_pid"
# The fuzzer_pid belongs to the timeout process.
actual_fuzzer_pid=$(ps -o pid= --ppid "$fuzzer_pid")
echo "Attaching gdb to the fuzzer itself"
gdb -batch -command script.gdb -p $actual_fuzzer_pid &
# Wait for the fuzzer to complete.
# Note that the 'wait || ...' thing is required so that the script doesn't
# exit because of 'set -e' when 'wait' returns nonzero code.
@ -387,7 +393,7 @@ if [ -f core.zst ]; then
fi
# Keep all the lines in the paragraphs containing <Fatal> that either contain <Fatal> or don't start with 20... (year)
sed -n '/<Fatal>/,/^$/p' s.log | awk '/<Fatal>/ || !/^20/' server.log > fatal.log ||:
sed -n '/<Fatal>/,/^$/p' server.log | awk '/<Fatal>/ || !/^20/' > fatal.log ||:
FATAL_LINK=''
if [ -s fatal.log ]; then
FATAL_LINK='<a href="fatal.log">fatal.log</a>'

View File

@ -18,7 +18,8 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
python3-pip \
yamllint \
locales \
&& pip3 install black==23.1.0 boto3 codespell==2.2.1 mypy==1.3.0 PyGithub unidiff pylint==2.6.2 \
&& pip3 install black==23.12.0 boto3 codespell==2.2.1 mypy==1.8.0 PyGithub unidiff pylint==3.1.0 \
requests types-requests \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* \
&& rm -rf /root/.cache/pip

View File

@ -79,6 +79,18 @@ remove_keeper_config "async_replication" "1"
# create_if_not_exists feature flag doesn't exist on some older versions
remove_keeper_config "create_if_not_exists" "[01]"
#todo: remove these after 24.3 released.
sudo cat /etc/clickhouse-server/config.d/azure_storage_conf.xml \
| sed "s|<object_storage_type>azure|<object_storage_type>azure_blob_storage|" \
> /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp
sudo mv /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp /etc/clickhouse-server/config.d/azure_storage_conf.xml
#todo: remove these after 24.3 released.
sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \
| sed "s|<object_storage_type>local|<object_storage_type>local_blob_storage|" \
> /etc/clickhouse-server/config.d/storage_conf.xml.tmp
sudo mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml
# latest_logs_cache_size_threshold setting doesn't exist on some older versions
remove_keeper_config "latest_logs_cache_size_threshold" "[[:digit:]]\+"
@ -113,6 +125,18 @@ sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
#todo: remove these after 24.3 released.
sudo cat /etc/clickhouse-server/config.d/azure_storage_conf.xml \
| sed "s|<object_storage_type>azure|<object_storage_type>azure_blob_storage|" \
> /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp
sudo mv /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp /etc/clickhouse-server/config.d/azure_storage_conf.xml
#todo: remove these after 24.3 released.
sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \
| sed "s|<object_storage_type>local|<object_storage_type>local_blob_storage|" \
> /etc/clickhouse-server/config.d/storage_conf.xml.tmp
sudo mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml
# async_replication setting doesn't exist on some older versions
remove_keeper_config "async_replication" "1"

View File

@ -0,0 +1,29 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v23.3.20.27-lts (cc974ba4f81) FIXME as compared to v23.3.19.32-lts (c4d4ca8ec02)
#### Improvement
* Backported in [#58818](https://github.com/ClickHouse/ClickHouse/issues/58818): Add `SYSTEM JEMALLOC PURGE` for purging unused jemalloc pages, `SYSTEM JEMALLOC [ ENABLE | DISABLE | FLUSH ] PROFILE` for controlling jemalloc profile if the profiler is enabled. Add jemalloc-related 4LW command in Keeper: `jmst` for dumping jemalloc stats, `jmfp`, `jmep`, `jmdp` for controlling jemalloc profile if the profiler is enabled. [#58665](https://github.com/ClickHouse/ClickHouse/pull/58665) ([Antonio Andelic](https://github.com/antonio2368)).
#### Build/Testing/Packaging Improvement
* Backported in [#59877](https://github.com/ClickHouse/ClickHouse/issues/59877): If you want to run initdb scripts every time when ClickHouse container is starting you shoud initialize environment varible CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS. [#59808](https://github.com/ClickHouse/ClickHouse/pull/59808) ([Alexander Nikolaev](https://github.com/AlexNik)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix working with read buffers in StreamingFormatExecutor [#57438](https://github.com/ClickHouse/ClickHouse/pull/57438) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix double destroy call on exception throw in addBatchLookupTable8 [#58745](https://github.com/ClickHouse/ClickHouse/pull/58745) ([Raúl Marín](https://github.com/Algunenano)).
* Fix: LIMIT BY and LIMIT in distributed query [#59153](https://github.com/ClickHouse/ClickHouse/pull/59153) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix translate() with FixedString input [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)).
* Fix leftPad / rightPad function with FixedString input [#59739](https://github.com/ClickHouse/ClickHouse/pull/59739) ([Raúl Marín](https://github.com/Algunenano)).
* Fix cosineDistance crash with Nullable [#60150](https://github.com/ClickHouse/ClickHouse/pull/60150) ([Raúl Marín](https://github.com/Algunenano)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Fix possible race in ManyAggregatedData dtor. [#58624](https://github.com/ClickHouse/ClickHouse/pull/58624) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Make ZooKeeper actually sequentialy consistent [#59735](https://github.com/ClickHouse/ClickHouse/pull/59735) ([Alexander Tokmakov](https://github.com/tavplubix)).

View File

@ -0,0 +1,39 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v23.8.10.43-lts (a278225bba9) FIXME as compared to v23.8.9.54-lts (192a1d231fa)
#### Improvement
* Backported in [#58819](https://github.com/ClickHouse/ClickHouse/issues/58819): Add `SYSTEM JEMALLOC PURGE` for purging unused jemalloc pages, `SYSTEM JEMALLOC [ ENABLE | DISABLE | FLUSH ] PROFILE` for controlling jemalloc profile if the profiler is enabled. Add jemalloc-related 4LW command in Keeper: `jmst` for dumping jemalloc stats, `jmfp`, `jmep`, `jmdp` for controlling jemalloc profile if the profiler is enabled. [#58665](https://github.com/ClickHouse/ClickHouse/pull/58665) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#60286](https://github.com/ClickHouse/ClickHouse/issues/60286): Copy S3 file GCP fallback to buffer copy in case GCP returned `Internal Error` with `GATEWAY_TIMEOUT` HTTP error code. [#60164](https://github.com/ClickHouse/ClickHouse/pull/60164) ([Maksim Kita](https://github.com/kitaisreal)).
#### Build/Testing/Packaging Improvement
* Backported in [#59879](https://github.com/ClickHouse/ClickHouse/issues/59879): If you want to run initdb scripts every time when ClickHouse container is starting you shoud initialize environment varible CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS. [#59808](https://github.com/ClickHouse/ClickHouse/pull/59808) ([Alexander Nikolaev](https://github.com/AlexNik)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Background merges correctly use temporary data storage in the cache [#57275](https://github.com/ClickHouse/ClickHouse/pull/57275) ([vdimir](https://github.com/vdimir)).
* MergeTree mutations reuse source part index granularity [#57352](https://github.com/ClickHouse/ClickHouse/pull/57352) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix double destroy call on exception throw in addBatchLookupTable8 [#58745](https://github.com/ClickHouse/ClickHouse/pull/58745) ([Raúl Marín](https://github.com/Algunenano)).
* Fix JSONExtract function for LowCardinality(Nullable) columns [#58808](https://github.com/ClickHouse/ClickHouse/pull/58808) ([vdimir](https://github.com/vdimir)).
* Fix: LIMIT BY and LIMIT in distributed query [#59153](https://github.com/ClickHouse/ClickHouse/pull/59153) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix translate() with FixedString input [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)).
* Fix error "Read beyond last offset" for AsynchronousBoundedReadBuffer [#59630](https://github.com/ClickHouse/ClickHouse/pull/59630) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix query start time on non initial queries [#59662](https://github.com/ClickHouse/ClickHouse/pull/59662) ([Raúl Marín](https://github.com/Algunenano)).
* Fix leftPad / rightPad function with FixedString input [#59739](https://github.com/ClickHouse/ClickHouse/pull/59739) ([Raúl Marín](https://github.com/Algunenano)).
* rabbitmq: fix having neither acked nor nacked messages [#59775](https://github.com/ClickHouse/ClickHouse/pull/59775) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix cosineDistance crash with Nullable [#60150](https://github.com/ClickHouse/ClickHouse/pull/60150) ([Raúl Marín](https://github.com/Algunenano)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Fix rare race in external sort/aggregation with temporary data in cache [#58013](https://github.com/ClickHouse/ClickHouse/pull/58013) ([Anton Popov](https://github.com/CurtizJ)).
* Fix possible race in ManyAggregatedData dtor. [#58624](https://github.com/ClickHouse/ClickHouse/pull/58624) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix 02720_row_policy_column_with_dots [#59453](https://github.com/ClickHouse/ClickHouse/pull/59453) ([Duc Canh Le](https://github.com/canhld94)).
* Pin python dependencies in stateless tests [#59663](https://github.com/ClickHouse/ClickHouse/pull/59663) ([Raúl Marín](https://github.com/Algunenano)).
* Make ZooKeeper actually sequentialy consistent [#59735](https://github.com/ClickHouse/ClickHouse/pull/59735) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Remove broken test while we fix it [#60547](https://github.com/ClickHouse/ClickHouse/pull/60547) ([Raúl Marín](https://github.com/Algunenano)).

View File

@ -549,6 +549,48 @@ Result:
└───────┴─────────────────────────────────────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
##### input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects
Enabling this setting allows to use String type for ambiguous paths during named tuples inference from JSON objects (when `input_format_json_try_infer_named_tuples_from_objects` is enabled) instead of an exception.
It allows to read JSON objects as named Tuples even if there are ambiguous paths.
Disabled by default.
**Examples**
With disabled setting:
```sql
SET input_format_json_try_infer_named_tuples_from_objects = 1;
SET input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects = 0;
DESC format(JSONEachRow, '{"obj" : {"a" : 42}}, {"obj" : {"a" : {"b" : "Hello"}}}');
```
Result:
```text
Code: 636. DB::Exception: The table structure cannot be extracted from a JSONEachRow format file. Error:
Code: 117. DB::Exception: JSON objects have ambiguous data: in some objects path 'a' has type 'Int64' and in some - 'Tuple(b String)'. You can enable setting input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects to use String type for path 'a'. (INCORRECT_DATA) (version 24.3.1.1).
You can specify the structure manually. (CANNOT_EXTRACT_TABLE_STRUCTURE)
```
With enabled setting:
```sql
SET input_format_json_try_infer_named_tuples_from_objects = 1;
SET input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects = 1;
DESC format(JSONEachRow, '{"obj" : "a" : 42}, {"obj" : {"a" : {"b" : "Hello"}}}');
SELECT * FROM format(JSONEachRow, '{"obj" : {"a" : 42}}, {"obj" : {"a" : {"b" : "Hello"}}}');
```
Result:
```text
┌─name─┬─type──────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ obj │ Tuple(a Nullable(String)) │ │ │ │ │ │
└──────┴───────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
┌─obj─────────────────┐
│ ('42') │
│ ('{"b" : "Hello"}') │
└─────────────────────┘
```
##### input_format_json_read_objects_as_strings
Enabling this setting allows reading nested JSON objects as strings.
@ -1554,6 +1596,28 @@ DESC format(JSONEachRow, $$
└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
#### input_format_try_infer_exponent_floats
If enabled, ClickHouse will try to infer floats in exponential form for text formats (except JSON where numbers in exponential form are always inferred).
Disabled by default.
**Example**
```sql
SET input_format_try_infer_exponent_floats = 1;
DESC format(CSV,
$$1.1E10
2.3e-12
42E00
$$)
```
```response
┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ c1 │ Nullable(Float64) │ │ │ │ │ │
└──────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
## Self describing formats {#self-describing-formats}
Self-describing formats contain information about the structure of the data in the data itself,

View File

@ -467,7 +467,7 @@ Enabled by default.
Allow to use String type for JSON keys that contain only `Null`/`{}`/`[]` in data sample during schema inference.
In JSON formats any value can be read as String, and we can avoid errors like `Cannot determine type for column 'column_name' by first 25000 rows of data, most likely this column contains only Nulls or empty Arrays/Maps` during schema inference
by using String type for keys with unknown types.
by using String type for keys with unknown types.
Example:
@ -891,7 +891,7 @@ Default value: `,`.
If it is set to true, allow strings in single quotes.
Enabled by default.
Disabled by default.
### format_csv_allow_double_quotes {#format_csv_allow_double_quotes}
@ -1605,7 +1605,7 @@ possible values:
- `1` — Enabled. Pretty formats will use ANSI escape sequences except for `NoEscapes` formats.
- `auto` - Enabled if `stdout` is a terminal except for `NoEscapes` formats.
Default value is `auto`.
Default value is `auto`.
### output_format_pretty_grid_charset {#output_format_pretty_grid_charset}

View File

@ -14,8 +14,6 @@
- `N` The number of elements to return.
If the parameter is omitted, default value is the size of input.
- `column` The value (Integer, String, Float and other Generic types).
**Example**
@ -36,13 +34,12 @@
Gets all the String implementations of all numbers in column:
``` sql
SELECT groupArraySorted(str) FROM (SELECT toString(number) as str FROM numbers(5));
SELECT groupArraySorted(5)(str) FROM (SELECT toString(number) as str FROM numbers(5));
```
``` text
┌─groupArraySorted(str)────────┐
│ ['0','1','2','3','4'] │
└──────────────────────────────┘
```
┌─groupArraySorted(5)(str)─┐
│ ['0','1','2','3','4'] │
└──────────────────────────┘
```

View File

@ -272,10 +272,16 @@ ALTER TABLE table_name MODIFY COLUMN column_name RESET SETTING max_compress_bloc
## MATERIALIZE COLUMN
Materializes or updates a column with an expression for a default value (`DEFAULT` or `MATERIALIZED`).
It is used if it is necessary to add or update a column with a complicated expression, because evaluating such an expression directly on `SELECT` executing turns out to be expensive.
Materializes a column with a `DEFAULT` or `MATERIALIZED` value expression.
This statement can be used to rewrite existing column data after a `DEFAULT` or `MATERIALIZED` expression has been added or updated (which only updates the metadata but does not change existing data).
Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations).
For columns with a new or updated `MATERIALIZED` value expression, all existing rows are rewritten.
For columns with a new or updated `DEFAULT` value expression, the behavior depends on the ClickHouse version:
- In ClickHouse < v24.2, all existing rows are rewritten.
- ClickHouse >= v24.2 distinguishes if a row value in a column with `DEFAULT` value expression was explicitly specified when it was inserted, or not, i.e. calculated from the `DEFAULT` value expression. If the value was explicitly specified, ClickHouse keeps it as is. If the value was was calculated, ClickHouse changes it to the new or updated `MATERIALIZED` value expression.
Syntax:
```sql

View File

@ -202,6 +202,13 @@ Hierarchy of privileges:
- `S3`
- [dictGet](#grant-dictget)
- [displaySecretsInShowAndSelect](#grant-display-secrets)
- [NAMED COLLECTION ADMIN](#grant-named-collection-admin)
- `CREATE NAMED COLLECTION`
- `DROP NAMED COLLECTION`
- `ALTER NAMED COLLECTION`
- `SHOW NAMED COLLECTIONS`
- `SHOW NAMED COLLECTIONS SECRETS`
- `NAMED COLLECTION`
Examples of how this hierarchy is treated:
@ -498,6 +505,25 @@ and
[`format_display_secrets_in_show_and_select` format setting](../../operations/settings/formats#format_display_secrets_in_show_and_select)
are turned on.
### NAMED COLLECTION ADMIN
Allows a certain operation on a specified named collection. Before version 23.7 it was called NAMED COLLECTION CONTROL, and after 23.7 NAMED COLLECTION ADMIN was added and NAMED COLLECTION CONTROL is preserved as an alias.
- `NAMED COLLECTION ADMIN`. Level: `NAMED_COLLECTION`. Aliases: `NAMED COLLECTION CONTROL`
- `CREATE NAMED COLLECTION`. Level: `NAMED_COLLECTION`
- `DROP NAMED COLLECTION`. Level: `NAMED_COLLECTION`
- `ALTER NAMED COLLECTION`. Level: `NAMED_COLLECTION`
- `SHOW NAMED COLLECTIONS`. Level: `NAMED_COLLECTION`. Aliases: `SHOW NAMED COLLECTIONS`
- `SHOW NAMED COLLECTIONS SECRETS`. Level: `NAMED_COLLECTION`. Aliases: `SHOW NAMED COLLECTIONS SECRETS`
- `NAMED COLLECTION`. Level: `NAMED_COLLECTION`. Aliases: `NAMED COLLECTION USAGE, USE NAMED COLLECTION`
Unlike all other grants (CREATE, DROP, ALTER, SHOW) grant NAMED COLLECTION was added only in 23.7, while all others were added earlier - in 22.12.
**Examples**
Assuming a named collection is called abc, we grant privilege CREATE NAMED COLLECTION to user john.
- `GRANT CREATE NAMED COLLECTION ON abc TO john`
### ALL
Grants all the privileges on regulated entity to a user account or a role.

View File

@ -38,6 +38,7 @@ ClickHouse Keeper может использоваться как равноце
- `dead_session_check_period_ms` — частота, с которой ClickHouse Keeper проверяет мертвые сессии и удаляет их, в миллисекундах (по умолчанию: 500).
- `election_timeout_lower_bound_ms` — время, после которого последователь может инициировать перевыбор лидера, если не получил от него контрольный сигнал (по умолчанию: 1000).
- `election_timeout_upper_bound_ms` — время, после которого последователь должен инициировать перевыбор лидера, если не получил от него контрольный сигнал (по умолчанию: 2000).
- `leadership_expiry_ms` — Если лидер не получает ответа от достаточного количества последователей в течение этого промежутка времени, он добровольно отказывается от своего руководства. При настройке 0 автоматически устанавливается 20 - кратное значение `heart_beat_interval_ms`, а при настройке меньше 0 лидер не отказывается от лидерства (по умолчанию 0).
- `force_sync` — вызывать `fsync` при каждой записи в журнал координации (по умолчанию: true).
- `four_letter_word_white_list` — список разрешенных 4-х буквенных команд (по умолчанию: "conf,cons,crst,envi,ruok,srst,srvr,stat,wchc,wchs,dirs,mntr,isro").
- `fresh_log_gap` — минимальное отставание от лидера в количестве записей журнала после которого последователь считает себя актуальным (по умолчанию: 200).
@ -209,6 +210,7 @@ dead_session_check_period_ms=500
heart_beat_interval_ms=500
election_timeout_lower_bound_ms=1000
election_timeout_upper_bound_ms=2000
leadership_expiry_ms=0
reserved_log_items=1000000000000000
snapshot_distance=10000
auto_forwarding=true

View File

@ -45,6 +45,7 @@ ClickHouse Keeper 完全可以作为ZooKeeper的独立替代品或者作为Click
- `heart_beat_interval_ms` — ClickHouse Keeper的leader发送心跳频率(毫秒)(默认为500)。
- `election_timeout_lower_bound_ms` — 如果follower在此间隔内没有收到leader的心跳那么它可以启动leader选举(默认为1000).
- `election_timeout_upper_bound_ms` — 如果follower在此间隔内没有收到leader的心跳那么它必须启动leader选举(默认为2000)。
- `leadership_expiry_ms` — 如果leader在此间隔内没有收到足够的follower回复那么他会主动放弃领导权。当被设置为0时会自动设置为`heart_beat_interval_ms`的20倍当被设置小于0时leader不会主动放弃领导权默认为0
- `rotate_log_storage_interval` — 单个文件中存储的日志记录数量(默认100000条)。
- `reserved_log_items` — 在压缩之前需要存储多少协调日志记录(默认100000)。
- `snapshot_distance` — ClickHouse Keeper创建新快照的频率(以日志记录的数量为单位)(默认100000)。
@ -214,6 +215,7 @@ dead_session_check_period_ms=500
heart_beat_interval_ms=500
election_timeout_lower_bound_ms=1000
election_timeout_upper_bound_ms=2000
leadership_expiry_ms=0
reserved_log_items=1000000000000000
snapshot_distance=10000
auto_forwarding=true

View File

@ -259,7 +259,7 @@ ShardPriority getReplicasPriority(const Cluster::Addresses & replicas, const std
res.is_remote = 1;
for (const auto & replica : replicas)
{
if (isLocalAddress(DNSResolver::instance().resolveHost(replica.host_name)))
if (isLocalAddress(DNSResolver::instance().resolveHostAllInOriginOrder(replica.host_name).front()))
{
res.is_remote = 0;
break;

View File

@ -102,7 +102,7 @@ struct TaskStateWithOwner
return TaskStateWithOwner(state, owner).toString();
}
String toString()
String toString() const
{
WriteBufferFromOwnString wb;
wb << static_cast<UInt32>(state) << "\n" << escape << owner;

View File

@ -180,7 +180,7 @@ public:
auto logger = getLogger("ClusterCopier");
if (rsp.error == Coordination::Error::ZOK)
{
switch (rsp.type)
switch (rsp.type) /// NOLINT(bugprone-switch-missing-default-case)
{
case Coordination::CREATED:
LOG_DEBUG(logger, "CleanStateClock change: CREATED, at {}", rsp.path);

View File

@ -841,7 +841,7 @@ void LocalServer::addOptions(OptionsDescription & options_description)
/// If structure argument is omitted then initial query is not generated
("structure,S", po::value<std::string>(), "structure of the initial table (list of column and type names)")
("file,f", po::value<std::string>(), "path to file with data of the initial table (stdin if not specified)")
("file,F", po::value<std::string>(), "path to file with data of the initial table (stdin if not specified)")
("input-format", po::value<std::string>(), "input format of the initial table data")
("output-format", po::value<std::string>(), "default output format")

View File

@ -40,7 +40,6 @@ public:
explicit ConnectionHolder(const String & connection_string_)
: pool(nullptr)
, connection()
, connection_string(connection_string_)
{
updateConnection();
@ -143,7 +142,7 @@ public:
{
std::lock_guard lock(mutex);
if (!factory.count(connection_string))
if (!factory.contains(connection_string))
factory.emplace(std::make_pair(connection_string, std::make_shared<nanodbc::Pool>(pool_size)));
auto & pool = factory[connection_string];

View File

@ -184,7 +184,7 @@ static bool jemallocOptionEnabled(const char *name)
return value;
}
#else
static bool jemallocOptionEnabled(const char *) { return 0; }
static bool jemallocOptionEnabled(const char *) { return false; }
#endif
int mainEntryClickHouseServer(int argc, char ** argv)

View File

@ -1,6 +1,4 @@
# vim: ft=config
[BASIC]
[tool.pylint.BASIC]
max-module-lines=2000
# due to SQL
max-line-length=200
@ -9,11 +7,13 @@ max-branches=50
max-nested-blocks=10
max-statements=200
[FORMAT]
ignore-long-lines = (# )?<?https?://\S+>?$
[tool.pylint.FORMAT]
#ignore-long-lines = (# )?<?https?://\S+>?$
[MESSAGES CONTROL]
disable = missing-docstring,
[tool.pylint.'MESSAGES CONTROL']
# pytest.mark.parametrize is not callable (not-callable)
disable = '''
missing-docstring,
too-few-public-methods,
invalid-name,
too-many-arguments,
@ -26,18 +26,15 @@ disable = missing-docstring,
wildcard-import,
unused-wildcard-import,
singleton-comparison,
# pytest.mark.parametrize is not callable (not-callable)
not-callable,
# https://github.com/PyCQA/pylint/issues/3882
# [Python 3.9] Value 'Optional' is unsubscriptable (unsubscriptable-object) (also Union)
unsubscriptable-object,
# Drop them one day:
redefined-outer-name,
broad-except,
bare-except,
no-else-return,
global-statement
'''
[SIMILARITIES]
[tool.pylint.SIMILARITIES]
# due to SQL
min-similarity-lines=1000

View File

@ -55,7 +55,7 @@ namespace
{
IPAddress addr_v6 = toIPv6(address);
auto host_addresses = DNSResolver::instance().resolveHostAll(host);
auto host_addresses = DNSResolver::instance().resolveHostAllInOriginOrder(host);
for (const auto & addr : host_addresses)
{

View File

@ -45,10 +45,15 @@ void ConstantNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state
if (hasAlias())
buffer << ", alias: " << getAlias();
buffer << ", constant_value: " << constant_value->getValue().dump();
buffer << ", constant_value: ";
if (mask_id)
buffer << "[HIDDEN id: " << mask_id << "]";
else
buffer << constant_value->getValue().dump();
buffer << ", constant_value_type: " << constant_value->getType()->getName();
if (getSourceExpression())
if (!mask_id && getSourceExpression())
{
buffer << '\n' << std::string(indent + 2, ' ') << "EXPRESSION" << '\n';
getSourceExpression()->dumpTreeImpl(buffer, format_state, indent + 4);

View File

@ -75,6 +75,11 @@ public:
return constant_value->getType();
}
void setMaskId(size_t id)
{
mask_id = id;
}
void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override;
protected:
@ -90,6 +95,7 @@ private:
ConstantValuePtr constant_value;
String value_string;
QueryTreeNodePtr source_expression;
size_t mask_id = 0;
static constexpr size_t children_size = 0;
};

View File

@ -0,0 +1,372 @@
#pragma once
#include <Parsers/FunctionSecretArgumentsFinder.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/IdentifierNode.h>
#include <Analyzer/ListNode.h>
#include <Common/KnownObjectNames.h>
#include <Core/QualifiedTableName.h>
#include <boost/algorithm/string/predicate.hpp>
namespace DB
{
/// Finds arguments of a specified function which should not be displayed for most users for security reasons.
/// That involves passwords and secret keys.
class FunctionSecretArgumentsFinderTreeNode
{
public:
explicit FunctionSecretArgumentsFinderTreeNode(const FunctionNode & function_) : function(function_), arguments(function.getArguments())
{
if (arguments.getNodes().empty())
return;
findFunctionSecretArguments();
}
struct Result
{
/// Result constructed by default means no arguments will be hidden.
size_t start = static_cast<size_t>(-1);
size_t count = 0; /// Mostly it's either 0 or 1. There are only a few cases where `count` can be greater than 1 (e.g. see `encrypt`).
/// In all known cases secret arguments are consecutive
bool are_named = false; /// Arguments like `password = 'password'` are considered as named arguments.
/// E.g. "headers" in `url('..', headers('foo' = '[HIDDEN]'))`
std::vector<std::string> nested_maps;
bool hasSecrets() const
{
return count != 0 || !nested_maps.empty();
}
};
FunctionSecretArgumentsFinder::Result getResult() const { return result; }
private:
const FunctionNode & function;
const ListNode & arguments;
FunctionSecretArgumentsFinder::Result result;
void markSecretArgument(size_t index, bool argument_is_named = false)
{
if (index >= arguments.getNodes().size())
return;
if (!result.count)
{
result.start = index;
result.are_named = argument_is_named;
}
chassert(index >= result.start); /// We always check arguments consecutively
result.count = index + 1 - result.start;
if (!argument_is_named)
result.are_named = false;
}
void findFunctionSecretArguments()
{
const auto & name = function.getFunctionName();
if ((name == "mysql") || (name == "postgresql") || (name == "mongodb"))
{
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
/// postgresql('host:port', 'database', 'table', 'user', 'password', ...)
/// mongodb('host:port', 'database', 'collection', 'user', 'password', ...)
findMySQLFunctionSecretArguments();
}
else if ((name == "s3") || (name == "cosn") || (name == "oss") ||
(name == "deltaLake") || (name == "hudi") || (name == "iceberg"))
{
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
findS3FunctionSecretArguments(/* is_cluster_function= */ false);
}
else if (name == "s3Cluster")
{
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
findS3FunctionSecretArguments(/* is_cluster_function= */ true);
}
else if ((name == "remote") || (name == "remoteSecure"))
{
/// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
findRemoteFunctionSecretArguments();
}
else if ((name == "encrypt") || (name == "decrypt") ||
(name == "aes_encrypt_mysql") || (name == "aes_decrypt_mysql") ||
(name == "tryDecrypt"))
{
/// encrypt('mode', 'plaintext', 'key' [, iv, aad])
findEncryptionFunctionSecretArguments();
}
else if (name == "url")
{
findURLSecretArguments();
}
}
void findMySQLFunctionSecretArguments()
{
if (isNamedCollectionName(0))
{
/// mysql(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
}
else
{
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
markSecretArgument(4);
}
}
/// Returns the number of arguments excluding "headers" and "extra_credentials" (which should
/// always be at the end). Marks "headers" as secret, if found.
size_t excludeS3OrURLNestedMaps()
{
const auto & nodes = arguments.getNodes();
size_t count = nodes.size();
while (count > 0)
{
const FunctionNode * f = nodes.at(count - 1)->as<FunctionNode>();
if (!f)
break;
if (f->getFunctionName() == "headers")
result.nested_maps.push_back(f->getFunctionName());
else if (f->getFunctionName() != "extra_credentials")
break;
count -= 1;
}
return count;
}
void findS3FunctionSecretArguments(bool is_cluster_function)
{
/// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
size_t url_arg_idx = is_cluster_function ? 1 : 0;
if (!is_cluster_function && isNamedCollectionName(0))
{
/// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...)
findSecretNamedArgument("secret_access_key", 1);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case of
/// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
/// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
size_t count = excludeS3OrURLNestedMaps();
if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4))
{
String second_arg;
if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg))
{
if (boost::iequals(second_arg, "NOSIGN"))
return; /// The argument after 'url' is "NOSIGN".
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
}
}
/// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
if (url_arg_idx + 2 < count)
markSecretArgument(url_arg_idx + 2);
}
void findURLSecretArguments()
{
if (!isNamedCollectionName(0))
excludeS3OrURLNestedMaps();
}
bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const
{
if (arg_idx >= arguments.getNodes().size())
return false;
return tryGetStringFromArgument(arguments.getNodes()[arg_idx], res, allow_identifier);
}
static bool tryGetStringFromArgument(const QueryTreeNodePtr argument, String * res, bool allow_identifier = true)
{
if (const auto * literal = argument->as<ConstantNode>())
{
if (literal->getValue().getType() != Field::Types::String)
return false;
if (res)
*res = literal->getValue().safeGet<String>();
return true;
}
if (allow_identifier)
{
if (const auto * id = argument->as<IdentifierNode>())
{
if (res)
*res = id->getIdentifier().getFullName();
return true;
}
}
return false;
}
void findRemoteFunctionSecretArguments()
{
if (isNamedCollectionName(0))
{
/// remote(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
return;
}
/// We're going to replace 'password' with '[HIDDEN'] for the following signatures:
/// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key])
/// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key])
/// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key])
/// But we should check the number of arguments first because we don't need to do any replacements in case of
/// remote('addresses_expr', db.table)
if (arguments.getNodes().size() < 3)
return;
size_t arg_num = 1;
/// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'.
const auto * table_function = arguments.getNodes()[arg_num]->as<FunctionNode>();
if (table_function && KnownTableFunctionNames::instance().exists(table_function->getFunctionName()))
{
++arg_num;
}
else
{
std::optional<String> database;
std::optional<QualifiedTableName> qualified_table_name;
if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name))
{
/// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'.
/// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user'
/// before the argument 'password'. So it's safer to wipe two arguments just in case.
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
/// before wiping it (because the `password` argument is always a literal string).
if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false))
{
/// Wipe either `password` or `user`.
markSecretArgument(arg_num + 2);
}
if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false))
{
/// Wipe either `password` or `sharding_key`.
markSecretArgument(arg_num + 3);
}
return;
}
/// Skip the current argument (which is either a database name or a qualified table name).
++arg_num;
if (database)
{
/// Skip the 'table' argument if the previous argument was a database name.
++arg_num;
}
}
/// Skip username.
++arg_num;
/// Do our replacement:
/// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...)
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
/// before wiping it (because the `password` argument is always a literal string).
bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false);
if (can_be_password)
markSecretArgument(arg_num);
}
/// Tries to get either a database name or a qualified table name from an argument.
/// Empty string is also allowed (it means the default database).
/// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password.
bool tryGetDatabaseNameOrQualifiedTableName(
size_t arg_idx,
std::optional<String> & res_database,
std::optional<QualifiedTableName> & res_qualified_table_name) const
{
res_database.reset();
res_qualified_table_name.reset();
String str;
if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true))
return false;
if (str.empty())
{
res_database = "";
return true;
}
auto qualified_table_name = QualifiedTableName::tryParseFromString(str);
if (!qualified_table_name)
return false;
if (qualified_table_name->database.empty())
res_database = std::move(qualified_table_name->table);
else
res_qualified_table_name = std::move(qualified_table_name);
return true;
}
void findEncryptionFunctionSecretArguments()
{
if (arguments.getNodes().empty())
return;
/// We replace all arguments after 'mode' with '[HIDDEN]':
/// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]')
result.start = 1;
result.count = arguments.getNodes().size() - 1;
}
/// Whether a specified argument can be the name of a named collection?
bool isNamedCollectionName(size_t arg_idx) const
{
if (arguments.getNodes().size() <= arg_idx)
return false;
const auto * identifier = arguments.getNodes()[arg_idx]->as<IdentifierNode>();
return identifier != nullptr;
}
/// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
{
for (size_t i = start; i < arguments.getNodes().size(); ++i)
{
const auto & argument = arguments.getNodes()[i];
const auto * equals_func = argument->as<FunctionNode>();
if (!equals_func || (equals_func->getFunctionName() != "equals"))
continue;
const auto * expr_list = equals_func->getArguments().as<ListNode>();
if (!expr_list)
continue;
const auto & equal_args = expr_list->getNodes();
if (equal_args.size() != 2)
continue;
String found_key;
if (!tryGetStringFromArgument(equal_args[0], &found_key))
continue;
if (found_key == key)
markSecretArgument(i, /* argument_is_named= */ true);
}
}
};
}

View File

@ -94,7 +94,8 @@ public:
if (!func_node || func_node->getArguments().getNodes().size() != 1)
return;
const auto * column_id = func_node->getArguments().getNodes()[0]->as<ColumnNode>();
const auto & argument_node = func_node->getArguments().getNodes()[0];
const auto * column_id = argument_node->as<ColumnNode>();
if (!column_id)
return;
@ -119,7 +120,7 @@ public:
if (!preimage_range)
return;
const auto new_node = generateOptimizedDateFilter(comparator, *column_id, *preimage_range);
const auto new_node = generateOptimizedDateFilter(comparator, argument_node, *preimage_range);
if (!new_node)
return;
@ -128,20 +129,22 @@ public:
}
private:
QueryTreeNodePtr
generateOptimizedDateFilter(const String & comparator, const ColumnNode & column_node, const std::pair<Field, Field> & range) const
QueryTreeNodePtr generateOptimizedDateFilter(
const String & comparator, const QueryTreeNodePtr & column_node, const std::pair<Field, Field> & range) const
{
const DateLUTImpl & date_lut = DateLUT::instance("UTC");
String start_date_or_date_time;
String end_date_or_date_time;
if (isDateOrDate32(column_node.getColumnType().get()))
const auto & column_node_typed = column_node->as<ColumnNode &>();
const auto & column_type = column_node_typed.getColumnType().get();
if (isDateOrDate32(column_type))
{
start_date_or_date_time = date_lut.dateToString(range.first.get<DateLUTImpl::Time>());
end_date_or_date_time = date_lut.dateToString(range.second.get<DateLUTImpl::Time>());
}
else if (isDateTime(column_node.getColumnType().get()) || isDateTime64(column_node.getColumnType().get()))
else if (isDateTime(column_type) || isDateTime64(column_type))
{
start_date_or_date_time = date_lut.timeToString(range.first.get<DateLUTImpl::Time>());
end_date_or_date_time = date_lut.timeToString(range.second.get<DateLUTImpl::Time>());
@ -151,69 +154,29 @@ private:
if (comparator == "equals")
{
const auto lhs = std::make_shared<FunctionNode>("greaterOrEquals");
lhs->getArguments().getNodes().push_back(std::make_shared<ColumnNode>(column_node.getColumn(), column_node.getColumnSource()));
lhs->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(start_date_or_date_time));
resolveOrdinaryFunctionNode(*lhs, lhs->getFunctionName());
const auto rhs = std::make_shared<FunctionNode>("less");
rhs->getArguments().getNodes().push_back(std::make_shared<ColumnNode>(column_node.getColumn(), column_node.getColumnSource()));
rhs->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(end_date_or_date_time));
resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName());
const auto new_date_filter = std::make_shared<FunctionNode>("and");
new_date_filter->getArguments().getNodes() = {lhs, rhs};
resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName());
return new_date_filter;
return createFunctionNode(
"and",
createFunctionNode("greaterOrEquals", column_node, std::make_shared<ConstantNode>(start_date_or_date_time)),
createFunctionNode("less", column_node, std::make_shared<ConstantNode>(end_date_or_date_time)));
}
else if (comparator == "notEquals")
{
const auto lhs = std::make_shared<FunctionNode>("less");
lhs->getArguments().getNodes().push_back(std::make_shared<ColumnNode>(column_node.getColumn(), column_node.getColumnSource()));
lhs->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(start_date_or_date_time));
resolveOrdinaryFunctionNode(*lhs, lhs->getFunctionName());
const auto rhs = std::make_shared<FunctionNode>("greaterOrEquals");
rhs->getArguments().getNodes().push_back(std::make_shared<ColumnNode>(column_node.getColumn(), column_node.getColumnSource()));
rhs->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(end_date_or_date_time));
resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName());
const auto new_date_filter = std::make_shared<FunctionNode>("or");
new_date_filter->getArguments().getNodes() = {lhs, rhs};
resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName());
return new_date_filter;
return createFunctionNode(
"or",
createFunctionNode("less", column_node, std::make_shared<ConstantNode>(start_date_or_date_time)),
createFunctionNode("greaterOrEquals", column_node, std::make_shared<ConstantNode>(end_date_or_date_time)));
}
else if (comparator == "greater")
{
const auto new_date_filter = std::make_shared<FunctionNode>("greaterOrEquals");
new_date_filter->getArguments().getNodes().push_back(
std::make_shared<ColumnNode>(column_node.getColumn(), column_node.getColumnSource()));
new_date_filter->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(end_date_or_date_time));
resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName());
return new_date_filter;
return createFunctionNode("greaterOrEquals", column_node, std::make_shared<ConstantNode>(end_date_or_date_time));
}
else if (comparator == "lessOrEquals")
{
const auto new_date_filter = std::make_shared<FunctionNode>("less");
new_date_filter->getArguments().getNodes().push_back(
std::make_shared<ColumnNode>(column_node.getColumn(), column_node.getColumnSource()));
new_date_filter->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(end_date_or_date_time));
resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName());
return new_date_filter;
return createFunctionNode("less", column_node, std::make_shared<ConstantNode>(end_date_or_date_time));
}
else if (comparator == "less" || comparator == "greaterOrEquals")
{
const auto new_date_filter = std::make_shared<FunctionNode>(comparator);
new_date_filter->getArguments().getNodes().push_back(
std::make_shared<ColumnNode>(column_node.getColumn(), column_node.getColumnSource()));
new_date_filter->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(start_date_or_date_time));
resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName());
return new_date_filter;
return createFunctionNode(comparator, column_node, std::make_shared<ConstantNode>(start_date_or_date_time));
}
else [[unlikely]]
{
@ -224,10 +187,17 @@ private:
}
}
void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
template <typename... Args>
QueryTreeNodePtr createFunctionNode(const String & function_name, Args &&... args) const
{
auto function = FunctionFactory::instance().get(function_name, getContext());
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
const auto function_node = std::make_shared<FunctionNode>(function_name);
auto & new_arguments = function_node->getArguments().getNodes();
new_arguments.reserve(sizeof...(args));
(new_arguments.push_back(std::forward<Args>(args)), ...);
function_node->resolveAsFunction(function->build(function_node->getArgumentColumns()));
return function_node;
}
};

View File

@ -3,6 +3,7 @@
#include <Common/checkStackSize.h>
#include <Common/NamePrompter.h>
#include <Common/ProfileEvents.h>
#include <Analyzer/FunctionSecretArgumentsFinderTreeNode.h>
#include <IO/WriteBuffer.h>
#include <IO/WriteHelpers.h>
@ -706,7 +707,10 @@ struct IdentifierResolveScope
{
subquery_depth = parent_scope->subquery_depth;
context = parent_scope->context;
projection_mask_map = parent_scope->projection_mask_map;
}
else
projection_mask_map = std::make_shared<std::map<IQueryTreeNode::Hash, size_t>>();
if (auto * union_node = scope_node->as<UnionNode>())
{
@ -718,6 +722,11 @@ struct IdentifierResolveScope
group_by_use_nulls = context->getSettingsRef().group_by_use_nulls &&
(query_node->isGroupByWithGroupingSets() || query_node->isGroupByWithRollup() || query_node->isGroupByWithCube());
}
if (context)
join_use_nulls = context->getSettingsRef().join_use_nulls;
else if (parent_scope)
join_use_nulls = parent_scope->join_use_nulls;
}
QueryTreeNodePtr scope_node;
@ -772,6 +781,8 @@ struct IdentifierResolveScope
/// Apply nullability to aggregation keys
bool group_by_use_nulls = false;
/// Join retutns NULLs instead of default values
bool join_use_nulls = false;
/// JOINs count
size_t joins_count = 0;
@ -784,6 +795,9 @@ struct IdentifierResolveScope
*/
QueryTreeNodePtr expression_join_tree_node;
/// Node hash to mask id map
std::shared_ptr<std::map<IQueryTreeNode::Hash, size_t>> projection_mask_map;
[[maybe_unused]] const IdentifierResolveScope * getNearestQueryScope() const
{
const IdentifierResolveScope * scope_to_check = this;
@ -3286,7 +3300,6 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
QueryTreeNodePtr resolved_identifier;
JoinKind join_kind = from_join_node.getKind();
bool join_use_nulls = scope.context->getSettingsRef().join_use_nulls;
/// If columns from left or right table were missed Object(Nullable('json')) subcolumns, they will be replaced
/// to ConstantNode(NULL), which can't be cast to ColumnNode, so we resolve it here.
@ -3451,7 +3464,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
if (join_node_in_resolve_process || !resolved_identifier)
return resolved_identifier;
if (join_use_nulls)
if (scope.join_use_nulls)
{
resolved_identifier = resolved_identifier->clone();
convertJoinedColumnTypeToNullIfNeeded(resolved_identifier, join_kind, resolved_side);
@ -4439,7 +4452,7 @@ ProjectionNames QueryAnalyzer::resolveMatcher(QueryTreeNodePtr & matcher_node, I
else
matched_expression_nodes_with_names = resolveUnqualifiedMatcher(matcher_node, scope);
if (scope.context->getSettingsRef().join_use_nulls)
if (scope.join_use_nulls)
{
/** If we are resolving matcher came from the result of JOIN and `join_use_nulls` is set,
* we need to convert joined column type to Nullable.
@ -5124,22 +5137,31 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
}
/// Resolve function arguments
bool allow_table_expressions = is_special_function_in;
auto arguments_projection_names = resolveExpressionNodeList(function_node_ptr->getArgumentsNode(),
scope,
true /*allow_lambda_expression*/,
allow_table_expressions /*allow_table_expression*/);
if (function_node_ptr->toAST()->hasSecretParts())
/// Mask arguments if needed
if (!scope.context->getSettingsRef().format_display_secrets_in_show_and_select)
{
for (auto & argument : arguments_projection_names)
if (FunctionSecretArgumentsFinder::Result secret_arguments = FunctionSecretArgumentsFinderTreeNode(*function_node_ptr).getResult(); secret_arguments.count)
{
SipHash hash;
hash.update(argument);
argument = getHexUIntLowercase(hash.get128());
auto & argument_nodes = function_node_ptr->getArgumentsNode()->as<ListNode &>().getNodes();
for (size_t n = secret_arguments.start; n < secret_arguments.start + secret_arguments.count; ++n)
{
if (auto * constant = argument_nodes[n]->as<ConstantNode>())
{
auto mask = scope.projection_mask_map->insert({constant->getTreeHash(), scope.projection_mask_map->size() + 1}).first->second;
constant->setMaskId(mask);
arguments_projection_names[n] = "[HIDDEN id: " + std::to_string(mask) + "]";
}
}
}
}
auto & function_node = *function_node_ptr;
/// Replace right IN function argument if it is table or table function with subquery that read ordinary columns
@ -6651,7 +6673,6 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table
if (column_default && column_default->kind == ColumnDefaultKind::Alias)
{
auto alias_expression = buildQueryTree(column_default->expression, scope.context);
alias_expression = buildCastFunction(alias_expression, column_name_and_type.type, scope.context, false /*resolve*/);
auto column_node = std::make_shared<ColumnNode>(column_name_and_type, std::move(alias_expression), table_expression_node);
column_name_to_column_node.emplace(column_name_and_type.name, column_node);
alias_columns_to_resolve.emplace_back(column_name_and_type.name, column_node);
@ -6684,7 +6705,9 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table
alias_column_resolve_scope,
false /*allow_lambda_expression*/,
false /*allow_table_expression*/);
auto & resolved_expression = alias_column_to_resolve->getExpression();
if (!resolved_expression->getResultType()->equals(*alias_column_to_resolve->getResultType()))
resolved_expression = buildCastFunction(resolved_expression, alias_column_to_resolve->getResultType(), scope.context, true);
column_name_to_column_node = std::move(alias_column_resolve_scope.column_name_to_column_node);
column_name_to_column_node[alias_column_to_resolve_name] = alias_column_to_resolve;
}
@ -7558,8 +7581,22 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
}
if (query_node_typed.getPrewhere())
{
/** Expression in PREWHERE with JOIN should not be modified by join_use_nulls.
* Example: SELECT * FROM t1 JOIN t2 USING (id) PREWHERE a = 1
* Column `a` should be resolved from table and should not change its type to Nullable.
*/
bool join_use_nulls = scope.join_use_nulls;
bool use_identifier_lookup_to_result_cache = scope.use_identifier_lookup_to_result_cache;
scope.join_use_nulls = false;
scope.use_identifier_lookup_to_result_cache = false;
resolveExpressionNode(query_node_typed.getPrewhere(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
scope.join_use_nulls = join_use_nulls;
scope.use_identifier_lookup_to_result_cache = use_identifier_lookup_to_result_cache;
}
if (query_node_typed.getWhere())
resolveExpressionNode(query_node_typed.getWhere(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);

View File

@ -15,8 +15,6 @@
namespace DB
{
namespace fs = std::filesystem;
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
@ -65,13 +63,13 @@ void registerBackupEngineS3(BackupFactory & factory)
secret_access_key = config.getString(config_prefix + ".secret_access_key", "");
if (config.has(config_prefix + ".filename"))
s3_uri = fs::path(s3_uri) / config.getString(config_prefix + ".filename");
s3_uri = std::filesystem::path(s3_uri) / config.getString(config_prefix + ".filename");
if (args.size() > 1)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Backup S3 requires 1 or 2 arguments: named_collection, [filename]");
if (args.size() == 1)
s3_uri = fs::path(s3_uri) / args[0].safeGet<String>();
s3_uri = std::filesystem::path(s3_uri) / args[0].safeGet<String>();
}
else
{

View File

@ -174,6 +174,8 @@ endif ()
add_library(clickhouse_common_io ${clickhouse_common_io_headers} ${clickhouse_common_io_sources})
set_source_files_properties(Common/ThreadFuzzer.cpp PROPERTIES COMPILE_FLAGS "-fomit-frame-pointer -momit-leaf-frame-pointer")
add_library (clickhouse_malloc OBJECT Common/malloc.cpp)
set_source_files_properties(Common/malloc.cpp PROPERTIES COMPILE_FLAGS "-fno-builtin")

View File

@ -115,7 +115,7 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati
/// At the same time, I want clickhouse-local to always work, regardless.
/// TODO: get rid of glibc, or replace getaddrinfo to c-ares.
compression = config.getBool("compression", host != "localhost" && !isLocalAddress(DNSResolver::instance().resolveHost(host)))
compression = config.getBool("compression", host != "localhost" && !isLocalAddress(DNSResolver::instance().resolveHostAllInOriginOrder(host).front()))
? Protocol::Compression::Enable : Protocol::Compression::Disable;
timeouts = ConnectionTimeouts()

View File

@ -0,0 +1,25 @@
#include <Columns/ColumnUnique.h>
namespace DB
{
/// Explicit template instantiations.
template class ColumnUnique<ColumnInt8>;
template class ColumnUnique<ColumnUInt8>;
template class ColumnUnique<ColumnInt16>;
template class ColumnUnique<ColumnUInt16>;
template class ColumnUnique<ColumnInt32>;
template class ColumnUnique<ColumnUInt32>;
template class ColumnUnique<ColumnInt64>;
template class ColumnUnique<ColumnUInt64>;
template class ColumnUnique<ColumnInt128>;
template class ColumnUnique<ColumnUInt128>;
template class ColumnUnique<ColumnInt256>;
template class ColumnUnique<ColumnUInt256>;
template class ColumnUnique<ColumnFloat32>;
template class ColumnUnique<ColumnFloat64>;
template class ColumnUnique<ColumnString>;
template class ColumnUnique<ColumnFixedString>;
template class ColumnUnique<ColumnDateTime64>;
}

View File

@ -15,6 +15,8 @@
#include <Common/typeid_cast.h>
#include <Common/assert_cast.h>
#include <Common/FieldVisitors.h>
#include "Columns/ColumnsDateTime.h"
#include "Columns/ColumnsNumber.h"
#include <base/range.h>
#include <base/unaligned.h>
@ -736,4 +738,23 @@ UInt128 ColumnUnique<ColumnType>::IncrementalHash::getHash(const ColumnType & co
return cur_hash;
}
extern template class ColumnUnique<ColumnInt8>;
extern template class ColumnUnique<ColumnUInt8>;
extern template class ColumnUnique<ColumnInt16>;
extern template class ColumnUnique<ColumnUInt16>;
extern template class ColumnUnique<ColumnInt32>;
extern template class ColumnUnique<ColumnUInt32>;
extern template class ColumnUnique<ColumnInt64>;
extern template class ColumnUnique<ColumnUInt64>;
extern template class ColumnUnique<ColumnInt128>;
extern template class ColumnUnique<ColumnUInt128>;
extern template class ColumnUnique<ColumnInt256>;
extern template class ColumnUnique<ColumnUInt256>;
extern template class ColumnUnique<ColumnFloat32>;
extern template class ColumnUnique<ColumnFloat64>;
extern template class ColumnUnique<ColumnString>;
extern template class ColumnUnique<ColumnFixedString>;
extern template class ColumnUnique<ColumnDateTime64>;
}

View File

@ -202,10 +202,10 @@ DNSResolver::DNSResolver() : impl(std::make_unique<DNSResolver::Impl>()), log(ge
Poco::Net::IPAddress DNSResolver::resolveHost(const std::string & host)
{
return pickAddress(resolveHostAll(host));
return pickAddress(resolveHostAll(host)); // random order -> random pick
}
DNSResolver::IPAddresses DNSResolver::resolveHostAll(const std::string & host)
DNSResolver::IPAddresses DNSResolver::resolveHostAllInOriginOrder(const std::string & host)
{
if (impl->disable_cache)
return resolveIPAddressImpl(host);
@ -214,6 +214,13 @@ DNSResolver::IPAddresses DNSResolver::resolveHostAll(const std::string & host)
return resolveIPAddressWithCache(impl->cache_host, host);
}
DNSResolver::IPAddresses DNSResolver::resolveHostAll(const std::string & host)
{
auto addresses = resolveHostAllInOriginOrder(host);
std::shuffle(addresses.begin(), addresses.end(), thread_local_rng);
return addresses;
}
Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host_and_port)
{
if (impl->disable_cache)

View File

@ -34,6 +34,9 @@ public:
Poco::Net::IPAddress resolveHost(const std::string & host);
/// Accepts host names like 'example.com' or '127.0.0.1' or '::1' and resolves all its IPs
/// resolveHostAllInOriginOrder returns addresses with the same order as system call returns it
IPAddresses resolveHostAllInOriginOrder(const std::string & host);
/// resolveHostAll returns addresses in random order
IPAddresses resolveHostAll(const std::string & host);
/// Accepts host names like 'example.com:port' or '127.0.0.1:port' or '[::1]:port' and resolves its IP and port

View File

@ -0,0 +1,23 @@
#include <Common/FieldVisitorConvertToNumber.h>
#include "base/Decimal.h"
namespace DB
{
/// Explicit template instantiations.
template class FieldVisitorConvertToNumber<Int8>;
template class FieldVisitorConvertToNumber<UInt8>;
template class FieldVisitorConvertToNumber<Int16>;
template class FieldVisitorConvertToNumber<UInt16>;
template class FieldVisitorConvertToNumber<Int32>;
template class FieldVisitorConvertToNumber<UInt32>;
template class FieldVisitorConvertToNumber<Int64>;
template class FieldVisitorConvertToNumber<UInt64>;
template class FieldVisitorConvertToNumber<Int128>;
template class FieldVisitorConvertToNumber<UInt128>;
template class FieldVisitorConvertToNumber<Int256>;
template class FieldVisitorConvertToNumber<UInt256>;
template class FieldVisitorConvertToNumber<Float32>;
template class FieldVisitorConvertToNumber<Float64>;
}

View File

@ -117,4 +117,19 @@ public:
T operator() (const bool & x) const { return T(x); }
};
extern template class FieldVisitorConvertToNumber<Int8>;
extern template class FieldVisitorConvertToNumber<UInt8>;
extern template class FieldVisitorConvertToNumber<Int16>;
extern template class FieldVisitorConvertToNumber<UInt16>;
extern template class FieldVisitorConvertToNumber<Int32>;
extern template class FieldVisitorConvertToNumber<UInt32>;
extern template class FieldVisitorConvertToNumber<Int64>;
extern template class FieldVisitorConvertToNumber<UInt64>;
extern template class FieldVisitorConvertToNumber<Int128>;
extern template class FieldVisitorConvertToNumber<UInt128>;
extern template class FieldVisitorConvertToNumber<Int256>;
extern template class FieldVisitorConvertToNumber<UInt256>;
extern template class FieldVisitorConvertToNumber<Float32>;
extern template class FieldVisitorConvertToNumber<Float64>;
}

View File

@ -191,7 +191,7 @@ size_t PageCache::maxChunks() const { return chunks_per_mmap_target * max_mmaps;
size_t PageCache::getPinnedSize() const
{
std::unique_lock lock(global_mutex);
std::lock_guard lock(global_mutex);
return (total_chunks - lru.size()) * bytes_per_page * pages_per_chunk;
}
@ -202,8 +202,11 @@ PageCache::MemoryStats PageCache::getResidentSetSize() const
if (use_madv_free)
{
std::unordered_set<UInt64> cache_mmap_addrs;
for (const auto & m : mmaps)
cache_mmap_addrs.insert(reinterpret_cast<UInt64>(m.ptr));
{
std::lock_guard lock(global_mutex);
for (const auto & m : mmaps)
cache_mmap_addrs.insert(reinterpret_cast<UInt64>(m.ptr));
}
ReadBufferFromFile in("/proc/self/smaps");
@ -283,6 +286,7 @@ PageCache::MemoryStats PageCache::getResidentSetSize() const
}
#endif
std::lock_guard lock(global_mutex);
stats.page_cache_rss = bytes_per_page * pages_per_chunk * total_chunks;
return stats;
}
@ -294,12 +298,12 @@ PinnedPageChunk PageCache::getOrSet(PageCacheKey key, bool detached_if_missing,
bool incremented_profile_events = false;
{
std::unique_lock lock(global_mutex);
std::lock_guard lock(global_mutex);
auto * it = chunk_by_key.find(key);
if (it == chunk_by_key.end())
{
chunk = getFreeChunk(lock);
chunk = getFreeChunk();
chassert(!chunk->key.has_value());
if (!detached_if_missing)
@ -331,14 +335,14 @@ PinnedPageChunk PageCache::getOrSet(PageCacheKey key, bool detached_if_missing,
/// otherwise we may detach a chunk pinned by someone else, which may be unexpected
/// for that someone else. Or maybe the latter is fine, dropCache() already does it.)
if (chunk->pages_populated.get(0) && reinterpret_cast<volatile std::atomic<char>*>(chunk->data)->load(std::memory_order_relaxed) == 0)
evictChunk(chunk, lock);
evictChunk(chunk);
}
if (inject_eviction && chunk->key.has_value() && rng() % 10 == 0)
{
/// Simulate eviction of the chunk or some of its pages.
if (rng() % 2 == 0)
evictChunk(chunk, lock);
evictChunk(chunk);
else
for (size_t i = 0; i < 20; ++i)
chunk->pages_populated.unset(rng() % (chunk->size / chunk->page_size));
@ -353,7 +357,7 @@ PinnedPageChunk PageCache::getOrSet(PageCacheKey key, bool detached_if_missing,
}
{
std::unique_lock chunk_lock(chunk->chunk_mutex);
std::lock_guard chunk_lock(chunk->chunk_mutex);
if (chunk->pages_state == PageChunkState::Limbo)
{
@ -383,7 +387,7 @@ void PageCache::removeRef(PageChunk * chunk) noexcept
return;
{
std::unique_lock lock(global_mutex);
std::lock_guard lock(global_mutex);
prev_pin_count = chunk->pin_count.fetch_sub(1);
if (prev_pin_count > 1)
@ -398,7 +402,7 @@ void PageCache::removeRef(PageChunk * chunk) noexcept
}
{
std::unique_lock chunk_lock(chunk->chunk_mutex);
std::lock_guard chunk_lock(chunk->chunk_mutex);
/// Need to be extra careful here because we unlocked global_mutex above, so other
/// getOrSet()/removeRef() calls could have happened during this brief period.
@ -421,7 +425,7 @@ static void logUnexpectedSyscallError(std::string name)
#endif
}
void PageCache::sendChunkToLimbo(PageChunk * chunk [[maybe_unused]], std::unique_lock<std::mutex> & /* chunk_mutex */) const noexcept
void PageCache::sendChunkToLimbo(PageChunk * chunk [[maybe_unused]], std::lock_guard<std::mutex> & /* chunk_mutex */) const noexcept
{
#ifdef MADV_FREE // if we're not on a very old version of Linux
chassert(chunk->size == bytes_per_page * pages_per_chunk);
@ -454,7 +458,7 @@ void PageCache::sendChunkToLimbo(PageChunk * chunk [[maybe_unused]], std::unique
#endif
}
std::pair<size_t, size_t> PageCache::restoreChunkFromLimbo(PageChunk * chunk, std::unique_lock<std::mutex> & /* chunk_mutex */) const noexcept
std::pair<size_t, size_t> PageCache::restoreChunkFromLimbo(PageChunk * chunk, std::lock_guard<std::mutex> & /* chunk_mutex */) const noexcept
{
static_assert(sizeof(std::atomic<char>) == 1, "char is not atomic?");
// Make sure our strategic memory reads/writes are not reordered or optimized out.
@ -505,10 +509,10 @@ std::pair<size_t, size_t> PageCache::restoreChunkFromLimbo(PageChunk * chunk, st
return {pages_restored, pages_evicted};
}
PageChunk * PageCache::getFreeChunk(std::unique_lock<std::mutex> & lock /* global_mutex */)
PageChunk * PageCache::getFreeChunk()
{
if (lru.empty() || (mmaps.size() < max_mmaps && lru.front().key.has_value()))
addMmap(lock);
addMmap();
if (lru.empty())
throw Exception(ErrorCodes::MEMORY_LIMIT_EXCEEDED, "All chunks in the entire page cache ({:.3} GiB) are pinned.",
bytes_per_page * pages_per_chunk * total_chunks * 1. / (1l << 30));
@ -519,12 +523,12 @@ PageChunk * PageCache::getFreeChunk(std::unique_lock<std::mutex> & lock /* globa
size_t prev_pin_count = chunk->pin_count.fetch_add(1);
chassert(prev_pin_count == 0);
evictChunk(chunk, lock);
evictChunk(chunk);
return chunk;
}
void PageCache::evictChunk(PageChunk * chunk, std::unique_lock<std::mutex> & /* global_mutex */)
void PageCache::evictChunk(PageChunk * chunk)
{
if (chunk->key.has_value())
{
@ -548,7 +552,7 @@ void PageCache::evictChunk(PageChunk * chunk, std::unique_lock<std::mutex> & /*
chunk->pages_populated.unsetAll();
}
void PageCache::addMmap(std::unique_lock<std::mutex> & /* global_mutex */)
void PageCache::addMmap()
{
/// ASLR by hand.
void * address_hint = reinterpret_cast<void *>(std::uniform_int_distribution<size_t>(0x100000000000UL, 0x700000000000UL)(rng));
@ -564,13 +568,13 @@ void PageCache::addMmap(std::unique_lock<std::mutex> & /* global_mutex */)
void PageCache::dropCache()
{
std::unique_lock lock(global_mutex);
std::lock_guard lock(global_mutex);
/// Detach and free unpinned chunks.
bool logged_error = false;
for (PageChunk & chunk : lru)
{
evictChunk(&chunk, lock);
evictChunk(&chunk);
if (use_madv_free)
{

View File

@ -75,7 +75,7 @@ struct FileChunkAddress
/// E.g. "s3:<bucket>/<path>"
std::string path;
/// Optional string with ETag, or file modification time, or anything else.
std::string file_version;
std::string file_version{};
size_t offset = 0;
PageCacheKey hash() const;
@ -270,28 +270,28 @@ private:
mutable std::mutex global_mutex;
pcg64 rng;
pcg64 rng TSA_GUARDED_BY(global_mutex);
std::vector<Mmap> mmaps;
size_t total_chunks = 0;
std::vector<Mmap> mmaps TSA_GUARDED_BY(global_mutex);
size_t total_chunks TSA_GUARDED_BY(global_mutex) = 0;
/// All non-pinned chunks, including ones not assigned to any file. Least recently used is begin().
boost::intrusive::list<PageChunk, boost::intrusive::base_hook<PageChunkLRUListHook>, boost::intrusive::constant_time_size<true>> lru;
boost::intrusive::list<PageChunk, boost::intrusive::base_hook<PageChunkLRUListHook>, boost::intrusive::constant_time_size<true>> lru TSA_GUARDED_BY(global_mutex);
HashMap<PageCacheKey, PageChunk *> chunk_by_key;
HashMap<PageCacheKey, PageChunk *> chunk_by_key TSA_GUARDED_BY(global_mutex);
/// Get a usable chunk, doing eviction or allocation if needed.
/// Caller is responsible for clearing pages_populated.
PageChunk * getFreeChunk(std::unique_lock<std::mutex> & /* global_mutex */);
void addMmap(std::unique_lock<std::mutex> & /* global_mutex */);
void evictChunk(PageChunk * chunk, std::unique_lock<std::mutex> & /* global_mutex */);
PageChunk * getFreeChunk() TSA_REQUIRES(global_mutex);
void addMmap() TSA_REQUIRES(global_mutex);
void evictChunk(PageChunk * chunk) TSA_REQUIRES(global_mutex);
void removeRef(PageChunk * chunk) noexcept;
/// These may run in parallel with getFreeChunk(), so be very careful about which fields of the PageChunk we touch here.
void sendChunkToLimbo(PageChunk * chunk, std::unique_lock<std::mutex> & /* chunk_mutex */) const noexcept;
void sendChunkToLimbo(PageChunk * chunk, std::lock_guard<std::mutex> & /* chunk_mutex */) const noexcept;
/// Returns {pages_restored, pages_evicted}.
std::pair<size_t, size_t> restoreChunkFromLimbo(PageChunk * chunk, std::unique_lock<std::mutex> & /* chunk_mutex */) const noexcept;
std::pair<size_t, size_t> restoreChunkFromLimbo(PageChunk * chunk, std::lock_guard<std::mutex> & /* chunk_mutex */) const noexcept;
};
using PageCachePtr = std::shared_ptr<PageCache>;

View File

@ -51,7 +51,11 @@ ThreadFuzzer::ThreadFuzzer()
{
initConfiguration();
if (!isEffective())
{
/// It has no effect - disable it
stop();
return;
}
setup();
}
@ -172,6 +176,8 @@ void ThreadFuzzer::stop()
void ThreadFuzzer::start()
{
if (!instance().isEffective())
return;
started.store(true, std::memory_order_relaxed);
}
@ -180,11 +186,11 @@ bool ThreadFuzzer::isStarted()
return started.load(std::memory_order_relaxed);
}
static void injection(
static void injectionImpl(
double yield_probability,
double migrate_probability,
double sleep_probability,
double sleep_time_us [[maybe_unused]])
double sleep_time_us)
{
DENY_ALLOCATIONS_IN_SCOPE;
if (!ThreadFuzzer::isStarted())
@ -222,6 +228,19 @@ static void injection(
}
}
static ALWAYS_INLINE void injection(
double yield_probability,
double migrate_probability,
double sleep_probability,
double sleep_time_us)
{
DENY_ALLOCATIONS_IN_SCOPE;
if (!ThreadFuzzer::isStarted())
return;
injectionImpl(yield_probability, migrate_probability, sleep_probability, sleep_time_us);
}
void ThreadFuzzer::maybeInjectSleep()
{
auto & fuzzer = ThreadFuzzer::instance();
@ -286,13 +305,13 @@ void ThreadFuzzer::setup() const
#if THREAD_FUZZER_WRAP_PTHREAD
#define INJECTION_BEFORE(NAME) \
injection( \
injectionImpl( \
NAME##_before_yield_probability.load(std::memory_order_relaxed), \
NAME##_before_migrate_probability.load(std::memory_order_relaxed), \
NAME##_before_sleep_probability.load(std::memory_order_relaxed), \
NAME##_before_sleep_time_us.load(std::memory_order_relaxed));
#define INJECTION_AFTER(NAME) \
injection( \
injectionImpl( \
NAME##_after_yield_probability.load(std::memory_order_relaxed), \
NAME##_after_migrate_probability.load(std::memory_order_relaxed), \
NAME##_after_sleep_probability.load(std::memory_order_relaxed), \
@ -383,13 +402,16 @@ static void * getFunctionAddress(const char * name)
static constinit RET(*real_##NAME)(__VA_ARGS__) = nullptr; \
extern "C" RET NAME(__VA_ARGS__) \
{ \
INJECTION_BEFORE(NAME); \
bool thread_fuzzer_enabled = ThreadFuzzer::isStarted(); \
if (thread_fuzzer_enabled) \
INJECTION_BEFORE(NAME); \
if (unlikely(!real_##NAME)) { \
real_##NAME = \
reinterpret_cast<RET(*)(__VA_ARGS__)>(getFunctionAddress(#NAME)); \
} \
auto && ret{real_##NAME(arg)}; \
INJECTION_AFTER(NAME); \
if (thread_fuzzer_enabled) \
INJECTION_AFTER(NAME); \
return ret; \
}
FOR_EACH_WRAPPED_FUNCTION(MAKE_WRAPPER_USING_DLSYM)
@ -399,10 +421,17 @@ FOR_EACH_WRAPPED_FUNCTION(MAKE_WRAPPER_USING_DLSYM)
extern "C" RET __##NAME(__VA_ARGS__); \
extern "C" RET NAME(__VA_ARGS__) \
{ \
INJECTION_BEFORE(NAME); \
auto && ret{__##NAME(arg)}; \
INJECTION_AFTER(NAME); \
return ret; \
if (!ThreadFuzzer::isStarted()) \
{ \
return __##NAME(arg); \
} \
else \
{ \
INJECTION_BEFORE(NAME); \
auto && ret{__##NAME(arg)}; \
INJECTION_AFTER(NAME); \
return ret; \
} \
}
FOR_EACH_WRAPPED_FUNCTION(MAKE_WRAPPER_USING_INTERNAL_SYMBOLS)
#undef MAKE_WRAPPER_USING_INTERNAL_SYMBOLS

View File

@ -1,6 +1,7 @@
#pragma once
#include <cstdint>
#include <atomic>
#include <base/defines.h>
namespace DB
{
@ -56,7 +57,7 @@ public:
static void stop();
static void start();
static bool isStarted();
static bool ALWAYS_INLINE isStarted();
static void maybeInjectSleep();
static void maybeInjectMemoryLimitException();

View File

@ -5,8 +5,6 @@
#include <Common/ZooKeeper/KeeperException.h>
#include <Common/logger_useful.h>
#include <memory>
namespace DB
{
@ -220,8 +218,8 @@ private:
return false;
}
if (process_list_element && !process_list_element->checkTimeLimitSoft())
return false;
if (process_list_element)
process_list_element->checkTimeLimit();
/// retries
logLastError("will retry due to error");

View File

@ -0,0 +1,36 @@
#include <optional>
#include <thread>
#include <vector>
#include <gtest/gtest.h>
#include <Common/ThreadFuzzer.h>
#include <Common/Stopwatch.h>
TEST(ThreadFuzzer, mutex)
{
/// Initialize ThreadFuzzer::started
DB::ThreadFuzzer::instance().isEffective();
std::mutex mutex;
std::atomic<size_t> elapsed_ns = 0;
auto func = [&]()
{
Stopwatch watch;
for (size_t i = 0; i < 1e6; ++i)
{
mutex.lock();
mutex.unlock();
}
elapsed_ns += watch.elapsedNanoseconds();
};
std::vector<std::optional<std::thread>> threads(10);
for (auto & thread : threads)
thread.emplace(func);
for (auto & thread : threads)
thread->join();
std::cout << "elapsed: " << elapsed_ns/1e9 << "\n";
}

View File

@ -114,6 +114,8 @@ void KeeperConfigurationAndSettings::dump(WriteBufferFromOwnString & buf) const
write_int(static_cast<uint64_t>(coordination_settings->election_timeout_lower_bound_ms));
writeText("election_timeout_upper_bound_ms=", buf);
write_int(static_cast<uint64_t>(coordination_settings->election_timeout_upper_bound_ms));
writeText("leadership_expiry_ms=", buf);
write_int(static_cast<uint64_t>(coordination_settings->leadership_expiry_ms));
writeText("reserved_log_items=", buf);
write_int(coordination_settings->reserved_log_items);

View File

@ -26,6 +26,7 @@ struct Settings;
M(Milliseconds, heart_beat_interval_ms, 500, "Heartbeat interval between quorum nodes", 0) \
M(Milliseconds, election_timeout_lower_bound_ms, 1000, "Lower bound of election timer (avoid too often leader elections)", 0) \
M(Milliseconds, election_timeout_upper_bound_ms, 2000, "Upper bound of election timer (avoid too often leader elections)", 0) \
M(Milliseconds, leadership_expiry_ms, 0, "Duration after which a leader will expire if it fails to receive responses from peers. Set it lower or equal to election_timeout_lower_bound_ms to avoid multiple leaders.", 0) \
M(UInt64, reserved_log_items, 100000, "How many log items to store (don't remove during compaction)", 0) \
M(UInt64, snapshot_distance, 100000, "How many log items we have to collect to write new snapshot", 0) \
M(Bool, auto_forwarding, true, "Allow to forward write requests from followers to leader", 0) \

View File

@ -316,6 +316,18 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co
}
}
params.leadership_expiry_ = getValueOrMaxInt32AndLogWarning(
coordination_settings->leadership_expiry_ms.totalMilliseconds(), "leadership_expiry_ms", log);
if (params.leadership_expiry_ > 0 && params.leadership_expiry_ <= params.election_timeout_lower_bound_)
{
LOG_INFO(
log,
"leadership_expiry_ is smaller than or equal to election_timeout_lower_bound_ms, which can avoid multiple leaders. "
"Notice that too small leadership_expiry_ may make Raft group sensitive to network status. "
);
}
params.reserved_log_items_ = getValueOrMaxInt32AndLogWarning(coordination_settings->reserved_log_items, "reserved_log_items", log);
params.snapshot_distance_ = getValueOrMaxInt32AndLogWarning(coordination_settings->snapshot_distance, "snapshot_distance", log);

View File

@ -30,7 +30,7 @@ bool isLocalhost(const std::string & hostname)
{
try
{
return isLocalAddress(DNSResolver::instance().resolveHost(hostname));
return isLocalAddress(DNSResolver::instance().resolveHostAllInOriginOrder(hostname).front());
}
catch (...)
{

View File

@ -186,6 +186,7 @@ class IColumn;
\
M(String, cluster_for_parallel_replicas, "", "Cluster for a shard in which current server is located", 0) \
M(UInt64, allow_experimental_parallel_reading_from_replicas, 0, "Use all the replicas from a shard for SELECT query execution. Reading is parallelized and coordinated dynamically. 0 - disabled, 1 - enabled, silently disable them in case of failure, 2 - enabled, throw an exception in case of failure", 0) \
M(Bool, parallel_replicas_allow_in_with_subquery, true, "If true, subquery for IN will be executed on every follower replica.", 0) \
M(Float, parallel_replicas_single_task_marks_count_multiplier, 2, "A multiplier which will be added during calculation for minimal number of marks to retrieve from coordinator. This will be applied only for remote replicas.", 0) \
M(Bool, parallel_replicas_for_non_replicated_merge_tree, false, "If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables", 0) \
M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \
@ -597,6 +598,7 @@ class IColumn;
M(Bool, normalize_function_names, true, "Normalize function names to their canonical names", 0) \
M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \
M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \
M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \
M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Should update insert deduplication token with table identifier during insert in dependent materialized views.", 0) \
M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \
M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \
@ -1022,6 +1024,7 @@ class IColumn;
M(Bool, input_format_json_read_objects_as_strings, true, "Allow to parse JSON objects as strings in JSON input formats", 0) \
M(Bool, input_format_json_read_arrays_as_strings, true, "Allow to parse JSON arrays as strings in JSON input formats", 0) \
M(Bool, input_format_json_try_infer_named_tuples_from_objects, true, "Try to infer named tuples from JSON objects in JSON input formats", 0) \
M(Bool, input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects, false, "Use String type instead of an exception in case of ambiguous paths in JSON objects during named tuples inference", 0) \
M(Bool, input_format_json_infer_incomplete_types_as_strings, true, "Use type String for keys that contains only Nulls or empty objects/arrays during schema inference in JSON input formats", 0) \
M(Bool, input_format_json_named_tuples_as_objects, true, "Deserialize named tuple columns as JSON objects", 0) \
M(Bool, input_format_json_ignore_unknown_keys_in_named_tuple, true, "Ignore unknown keys in json object for named tuples", 0) \
@ -1029,7 +1032,7 @@ class IColumn;
M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \
M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \
M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \
M(Bool, input_format_try_infer_exponent_floats, false, "Try to infer floats in exponential notation while schema inference in text formats", 0) \
M(Bool, input_format_try_infer_exponent_floats, false, "Try to infer floats in exponential notation while schema inference in text formats (except JSON, where exponent numbers are always inferred)", 0) \
M(Bool, output_format_markdown_escape_special_characters, false, "Escape special characters in Markdown", 0) \
M(Bool, input_format_protobuf_flatten_google_wrappers, false, "Enable Google wrappers for regular non-nested columns, e.g. google.protobuf.StringValue 'str' for String column 'str'. For Nullable columns empty wrappers are recognized as defaults, and missing as nulls", 0) \
M(Bool, output_format_protobuf_nullables_with_google_wrappers, false, "When serializing Nullable columns with Google wrappers, serialize default values as empty wrappers. If turned off, default and null values are not serialized", 0) \

View File

@ -89,6 +89,9 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
{"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"},
{"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"},
{"page_cache_inject_eviction", false, false, "Added userspace page cache"},
{"input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects", false, false, "Allow to use String type for ambiguous paths during named tuple inference from JSON objects"},
{"throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert", false, true, "Deduplication is dependent materialized view cannot work together with async inserts."},
{"parallel_replicas_allow_in_with_subquery", false, true, "If true, subquery for IN will be executed on every follower replica"},
}},
{"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"},
{"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"},

View File

@ -207,4 +207,10 @@ inline DataTypePtr createDecimal(UInt64 precision_value, UInt64 scale_value)
return std::make_shared<DecimalType<Decimal256>>(precision_value, scale_value);
}
extern template class DataTypeDecimalBase<Decimal32>;
extern template class DataTypeDecimalBase<Decimal64>;
extern template class DataTypeDecimalBase<Decimal128>;
extern template class DataTypeDecimalBase<Decimal256>;
extern template class DataTypeDecimalBase<DateTime64>;
}

View File

@ -112,6 +112,256 @@ static DataTypePtr createExact(const ASTPtr & arguments)
return createDecimal<DataTypeDecimal>(precision, scale);
}
template <typename FromDataType, typename ToDataType, typename ReturnType>
requires (IsDataTypeDecimal<FromDataType> && IsDataTypeDecimal<ToDataType>)
ReturnType convertDecimalsImpl(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename ToDataType::FieldType & result)
{
using FromFieldType = typename FromDataType::FieldType;
using ToFieldType = typename ToDataType::FieldType;
using MaxFieldType = std::conditional_t<(sizeof(FromFieldType) > sizeof(ToFieldType)), FromFieldType, ToFieldType>;
using MaxNativeType = typename MaxFieldType::NativeType;
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
MaxNativeType converted_value;
if (scale_to > scale_from)
{
converted_value = DecimalUtils::scaleMultiplier<MaxNativeType>(scale_to - scale_from);
if (common::mulOverflow(static_cast<MaxNativeType>(value.value), converted_value, converted_value))
{
if constexpr (throw_exception)
throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow while multiplying {} by scale {}",
std::string(ToDataType::family_name), toString(value.value), toString(converted_value));
else
return ReturnType(false);
}
}
else if (scale_to == scale_from)
{
converted_value = value.value;
}
else
{
converted_value = value.value / DecimalUtils::scaleMultiplier<MaxNativeType>(scale_from - scale_to);
}
if constexpr (sizeof(FromFieldType) > sizeof(ToFieldType))
{
if (converted_value < std::numeric_limits<typename ToFieldType::NativeType>::min() ||
converted_value > std::numeric_limits<typename ToFieldType::NativeType>::max())
{
if constexpr (throw_exception)
throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow: {} is not in range ({}, {})",
std::string(ToDataType::family_name), toString(converted_value),
toString(std::numeric_limits<typename ToFieldType::NativeType>::min()),
toString(std::numeric_limits<typename ToFieldType::NativeType>::max()));
else
return ReturnType(false);
}
}
result = static_cast<typename ToFieldType::NativeType>(converted_value);
return ReturnType(true);
}
#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \
template void convertDecimalsImpl<FROM_DATA_TYPE, TO_DATA_TYPE, void>(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename TO_DATA_TYPE::FieldType & result); \
template bool convertDecimalsImpl<FROM_DATA_TYPE, TO_DATA_TYPE, bool>(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename TO_DATA_TYPE::FieldType & result);
#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X)
FOR_EACH_DECIMAL_TYPE(INVOKE);
#undef DISPATCH
template <typename FromDataType, typename ToDataType>
requires (IsDataTypeDecimal<FromDataType> && IsDataTypeDecimal<ToDataType>)
typename ToDataType::FieldType convertDecimals(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to)
{
using ToFieldType = typename ToDataType::FieldType;
ToFieldType result;
convertDecimalsImpl<FromDataType, ToDataType, void>(value, scale_from, scale_to, result);
return result;
}
#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \
template typename TO_DATA_TYPE::FieldType convertDecimals<FROM_DATA_TYPE, TO_DATA_TYPE>(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale_from, UInt32 scale_to);
#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X)
FOR_EACH_DECIMAL_TYPE(INVOKE);
#undef DISPATCH
template <typename FromDataType, typename ToDataType>
requires (IsDataTypeDecimal<FromDataType> && IsDataTypeDecimal<ToDataType>)
bool tryConvertDecimals(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename ToDataType::FieldType & result)
{
return convertDecimalsImpl<FromDataType, ToDataType, bool>(value, scale_from, scale_to, result);
}
#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \
template bool tryConvertDecimals<FROM_DATA_TYPE, TO_DATA_TYPE>(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename TO_DATA_TYPE::FieldType & result);
#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X)
FOR_EACH_DECIMAL_TYPE(INVOKE);
#undef DISPATCH
template <typename FromDataType, typename ToDataType, typename ReturnType>
requires (IsDataTypeDecimal<FromDataType> && is_arithmetic_v<typename ToDataType::FieldType>)
ReturnType convertFromDecimalImpl(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType & result)
{
using FromFieldType = typename FromDataType::FieldType;
using ToFieldType = typename ToDataType::FieldType;
return DecimalUtils::convertToImpl<ToFieldType, FromFieldType, ReturnType>(value, scale, result);
}
#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \
template void convertFromDecimalImpl<FROM_DATA_TYPE, TO_DATA_TYPE>(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType & result); \
template bool convertFromDecimalImpl<FROM_DATA_TYPE, TO_DATA_TYPE>(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType & result);
#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X)
FOR_EACH_ARITHMETIC_TYPE(INVOKE);
#undef INVOKE
#undef DISPATCH
template <typename FromDataType, typename ToDataType>
requires (IsDataTypeDecimal<FromDataType> && is_arithmetic_v<typename ToDataType::FieldType>)
inline typename ToDataType::FieldType convertFromDecimal(const typename FromDataType::FieldType & value, UInt32 scale)
{
typename ToDataType::FieldType result;
convertFromDecimalImpl<FromDataType, ToDataType, void>(value, scale, result);
return result;
}
#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \
template typename TO_DATA_TYPE::FieldType convertFromDecimal<FROM_DATA_TYPE, TO_DATA_TYPE>(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale);
#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X)
FOR_EACH_ARITHMETIC_TYPE(INVOKE);
#undef INVOKE
#undef DISPATCH
template <typename FromDataType, typename ToDataType>
requires (IsDataTypeDecimal<FromDataType> && is_arithmetic_v<typename ToDataType::FieldType>)
inline bool tryConvertFromDecimal(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result)
{
return convertFromDecimalImpl<FromDataType, ToDataType, bool>(value, scale, result);
}
#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \
template bool tryConvertFromDecimal<FROM_DATA_TYPE, TO_DATA_TYPE>(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType& result);
#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X)
FOR_EACH_ARITHMETIC_TYPE(INVOKE);
#undef INVOKE
#undef DISPATCH
template <typename FromDataType, typename ToDataType, typename ReturnType>
requires (is_arithmetic_v<typename FromDataType::FieldType> && IsDataTypeDecimal<ToDataType>)
ReturnType convertToDecimalImpl(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType & result)
{
using FromFieldType = typename FromDataType::FieldType;
using ToFieldType = typename ToDataType::FieldType;
using ToNativeType = typename ToFieldType::NativeType;
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
if constexpr (std::is_floating_point_v<FromFieldType>)
{
if (!std::isfinite(value))
{
if constexpr (throw_exception)
throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow. Cannot convert infinity or NaN to decimal", ToDataType::family_name);
else
return ReturnType(false);
}
auto out = value * static_cast<FromFieldType>(DecimalUtils::scaleMultiplier<ToNativeType>(scale));
if (out <= static_cast<FromFieldType>(std::numeric_limits<ToNativeType>::min()) ||
out >= static_cast<FromFieldType>(std::numeric_limits<ToNativeType>::max()))
{
if constexpr (throw_exception)
throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow. Float is out of Decimal range", ToDataType::family_name);
else
return ReturnType(false);
}
result = static_cast<ToNativeType>(out);
return ReturnType(true);
}
else
{
if constexpr (is_big_int_v<FromFieldType>)
return ReturnType(convertDecimalsImpl<DataTypeDecimal<Decimal256>, ToDataType, ReturnType>(static_cast<Int256>(value), 0, scale, result));
else if constexpr (std::is_same_v<FromFieldType, UInt64>)
return ReturnType(convertDecimalsImpl<DataTypeDecimal<Decimal128>, ToDataType, ReturnType>(static_cast<Int128>(value), 0, scale, result));
else
return ReturnType(convertDecimalsImpl<DataTypeDecimal<Decimal64>, ToDataType, ReturnType>(static_cast<Int64>(value), 0, scale, result));
}
}
#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \
template void convertToDecimalImpl<FROM_DATA_TYPE, TO_DATA_TYPE>(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType & result); \
template bool convertToDecimalImpl<FROM_DATA_TYPE, TO_DATA_TYPE>(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType & result);
#define INVOKE(X) FOR_EACH_ARITHMETIC_TYPE_PASS(DISPATCH, X)
FOR_EACH_DECIMAL_TYPE(INVOKE);
#undef INVOKE
#undef DISPATCH
template <typename FromDataType, typename ToDataType>
requires (is_arithmetic_v<typename FromDataType::FieldType> && IsDataTypeDecimal<ToDataType>)
inline typename ToDataType::FieldType convertToDecimal(const typename FromDataType::FieldType & value, UInt32 scale)
{
typename ToDataType::FieldType result;
convertToDecimalImpl<FromDataType, ToDataType, void>(value, scale, result);
return result;
}
#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \
template typename TO_DATA_TYPE::FieldType convertToDecimal<FROM_DATA_TYPE, TO_DATA_TYPE>(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale);
#define INVOKE(X) FOR_EACH_ARITHMETIC_TYPE_PASS(DISPATCH, X)
FOR_EACH_DECIMAL_TYPE(INVOKE);
#undef INVOKE
#undef DISPATCH
template <typename FromDataType, typename ToDataType>
requires (is_arithmetic_v<typename FromDataType::FieldType> && IsDataTypeDecimal<ToDataType>)
inline bool tryConvertToDecimal(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result)
{
return convertToDecimalImpl<FromDataType, ToDataType, bool>(value, scale, result);
}
#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \
template bool tryConvertToDecimal<FROM_DATA_TYPE, TO_DATA_TYPE>(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType& result);
#define INVOKE(X) FOR_EACH_ARITHMETIC_TYPE_PASS(DISPATCH, X)
FOR_EACH_DECIMAL_TYPE(INVOKE);
#undef INVOKE
#undef DISPATCH
template <typename T>
DataTypePtr createDecimalMaxPrecision(UInt64 scale)
{
return std::make_shared<DataTypeDecimal<T>>(DecimalUtils::max_precision<T>, scale);
}
template DataTypePtr createDecimalMaxPrecision<Decimal32>(UInt64 scale);
template DataTypePtr createDecimalMaxPrecision<Decimal64>(UInt64 scale);
template DataTypePtr createDecimalMaxPrecision<Decimal128>(UInt64 scale);
template DataTypePtr createDecimalMaxPrecision<Decimal256>(UInt64 scale);
/// Explicit template instantiations.
template class DataTypeDecimal<Decimal32>;
template class DataTypeDecimal<Decimal64>;
template class DataTypeDecimal<Decimal128>;
template class DataTypeDecimal<Decimal256>;
void registerDataTypeDecimal(DataTypeFactory & factory)
{
factory.registerDataType("Decimal32", createExact<Decimal32>, DataTypeFactory::CaseInsensitive);
@ -125,10 +375,4 @@ void registerDataTypeDecimal(DataTypeFactory & factory)
factory.registerAlias("FIXED", "Decimal", DataTypeFactory::CaseInsensitive);
}
/// Explicit template instantiations.
template class DataTypeDecimal<Decimal32>;
template class DataTypeDecimal<Decimal64>;
template class DataTypeDecimal<Decimal128>;
template class DataTypeDecimal<Decimal256>;
}

View File

@ -3,7 +3,11 @@
#include <base/arithmeticOverflow.h>
#include <base/extended_types.h>
#include <Common/typeid_cast.h>
#include <base/Decimal.h>
#include <base/Decimal_fwd.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDate32.h>
#include <DataTypes/DataTypeDecimalBase.h>
#include <DataTypes/DataTypeDateTime64.h>
@ -13,7 +17,6 @@ namespace DB
namespace ErrorCodes
{
extern const int DECIMAL_OVERFLOW;
extern const int LOGICAL_ERROR;
}
@ -99,171 +102,145 @@ inline UInt32 getDecimalScale(const DataTypeDecimal<T> & data_type)
return data_type.getScale();
}
#define FOR_EACH_DECIMAL_TYPE(M) \
M(DataTypeDecimal<DateTime64>) \
M(DataTypeDateTime64) \
M(DataTypeDecimal32) \
M(DataTypeDecimal64) \
M(DataTypeDecimal128) \
M(DataTypeDecimal256)
#define FOR_EACH_DECIMAL_TYPE_PASS(M, X) \
M(DataTypeDecimal<DateTime64>, X) \
M(DataTypeDateTime64, X) \
M(DataTypeDecimal32, X) \
M(DataTypeDecimal64, X) \
M(DataTypeDecimal128, X) \
M(DataTypeDecimal256, X)
template <typename FromDataType, typename ToDataType, typename ReturnType = void>
requires (IsDataTypeDecimal<FromDataType> && IsDataTypeDecimal<ToDataType>)
inline ReturnType convertDecimalsImpl(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename ToDataType::FieldType & result)
{
using FromFieldType = typename FromDataType::FieldType;
using ToFieldType = typename ToDataType::FieldType;
using MaxFieldType = std::conditional_t<(sizeof(FromFieldType) > sizeof(ToFieldType)), FromFieldType, ToFieldType>;
using MaxNativeType = typename MaxFieldType::NativeType;
ReturnType convertDecimalsImpl(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename ToDataType::FieldType & result);
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \
extern template void convertDecimalsImpl<FROM_DATA_TYPE, TO_DATA_TYPE, void>(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename TO_DATA_TYPE::FieldType & result); \
extern template bool convertDecimalsImpl<FROM_DATA_TYPE, TO_DATA_TYPE, bool>(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename TO_DATA_TYPE::FieldType & result);
#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X)
FOR_EACH_DECIMAL_TYPE(INVOKE);
#undef INVOKE
#undef DISPATCH
MaxNativeType converted_value;
if (scale_to > scale_from)
{
converted_value = DecimalUtils::scaleMultiplier<MaxNativeType>(scale_to - scale_from);
if (common::mulOverflow(static_cast<MaxNativeType>(value.value), converted_value, converted_value))
{
if constexpr (throw_exception)
throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow while multiplying {} by scale {}",
std::string(ToDataType::family_name), toString(value.value), toString(converted_value));
else
return ReturnType(false);
}
}
else if (scale_to == scale_from)
{
converted_value = value.value;
}
else
{
converted_value = value.value / DecimalUtils::scaleMultiplier<MaxNativeType>(scale_from - scale_to);
}
if constexpr (sizeof(FromFieldType) > sizeof(ToFieldType))
{
if (converted_value < std::numeric_limits<typename ToFieldType::NativeType>::min() ||
converted_value > std::numeric_limits<typename ToFieldType::NativeType>::max())
{
if constexpr (throw_exception)
throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow: {} is not in range ({}, {})",
std::string(ToDataType::family_name), toString(converted_value),
toString(std::numeric_limits<typename ToFieldType::NativeType>::min()),
toString(std::numeric_limits<typename ToFieldType::NativeType>::max()));
else
return ReturnType(false);
}
}
result = static_cast<typename ToFieldType::NativeType>(converted_value);
return ReturnType(true);
}
template <typename FromDataType, typename ToDataType>
requires (IsDataTypeDecimal<FromDataType> && IsDataTypeDecimal<ToDataType>)
inline typename ToDataType::FieldType convertDecimals(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to)
{
using ToFieldType = typename ToDataType::FieldType;
ToFieldType result;
typename ToDataType::FieldType convertDecimals(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to);
convertDecimalsImpl<FromDataType, ToDataType, void>(value, scale_from, scale_to, result);
#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \
extern template typename TO_DATA_TYPE::FieldType convertDecimals<FROM_DATA_TYPE, TO_DATA_TYPE>(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale_from, UInt32 scale_to);
#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X)
FOR_EACH_DECIMAL_TYPE(INVOKE);
#undef INVOKE
#undef DISPATCH
return result;
}
template <typename FromDataType, typename ToDataType>
requires (IsDataTypeDecimal<FromDataType> && IsDataTypeDecimal<ToDataType>)
inline bool tryConvertDecimals(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename ToDataType::FieldType & result)
{
return convertDecimalsImpl<FromDataType, ToDataType, bool>(value, scale_from, scale_to, result);
}
bool tryConvertDecimals(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename ToDataType::FieldType & result);
#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \
extern template bool tryConvertDecimals<FROM_DATA_TYPE, TO_DATA_TYPE>(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename TO_DATA_TYPE::FieldType & result);
#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X)
FOR_EACH_DECIMAL_TYPE(INVOKE);
#undef INVOKE
#undef DISPATCH
template <typename FromDataType, typename ToDataType, typename ReturnType>
requires (IsDataTypeDecimal<FromDataType> && is_arithmetic_v<typename ToDataType::FieldType>)
inline ReturnType convertFromDecimalImpl(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result)
{
using FromFieldType = typename FromDataType::FieldType;
using ToFieldType = typename ToDataType::FieldType;
ReturnType convertFromDecimalImpl(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType & result);
#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \
extern template void convertFromDecimalImpl<FROM_DATA_TYPE, TO_DATA_TYPE>(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType & result); \
extern template bool convertFromDecimalImpl<FROM_DATA_TYPE, TO_DATA_TYPE>(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType & result);
#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X)
FOR_EACH_ARITHMETIC_TYPE(INVOKE);
#undef INVOKE
#undef DISPATCH
return DecimalUtils::convertToImpl<ToFieldType, FromFieldType, ReturnType>(value, scale, result);
}
template <typename FromDataType, typename ToDataType>
requires (IsDataTypeDecimal<FromDataType> && is_arithmetic_v<typename ToDataType::FieldType>)
inline typename ToDataType::FieldType convertFromDecimal(const typename FromDataType::FieldType & value, UInt32 scale)
{
typename ToDataType::FieldType result;
typename ToDataType::FieldType convertFromDecimal(const typename FromDataType::FieldType & value, UInt32 scale);
convertFromDecimalImpl<FromDataType, ToDataType, void>(value, scale, result);
#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \
extern template typename TO_DATA_TYPE::FieldType convertFromDecimal<FROM_DATA_TYPE, TO_DATA_TYPE>(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale);
#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X)
FOR_EACH_ARITHMETIC_TYPE(INVOKE);
#undef INVOKE
#undef DISPATCH
return result;
}
template <typename FromDataType, typename ToDataType>
requires (IsDataTypeDecimal<FromDataType> && is_arithmetic_v<typename ToDataType::FieldType>)
inline bool tryConvertFromDecimal(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result)
{
return convertFromDecimalImpl<FromDataType, ToDataType, bool>(value, scale, result);
}
bool tryConvertFromDecimal(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result);
#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \
extern template bool tryConvertFromDecimal<FROM_DATA_TYPE, TO_DATA_TYPE>(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType& result);
#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X)
FOR_EACH_ARITHMETIC_TYPE(INVOKE);
#undef INVOKE
#undef DISPATCH
template <typename FromDataType, typename ToDataType, typename ReturnType>
requires (is_arithmetic_v<typename FromDataType::FieldType> && IsDataTypeDecimal<ToDataType>)
inline ReturnType convertToDecimalImpl(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result)
{
using FromFieldType = typename FromDataType::FieldType;
using ToFieldType = typename ToDataType::FieldType;
using ToNativeType = typename ToFieldType::NativeType;
ReturnType convertToDecimalImpl(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result);
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \
extern template void convertToDecimalImpl<FROM_DATA_TYPE, TO_DATA_TYPE>(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType & result); \
extern template bool convertToDecimalImpl<FROM_DATA_TYPE, TO_DATA_TYPE>(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType & result);
#define INVOKE(X) FOR_EACH_ARITHMETIC_TYPE_PASS(DISPATCH, X)
FOR_EACH_DECIMAL_TYPE(INVOKE);
#undef INVOKE
#undef DISPATCH
if constexpr (std::is_floating_point_v<FromFieldType>)
{
if (!std::isfinite(value))
{
if constexpr (throw_exception)
throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow. Cannot convert infinity or NaN to decimal", ToDataType::family_name);
else
return ReturnType(false);
}
auto out = value * static_cast<FromFieldType>(DecimalUtils::scaleMultiplier<ToNativeType>(scale));
if (out <= static_cast<FromFieldType>(std::numeric_limits<ToNativeType>::min()) ||
out >= static_cast<FromFieldType>(std::numeric_limits<ToNativeType>::max()))
{
if constexpr (throw_exception)
throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow. Float is out of Decimal range", ToDataType::family_name);
else
return ReturnType(false);
}
result = static_cast<ToNativeType>(out);
return ReturnType(true);
}
else
{
if constexpr (is_big_int_v<FromFieldType>)
return ReturnType(convertDecimalsImpl<DataTypeDecimal<Decimal256>, ToDataType, ReturnType>(static_cast<Int256>(value), 0, scale, result));
else if constexpr (std::is_same_v<FromFieldType, UInt64>)
return ReturnType(convertDecimalsImpl<DataTypeDecimal<Decimal128>, ToDataType, ReturnType>(static_cast<Int128>(value), 0, scale, result));
else
return ReturnType(convertDecimalsImpl<DataTypeDecimal<Decimal64>, ToDataType, ReturnType>(static_cast<Int64>(value), 0, scale, result));
}
}
template <typename FromDataType, typename ToDataType>
requires (is_arithmetic_v<typename FromDataType::FieldType> && IsDataTypeDecimal<ToDataType>)
inline typename ToDataType::FieldType convertToDecimal(const typename FromDataType::FieldType & value, UInt32 scale)
{
typename ToDataType::FieldType result;
convertToDecimalImpl<FromDataType, ToDataType, void>(value, scale, result);
return result;
}
typename ToDataType::FieldType convertToDecimal(const typename FromDataType::FieldType & value, UInt32 scale);
#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \
extern template typename TO_DATA_TYPE::FieldType convertToDecimal<FROM_DATA_TYPE, TO_DATA_TYPE>(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale);
#define INVOKE(X) FOR_EACH_ARITHMETIC_TYPE_PASS(DISPATCH, X)
FOR_EACH_DECIMAL_TYPE(INVOKE);
#undef INVOKE
#undef DISPATCH
template <typename FromDataType, typename ToDataType>
requires (is_arithmetic_v<typename FromDataType::FieldType> && IsDataTypeDecimal<ToDataType>)
inline bool tryConvertToDecimal(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result)
{
return convertToDecimalImpl<FromDataType, ToDataType, bool>(value, scale, result);
}
bool tryConvertToDecimal(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result);
#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \
extern template bool tryConvertToDecimal<FROM_DATA_TYPE, TO_DATA_TYPE>(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType& result);
#define INVOKE(X) FOR_EACH_ARITHMETIC_TYPE_PASS(DISPATCH, X)
FOR_EACH_DECIMAL_TYPE(INVOKE);
#undef INVOKE
#undef DISPATCH
template <typename T>
inline DataTypePtr createDecimalMaxPrecision(UInt64 scale)
{
return std::make_shared<DataTypeDecimal<T>>(DecimalUtils::max_precision<T>, scale);
}
DataTypePtr createDecimalMaxPrecision(UInt64 scale);
extern template DataTypePtr createDecimalMaxPrecision<Decimal32>(UInt64 scale);
extern template DataTypePtr createDecimalMaxPrecision<Decimal64>(UInt64 scale);
extern template DataTypePtr createDecimalMaxPrecision<Decimal128>(UInt64 scale);
extern template DataTypePtr createDecimalMaxPrecision<Decimal256>(UInt64 scale);
extern template class DataTypeDecimal<Decimal32>;
extern template class DataTypeDecimal<Decimal64>;
extern template class DataTypeDecimal<Decimal128>;
extern template class DataTypeDecimal<Decimal256>;
}

View File

@ -102,4 +102,21 @@ void registerDataTypeNumbers(DataTypeFactory & factory)
factory.registerAlias("DOUBLE PRECISION", "Float64", DataTypeFactory::CaseInsensitive);
}
/// Explicit template instantiations.
template class DataTypeNumber<UInt8>;
template class DataTypeNumber<UInt16>;
template class DataTypeNumber<UInt32>;
template class DataTypeNumber<UInt64>;
template class DataTypeNumber<Int8>;
template class DataTypeNumber<Int16>;
template class DataTypeNumber<Int32>;
template class DataTypeNumber<Int64>;
template class DataTypeNumber<Float32>;
template class DataTypeNumber<Float64>;
template class DataTypeNumber<UInt128>;
template class DataTypeNumber<Int128>;
template class DataTypeNumber<UInt256>;
template class DataTypeNumber<Int256>;
}

View File

@ -55,6 +55,22 @@ private:
bool unsigned_can_be_signed = false;
};
extern template class DataTypeNumber<UInt8>;
extern template class DataTypeNumber<UInt16>;
extern template class DataTypeNumber<UInt32>;
extern template class DataTypeNumber<UInt64>;
extern template class DataTypeNumber<Int8>;
extern template class DataTypeNumber<Int16>;
extern template class DataTypeNumber<Int32>;
extern template class DataTypeNumber<Int64>;
extern template class DataTypeNumber<Float32>;
extern template class DataTypeNumber<Float64>;
extern template class DataTypeNumber<UInt128>;
extern template class DataTypeNumber<Int128>;
extern template class DataTypeNumber<UInt256>;
extern template class DataTypeNumber<Int256>;
using DataTypeUInt8 = DataTypeNumber<UInt8>;
using DataTypeUInt16 = DataTypeNumber<UInt16>;
using DataTypeUInt32 = DataTypeNumber<UInt32>;

View File

@ -267,4 +267,91 @@ SerializationPtr IDataType::getSerialization(const NameAndTypePair & column)
return column.type->getDefaultSerialization();
}
#define FOR_TYPES_OF_TYPE(M) \
M(TypeIndex) \
M(const IDataType &) \
M(const DataTypePtr &) \
M(WhichDataType)
#define DISPATCH(TYPE) \
bool isUInt8(TYPE data_type) { return WhichDataType(data_type).isUInt8(); } \
bool isUInt16(TYPE data_type) { return WhichDataType(data_type).isUInt16(); } \
bool isUInt32(TYPE data_type) { return WhichDataType(data_type).isUInt32(); } \
bool isUInt64(TYPE data_type) { return WhichDataType(data_type).isUInt64(); } \
bool isNativeUInt(TYPE data_type) { return WhichDataType(data_type).isNativeUInt(); } \
bool isUInt(TYPE data_type) { return WhichDataType(data_type).isUInt(); } \
\
bool isInt8(TYPE data_type) { return WhichDataType(data_type).isInt8(); } \
bool isInt16(TYPE data_type) { return WhichDataType(data_type).isInt16(); } \
bool isInt32(TYPE data_type) { return WhichDataType(data_type).isInt32(); } \
bool isInt64(TYPE data_type) { return WhichDataType(data_type).isInt64(); } \
bool isNativeInt(TYPE data_type) { return WhichDataType(data_type).isNativeInt(); } \
bool isInt(TYPE data_type) { return WhichDataType(data_type).isInt(); } \
\
bool isInteger(TYPE data_type) { return WhichDataType(data_type).isInteger(); } \
bool isNativeInteger(TYPE data_type) { return WhichDataType(data_type).isNativeInteger(); } \
\
bool isDecimal(TYPE data_type) { return WhichDataType(data_type).isDecimal(); } \
\
bool isFloat(TYPE data_type) { return WhichDataType(data_type).isFloat(); } \
\
bool isNativeNumber(TYPE data_type) { return WhichDataType(data_type).isNativeNumber(); } \
bool isNumber(TYPE data_type) { return WhichDataType(data_type).isNumber(); } \
\
bool isEnum8(TYPE data_type) { return WhichDataType(data_type).isEnum8(); } \
bool isEnum16(TYPE data_type) { return WhichDataType(data_type).isEnum16(); } \
bool isEnum(TYPE data_type) { return WhichDataType(data_type).isEnum(); } \
\
bool isDate(TYPE data_type) { return WhichDataType(data_type).isDate(); } \
bool isDate32(TYPE data_type) { return WhichDataType(data_type).isDate32(); } \
bool isDateOrDate32(TYPE data_type) { return WhichDataType(data_type).isDateOrDate32(); } \
bool isDateTime(TYPE data_type) { return WhichDataType(data_type).isDateTime(); } \
bool isDateTime64(TYPE data_type) { return WhichDataType(data_type).isDateTime64(); } \
bool isDateTimeOrDateTime64(TYPE data_type) { return WhichDataType(data_type).isDateTimeOrDateTime64(); } \
bool isDateOrDate32OrDateTimeOrDateTime64(TYPE data_type) { return WhichDataType(data_type).isDateOrDate32OrDateTimeOrDateTime64(); } \
\
bool isString(TYPE data_type) { return WhichDataType(data_type).isString(); } \
bool isFixedString(TYPE data_type) { return WhichDataType(data_type).isFixedString(); } \
bool isStringOrFixedString(TYPE data_type) { return WhichDataType(data_type).isStringOrFixedString(); } \
\
bool isUUID(TYPE data_type) { return WhichDataType(data_type).isUUID(); } \
bool isIPv4(TYPE data_type) { return WhichDataType(data_type).isIPv4(); } \
bool isIPv6(TYPE data_type) { return WhichDataType(data_type).isIPv6(); } \
bool isArray(TYPE data_type) { return WhichDataType(data_type).isArray(); } \
bool isTuple(TYPE data_type) { return WhichDataType(data_type).isTuple(); } \
bool isMap(TYPE data_type) {return WhichDataType(data_type).isMap(); } \
bool isInterval(TYPE data_type) {return WhichDataType(data_type).isInterval(); } \
bool isObject(TYPE data_type) { return WhichDataType(data_type).isObject(); } \
bool isVariant(TYPE data_type) { return WhichDataType(data_type).isVariant(); } \
bool isNothing(TYPE data_type) { return WhichDataType(data_type).isNothing(); } \
\
bool isColumnedAsNumber(TYPE data_type) \
{ \
WhichDataType which(data_type); \
return which.isInteger() || which.isFloat() || which.isDateOrDate32OrDateTimeOrDateTime64() || which.isUUID() || which.isIPv4() || which.isIPv6(); \
} \
\
bool isColumnedAsDecimal(TYPE data_type) \
{ \
WhichDataType which(data_type); \
return which.isDecimal() || which.isDateTime64(); \
} \
\
bool isNotCreatable(TYPE data_type) \
{ \
WhichDataType which(data_type); \
return which.isNothing() || which.isFunction() || which.isSet(); \
} \
\
bool isNotDecimalButComparableToDecimal(TYPE data_type) \
{ \
WhichDataType which(data_type); \
return which.isInt() || which.isUInt() || which.isFloat(); \
} \
FOR_TYPES_OF_TYPE(DISPATCH)
#undef DISPATCH
#undef FOR_TYPES_OF_TYPE
}

View File

@ -424,71 +424,76 @@ struct WhichDataType
/// IDataType helpers (alternative for IDataType virtual methods with single point of truth)
template <typename T> inline bool isUInt8(const T & data_type) { return WhichDataType(data_type).isUInt8(); }
template <typename T> inline bool isUInt16(const T & data_type) { return WhichDataType(data_type).isUInt16(); }
template <typename T> inline bool isUInt32(const T & data_type) { return WhichDataType(data_type).isUInt32(); }
template <typename T> inline bool isUInt64(const T & data_type) { return WhichDataType(data_type).isUInt64(); }
template <typename T> inline bool isNativeUInt(const T & data_type) { return WhichDataType(data_type).isNativeUInt(); }
template <typename T> inline bool isUInt(const T & data_type) { return WhichDataType(data_type).isUInt(); }
#define FOR_TYPES_OF_TYPE(M) \
M(TypeIndex) \
M(const IDataType &) \
M(const DataTypePtr &) \
M(WhichDataType)
template <typename T> inline bool isInt8(const T & data_type) { return WhichDataType(data_type).isInt8(); }
template <typename T> inline bool isInt16(const T & data_type) { return WhichDataType(data_type).isInt16(); }
template <typename T> inline bool isInt32(const T & data_type) { return WhichDataType(data_type).isInt32(); }
template <typename T> inline bool isInt64(const T & data_type) { return WhichDataType(data_type).isInt64(); }
template <typename T> inline bool isNativeInt(const T & data_type) { return WhichDataType(data_type).isNativeInt(); }
template <typename T> inline bool isInt(const T & data_type) { return WhichDataType(data_type).isInt(); }
#define DISPATCH(TYPE) \
bool isUInt8(TYPE data_type); \
bool isUInt16(TYPE data_type); \
bool isUInt32(TYPE data_type); \
bool isUInt64(TYPE data_type); \
bool isNativeUInt(TYPE data_type); \
bool isUInt(TYPE data_type); \
\
bool isInt8(TYPE data_type); \
bool isInt16(TYPE data_type); \
bool isInt32(TYPE data_type); \
bool isInt64(TYPE data_type); \
bool isNativeInt(TYPE data_type); \
bool isInt(TYPE data_type); \
\
bool isInteger(TYPE data_type); \
bool isNativeInteger(TYPE data_type); \
\
bool isDecimal(TYPE data_type); \
\
bool isFloat(TYPE data_type); \
\
bool isNativeNumber(TYPE data_type); \
bool isNumber(TYPE data_type); \
\
bool isEnum8(TYPE data_type); \
bool isEnum16(TYPE data_type); \
bool isEnum(TYPE data_type); \
\
bool isDate(TYPE data_type); \
bool isDate32(TYPE data_type); \
bool isDateOrDate32(TYPE data_type); \
bool isDateTime(TYPE data_type); \
bool isDateTime64(TYPE data_type); \
bool isDateTimeOrDateTime64(TYPE data_type); \
bool isDateOrDate32OrDateTimeOrDateTime64(TYPE data_type); \
\
bool isString(TYPE data_type); \
bool isFixedString(TYPE data_type); \
bool isStringOrFixedString(TYPE data_type); \
\
bool isUUID(TYPE data_type); \
bool isIPv4(TYPE data_type); \
bool isIPv6(TYPE data_type); \
bool isArray(TYPE data_type); \
bool isTuple(TYPE data_type); \
bool isMap(TYPE data_type); \
bool isInterval(TYPE data_type); \
bool isObject(TYPE data_type); \
bool isVariant(TYPE data_type); \
bool isNothing(TYPE data_type); \
\
bool isColumnedAsNumber(TYPE data_type); \
\
bool isColumnedAsDecimal(TYPE data_type); \
\
bool isNotCreatable(TYPE data_type); \
\
bool isNotDecimalButComparableToDecimal(TYPE data_type); \
template <typename T> inline bool isInteger(const T & data_type) { return WhichDataType(data_type).isInteger(); }
template <typename T> inline bool isNativeInteger(const T & data_type) { return WhichDataType(data_type).isNativeInteger(); }
FOR_TYPES_OF_TYPE(DISPATCH)
template <typename T> inline bool isDecimal(const T & data_type) { return WhichDataType(data_type).isDecimal(); }
template <typename T> inline bool isFloat(const T & data_type) { return WhichDataType(data_type).isFloat(); }
template <typename T> inline bool isNativeNumber(const T & data_type) { return WhichDataType(data_type).isNativeNumber(); }
template <typename T> inline bool isNumber(const T & data_type) { return WhichDataType(data_type).isNumber(); }
template <typename T> inline bool isEnum8(const T & data_type) { return WhichDataType(data_type).isEnum8(); }
template <typename T> inline bool isEnum16(const T & data_type) { return WhichDataType(data_type).isEnum16(); }
template <typename T> inline bool isEnum(const T & data_type) { return WhichDataType(data_type).isEnum(); }
template <typename T> inline bool isDate(const T & data_type) { return WhichDataType(data_type).isDate(); }
template <typename T> inline bool isDate32(const T & data_type) { return WhichDataType(data_type).isDate32(); }
template <typename T> inline bool isDateOrDate32(const T & data_type) { return WhichDataType(data_type).isDateOrDate32(); }
template <typename T> inline bool isDateTime(const T & data_type) { return WhichDataType(data_type).isDateTime(); }
template <typename T> inline bool isDateTime64(const T & data_type) { return WhichDataType(data_type).isDateTime64(); }
template <typename T> inline bool isDateTimeOrDateTime64(const T & data_type) { return WhichDataType(data_type).isDateTimeOrDateTime64(); }
template <typename T> inline bool isDateOrDate32OrDateTimeOrDateTime64(const T & data_type) { return WhichDataType(data_type).isDateOrDate32OrDateTimeOrDateTime64(); }
template <typename T> inline bool isString(const T & data_type) { return WhichDataType(data_type).isString(); }
template <typename T> inline bool isFixedString(const T & data_type) { return WhichDataType(data_type).isFixedString(); }
template <typename T> inline bool isStringOrFixedString(const T & data_type) { return WhichDataType(data_type).isStringOrFixedString(); }
template <typename T> inline bool isUUID(const T & data_type) { return WhichDataType(data_type).isUUID(); }
template <typename T> inline bool isIPv4(const T & data_type) { return WhichDataType(data_type).isIPv4(); }
template <typename T> inline bool isIPv6(const T & data_type) { return WhichDataType(data_type).isIPv6(); }
template <typename T> inline bool isArray(const T & data_type) { return WhichDataType(data_type).isArray(); }
template <typename T> inline bool isTuple(const T & data_type) { return WhichDataType(data_type).isTuple(); }
template <typename T> inline bool isMap(const T & data_type) {return WhichDataType(data_type).isMap(); }
template <typename T> inline bool isInterval(const T & data_type) {return WhichDataType(data_type).isInterval(); }
template <typename T> inline bool isObject(const T & data_type) { return WhichDataType(data_type).isObject(); }
template <typename T> inline bool isVariant(const T & data_type) { return WhichDataType(data_type).isVariant(); }
template <typename T> inline bool isNothing(const T & data_type) { return WhichDataType(data_type).isNothing(); }
template <typename T>
inline bool isColumnedAsNumber(const T & data_type)
{
WhichDataType which(data_type);
return which.isInteger() || which.isFloat() || which.isDateOrDate32OrDateTimeOrDateTime64() || which.isUUID() || which.isIPv4() || which.isIPv6();
}
template <typename T>
inline bool isColumnedAsDecimal(const T & data_type)
{
WhichDataType which(data_type);
return which.isDecimal() || which.isDateTime64();
}
#undef DISPATCH
#undef FOR_TYPES_OF_TYPE
// Same as isColumnedAsDecimal but also checks value type of underlyig column.
template <typename T, typename DataType>
@ -498,19 +503,6 @@ inline bool isColumnedAsDecimalT(const DataType & data_type)
return (which.isDecimal() || which.isDateTime64()) && which.idx == TypeToTypeIndex<T>;
}
template <typename T>
inline bool isNotCreatable(const T & data_type)
{
WhichDataType which(data_type);
return which.isNothing() || which.isFunction() || which.isSet();
}
inline bool isNotDecimalButComparableToDecimal(const DataTypePtr & data_type)
{
WhichDataType which(data_type);
return which.isInt() || which.isUInt() || which.isFloat();
}
inline bool isBool(const DataTypePtr & data_type)
{
return data_type->getName() == "Bool";

View File

@ -29,4 +29,10 @@ public:
void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override;
};
extern template class SerializationDecimalBase<Decimal32>;
extern template class SerializationDecimalBase<Decimal64>;
extern template class SerializationDecimalBase<Decimal128>;
extern template class SerializationDecimalBase<Decimal256>;
extern template class SerializationDecimalBase<DateTime64>;
}

View File

@ -146,20 +146,7 @@ StoragePtr DatabaseFilesystem::getTableImpl(const String & name, ContextPtr cont
if (!checkTableFilePath(table_path, context_, throw_on_error))
return {};
String format;
if (throw_on_error)
{
format = FormatFactory::instance().getFormatFromFileName(table_path);
}
else
{
auto format_maybe = FormatFactory::instance().tryGetFormatFromFileName(table_path);
if (!format_maybe)
return {};
format = *format_maybe;
}
auto ast_function_ptr = makeASTFunction("file", std::make_shared<ASTLiteral>(table_path), std::make_shared<ASTLiteral>(format));
auto ast_function_ptr = makeASTFunction("file", std::make_shared<ASTLiteral>(table_path));
auto table_function = TableFunctionFactory::instance().get(ast_function_ptr, context_);
if (!table_function)

View File

@ -1078,7 +1078,7 @@ void HashedArrayDictionary<dictionary_key_type, sharded>::calculateBytesAllocate
bytes_allocated += container.allocated_bytes();
}
bucket_count = container.capacity();
bucket_count += container.capacity();
}
};
@ -1089,6 +1089,13 @@ void HashedArrayDictionary<dictionary_key_type, sharded>::calculateBytesAllocate
bytes_allocated += container.size();
}
/// `bucket_count` should be a sum over all shards,
/// but it should not be a sum over all attributes, since it is used to
/// calculate load_factor like this: `element_count / bucket_count`
/// While element_count is a sum over all shards, not over all attributes.
if (attributes.size())
bucket_count /= attributes.size();
if (update_field_loaded_block)
bytes_allocated += update_field_loaded_block->allocatedBytes();
@ -1167,17 +1174,24 @@ void registerDictionaryArrayHashed(DictionaryFactory & factory)
if (shards <= 0 || 128 < shards)
throw Exception(ErrorCodes::BAD_ARGUMENTS,"{}: SHARDS parameter should be within [1, 128]", full_name);
HashedArrayDictionaryStorageConfiguration configuration{require_nonempty, dict_lifetime, static_cast<size_t>(shards)};
Int64 shard_load_queue_backlog = config.getInt(config_prefix + dictionary_layout_prefix + ".shard_load_queue_backlog", 10000);
if (shard_load_queue_backlog <= 0)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "{}: SHARD_LOAD_QUEUE_BACKLOG parameter should be greater then zero", full_name);
if (source_ptr->hasUpdateField() && shards > 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "{}: SHARDS parameter does not supports for updatable source (UPDATE_FIELD)", full_name);
HashedArrayDictionaryStorageConfiguration configuration{require_nonempty, dict_lifetime, static_cast<size_t>(shards), static_cast<UInt64>(shard_load_queue_backlog)};
ContextMutablePtr context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix);
const auto & settings = context->getSettingsRef();
const auto * clickhouse_source = dynamic_cast<const ClickHouseDictionarySource *>(source_ptr.get());
configuration.use_async_executor = clickhouse_source && clickhouse_source->isLocal() && settings.dictionary_use_async_executor;
if (settings.max_execution_time.totalSeconds() > 0)
configuration.load_timeout = std::chrono::seconds(settings.max_execution_time.totalSeconds());
if (dictionary_key_type == DictionaryKeyType::Simple)
{
if (shards > 1)

View File

@ -29,6 +29,7 @@ struct HashedArrayDictionaryStorageConfiguration
size_t shards = 1;
size_t shard_load_queue_backlog = 10000;
bool use_async_executor = false;
std::chrono::seconds load_timeout{0};
};
template <DictionaryKeyType dictionary_key_type, bool sharded>

View File

@ -67,6 +67,7 @@ struct HashedDictionaryConfiguration
const bool require_nonempty;
const DictionaryLifetime lifetime;
bool use_async_executor = false;
const std::chrono::seconds load_timeout{0};
};
template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>

View File

@ -31,6 +31,7 @@ template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded> clas
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int TIMEOUT_EXCEEDED;
}
}
@ -50,9 +51,10 @@ public:
, shards(dictionary.configuration.shards)
, pool(CurrentMetrics::HashedDictionaryThreads, CurrentMetrics::HashedDictionaryThreadsActive, CurrentMetrics::HashedDictionaryThreadsScheduled, shards)
, shards_queues(shards)
, loading_timeout(dictionary.configuration.load_timeout)
{
UInt64 backlog = dictionary.configuration.shard_load_queue_backlog;
LOG_TRACE(dictionary.log, "Will load the {} dictionary using {} threads (with {} backlog)", dictionary_name, shards, backlog);
LOG_TRACE(dictionary.log, "Will load the {} dictionary using {} threads (with {} backlog and timeout {} sec)", dictionary_name, shards, backlog, loading_timeout.count());
shards_slots.resize(shards);
iota(shards_slots.data(), shards_slots.size(), UInt64(0));
@ -62,7 +64,11 @@ public:
shards_queues[shard].emplace(backlog);
pool.scheduleOrThrowOnError([this, shard, thread_group = CurrentThread::getGroup()]
{
WorkerStatistic statistic;
SCOPE_EXIT_SAFE(
LOG_TRACE(dictionary.log, "Finished worker for dictionary {} shard {}, processed {} blocks, {} rows, total time {}ms",
dictionary_name, shard, statistic.total_blocks, statistic.total_rows, statistic.total_elapsed_ms);
if (thread_group)
CurrentThread::detachFromGroupIfNotDetached();
);
@ -74,7 +80,9 @@ public:
CurrentThread::attachToGroupIfDetached(thread_group);
setThreadName("HashedDictLoad");
threadWorker(shard);
LOG_TRACE(dictionary.log, "Starting worker for dictionary {}, shard {}", dictionary_name, shard);
threadWorker(shard, statistic);
});
}
}
@ -87,8 +95,28 @@ public:
for (size_t shard = 0; shard < shards; ++shard)
{
if (!shards_queues[shard]->push(std::move(shards_blocks[shard])))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not push to shards queue #{}", shard);
const auto & current_block = shards_blocks[shard];
while (!shards_queues[shard]->tryPush(current_block, /* milliseconds= */ 100))
{
if (shards_queues[shard]->isFinished())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not push to finished shards queue #{}, dictionary {}", shard, dictionary_name);
/// We need to check if some workers failed
if (pool.active() != shards)
{
LOG_DEBUG(dictionary.log, "Some workers for dictionary {} failed, stopping all workers", dictionary_name);
stop_all_workers = true;
pool.wait(); /// We expect exception to be thrown from the failed worker thread
throw Exception(ErrorCodes::LOGICAL_ERROR, "Worker threads for dictionary {} are not active", dictionary_name);
}
if (loading_timeout.count() && std::chrono::milliseconds(total_loading_time.elapsedMilliseconds()) > loading_timeout)
{
stop_all_workers = true;
pool.wait();
throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Timeout {} sec for dictionary {} loading is expired", loading_timeout.count(), dictionary_name);
}
}
}
}
@ -124,27 +152,49 @@ private:
String dictionary_name;
const size_t shards;
ThreadPool pool;
std::atomic_bool stop_all_workers{false};
std::vector<std::optional<ConcurrentBoundedQueue<Block>>> shards_queues;
std::chrono::seconds loading_timeout;
Stopwatch total_loading_time;
std::vector<UInt64> shards_slots;
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
void threadWorker(size_t shard)
struct WorkerStatistic
{
UInt64 total_elapsed_ms = 0;
UInt64 total_blocks = 0;
UInt64 total_rows = 0;
};
void threadWorker(size_t shard, WorkerStatistic & statistic)
{
Block block;
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder_;
auto & shard_queue = *shards_queues[shard];
while (shard_queue.pop(block))
while (true)
{
if (!shard_queue.tryPop(block, /* milliseconds= */ 100))
{
/// Check if we need to stop
if (stop_all_workers || shard_queue.isFinished())
break;
/// Timeout expired, but the queue is not finished yet, try again
continue;
}
Stopwatch watch;
dictionary.blockToAttributes(block, arena_holder_, shard);
UInt64 elapsed_ms = watch.elapsedMilliseconds();
if (elapsed_ms > 1'000)
LOG_TRACE(dictionary.log, "Block processing for shard #{} is slow {}ms (rows {}).", shard, elapsed_ms, block.rows());
}
if (!shard_queue.isFinished())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not pull non finished shards queue #{}", shard);
statistic.total_elapsed_ms += elapsed_ms;
statistic.total_blocks += 1;
statistic.total_rows += block.rows();
if (elapsed_ms > 1'000)
LOG_TRACE(dictionary.log, "Block processing for shard #{} is slow {}ms (rows {})", shard, elapsed_ms, block.rows());
}
}
/// Split block to shards smaller block, using 'selector'.

View File

@ -77,6 +77,7 @@ void registerDictionaryHashed(DictionaryFactory & factory)
require_nonempty,
dict_lifetime,
use_async_executor,
std::chrono::seconds(settings.max_execution_time.totalSeconds()),
};
if (source_ptr->hasUpdateField() && shards > 1)

View File

@ -33,8 +33,6 @@ enum class MetadataStorageType
Memory,
};
String toString(DataSourceType data_source_type);
struct DataSourceDescription
{
DataSourceType type;

View File

@ -23,7 +23,6 @@ namespace ErrorCodes
extern const int CANNOT_OPEN_FILE;
extern const int FILE_DOESNT_EXIST;
extern const int BAD_FILE_TYPE;
extern const int FILE_ALREADY_EXISTS;
extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
extern const int LOGICAL_ERROR;
}
@ -593,14 +592,8 @@ void DiskObjectStorageTransaction::moveDirectory(const std::string & from_path,
void DiskObjectStorageTransaction::moveFile(const String & from_path, const String & to_path)
{
operations_to_execute.emplace_back(
std::make_unique<PureMetadataObjectStorageOperation>(object_storage, metadata_storage, [from_path, to_path, this](MetadataTransactionPtr tx)
std::make_unique<PureMetadataObjectStorageOperation>(object_storage, metadata_storage, [from_path, to_path](MetadataTransactionPtr tx)
{
if (metadata_storage.exists(to_path))
throw Exception(ErrorCodes::FILE_ALREADY_EXISTS, "File already exists: {}", to_path);
if (!metadata_storage.exists(from_path))
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File {} doesn't exist, cannot move", from_path);
tx->moveFile(from_path, to_path);
}));
}

View File

@ -96,10 +96,10 @@ S3::URI getS3URI(const Poco::Util::AbstractConfiguration & config, const std::st
}
void checkS3Capabilities(
S3ObjectStorage & storage, const S3Capabilities s3_capabilities, const String & name, const String & key_with_trailing_slash)
S3ObjectStorage & storage, const S3Capabilities s3_capabilities, const String & name)
{
/// If `support_batch_delete` is turned on (default), check and possibly switch it off.
if (s3_capabilities.support_batch_delete && !checkBatchRemove(storage, key_with_trailing_slash))
if (s3_capabilities.support_batch_delete && !checkBatchRemove(storage))
{
LOG_WARNING(
getLogger("S3ObjectStorage"),
@ -134,7 +134,7 @@ void registerS3ObjectStorage(ObjectStorageFactory & factory)
/// NOTE: should we still perform this check for clickhouse-disks?
if (!skip_access_check)
checkS3Capabilities(*object_storage, s3_capabilities, name, uri.key);
checkS3Capabilities(*object_storage, s3_capabilities, name);
return object_storage;
});
@ -170,7 +170,7 @@ void registerS3PlainObjectStorage(ObjectStorageFactory & factory)
/// NOTE: should we still perform this check for clickhouse-disks?
if (!skip_access_check)
checkS3Capabilities(*object_storage, s3_capabilities, name, uri.key);
checkS3Capabilities(*object_storage, s3_capabilities, name);
return object_storage;
});
@ -206,7 +206,7 @@ void registerHDFSObjectStorage(ObjectStorageFactory & factory)
#if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD)
void registerAzureObjectStorage(ObjectStorageFactory & factory)
{
factory.registerObjectStorageType("azure_blob_storage", [](
auto creator = [](
const std::string & name,
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
@ -220,7 +220,9 @@ void registerAzureObjectStorage(ObjectStorageFactory & factory)
getAzureBlobStorageSettings(config, config_prefix, context),
endpoint.prefix.empty() ? endpoint.container_name : endpoint.container_name + "/" + endpoint.prefix);
});
};
factory.registerObjectStorageType("azure_blob_storage", creator);
factory.registerObjectStorageType("azure", creator);
}
#endif
@ -254,7 +256,7 @@ void registerWebObjectStorage(ObjectStorageFactory & factory)
void registerLocalObjectStorage(ObjectStorageFactory & factory)
{
factory.registerObjectStorageType("local_blob_storage", [](
auto creator = [](
const std::string & name,
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
@ -267,7 +269,10 @@ void registerLocalObjectStorage(ObjectStorageFactory & factory)
/// keys are mapped to the fs, object_key_prefix is a directory also
fs::create_directories(object_key_prefix);
return std::make_shared<LocalObjectStorage>(object_key_prefix);
});
};
factory.registerObjectStorageType("local_blob_storage", creator);
factory.registerObjectStorageType("local", creator);
}
#endif

View File

@ -79,13 +79,14 @@ static String getServerUUID()
return toString(server_uuid);
}
bool checkBatchRemove(S3ObjectStorage & storage, const String & key_with_trailing_slash)
bool checkBatchRemove(S3ObjectStorage & storage)
{
/// NOTE: key_with_trailing_slash is the disk prefix, it is required
/// because access is done via S3ObjectStorage not via IDisk interface
/// (since we don't have disk yet).
const String path = fmt::format("{}clickhouse_remove_objects_capability_{}", key_with_trailing_slash, getServerUUID());
StoredObject object(path);
/// NOTE: Here we are going to write and later drop some key.
/// We are using generateObjectKeyForPath() which returns random object key.
/// That generated key is placed in a right directory where we should have write access.
const String path = fmt::format("clickhouse_remove_objects_capability_{}", getServerUUID());
const auto key = storage.generateObjectKeyForPath(path);
StoredObject object(key.serialize(), path);
try
{
auto file = storage.writeObject(object, WriteMode::Rewrite);

View File

@ -18,7 +18,7 @@ ObjectStorageKeysGeneratorPtr getKeyGenerator(
const String & config_prefix);
class S3ObjectStorage;
bool checkBatchRemove(S3ObjectStorage & storage, const std::string & key_with_trailing_slash);
bool checkBatchRemove(S3ObjectStorage & storage);
}

View File

@ -450,8 +450,10 @@ String getAdditionalFormatInfoByEscapingRule(const FormatSettings & settings, Fo
break;
case FormatSettings::EscapingRule::JSON:
result += fmt::format(
", try_infer_numbers_from_strings={}, read_bools_as_numbers={}, read_bools_as_strings={}, read_objects_as_strings={}, read_numbers_as_strings={}, "
"read_arrays_as_strings={}, try_infer_objects_as_tuples={}, infer_incomplete_types_as_strings={}, try_infer_objects={}",
", try_infer_numbers_from_strings={}, read_bools_as_numbers={}, read_bools_as_strings={}, read_objects_as_strings={}, "
"read_numbers_as_strings={}, "
"read_arrays_as_strings={}, try_infer_objects_as_tuples={}, infer_incomplete_types_as_strings={}, try_infer_objects={}, "
"use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects={}",
settings.json.try_infer_numbers_from_strings,
settings.json.read_bools_as_numbers,
settings.json.read_bools_as_strings,
@ -460,7 +462,8 @@ String getAdditionalFormatInfoByEscapingRule(const FormatSettings & settings, Fo
settings.json.read_arrays_as_strings,
settings.json.try_infer_objects_as_tuples,
settings.json.infer_incomplete_types_as_strings,
settings.json.allow_object_type);
settings.json.allow_object_type,
settings.json.use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects);
break;
default:
break;

View File

@ -122,6 +122,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
format_settings.json.write_named_tuples_as_objects = settings.output_format_json_named_tuples_as_objects;
format_settings.json.skip_null_value_in_named_tuples = settings.output_format_json_skip_null_value_in_named_tuples;
format_settings.json.read_named_tuples_as_objects = settings.input_format_json_named_tuples_as_objects;
format_settings.json.use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects = settings.input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects;
format_settings.json.defaults_for_missing_elements_in_named_tuple = settings.input_format_json_defaults_for_missing_elements_in_named_tuple;
format_settings.json.ignore_unknown_keys_in_named_tuple = settings.input_format_json_ignore_unknown_keys_in_named_tuple;
format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers;

View File

@ -202,6 +202,7 @@ struct FormatSettings
bool quote_decimals = false;
bool escape_forward_slashes = true;
bool read_named_tuples_as_objects = false;
bool use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects = false;
bool write_named_tuples_as_objects = false;
bool skip_null_value_in_named_tuples = false;
bool defaults_for_missing_elements_in_named_tuple = false;

View File

@ -136,7 +136,7 @@ namespace
bool empty() const { return paths.empty(); }
DataTypePtr finalize() const
DataTypePtr finalize(bool use_string_type_for_ambiguous_paths = false) const
{
if (paths.empty())
throw Exception(ErrorCodes::ONLY_NULLS_WHILE_READING_SCHEMA, "Cannot infer named Tuple from JSON object because object is empty");
@ -167,7 +167,7 @@ namespace
current_node->leaf_type = type;
}
return root_node.getType();
return root_node.getType(use_string_type_for_ambiguous_paths);
}
private:
@ -180,19 +180,8 @@ namespace
/// Store path to this node for better exception message in case of ambiguous paths.
String path;
DataTypePtr getType() const
DataTypePtr getType(bool use_string_type_for_ambiguous_paths) const
{
/// Check if we have ambiguous paths.
/// For example:
/// 'a.b.c' : Int32 and 'a.b' : String
/// Also check if leaf type is Nothing, because the next situation is possible:
/// {"a" : {"b" : null}} -> 'a.b' : Nullable(Nothing)
/// {"a" : {"b" : {"c" : 42}}} -> 'a.b.c' : Int32
/// And after merge we will have ambiguous paths 'a.b.c' : Int32 and 'a.b' : Nullable(Nothing),
/// but it's a valid case and we should ignore path 'a.b'.
if (leaf_type && !isNothing(removeNullable(leaf_type)) && !nodes.empty())
throw Exception(ErrorCodes::INCORRECT_DATA, "JSON objects have ambiguous paths: '{}' with type {} and '{}'", path, leaf_type->getName(), nodes.begin()->second.path);
if (nodes.empty())
return leaf_type;
@ -203,10 +192,33 @@ namespace
for (const auto & [name, node] : nodes)
{
node_names.push_back(name);
node_types.push_back(node.getType());
node_types.push_back(node.getType(use_string_type_for_ambiguous_paths));
}
return std::make_shared<DataTypeTuple>(std::move(node_types), std::move(node_names));
auto tuple_type = std::make_shared<DataTypeTuple>(std::move(node_types), std::move(node_names));
/// Check if we have ambiguous paths.
/// For example:
/// 'a.b.c' : Int32 and 'a.b' : String
/// Also check if leaf type is Nothing, because the next situation is possible:
/// {"a" : {"b" : null}} -> 'a.b' : Nullable(Nothing)
/// {"a" : {"b" : {"c" : 42}}} -> 'a.b.c' : Int32
/// And after merge we will have ambiguous paths 'a.b.c' : Int32 and 'a.b' : Nullable(Nothing),
/// but it's a valid case and we should ignore path 'a.b'.
if (leaf_type && !isNothing(removeNullable(leaf_type)) && !nodes.empty())
{
if (use_string_type_for_ambiguous_paths)
return std::make_shared<DataTypeString>();
throw Exception(
ErrorCodes::INCORRECT_DATA,
"JSON objects have ambiguous data: in some objects path '{}' has type '{}' and in some - '{}'. You can enable setting "
"input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects to use String type "
"for path '{}'",
path, leaf_type->getName(), tuple_type->getName(), path);
}
return tuple_type;
}
};
@ -866,13 +878,15 @@ namespace
return std::make_shared<DataTypeTuple>(nested_types);
}
template <bool is_json>
bool tryReadFloat(Float64 & value, ReadBuffer & buf, const FormatSettings & settings)
{
if (settings.try_infer_exponent_floats)
if (is_json || settings.try_infer_exponent_floats)
return tryReadFloatText(value, buf);
return tryReadFloatTextNoExponent(value, buf);
}
template <bool is_json>
DataTypePtr tryInferNumber(ReadBuffer & buf, const FormatSettings & settings)
{
if (buf.eof())
@ -911,7 +925,7 @@ namespace
buf.position() = number_start;
}
if (tryReadFloat(tmp_float, buf, settings))
if (tryReadFloat<is_json>(tmp_float, buf, settings))
{
if (read_int && buf.position() == int_end)
return std::make_shared<DataTypeInt64>();
@ -945,7 +959,7 @@ namespace
peekable_buf.rollbackToCheckpoint(true);
}
if (tryReadFloat(tmp_float, peekable_buf, settings))
if (tryReadFloat<is_json>(tmp_float, peekable_buf, settings))
{
/// Float parsing reads no fewer bytes than integer parsing,
/// so position of the buffer is either the same, or further.
@ -957,7 +971,7 @@ namespace
return std::make_shared<DataTypeFloat64>();
}
}
else if (tryReadFloat(tmp_float, buf, settings))
else if (tryReadFloat<is_json>(tmp_float, buf, settings))
{
return std::make_shared<DataTypeFloat64>();
}
@ -966,6 +980,36 @@ namespace
return nullptr;
}
template <bool is_json>
DataTypePtr tryInferNumberFromStringImpl(std::string_view field, const FormatSettings & settings)
{
ReadBufferFromString buf(field);
if (settings.try_infer_integers)
{
Int64 tmp_int;
if (tryReadIntText(tmp_int, buf) && buf.eof())
return std::make_shared<DataTypeInt64>();
/// We can safely get back to the start of buffer, because we read from a string and we didn't reach eof.
buf.position() = buf.buffer().begin();
/// In case of Int64 overflow, try to infer UInt64
UInt64 tmp_uint;
if (tryReadIntText(tmp_uint, buf) && buf.eof())
return std::make_shared<DataTypeUInt64>();
}
/// We can safely get back to the start of buffer, because we read from a string and we didn't reach eof.
buf.position() = buf.buffer().begin();
Float64 tmp;
if (tryReadFloat<is_json>(tmp, buf, settings) && buf.eof())
return std::make_shared<DataTypeFloat64>();
return nullptr;
}
template <bool is_json>
DataTypePtr tryInferString(ReadBuffer & buf, const FormatSettings & settings, JSONInferenceInfo * json_info)
{
@ -995,7 +1039,7 @@ namespace
{
if (settings.json.try_infer_numbers_from_strings)
{
if (auto number_type = tryInferNumberFromString(field, settings))
if (auto number_type = tryInferNumberFromStringImpl<true>(field, settings))
{
json_info->numbers_parsed_from_json_strings.insert(number_type.get());
return number_type;
@ -1238,7 +1282,7 @@ namespace
}
/// Number
return tryInferNumber(buf, settings);
return tryInferNumber<is_json>(buf, settings);
}
}
@ -1294,7 +1338,7 @@ void transformFinalInferredJSONTypeIfNeededImpl(DataTypePtr & data_type, const F
return;
}
data_type = json_paths->finalize();
data_type = json_paths->finalize(settings.json.use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects);
transformFinalInferredJSONTypeIfNeededImpl(data_type, settings, json_info, remain_nothing_types);
return;
}
@ -1377,31 +1421,7 @@ void transformFinalInferredJSONTypeIfNeeded(DataTypePtr & data_type, const Forma
DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSettings & settings)
{
ReadBufferFromString buf(field);
if (settings.try_infer_integers)
{
Int64 tmp_int;
if (tryReadIntText(tmp_int, buf) && buf.eof())
return std::make_shared<DataTypeInt64>();
/// We can safely get back to the start of buffer, because we read from a string and we didn't reach eof.
buf.position() = buf.buffer().begin();
/// In case of Int64 overflow, try to infer UInt64
UInt64 tmp_uint;
if (tryReadIntText(tmp_uint, buf) && buf.eof())
return std::make_shared<DataTypeUInt64>();
}
/// We can safely get back to the start of buffer, because we read from a string and we didn't reach eof.
buf.position() = buf.buffer().begin();
Float64 tmp;
if (tryReadFloat(tmp, buf, settings) && buf.eof())
return std::make_shared<DataTypeFloat64>();
return nullptr;
return tryInferNumberFromStringImpl<false>(field, settings);
}
DataTypePtr tryInferDateOrDateTimeFromString(std::string_view field, const FormatSettings & settings)

View File

@ -538,7 +538,7 @@ struct ToStartOfInterval<IntervalKind::Kind::Microsecond>
{
Int64 scale_diff = scale_multiplier / static_cast<Int64>(1000000);
if (t >= 0) [[likely]] /// When we divide the `t` value we should round the result
return (t / microseconds + scale_diff / 2) / scale_diff * microseconds;
return (t + scale_diff / 2) / (microseconds * scale_diff) * microseconds;
else
return ((t + 1) / microseconds / scale_diff - 1) * microseconds;
}
@ -581,7 +581,7 @@ struct ToStartOfInterval<IntervalKind::Kind::Millisecond>
{
Int64 scale_diff = scale_multiplier / static_cast<Int64>(1000);
if (t >= 0) [[likely]] /// When we divide the `t` value we should round the result
return (t / milliseconds + scale_diff / 2) / scale_diff * milliseconds;
return (t + scale_diff / 2) / (milliseconds * scale_diff) * milliseconds;
else
return ((t + 1) / milliseconds / scale_diff - 1) * milliseconds;
}

View File

@ -100,7 +100,7 @@ public:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
FunctionArgumentDescriptors mandatory_arguments{
{"value", &isStringOrFixedString<IDataType>, nullptr, "String or FixedString"}
{"value", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), nullptr, "String or FixedString"}
};
validateFunctionArgumentTypes(*this, arguments, mandatory_arguments);

View File

@ -170,7 +170,8 @@ public:
/// DateTime, but if both operands are Dates, their type must be the same (e.g. Date - DateTime is invalid).
using ResultDataType = Switch<
/// Result must be Integer
Case<IsOperation<Operation>::div_int || IsOperation<Operation>::div_int_or_zero, DataTypeFromFieldType<typename Op::ResultType>>,
Case<IsOperation<Operation>::int_div || IsOperation<Operation>::int_div_or_zero,
std::conditional_t<IsDataTypeDecimalOrNumber<LeftDataType> && IsDataTypeDecimalOrNumber<RightDataType>, DataTypeFromFieldType<typename Op::ResultType>, InvalidType>>,
/// Decimal cases
Case<IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>, DecimalResultDataType>,
Case<
@ -672,8 +673,8 @@ private:
IsOperation<Operation>::minus;
static constexpr bool is_multiply = IsOperation<Operation>::multiply;
static constexpr bool is_float_division = IsOperation<Operation>::div_floating;
static constexpr bool is_int_division = IsOperation<Operation>::div_int ||
IsOperation<Operation>::div_int_or_zero;
static constexpr bool is_int_division = IsOperation<Operation>::int_div ||
IsOperation<Operation>::int_div_or_zero;
static constexpr bool is_division = is_float_division || is_int_division;
static constexpr bool is_compare = IsOperation<Operation>::least ||
IsOperation<Operation>::greatest;
@ -781,8 +782,8 @@ class FunctionBinaryArithmetic : public IFunction
static constexpr bool is_division = IsOperation<Op>::division;
static constexpr bool is_bit_hamming_distance = IsOperation<Op>::bit_hamming_distance;
static constexpr bool is_modulo = IsOperation<Op>::modulo;
static constexpr bool is_div_int = IsOperation<Op>::div_int;
static constexpr bool is_div_int_or_zero = IsOperation<Op>::div_int_or_zero;
static constexpr bool is_int_div = IsOperation<Op>::int_div;
static constexpr bool is_int_div_or_zero = IsOperation<Op>::int_div_or_zero;
ContextPtr context;
bool check_decimal_overflow = true;
@ -1007,11 +1008,11 @@ class FunctionBinaryArithmetic : public IFunction
{
function_name = "tupleModuloByNumber";
}
else if constexpr (is_div_int)
else if constexpr (is_int_div)
{
function_name = "tupleIntDivByNumber";
}
else if constexpr (is_div_int_or_zero)
else if constexpr (is_int_div_or_zero)
{
function_name = "tupleIntDivOrZeroByNumber";
}
@ -1466,7 +1467,7 @@ public:
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & arguments) const override
{
return ((IsOperation<Op>::div_int || IsOperation<Op>::modulo || IsOperation<Op>::positive_modulo) && !arguments[1].is_const)
return ((IsOperation<Op>::int_div || IsOperation<Op>::modulo || IsOperation<Op>::positive_modulo) && !arguments[1].is_const)
|| (IsOperation<Op>::div_floating
&& (isDecimalOrNullableDecimal(arguments[0].type) || isDecimalOrNullableDecimal(arguments[1].type)));
}
@ -1690,7 +1691,7 @@ public:
if constexpr (!std::is_same_v<ResultDataType, InvalidType>)
{
if constexpr (is_div_int || is_div_int_or_zero)
if constexpr (is_int_div || is_int_div_or_zero)
type_res = std::make_shared<ResultDataType>();
else if constexpr (IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType>)
{
@ -2086,7 +2087,7 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A
right_nullmap);
}
/// Here we check if we have `intDiv` or `intDivOrZero` and at least one of the arguments is decimal, because in this case originally we had result as decimal, so we need to convert result into integer after calculations
else if constexpr (!decimal_with_float && (is_div_int || is_div_int_or_zero) && (IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>))
else if constexpr (!decimal_with_float && (is_int_div || is_int_div_or_zero) && (IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>))
{
if constexpr (!std::is_same_v<DecimalResultType, InvalidType>)
@ -2624,7 +2625,7 @@ public:
/// Check the case when operation is divide, intDiv or modulo and denominator is Nullable(Something).
/// For divide operation we should check only Nullable(Decimal), because only this case can throw division by zero error.
bool division_by_nullable = !arguments[0].type->onlyNull() && !arguments[1].type->onlyNull() && arguments[1].type->isNullable()
&& (IsOperation<Op>::div_int || IsOperation<Op>::modulo || IsOperation<Op>::positive_modulo
&& (IsOperation<Op>::int_div || IsOperation<Op>::modulo || IsOperation<Op>::positive_modulo
|| (IsOperation<Op>::div_floating
&& (isDecimalOrNullableDecimal(arguments[0].type) || isDecimalOrNullableDecimal(arguments[1].type))));

View File

@ -108,8 +108,10 @@ struct FunctionArgumentDescriptor
{
const char * argument_name;
std::function<bool (const IDataType &)> type_validator_func;
std::function<bool (const IColumn &)> column_validator_func;
using TypeValidator = bool (*)(const IDataType &);
TypeValidator type_validator_func;
using ColumnValidator = bool (*)(const IColumn &);
ColumnValidator column_validator_func;
const char * expected_type_description;

View File

@ -35,9 +35,9 @@ public:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
FunctionArgumentDescriptors args{
{"haystack", &isStringOrFixedString<IDataType>, nullptr, "String or FixedString"},
{"pattern", &isString<IDataType>, nullptr, "String"},
{"replacement", &isString<IDataType>, nullptr, "String"}
{"haystack", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), nullptr, "String or FixedString"},
{"pattern", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
{"replacement", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"}
};
validateFunctionArgumentTypes(*this, arguments, args);

View File

@ -184,12 +184,12 @@ static inline void checkArgumentsWithSeparatorAndOptionalMaxSubstrings(
const IFunction & func, const ColumnsWithTypeAndName & arguments)
{
FunctionArgumentDescriptors mandatory_args{
{"separator", &isString<IDataType>, isColumnConst, "const String"},
{"s", &isString<IDataType>, nullptr, "String"}
{"separator", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), isColumnConst, "const String"},
{"s", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"}
};
FunctionArgumentDescriptors optional_args{
{"max_substrings", &isNativeInteger<IDataType>, isColumnConst, "const Number"},
{"max_substrings", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeInteger), isColumnConst, "const Number"},
};
validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args);
@ -198,11 +198,11 @@ static inline void checkArgumentsWithSeparatorAndOptionalMaxSubstrings(
static inline void checkArgumentsWithOptionalMaxSubstrings(const IFunction & func, const ColumnsWithTypeAndName & arguments)
{
FunctionArgumentDescriptors mandatory_args{
{"s", &isString<IDataType>, nullptr, "String"},
{"s", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
};
FunctionArgumentDescriptors optional_args{
{"max_substrings", &isNativeInteger<IDataType>, isColumnConst, "const Number"},
{"max_substrings", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeInteger), isColumnConst, "const Number"},
};
validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args);

View File

@ -45,7 +45,7 @@ public:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
FunctionArgumentDescriptors args{
{"value", &isDateTime64<IDataType>, nullptr, "DateTime64"}
{"value", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isDateTime64), nullptr, "DateTime64"}
};
validateFunctionArgumentTypes(*this, arguments, args);

View File

@ -154,21 +154,21 @@ private:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
auto optional_args = FunctionArgumentDescriptors{
{"IV", &isStringOrFixedString<IDataType>, nullptr, "Initialization vector binary string"},
{"IV", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), nullptr, "Initialization vector binary string"},
};
if constexpr (compatibility_mode == OpenSSLDetails::CompatibilityMode::OpenSSL)
{
optional_args.emplace_back(FunctionArgumentDescriptor{
"AAD", &isStringOrFixedString<IDataType>, nullptr, "Additional authenticated data binary string for GCM mode"
"AAD", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), nullptr, "Additional authenticated data binary string for GCM mode"
});
}
validateFunctionArgumentTypes(*this, arguments,
FunctionArgumentDescriptors{
{"mode", &isStringOrFixedString<IDataType>, isColumnConst, "encryption mode string"},
{"input", &isStringOrFixedString<IDataType>, {}, "plaintext"},
{"key", &isStringOrFixedString<IDataType>, {}, "encryption key binary string"},
{"mode", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), isColumnConst, "encryption mode string"},
{"input", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), {}, "plaintext"},
{"key", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), {}, "encryption key binary string"},
},
optional_args
);
@ -425,21 +425,21 @@ private:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
auto optional_args = FunctionArgumentDescriptors{
{"IV", &isStringOrFixedString<IDataType>, nullptr, "Initialization vector binary string"},
{"IV", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), nullptr, "Initialization vector binary string"},
};
if constexpr (compatibility_mode == OpenSSLDetails::CompatibilityMode::OpenSSL)
{
optional_args.emplace_back(FunctionArgumentDescriptor{
"AAD", &isStringOrFixedString<IDataType>, nullptr, "Additional authenticated data binary string for GCM mode"
"AAD", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), nullptr, "Additional authenticated data binary string for GCM mode"
});
}
validateFunctionArgumentTypes(*this, arguments,
FunctionArgumentDescriptors{
{"mode", &isStringOrFixedString<IDataType>, isColumnConst, "decryption mode string"},
{"input", &isStringOrFixedString<IDataType>, {}, "ciphertext"},
{"key", &isStringOrFixedString<IDataType>, {}, "decryption key binary string"},
{"mode", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), isColumnConst, "decryption mode string"},
{"input", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), {}, "ciphertext"},
{"key", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), {}, "decryption key binary string"},
},
optional_args
);

View File

@ -2129,12 +2129,12 @@ public:
if constexpr (to_decimal)
{
mandatory_args.push_back({"scale", &isNativeInteger<IDataType>, &isColumnConst, "const Integer"});
mandatory_args.push_back({"scale", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeInteger), &isColumnConst, "const Integer"});
}
if (!to_decimal && isDateTime64<Name, ToDataType>(arguments))
{
mandatory_args.push_back({"scale", &isNativeInteger<IDataType>, &isColumnConst, "const Integer"});
mandatory_args.push_back({"scale", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeInteger), &isColumnConst, "const Integer"});
}
// toString(DateTime or DateTime64, [timezone: String])
@ -2150,7 +2150,7 @@ public:
// toDateTime64(value, scale : Integer[, timezone: String])
|| std::is_same_v<ToDataType, DataTypeDateTime64>)
{
optional_args.push_back({"timezone", &isString<IDataType>, nullptr, "String"});
optional_args.push_back({"timezone", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"});
}
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
@ -2498,11 +2498,11 @@ public:
if (isDateTime64<Name, ToDataType>(arguments))
{
validateFunctionArgumentTypes(*this, arguments,
FunctionArgumentDescriptors{{"string", &isStringOrFixedString<IDataType>, nullptr, "String or FixedString"}},
FunctionArgumentDescriptors{{"string", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), nullptr, "String or FixedString"}},
// optional
FunctionArgumentDescriptors{
{"precision", &isUInt8<IDataType>, isColumnConst, "const UInt8"},
{"timezone", &isStringOrFixedString<IDataType>, isColumnConst, "const String or FixedString"},
{"precision", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isUInt8), isColumnConst, "const UInt8"},
{"timezone", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), isColumnConst, "const String or FixedString"},
});
UInt64 scale = to_datetime64 ? DataTypeDateTime64::default_scale : 0;

View File

@ -51,8 +51,8 @@ struct IsOperation
static constexpr bool minus = IsSameOperation<Op, MinusImpl>::value;
static constexpr bool multiply = IsSameOperation<Op, MultiplyImpl>::value;
static constexpr bool div_floating = IsSameOperation<Op, DivideFloatingImpl>::value;
static constexpr bool div_int = IsSameOperation<Op, DivideIntegralImpl>::value;
static constexpr bool div_int_or_zero = IsSameOperation<Op, DivideIntegralOrZeroImpl>::value;
static constexpr bool int_div = IsSameOperation<Op, DivideIntegralImpl>::value;
static constexpr bool int_div_or_zero = IsSameOperation<Op, DivideIntegralOrZeroImpl>::value;
static constexpr bool modulo = IsSameOperation<Op, ModuloImpl>::value;
static constexpr bool positive_modulo = IsSameOperation<Op, PositiveModuloImpl>::value;
static constexpr bool least = IsSameOperation<Op, LeastBaseImpl>::value;
@ -60,7 +60,7 @@ struct IsOperation
static constexpr bool bit_hamming_distance = IsSameOperation<Op, BitHammingDistanceImpl>::value;
static constexpr bool division = div_floating || div_int || div_int_or_zero || modulo;
static constexpr bool division = div_floating || int_div || int_div_or_zero || modulo;
// NOTE: allow_decimal should not fully contain `division` because of divInt
static constexpr bool allow_decimal = plus || minus || multiply || division || least || greatest;
};

View File

@ -45,7 +45,7 @@ namespace
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
auto args = FunctionArgumentDescriptors{
{"json", &isString<IDataType>, nullptr, "String"},
{"json", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
};
validateFunctionArgumentTypes(*this, arguments, args);

View File

@ -27,7 +27,7 @@ public:
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
{
FunctionArgumentDescriptors mandatory_args{
{"URL", &isString<IDataType>, nullptr, "String"},
{"URL", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
};
validateFunctionArgumentTypes(func, arguments, mandatory_args);

View File

@ -25,7 +25,7 @@ public:
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
{
FunctionArgumentDescriptors mandatory_args{
{"URL", &isString<IDataType>, nullptr, "String"},
{"URL", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
};
validateFunctionArgumentTypes(func, arguments, mandatory_args);

View File

@ -25,7 +25,7 @@ public:
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
{
FunctionArgumentDescriptors mandatory_args{
{"URL", &isString<IDataType>, nullptr, "String"},
{"URL", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
};
validateFunctionArgumentTypes(func, arguments, mandatory_args);

View File

@ -26,7 +26,7 @@ public:
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
{
FunctionArgumentDescriptors mandatory_args{
{"URL", &isString<IDataType>, nullptr, "String"},
{"URL", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
};
validateFunctionArgumentTypes(func, arguments, mandatory_args);

View File

@ -84,8 +84,8 @@ public:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
FunctionArgumentDescriptors args{
{"array_1", &isArray<IDataType>, nullptr, "Array"},
{"array_2", &isArray<IDataType>, nullptr, "Array"},
{"array_1", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isArray), nullptr, "Array"},
{"array_2", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isArray), nullptr, "Array"},
};
validateFunctionArgumentTypes(*this, arguments, args);
return std::make_shared<DataTypeNumber<ResultType>>();

View File

@ -36,8 +36,8 @@ public:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
FunctionArgumentDescriptors args{
{"array", &isArray<IDataType>, nullptr, "Array"},
{"samples", &isUInt<IDataType>, isColumnConst, "const UInt*"},
{"array", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isArray), nullptr, "Array"},
{"samples", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isUInt), isColumnConst, "const UInt*"},
};
validateFunctionArgumentTypes(*this, arguments, args);

View File

@ -28,8 +28,8 @@ public:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
FunctionArgumentDescriptors args{
{"array", &isArray<IDataType>, nullptr, "Array"},
{"length", &isInteger<IDataType>, nullptr, "Integer"}
{"array", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isArray), nullptr, "Array"},
{"length", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isInteger), nullptr, "Integer"}
};
validateFunctionArgumentTypes(*this, arguments, args);

Some files were not shown because too many files have changed in this diff Show More