mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-13 09:52:38 +00:00
Merge remote-tracking branch 'upstream/master' into cache-better-locks
This commit is contained in:
commit
a2977e373a
78
.clang-tidy
78
.clang-tidy
@ -23,9 +23,12 @@ Checks: '*,
|
||||
-bugprone-implicit-widening-of-multiplication-result,
|
||||
-bugprone-narrowing-conversions,
|
||||
-bugprone-not-null-terminated-result,
|
||||
-bugprone-reserved-identifier,
|
||||
-bugprone-unchecked-optional-access,
|
||||
|
||||
-cert-dcl16-c,
|
||||
-cert-dcl37-c,
|
||||
-cert-dcl51-cpp,
|
||||
-cert-err58-cpp,
|
||||
-cert-msc32-c,
|
||||
-cert-msc51-cpp,
|
||||
@ -129,6 +132,7 @@ Checks: '*,
|
||||
-readability-function-cognitive-complexity,
|
||||
-readability-function-size,
|
||||
-readability-identifier-length,
|
||||
-readability-identifier-naming,
|
||||
-readability-implicit-bool-conversion,
|
||||
-readability-isolate-declaration,
|
||||
-readability-magic-numbers,
|
||||
@ -158,54 +162,28 @@ Checks: '*,
|
||||
|
||||
WarningsAsErrors: '*'
|
||||
|
||||
# TODO: use dictionary syntax for CheckOptions when minimum clang-tidy level rose to 15
|
||||
# some-check.SomeOption: 'some value'
|
||||
# instead of
|
||||
# - key: some-check.SomeOption
|
||||
# value: 'some value'
|
||||
CheckOptions:
|
||||
- key: readability-identifier-naming.ClassCase
|
||||
value: CamelCase
|
||||
- key: readability-identifier-naming.EnumCase
|
||||
value: CamelCase
|
||||
- key: readability-identifier-naming.LocalVariableCase
|
||||
value: lower_case
|
||||
- key: readability-identifier-naming.StaticConstantCase
|
||||
value: aNy_CasE
|
||||
- key: readability-identifier-naming.MemberCase
|
||||
value: lower_case
|
||||
- key: readability-identifier-naming.PrivateMemberPrefix
|
||||
value: ''
|
||||
- key: readability-identifier-naming.ProtectedMemberPrefix
|
||||
value: ''
|
||||
- key: readability-identifier-naming.PublicMemberCase
|
||||
value: lower_case
|
||||
- key: readability-identifier-naming.MethodCase
|
||||
value: camelBack
|
||||
- key: readability-identifier-naming.PrivateMethodPrefix
|
||||
value: ''
|
||||
- key: readability-identifier-naming.ProtectedMethodPrefix
|
||||
value: ''
|
||||
- key: readability-identifier-naming.ParameterPackCase
|
||||
value: lower_case
|
||||
- key: readability-identifier-naming.StructCase
|
||||
value: CamelCase
|
||||
- key: readability-identifier-naming.TemplateTemplateParameterCase
|
||||
value: CamelCase
|
||||
- key: readability-identifier-naming.TemplateUsingCase
|
||||
value: lower_case
|
||||
- key: readability-identifier-naming.TypeTemplateParameterCase
|
||||
value: CamelCase
|
||||
- key: readability-identifier-naming.TypedefCase
|
||||
value: CamelCase
|
||||
- key: readability-identifier-naming.UnionCase
|
||||
value: CamelCase
|
||||
- key: readability-identifier-naming.UsingCase
|
||||
value: CamelCase
|
||||
- key: modernize-loop-convert.UseCxx20ReverseRanges
|
||||
value: false
|
||||
- key: performance-move-const-arg.CheckTriviallyCopyableMove
|
||||
value: false
|
||||
# Workaround clang-tidy bug: https://github.com/llvm/llvm-project/issues/46097
|
||||
- key: readability-identifier-naming.TypeTemplateParameterIgnoredRegexp
|
||||
value: expr-type
|
||||
readability-identifier-naming.ClassCase: CamelCase
|
||||
readability-identifier-naming.EnumCase: CamelCase
|
||||
readability-identifier-naming.LocalVariableCase: lower_case
|
||||
readability-identifier-naming.StaticConstantCase: aNy_CasE
|
||||
readability-identifier-naming.MemberCase: lower_case
|
||||
readability-identifier-naming.PrivateMemberPrefix: ''
|
||||
readability-identifier-naming.ProtectedMemberPrefix: ''
|
||||
readability-identifier-naming.PublicMemberCase: lower_case
|
||||
readability-identifier-naming.MethodCase: camelBack
|
||||
readability-identifier-naming.PrivateMethodPrefix: ''
|
||||
readability-identifier-naming.ProtectedMethodPrefix: ''
|
||||
readability-identifier-naming.ParameterPackCase: lower_case
|
||||
readability-identifier-naming.StructCase: CamelCase
|
||||
readability-identifier-naming.TemplateTemplateParameterCase: CamelCase
|
||||
readability-identifier-naming.TemplateUsingCase: lower_case
|
||||
readability-identifier-naming.TypeTemplateParameterCase: CamelCase
|
||||
readability-identifier-naming.TypedefCase: CamelCase
|
||||
readability-identifier-naming.UnionCase: CamelCase
|
||||
readability-identifier-naming.UsingCase: CamelCase
|
||||
modernize-loop-convert.UseCxx20ReverseRanges: false
|
||||
performance-move-const-arg.CheckTriviallyCopyableMove: false
|
||||
# Workaround clang-tidy bug: https://github.com/llvm/llvm-project/issues/46097
|
||||
readability-identifier-naming.TypeTemplateParameterIgnoredRegexp: expr-type
|
||||
cppcoreguidelines-avoid-do-while.IgnoreMacros: true
|
||||
|
43
.github/workflows/backport_branches.yml
vendored
43
.github/workflows/backport_branches.yml
vendored
@ -79,7 +79,7 @@ jobs:
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ runner.temp }}/changed_images.json
|
||||
CompatibilityCheck:
|
||||
CompatibilityCheckX86:
|
||||
needs: [BuilderDebRelease]
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
@ -98,12 +98,43 @@ jobs:
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: CompatibilityCheck
|
||||
- name: CompatibilityCheckX86
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
CompatibilityCheckAarch64:
|
||||
needs: [BuilderDebAarch64]
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/compatibility_check
|
||||
REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
EOF
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: CompatibilityCheckAarch64
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -421,7 +452,8 @@ jobs:
|
||||
- name: Check docker clickhouse/clickhouse-server building
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 docker_server.py --release-type head --no-push
|
||||
python3 docker_server.py --release-type head --no-push \
|
||||
--image-repo clickhouse/clickhouse-server --image-path docker/server
|
||||
python3 docker_server.py --release-type head --no-push --no-ubuntu \
|
||||
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
|
||||
- name: Cleanup
|
||||
@ -741,7 +773,8 @@ jobs:
|
||||
- FunctionalStatefulTestDebug
|
||||
- StressTestTsan
|
||||
- IntegrationTestsRelease
|
||||
- CompatibilityCheck
|
||||
- CompatibilityCheckX86
|
||||
- CompatibilityCheckAarch64
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
|
1
.github/workflows/cherry_pick.yml
vendored
1
.github/workflows/cherry_pick.yml
vendored
@ -35,7 +35,6 @@ jobs:
|
||||
fetch-depth: 0
|
||||
- name: Cherry pick
|
||||
run: |
|
||||
sudo pip install GitPython
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 cherry_pick.py
|
||||
- name: Cleanup
|
||||
|
43
.github/workflows/master.yml
vendored
43
.github/workflows/master.yml
vendored
@ -110,7 +110,7 @@ jobs:
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
CompatibilityCheck:
|
||||
CompatibilityCheckX86:
|
||||
needs: [BuilderDebRelease]
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
@ -129,12 +129,43 @@ jobs:
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: CompatibilityCheck
|
||||
- name: CompatibilityCheckX86
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
CompatibilityCheckAarch64:
|
||||
needs: [BuilderDebAarch64]
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/compatibility_check
|
||||
REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
EOF
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: CompatibilityCheckAarch64
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -829,7 +860,8 @@ jobs:
|
||||
- name: Check docker clickhouse/clickhouse-server building
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 docker_server.py --release-type head
|
||||
python3 docker_server.py --release-type head \
|
||||
--image-repo clickhouse/clickhouse-server --image-path docker/server
|
||||
python3 docker_server.py --release-type head --no-ubuntu \
|
||||
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
|
||||
- name: Cleanup
|
||||
@ -3124,7 +3156,8 @@ jobs:
|
||||
- PerformanceComparisonX86-1
|
||||
- PerformanceComparisonX86-2
|
||||
- PerformanceComparisonX86-3
|
||||
- CompatibilityCheck
|
||||
- CompatibilityCheckX86
|
||||
- CompatibilityCheckAarch64
|
||||
- ASTFuzzerTestDebug
|
||||
- ASTFuzzerTestAsan
|
||||
- ASTFuzzerTestTsan
|
||||
|
184
.github/workflows/pull_request.yml
vendored
184
.github/workflows/pull_request.yml
vendored
@ -37,7 +37,6 @@ jobs:
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 run_check.py
|
||||
PythonUnitTests:
|
||||
needs: CheckLabels
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
@ -174,7 +173,7 @@ jobs:
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
CompatibilityCheck:
|
||||
CompatibilityCheckX86:
|
||||
needs: [BuilderDebRelease]
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
@ -193,12 +192,43 @@ jobs:
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: CompatibilityCheck
|
||||
- name: CompatibilityCheckX86
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
CompatibilityCheckAarch64:
|
||||
needs: [BuilderDebAarch64]
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/compatibility_check
|
||||
REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
EOF
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: CompatibilityCheckAarch64
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -886,7 +916,8 @@ jobs:
|
||||
- name: Check docker clickhouse/clickhouse-server building
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 docker_server.py --release-type head --no-push
|
||||
python3 docker_server.py --release-type head --no-push \
|
||||
--image-repo clickhouse/clickhouse-server --image-path docker/server
|
||||
python3 docker_server.py --release-type head --no-push --no-ubuntu \
|
||||
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
|
||||
- name: Cleanup
|
||||
@ -3105,10 +3136,10 @@ jobs:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/stress_thread
|
||||
TEMP_PATH=${{runner.temp}}/stress_asan
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Stress test (asan)
|
||||
REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
|
||||
REPO_COPY=${{runner.temp}}/stress_asan/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
@ -3267,6 +3298,142 @@ jobs:
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
##############################################################################################
|
||||
######################################### UPGRADE CHECK ######################################
|
||||
##############################################################################################
|
||||
UpgradeCheckAsan:
|
||||
needs: [BuilderDebAsan]
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/upgrade_asan
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Upgrade check (asan)
|
||||
REPO_COPY=${{runner.temp}}/upgrade_asan/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Upgrade check
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 upgrade_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
UpgradeCheckTsan:
|
||||
needs: [BuilderDebTsan]
|
||||
# same as for stress test with tsan
|
||||
runs-on: [self-hosted, func-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/upgrade_thread
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Upgrade check (tsan)
|
||||
REPO_COPY=${{runner.temp}}/upgrade_thread/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Upgrade check
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 upgrade_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
UpgradeCheckMsan:
|
||||
needs: [BuilderDebMsan]
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/upgrade_memory
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Upgrade check (msan)
|
||||
REPO_COPY=${{runner.temp}}/upgrade_memory/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Upgrade check
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 upgrade_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
UpgradeCheckDebug:
|
||||
needs: [BuilderDebDebug]
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/upgrade_debug
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Upgrade check (debug)
|
||||
REPO_COPY=${{runner.temp}}/upgrade_debug/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Upgrade check
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 upgrade_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
##############################################################################################
|
||||
##################################### AST FUZZERS ############################################
|
||||
##############################################################################################
|
||||
@ -4656,7 +4823,8 @@ jobs:
|
||||
- UnitTestsMsan
|
||||
- UnitTestsUBsan
|
||||
- UnitTestsReleaseClang
|
||||
- CompatibilityCheck
|
||||
- CompatibilityCheckX86
|
||||
- CompatibilityCheckAarch64
|
||||
- IntegrationTestsFlakyCheck
|
||||
- SQLancerTestRelease
|
||||
- SQLancerTestDebug
|
||||
|
31
.github/workflows/release.yml
vendored
31
.github/workflows/release.yml
vendored
@ -7,15 +7,28 @@ on: # yamllint disable-line rule:truthy
|
||||
release:
|
||||
types:
|
||||
- published
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
tag:
|
||||
description: 'Release tag'
|
||||
required: true
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
ReleasePublish:
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Set tag from input
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
run: |
|
||||
echo "GITHUB_TAG=${{ github.event.inputs.tag }}" >> "$GITHUB_ENV"
|
||||
- name: Set tag from REF
|
||||
if: github.event_name == 'release'
|
||||
run: |
|
||||
echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV"
|
||||
- name: Deploy packages and assets
|
||||
run: |
|
||||
GITHUB_TAG="${GITHUB_REF#refs/tags/}"
|
||||
curl --silent --data '' \
|
||||
curl --silent --data '' --no-buffer \
|
||||
'${{ secrets.PACKAGES_RELEASE_URL }}/release/'"${GITHUB_TAG}"'?binary=binary_darwin&binary=binary_darwin_aarch64&sync=true'
|
||||
############################################################################################
|
||||
##################################### Docker images #######################################
|
||||
@ -23,16 +36,26 @@ jobs:
|
||||
DockerServerImages:
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Set tag from input
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
run: |
|
||||
echo "GITHUB_TAG=${{ github.event.inputs.tag }}" >> "$GITHUB_ENV"
|
||||
- name: Set tag from REF
|
||||
if: github.event_name == 'release'
|
||||
run: |
|
||||
echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV"
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
fetch-depth: 0 # otherwise we will have no version info
|
||||
ref: ${{ env.GITHUB_TAG }}
|
||||
- name: Check docker clickhouse/clickhouse-server building
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 docker_server.py --release-type auto --version "${{ github.ref }}"
|
||||
python3 docker_server.py --release-type auto --version "${{ github.ref }}" --no-ubuntu \
|
||||
python3 docker_server.py --release-type auto --version "$GITHUB_TAG" \
|
||||
--image-repo clickhouse/clickhouse-server --image-path docker/server
|
||||
python3 docker_server.py --release-type auto --version "$GITHUB_TAG" --no-ubuntu \
|
||||
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
|
43
.github/workflows/release_branches.yml
vendored
43
.github/workflows/release_branches.yml
vendored
@ -71,7 +71,7 @@ jobs:
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ runner.temp }}/changed_images.json
|
||||
CompatibilityCheck:
|
||||
CompatibilityCheckX86:
|
||||
needs: [BuilderDebRelease]
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
@ -90,12 +90,43 @@ jobs:
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: CompatibilityCheck
|
||||
- name: CompatibilityCheckX86
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
CompatibilityCheckAarch64:
|
||||
needs: [BuilderDebAarch64]
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/compatibility_check
|
||||
REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
EOF
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: CompatibilityCheckAarch64
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -494,7 +525,8 @@ jobs:
|
||||
- name: Check docker clickhouse/clickhouse-server building
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 docker_server.py --release-type head --no-push
|
||||
python3 docker_server.py --release-type head --no-push \
|
||||
--image-repo clickhouse/clickhouse-server --image-path docker/server
|
||||
python3 docker_server.py --release-type head --no-push --no-ubuntu \
|
||||
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
|
||||
- name: Cleanup
|
||||
@ -1947,7 +1979,8 @@ jobs:
|
||||
- IntegrationTestsTsan1
|
||||
- IntegrationTestsTsan2
|
||||
- IntegrationTestsTsan3
|
||||
- CompatibilityCheck
|
||||
- CompatibilityCheckX86
|
||||
- CompatibilityCheckAarch64
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
|
@ -301,12 +301,12 @@ if (ENABLE_BUILD_PROFILING)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
set (CMAKE_CXX_STANDARD 20)
|
||||
set (CMAKE_CXX_EXTENSIONS ON) # Same as gnu++2a (ON) vs c++2a (OFF): https://cmake.org/cmake/help/latest/prop_tgt/CXX_EXTENSIONS.html
|
||||
set (CMAKE_CXX_STANDARD 23)
|
||||
set (CMAKE_CXX_EXTENSIONS OFF)
|
||||
set (CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
set (CMAKE_C_STANDARD 11)
|
||||
set (CMAKE_C_EXTENSIONS ON)
|
||||
set (CMAKE_C_EXTENSIONS ON) # required by most contribs written in C
|
||||
set (CMAKE_C_STANDARD_REQUIRED ON)
|
||||
|
||||
if (COMPILER_GCC OR COMPILER_CLANG)
|
||||
@ -391,10 +391,12 @@ if (COMPILER_CLANG)
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-vtable-pointers")
|
||||
|
||||
# Set new experimental pass manager, it's a performance, build time and binary size win.
|
||||
# Can be removed after https://reviews.llvm.org/D66490 merged and released to at least two versions of clang.
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexperimental-new-pass-manager")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fexperimental-new-pass-manager")
|
||||
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16)
|
||||
# Set new experimental pass manager, it's a performance, build time and binary size win.
|
||||
# Can be removed after https://reviews.llvm.org/D66490 merged and released to at least two versions of clang.
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexperimental-new-pass-manager")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fexperimental-new-pass-manager")
|
||||
endif ()
|
||||
|
||||
# We cannot afford to use LTO when compiling unit tests, and it's not enough
|
||||
# to only supply -fno-lto at the final linking stage. So we disable it
|
||||
|
@ -14,7 +14,7 @@ curl https://clickhouse.com/ | sh
|
||||
* [Tutorial](https://clickhouse.com/docs/en/getting_started/tutorial/) shows how to set up and query a small ClickHouse cluster.
|
||||
* [Documentation](https://clickhouse.com/docs/en/) provides more in-depth information.
|
||||
* [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
|
||||
* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-1gh9ds7f4-PgDhJAaF8ad5RbWBAAjzFg) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
|
||||
* [Slack](https://clickhouse.com/slack) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
|
||||
* [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events.
|
||||
* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation.
|
||||
* [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev.
|
||||
|
@ -2,6 +2,10 @@ if (USE_CLANG_TIDY)
|
||||
set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
|
||||
endif ()
|
||||
|
||||
# TODO: Remove this. We like to compile with C++23 (set by top-level CMakeLists) but Clang crashes with our libcxx
|
||||
# when instantiated from JSON.cpp. Try again when libcxx(abi) and Clang are upgraded to 16.
|
||||
set (CMAKE_CXX_STANDARD 20)
|
||||
|
||||
set (SRCS
|
||||
argsToConfig.cpp
|
||||
coverage.cpp
|
||||
|
@ -4,7 +4,6 @@
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include "defines.h"
|
||||
#include "TypePair.h"
|
||||
|
||||
/// General-purpose typelist. Easy on compilation times as it does not use recursion.
|
||||
template <typename ...Args>
|
||||
@ -28,7 +27,7 @@ namespace TypeListUtils /// In some contexts it's more handy to use functions in
|
||||
constexpr Root<Args...> changeRoot(TypeList<Args...>) { return {}; }
|
||||
|
||||
template <typename F, typename ...Args>
|
||||
constexpr void forEach(TypeList<Args...>, F && f) { (std::forward<F>(f)(Id<Args>{}), ...); }
|
||||
constexpr void forEach(TypeList<Args...>, F && f) { (std::forward<F>(f)(TypeList<Args>{}), ...); }
|
||||
}
|
||||
|
||||
template <typename TypeListLeft, typename TypeListRight>
|
||||
|
@ -1,4 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
template <typename T, typename V> struct TypePair {};
|
||||
template <typename T> struct Id {};
|
@ -159,22 +159,22 @@ inline const char * find_first_symbols_sse42(const char * const begin, const cha
|
||||
#endif
|
||||
|
||||
for (; pos < end; ++pos)
|
||||
if ( (num_chars >= 1 && maybe_negate<positive>(*pos == c01))
|
||||
|| (num_chars >= 2 && maybe_negate<positive>(*pos == c02))
|
||||
|| (num_chars >= 3 && maybe_negate<positive>(*pos == c03))
|
||||
|| (num_chars >= 4 && maybe_negate<positive>(*pos == c04))
|
||||
|| (num_chars >= 5 && maybe_negate<positive>(*pos == c05))
|
||||
|| (num_chars >= 6 && maybe_negate<positive>(*pos == c06))
|
||||
|| (num_chars >= 7 && maybe_negate<positive>(*pos == c07))
|
||||
|| (num_chars >= 8 && maybe_negate<positive>(*pos == c08))
|
||||
|| (num_chars >= 9 && maybe_negate<positive>(*pos == c09))
|
||||
|| (num_chars >= 10 && maybe_negate<positive>(*pos == c10))
|
||||
|| (num_chars >= 11 && maybe_negate<positive>(*pos == c11))
|
||||
|| (num_chars >= 12 && maybe_negate<positive>(*pos == c12))
|
||||
|| (num_chars >= 13 && maybe_negate<positive>(*pos == c13))
|
||||
|| (num_chars >= 14 && maybe_negate<positive>(*pos == c14))
|
||||
|| (num_chars >= 15 && maybe_negate<positive>(*pos == c15))
|
||||
|| (num_chars >= 16 && maybe_negate<positive>(*pos == c16)))
|
||||
if ( (num_chars == 1 && maybe_negate<positive>(is_in<c01>(*pos)))
|
||||
|| (num_chars == 2 && maybe_negate<positive>(is_in<c01, c02>(*pos)))
|
||||
|| (num_chars == 3 && maybe_negate<positive>(is_in<c01, c02, c03>(*pos)))
|
||||
|| (num_chars == 4 && maybe_negate<positive>(is_in<c01, c02, c03, c04>(*pos)))
|
||||
|| (num_chars == 5 && maybe_negate<positive>(is_in<c01, c02, c03, c04, c05>(*pos)))
|
||||
|| (num_chars == 6 && maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06>(*pos)))
|
||||
|| (num_chars == 7 && maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06, c07>(*pos)))
|
||||
|| (num_chars == 8 && maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06, c07, c08>(*pos)))
|
||||
|| (num_chars == 9 && maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06, c07, c08, c09>(*pos)))
|
||||
|| (num_chars == 10 && maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06, c07, c08, c09, c10>(*pos)))
|
||||
|| (num_chars == 11 && maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11>(*pos)))
|
||||
|| (num_chars == 12 && maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12>(*pos)))
|
||||
|| (num_chars == 13 && maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12, c13>(*pos)))
|
||||
|| (num_chars == 14 && maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12, c13, c14>(*pos)))
|
||||
|| (num_chars == 15 && maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12, c13, c14, c15>(*pos)))
|
||||
|| (num_chars == 16 && maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12, c13, c14, c15, c16>(*pos))))
|
||||
return pos;
|
||||
return return_mode == ReturnMode::End ? end : nullptr;
|
||||
}
|
||||
|
214
base/base/hex.h
Normal file
214
base/base/hex.h
Normal file
@ -0,0 +1,214 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstring>
|
||||
#include "types.h"
|
||||
|
||||
/// Maps 0..15 to 0..9A..F or 0..9a..f correspondingly.
|
||||
|
||||
constexpr inline std::string_view hex_digit_to_char_uppercase_table = "0123456789ABCDEF";
|
||||
constexpr inline std::string_view hex_digit_to_char_lowercase_table = "0123456789abcdef";
|
||||
|
||||
constexpr char hexDigitUppercase(unsigned char c)
|
||||
{
|
||||
return hex_digit_to_char_uppercase_table[c];
|
||||
}
|
||||
constexpr char hexDigitLowercase(unsigned char c)
|
||||
{
|
||||
return hex_digit_to_char_lowercase_table[c];
|
||||
}
|
||||
|
||||
/// Maps 0..255 to 00..FF or 00..ff correspondingly
|
||||
|
||||
constexpr inline std::string_view hex_byte_to_char_uppercase_table = //
|
||||
"000102030405060708090A0B0C0D0E0F"
|
||||
"101112131415161718191A1B1C1D1E1F"
|
||||
"202122232425262728292A2B2C2D2E2F"
|
||||
"303132333435363738393A3B3C3D3E3F"
|
||||
"404142434445464748494A4B4C4D4E4F"
|
||||
"505152535455565758595A5B5C5D5E5F"
|
||||
"606162636465666768696A6B6C6D6E6F"
|
||||
"707172737475767778797A7B7C7D7E7F"
|
||||
"808182838485868788898A8B8C8D8E8F"
|
||||
"909192939495969798999A9B9C9D9E9F"
|
||||
"A0A1A2A3A4A5A6A7A8A9AAABACADAEAF"
|
||||
"B0B1B2B3B4B5B6B7B8B9BABBBCBDBEBF"
|
||||
"C0C1C2C3C4C5C6C7C8C9CACBCCCDCECF"
|
||||
"D0D1D2D3D4D5D6D7D8D9DADBDCDDDEDF"
|
||||
"E0E1E2E3E4E5E6E7E8E9EAEBECEDEEEF"
|
||||
"F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF";
|
||||
|
||||
constexpr inline std::string_view hex_byte_to_char_lowercase_table = //
|
||||
"000102030405060708090a0b0c0d0e0f"
|
||||
"101112131415161718191a1b1c1d1e1f"
|
||||
"202122232425262728292a2b2c2d2e2f"
|
||||
"303132333435363738393a3b3c3d3e3f"
|
||||
"404142434445464748494a4b4c4d4e4f"
|
||||
"505152535455565758595a5b5c5d5e5f"
|
||||
"606162636465666768696a6b6c6d6e6f"
|
||||
"707172737475767778797a7b7c7d7e7f"
|
||||
"808182838485868788898a8b8c8d8e8f"
|
||||
"909192939495969798999a9b9c9d9e9f"
|
||||
"a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
|
||||
"b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
|
||||
"c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
|
||||
"d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
|
||||
"e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
|
||||
"f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
|
||||
|
||||
inline void writeHexByteUppercase(UInt8 byte, void * out)
|
||||
{
|
||||
memcpy(out, &hex_byte_to_char_uppercase_table[static_cast<size_t>(byte) * 2], 2);
|
||||
}
|
||||
|
||||
inline void writeHexByteLowercase(UInt8 byte, void * out)
|
||||
{
|
||||
memcpy(out, &hex_byte_to_char_lowercase_table[static_cast<size_t>(byte) * 2], 2);
|
||||
}
|
||||
|
||||
constexpr inline std::string_view bin_byte_to_char_table = //
|
||||
"0000000000000001000000100000001100000100000001010000011000000111"
|
||||
"0000100000001001000010100000101100001100000011010000111000001111"
|
||||
"0001000000010001000100100001001100010100000101010001011000010111"
|
||||
"0001100000011001000110100001101100011100000111010001111000011111"
|
||||
"0010000000100001001000100010001100100100001001010010011000100111"
|
||||
"0010100000101001001010100010101100101100001011010010111000101111"
|
||||
"0011000000110001001100100011001100110100001101010011011000110111"
|
||||
"0011100000111001001110100011101100111100001111010011111000111111"
|
||||
"0100000001000001010000100100001101000100010001010100011001000111"
|
||||
"0100100001001001010010100100101101001100010011010100111001001111"
|
||||
"0101000001010001010100100101001101010100010101010101011001010111"
|
||||
"0101100001011001010110100101101101011100010111010101111001011111"
|
||||
"0110000001100001011000100110001101100100011001010110011001100111"
|
||||
"0110100001101001011010100110101101101100011011010110111001101111"
|
||||
"0111000001110001011100100111001101110100011101010111011001110111"
|
||||
"0111100001111001011110100111101101111100011111010111111001111111"
|
||||
"1000000010000001100000101000001110000100100001011000011010000111"
|
||||
"1000100010001001100010101000101110001100100011011000111010001111"
|
||||
"1001000010010001100100101001001110010100100101011001011010010111"
|
||||
"1001100010011001100110101001101110011100100111011001111010011111"
|
||||
"1010000010100001101000101010001110100100101001011010011010100111"
|
||||
"1010100010101001101010101010101110101100101011011010111010101111"
|
||||
"1011000010110001101100101011001110110100101101011011011010110111"
|
||||
"1011100010111001101110101011101110111100101111011011111010111111"
|
||||
"1100000011000001110000101100001111000100110001011100011011000111"
|
||||
"1100100011001001110010101100101111001100110011011100111011001111"
|
||||
"1101000011010001110100101101001111010100110101011101011011010111"
|
||||
"1101100011011001110110101101101111011100110111011101111011011111"
|
||||
"1110000011100001111000101110001111100100111001011110011011100111"
|
||||
"1110100011101001111010101110101111101100111011011110111011101111"
|
||||
"1111000011110001111100101111001111110100111101011111011011110111"
|
||||
"1111100011111001111110101111101111111100111111011111111011111111";
|
||||
|
||||
inline void writeBinByte(UInt8 byte, void * out)
|
||||
{
|
||||
memcpy(out, &bin_byte_to_char_table[static_cast<size_t>(byte) * 8], 8);
|
||||
}
|
||||
|
||||
/// Produces hex representation of an unsigned int with leading zeros (for checksums)
|
||||
template <typename TUInt>
|
||||
inline void writeHexUIntImpl(TUInt uint_, char * out, std::string_view table)
|
||||
{
|
||||
union
|
||||
{
|
||||
TUInt value;
|
||||
UInt8 uint8[sizeof(TUInt)];
|
||||
};
|
||||
|
||||
value = uint_;
|
||||
|
||||
for (size_t i = 0; i < sizeof(TUInt); ++i)
|
||||
{
|
||||
if constexpr (std::endian::native == std::endian::little)
|
||||
memcpy(out + i * 2, &table[static_cast<size_t>(uint8[sizeof(TUInt) - 1 - i]) * 2], 2);
|
||||
else
|
||||
memcpy(out + i * 2, &table[static_cast<size_t>(uint8[i]) * 2], 2);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TUInt>
|
||||
inline void writeHexUIntUppercase(TUInt uint_, char * out)
|
||||
{
|
||||
writeHexUIntImpl(uint_, out, hex_byte_to_char_uppercase_table);
|
||||
}
|
||||
|
||||
template <typename TUInt>
|
||||
inline void writeHexUIntLowercase(TUInt uint_, char * out)
|
||||
{
|
||||
writeHexUIntImpl(uint_, out, hex_byte_to_char_lowercase_table);
|
||||
}
|
||||
|
||||
template <typename TUInt>
|
||||
std::string getHexUIntUppercase(TUInt uint_)
|
||||
{
|
||||
std::string res(sizeof(TUInt) * 2, '\0');
|
||||
writeHexUIntUppercase(uint_, res.data());
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename TUInt>
|
||||
std::string getHexUIntLowercase(TUInt uint_)
|
||||
{
|
||||
std::string res(sizeof(TUInt) * 2, '\0');
|
||||
writeHexUIntLowercase(uint_, res.data());
|
||||
return res;
|
||||
}
|
||||
|
||||
/// Maps 0..9, A..F, a..f to 0..15. Other chars are mapped to implementation specific value.
|
||||
|
||||
constexpr inline std::string_view hex_char_to_digit_table
|
||||
= {"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff" //0-9
|
||||
"\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //A-Z
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //a-z
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff",
|
||||
256};
|
||||
|
||||
constexpr UInt8 unhex(char c)
|
||||
{
|
||||
return hex_char_to_digit_table[static_cast<UInt8>(c)];
|
||||
}
|
||||
|
||||
constexpr UInt8 unhex2(const char * data)
|
||||
{
|
||||
return static_cast<UInt8>(unhex(data[0])) * 0x10 + static_cast<UInt8>(unhex(data[1]));
|
||||
}
|
||||
|
||||
constexpr UInt16 unhex4(const char * data)
|
||||
{
|
||||
return static_cast<UInt16>(unhex(data[0])) * 0x1000 + static_cast<UInt16>(unhex(data[1])) * 0x100
|
||||
+ static_cast<UInt16>(unhex(data[2])) * 0x10 + static_cast<UInt16>(unhex(data[3]));
|
||||
}
|
||||
|
||||
template <typename TUInt>
|
||||
constexpr TUInt unhexUInt(const char * data)
|
||||
{
|
||||
TUInt res = 0;
|
||||
if constexpr ((sizeof(TUInt) <= 8) || ((sizeof(TUInt) % 8) != 0))
|
||||
{
|
||||
for (size_t i = 0; i < sizeof(TUInt) * 2; ++i, ++data)
|
||||
{
|
||||
res <<= 4;
|
||||
res += unhex(*data);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < sizeof(TUInt) / 8; ++i, data += 16)
|
||||
{
|
||||
res <<= 64;
|
||||
res += unhexUInt<UInt64>(data);
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
13
base/base/interpolate.h
Normal file
13
base/base/interpolate.h
Normal file
@ -0,0 +1,13 @@
|
||||
#pragma once
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
|
||||
/** Linear interpolation in logarithmic coordinates.
|
||||
* Exponential interpolation is related to linear interpolation
|
||||
* exactly in same way as geometric mean is related to arithmetic mean.
|
||||
*/
|
||||
constexpr double interpolateExponential(double min, double max, double ratio)
|
||||
{
|
||||
assert(min > 0 && ratio >= 0 && ratio <= 1);
|
||||
return min * std::pow(max / min, ratio);
|
||||
}
|
@ -195,7 +195,6 @@ long splice(int fd_in, off_t *off_in, int fd_out, off_t *off_out, size_t len, un
|
||||
#include <sys/stat.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if !defined(__aarch64__)
|
||||
struct statx {
|
||||
uint32_t stx_mask;
|
||||
uint32_t stx_blksize;
|
||||
@ -226,7 +225,6 @@ int statx(int fd, const char *restrict path, int flag,
|
||||
{
|
||||
return syscall(SYS_statx, fd, path, flag, mask, statxbuf);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#include <syscall.h>
|
||||
|
@ -8,3 +8,8 @@ int fallocate(int fd, int mode, off_t base, off_t len)
|
||||
{
|
||||
return syscall(SYS_fallocate, fd, mode, base, len);
|
||||
}
|
||||
|
||||
int fallocate64(int fd, int mode, off_t base, off_t len)
|
||||
{
|
||||
return fallocate(fd, mode, base, len);
|
||||
}
|
||||
|
@ -9,3 +9,8 @@ ssize_t pwritev(int fd, const struct iovec *iov, int count, off_t ofs)
|
||||
/// There was cancellable syscall (syscall_cp), but I don't care.
|
||||
return syscall(SYS_pwritev, fd, iov, count, (long)(ofs), (long)(ofs>>32));
|
||||
}
|
||||
|
||||
ssize_t pwritev64(int fd, const struct iovec *iov, int count, off_t ofs)
|
||||
{
|
||||
return pwritev(fd, iov, count, ofs);
|
||||
}
|
||||
|
@ -466,7 +466,7 @@ namespace Data
|
||||
bool extractManualImpl(std::size_t pos, T & val, SQLSMALLINT cType)
|
||||
{
|
||||
SQLRETURN rc = 0;
|
||||
T value = (T)0;
|
||||
T value;
|
||||
|
||||
resizeLengths(pos);
|
||||
|
||||
|
@ -105,6 +105,8 @@ public:
|
||||
const std::string & getText() const;
|
||||
/// Returns the text of the message.
|
||||
|
||||
void appendText(const std::string & text);
|
||||
|
||||
void setPriority(Priority prio);
|
||||
/// Sets the priority of the message.
|
||||
|
||||
|
@ -67,19 +67,7 @@ public:
|
||||
void swap(Timespan & timespan);
|
||||
/// Swaps the Timespan with another one.
|
||||
|
||||
bool operator==(const Timespan & ts) const;
|
||||
bool operator!=(const Timespan & ts) const;
|
||||
bool operator>(const Timespan & ts) const;
|
||||
bool operator>=(const Timespan & ts) const;
|
||||
bool operator<(const Timespan & ts) const;
|
||||
bool operator<=(const Timespan & ts) const;
|
||||
|
||||
bool operator==(TimeDiff microSeconds) const;
|
||||
bool operator!=(TimeDiff microSeconds) const;
|
||||
bool operator>(TimeDiff microSeconds) const;
|
||||
bool operator>=(TimeDiff microSeconds) const;
|
||||
bool operator<(TimeDiff microSeconds) const;
|
||||
bool operator<=(TimeDiff microSeconds) const;
|
||||
auto operator<=>(const Timespan & ts) const = default;
|
||||
|
||||
Timespan operator+(const Timespan & d) const;
|
||||
Timespan operator-(const Timespan & d) const;
|
||||
@ -215,78 +203,6 @@ inline Timespan::TimeDiff Timespan::totalMicroseconds() const
|
||||
}
|
||||
|
||||
|
||||
inline bool Timespan::operator==(const Timespan & ts) const
|
||||
{
|
||||
return _span == ts._span;
|
||||
}
|
||||
|
||||
|
||||
inline bool Timespan::operator!=(const Timespan & ts) const
|
||||
{
|
||||
return _span != ts._span;
|
||||
}
|
||||
|
||||
|
||||
inline bool Timespan::operator>(const Timespan & ts) const
|
||||
{
|
||||
return _span > ts._span;
|
||||
}
|
||||
|
||||
|
||||
inline bool Timespan::operator>=(const Timespan & ts) const
|
||||
{
|
||||
return _span >= ts._span;
|
||||
}
|
||||
|
||||
|
||||
inline bool Timespan::operator<(const Timespan & ts) const
|
||||
{
|
||||
return _span < ts._span;
|
||||
}
|
||||
|
||||
|
||||
inline bool Timespan::operator<=(const Timespan & ts) const
|
||||
{
|
||||
return _span <= ts._span;
|
||||
}
|
||||
|
||||
|
||||
inline bool Timespan::operator==(TimeDiff microSeconds) const
|
||||
{
|
||||
return _span == microSeconds;
|
||||
}
|
||||
|
||||
|
||||
inline bool Timespan::operator!=(TimeDiff microSeconds) const
|
||||
{
|
||||
return _span != microSeconds;
|
||||
}
|
||||
|
||||
|
||||
inline bool Timespan::operator>(TimeDiff microSeconds) const
|
||||
{
|
||||
return _span > microSeconds;
|
||||
}
|
||||
|
||||
|
||||
inline bool Timespan::operator>=(TimeDiff microSeconds) const
|
||||
{
|
||||
return _span >= microSeconds;
|
||||
}
|
||||
|
||||
|
||||
inline bool Timespan::operator<(TimeDiff microSeconds) const
|
||||
{
|
||||
return _span < microSeconds;
|
||||
}
|
||||
|
||||
|
||||
inline bool Timespan::operator<=(TimeDiff microSeconds) const
|
||||
{
|
||||
return _span <= microSeconds;
|
||||
}
|
||||
|
||||
|
||||
inline void swap(Timespan & s1, Timespan & s2)
|
||||
{
|
||||
s1.swap(s2);
|
||||
|
@ -27,8 +27,7 @@ Message::Message():
|
||||
_tid(0),
|
||||
_file(0),
|
||||
_line(0),
|
||||
_pMap(0),
|
||||
_fmt_str(0)
|
||||
_pMap(0)
|
||||
{
|
||||
init();
|
||||
}
|
||||
@ -157,6 +156,12 @@ void Message::setText(const std::string& text)
|
||||
}
|
||||
|
||||
|
||||
void Message::appendText(const std::string & text)
|
||||
{
|
||||
_text.append(text);
|
||||
}
|
||||
|
||||
|
||||
void Message::setPriority(Priority prio)
|
||||
{
|
||||
_prio = prio;
|
||||
|
@ -90,6 +90,9 @@ namespace MongoDB
|
||||
|
||||
Poco::Net::SocketAddress address() const;
|
||||
/// Returns the address of the MongoDB server.
|
||||
|
||||
const std::string & uri() const;
|
||||
/// Returns the uri on which the connection was made.
|
||||
|
||||
void connect(const std::string & hostAndPort);
|
||||
/// Connects to the given MongoDB server.
|
||||
@ -148,6 +151,7 @@ namespace MongoDB
|
||||
private:
|
||||
Poco::Net::SocketAddress _address;
|
||||
Poco::Net::StreamSocket _socket;
|
||||
std::string _uri;
|
||||
};
|
||||
|
||||
|
||||
@ -158,6 +162,10 @@ namespace MongoDB
|
||||
{
|
||||
return _address;
|
||||
}
|
||||
inline const std::string & Connection::uri() const
|
||||
{
|
||||
return _uri;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -145,68 +145,155 @@ void Connection::connect(const Poco::Net::StreamSocket& socket)
|
||||
|
||||
void Connection::connect(const std::string& uri, SocketFactory& socketFactory)
|
||||
{
|
||||
Poco::URI theURI(uri);
|
||||
if (theURI.getScheme() != "mongodb") throw Poco::UnknownURISchemeException(uri);
|
||||
std::vector<std::string> strAddresses;
|
||||
std::string newURI;
|
||||
|
||||
std::string userInfo = theURI.getUserInfo();
|
||||
std::string host = theURI.getHost();
|
||||
Poco::UInt16 port = theURI.getPort();
|
||||
if (port == 0) port = 27017;
|
||||
if (uri.find(',') != std::string::npos)
|
||||
{
|
||||
size_t pos;
|
||||
size_t head = 0;
|
||||
if ((pos = uri.find("@")) != std::string::npos)
|
||||
{
|
||||
head = pos + 1;
|
||||
}
|
||||
else if ((pos = uri.find("://")) != std::string::npos)
|
||||
{
|
||||
head = pos + 3;
|
||||
}
|
||||
|
||||
std::string databaseName = theURI.getPath();
|
||||
if (!databaseName.empty() && databaseName[0] == '/') databaseName.erase(0, 1);
|
||||
if (databaseName.empty()) databaseName = "admin";
|
||||
std::string tempstr;
|
||||
std::string::const_iterator it = uri.begin();
|
||||
it += head;
|
||||
size_t tail = head;
|
||||
for (;it != uri.end() && *it != '?' && *it != '/'; ++it)
|
||||
{
|
||||
tempstr += *it;
|
||||
tail++;
|
||||
}
|
||||
|
||||
bool ssl = false;
|
||||
Poco::Timespan connectTimeout;
|
||||
Poco::Timespan socketTimeout;
|
||||
std::string authMechanism = Database::AUTH_SCRAM_SHA1;
|
||||
it = tempstr.begin();
|
||||
std::string token;
|
||||
for (;it != tempstr.end(); ++it)
|
||||
{
|
||||
if (*it == ',')
|
||||
{
|
||||
newURI = uri.substr(0, head) + token + uri.substr(tail, uri.length());
|
||||
strAddresses.push_back(newURI);
|
||||
token = "";
|
||||
}
|
||||
else
|
||||
{
|
||||
token += *it;
|
||||
}
|
||||
}
|
||||
newURI = uri.substr(0, head) + token + uri.substr(tail, uri.length());
|
||||
strAddresses.push_back(newURI);
|
||||
}
|
||||
else
|
||||
{
|
||||
strAddresses.push_back(uri);
|
||||
}
|
||||
|
||||
Poco::URI::QueryParameters params = theURI.getQueryParameters();
|
||||
for (Poco::URI::QueryParameters::const_iterator it = params.begin(); it != params.end(); ++it)
|
||||
{
|
||||
if (it->first == "ssl")
|
||||
{
|
||||
ssl = (it->second == "true");
|
||||
}
|
||||
else if (it->first == "connectTimeoutMS")
|
||||
{
|
||||
connectTimeout = static_cast<Poco::Timespan::TimeDiff>(1000)*Poco::NumberParser::parse(it->second);
|
||||
}
|
||||
else if (it->first == "socketTimeoutMS")
|
||||
{
|
||||
socketTimeout = static_cast<Poco::Timespan::TimeDiff>(1000)*Poco::NumberParser::parse(it->second);
|
||||
}
|
||||
else if (it->first == "authMechanism")
|
||||
{
|
||||
authMechanism = it->second;
|
||||
}
|
||||
}
|
||||
newURI = strAddresses.front();
|
||||
Poco::URI theURI(newURI);
|
||||
if (theURI.getScheme() != "mongodb") throw Poco::UnknownURISchemeException(uri);
|
||||
|
||||
connect(socketFactory.createSocket(host, port, connectTimeout, ssl));
|
||||
std::string userInfo = theURI.getUserInfo();
|
||||
std::string databaseName = theURI.getPath();
|
||||
if (!databaseName.empty() && databaseName[0] == '/') databaseName.erase(0, 1);
|
||||
if (databaseName.empty()) databaseName = "admin";
|
||||
|
||||
if (socketTimeout > 0)
|
||||
{
|
||||
_socket.setSendTimeout(socketTimeout);
|
||||
_socket.setReceiveTimeout(socketTimeout);
|
||||
}
|
||||
bool ssl = false;
|
||||
Poco::Timespan connectTimeout;
|
||||
Poco::Timespan socketTimeout;
|
||||
std::string authMechanism = Database::AUTH_SCRAM_SHA1;
|
||||
std::string readPreference="primary";
|
||||
|
||||
if (!userInfo.empty())
|
||||
{
|
||||
std::string username;
|
||||
std::string password;
|
||||
std::string::size_type pos = userInfo.find(':');
|
||||
if (pos != std::string::npos)
|
||||
{
|
||||
username.assign(userInfo, 0, pos++);
|
||||
password.assign(userInfo, pos, userInfo.size() - pos);
|
||||
}
|
||||
else username = userInfo;
|
||||
Poco::URI::QueryParameters params = theURI.getQueryParameters();
|
||||
for (Poco::URI::QueryParameters::const_iterator it = params.begin(); it != params.end(); ++it)
|
||||
{
|
||||
if (it->first == "ssl")
|
||||
{
|
||||
ssl = (it->second == "true");
|
||||
}
|
||||
else if (it->first == "connectTimeoutMS")
|
||||
{
|
||||
connectTimeout = static_cast<Poco::Timespan::TimeDiff>(1000)*Poco::NumberParser::parse(it->second);
|
||||
}
|
||||
else if (it->first == "socketTimeoutMS")
|
||||
{
|
||||
socketTimeout = static_cast<Poco::Timespan::TimeDiff>(1000)*Poco::NumberParser::parse(it->second);
|
||||
}
|
||||
else if (it->first == "authMechanism")
|
||||
{
|
||||
authMechanism = it->second;
|
||||
}
|
||||
else if (it->first == "readPreference")
|
||||
{
|
||||
readPreference= it->second;
|
||||
}
|
||||
}
|
||||
|
||||
Database database(databaseName);
|
||||
if (!database.authenticate(*this, username, password, authMechanism))
|
||||
throw Poco::NoPermissionException(Poco::format("Access to MongoDB database %s denied for user %s", databaseName, username));
|
||||
}
|
||||
for (std::vector<std::string>::const_iterator it = strAddresses.cbegin();it != strAddresses.cend(); ++it)
|
||||
{
|
||||
newURI = *it;
|
||||
theURI = Poco::URI(newURI);
|
||||
|
||||
std::string host = theURI.getHost();
|
||||
Poco::UInt16 port = theURI.getPort();
|
||||
if (port == 0) port = 27017;
|
||||
|
||||
connect(socketFactory.createSocket(host, port, connectTimeout, ssl));
|
||||
_uri = newURI;
|
||||
if (socketTimeout > 0)
|
||||
{
|
||||
_socket.setSendTimeout(socketTimeout);
|
||||
_socket.setReceiveTimeout(socketTimeout);
|
||||
}
|
||||
if (strAddresses.size() > 1)
|
||||
{
|
||||
Poco::MongoDB::QueryRequest request("admin.$cmd");
|
||||
request.setNumberToReturn(1);
|
||||
request.selector().add("isMaster", 1);
|
||||
Poco::MongoDB::ResponseMessage response;
|
||||
|
||||
sendRequest(request, response);
|
||||
_uri = newURI;
|
||||
if (!response.documents().empty())
|
||||
{
|
||||
Poco::MongoDB::Document::Ptr doc = response.documents()[0];
|
||||
if (doc->get<bool>("ismaster") && readPreference == "primary")
|
||||
{
|
||||
break;
|
||||
}
|
||||
else if (!doc->get<bool>("ismaster") && readPreference == "secondary")
|
||||
{
|
||||
break;
|
||||
}
|
||||
else if (it + 1 == strAddresses.cend())
|
||||
{
|
||||
throw Poco::URISyntaxException(uri);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!userInfo.empty())
|
||||
{
|
||||
std::string username;
|
||||
std::string password;
|
||||
std::string::size_type pos = userInfo.find(':');
|
||||
if (pos != std::string::npos)
|
||||
{
|
||||
username.assign(userInfo, 0, pos++);
|
||||
password.assign(userInfo, pos, userInfo.size() - pos);
|
||||
}
|
||||
else username = userInfo;
|
||||
|
||||
Database database(databaseName);
|
||||
|
||||
if (!database.authenticate(*this, username, password, authMechanism))
|
||||
throw Poco::NoPermissionException(Poco::format("Access to MongoDB database %s denied for user %s", databaseName, username));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -30,7 +30,7 @@ elseif (ARCH_AARCH64)
|
||||
# support it.
|
||||
set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8+crc")
|
||||
else ()
|
||||
# ARMv8.2 is quite ancient but the lowest common denominator supported by both Graviton 2 and 3 processors [1]. In particular, it
|
||||
# ARMv8.2 is quite ancient but the lowest common denominator supported by both Graviton 2 and 3 processors [1, 10]. In particular, it
|
||||
# includes LSE (made mandatory with ARMv8.1) which provides nice speedups without having to fall back to compat flag
|
||||
# "-moutline-atomics" for v8.0 [2, 3, 4] that requires a recent glibc with runtime dispatch helper, limiting our ability to run on
|
||||
# old OSs.
|
||||
@ -45,19 +45,20 @@ elseif (ARCH_AARCH64)
|
||||
# dotprod: Scalar vector product (SDOT and UDOT instructions). Probably the most obscure extra flag with doubtful performance benefits
|
||||
# but it has been activated since always, so why not enable it. It's not 100% clear in which revision this flag was
|
||||
# introduced as optional, either in v8.2 [7] or in v8.4 [8].
|
||||
# ldapr: Load-Acquire RCpc Register. Better support of release/acquire of atomics. Good for allocators and high contention code.
|
||||
# Optional in v8.2, mandatory in v8.3 [9]. Supported in Graviton 2+, Azure and GCP instances. Generated from clang 15.
|
||||
# rcpc: Load-Acquire RCpc Register. Better support of release/acquire of atomics. Good for allocators and high contention code.
|
||||
# Optional in v8.2, mandatory in v8.3 [9]. Supported in Graviton >=2, Azure and GCP instances.
|
||||
#
|
||||
# [1] https://github.com/aws/aws-graviton-getting-started/blob/main/c-c%2B%2B.md
|
||||
# [2] https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10
|
||||
# [3] https://mysqlonarm.github.io/ARM-LSE-and-MySQL/
|
||||
# [4] https://dev.to/aws-builders/large-system-extensions-for-aws-graviton-processors-3eci
|
||||
# [5] https://developer.arm.com/tools-and-software/open-source-software/developer-tools/llvm-toolchain/sve-support
|
||||
# [6] https://developer.arm.com/documentation/100067/0612/armclang-Command-line-Options/-mcpu?lang=en
|
||||
# [7] https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html
|
||||
# [8] https://developer.arm.com/documentation/102651/a/What-are-dot-product-intructions-
|
||||
# [9] https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/LDAPR?lang=en
|
||||
set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8.2-a+simd+crypto+dotprod+ssbs -Xclang=-target-feature -Xclang=+ldapr -Wno-unused-command-line-argument")
|
||||
# [1] https://github.com/aws/aws-graviton-getting-started/blob/main/c-c%2B%2B.md
|
||||
# [2] https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10
|
||||
# [3] https://mysqlonarm.github.io/ARM-LSE-and-MySQL/
|
||||
# [4] https://dev.to/aws-builders/large-system-extensions-for-aws-graviton-processors-3eci
|
||||
# [5] https://developer.arm.com/tools-and-software/open-source-software/developer-tools/llvm-toolchain/sve-support
|
||||
# [6] https://developer.arm.com/documentation/100067/0612/armclang-Command-line-Options/-mcpu?lang=en
|
||||
# [7] https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html
|
||||
# [8] https://developer.arm.com/documentation/102651/a/What-are-dot-product-intructions-
|
||||
# [9] https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/LDAPR?lang=en
|
||||
# [10] https://github.com/aws/aws-graviton-getting-started/blob/main/README.md
|
||||
set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8.2-a+simd+crypto+dotprod+ssbs+rcpc")
|
||||
endif ()
|
||||
|
||||
# Best-effort check: The build generates and executes intermediate binaries, e.g. protoc and llvm-tablegen. If we build on ARM for ARM
|
||||
|
@ -50,15 +50,18 @@ endif ()
|
||||
string (REGEX MATCHALL "[0-9]+" COMPILER_VERSION_LIST ${CMAKE_CXX_COMPILER_VERSION})
|
||||
list (GET COMPILER_VERSION_LIST 0 COMPILER_VERSION_MAJOR)
|
||||
|
||||
# Example values: `lld-10`, `gold`.
|
||||
# Example values: `lld-10`
|
||||
option (LINKER_NAME "Linker name or full path")
|
||||
|
||||
if (LINKER_NAME MATCHES "gold")
|
||||
message (FATAL_ERROR "Linking with gold is unsupported. Please use lld.")
|
||||
endif ()
|
||||
|
||||
# s390x doesnt support lld
|
||||
if (NOT ARCH_S390X)
|
||||
if (NOT LINKER_NAME)
|
||||
if (COMPILER_GCC)
|
||||
find_program (LLD_PATH NAMES "ld.lld")
|
||||
find_program (GOLD_PATH NAMES "ld.gold")
|
||||
elseif (COMPILER_CLANG)
|
||||
# llvm lld is a generic driver.
|
||||
# Invoke ld.lld (Unix), ld64.lld (macOS), lld-link (Windows), wasm-ld (WebAssembly) instead
|
||||
@ -67,13 +70,11 @@ if (NOT ARCH_S390X)
|
||||
elseif (OS_DARWIN)
|
||||
find_program (LLD_PATH NAMES "ld64.lld-${COMPILER_VERSION_MAJOR}" "ld64.lld")
|
||||
endif ()
|
||||
find_program (GOLD_PATH NAMES "ld.gold" "gold")
|
||||
endif ()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if ((OS_LINUX OR OS_DARWIN) AND NOT LINKER_NAME)
|
||||
# prefer lld linker over gold or ld on linux and macos
|
||||
if (LLD_PATH)
|
||||
if (COMPILER_GCC)
|
||||
# GCC driver requires one of supported linker names like "lld".
|
||||
@ -83,17 +84,6 @@ if ((OS_LINUX OR OS_DARWIN) AND NOT LINKER_NAME)
|
||||
set (LINKER_NAME ${LLD_PATH})
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (NOT LINKER_NAME)
|
||||
if (GOLD_PATH)
|
||||
message (FATAL_ERROR "Linking with gold is unsupported. Please use lld.")
|
||||
if (COMPILER_GCC)
|
||||
set (LINKER_NAME "gold")
|
||||
else ()
|
||||
set (LINKER_NAME ${GOLD_PATH})
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
# TODO: allow different linker on != OS_LINUX
|
||||
|
||||
|
@ -45,6 +45,7 @@ if (COMPILER_CLANG)
|
||||
no_warning(weak-vtables)
|
||||
no_warning(thread-safety-negative) # experimental flag, too many false positives
|
||||
no_warning(enum-constexpr-conversion) # breaks magic-enum library in clang-16
|
||||
no_warning(unsafe-buffer-usage) # too aggressive
|
||||
# TODO Enable conversion, sign-conversion, double-promotion warnings.
|
||||
elseif (COMPILER_GCC)
|
||||
# Add compiler options only to c++ compiler
|
||||
|
@ -115,6 +115,13 @@ configure_file("${ORC_SOURCE_SRC_DIR}/Adaptor.hh.in" "${ORC_BUILD_INCLUDE_DIR}/A
|
||||
|
||||
# ARROW_ORC + adapters/orc/CMakefiles
|
||||
set(ORC_SRCS
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/orc_proto.pb.h"
|
||||
"${ORC_SOURCE_SRC_DIR}/sargs/ExpressionTree.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/sargs/Literal.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/sargs/PredicateLeaf.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/sargs/SargsApplier.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/sargs/SearchArgument.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/sargs/TruthValue.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/Exceptions.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/OrcFile.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/Reader.cc"
|
||||
@ -129,13 +136,20 @@ set(ORC_SRCS
|
||||
"${ORC_SOURCE_SRC_DIR}/MemoryPool.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/RLE.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/RLEv1.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/RLEv2.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/RleDecoderV2.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/RleEncoderV2.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/RLEV2Util.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/Statistics.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/StripeStream.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/Timezone.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/TypeImpl.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/Vector.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/Writer.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/Adaptor.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/BloomFilter.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/Murmur3.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/BlockBuffer.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/wrap/orc-proto-wrapper.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/io/InputStream.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/io/OutputStream.cc"
|
||||
"${ORC_ADDITION_SOURCE_DIR}/orc_proto.pb.cc"
|
||||
@ -358,6 +372,9 @@ SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zlib.cc" ${ARROW_SRCS})
|
||||
add_definitions(-DARROW_WITH_ZSTD)
|
||||
SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zstd.cc" ${ARROW_SRCS})
|
||||
|
||||
add_definitions(-DARROW_WITH_BROTLI)
|
||||
SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_brotli.cc" ${ARROW_SRCS})
|
||||
|
||||
|
||||
add_library(_arrow ${ARROW_SRCS})
|
||||
|
||||
@ -372,6 +389,7 @@ target_link_libraries(_arrow PRIVATE
|
||||
ch_contrib::snappy
|
||||
ch_contrib::zlib
|
||||
ch_contrib::zstd
|
||||
ch_contrib::brotli
|
||||
)
|
||||
target_link_libraries(_arrow PUBLIC _orc)
|
||||
|
||||
|
2
contrib/capnproto
vendored
2
contrib/capnproto
vendored
@ -1 +1 @@
|
||||
Subproject commit e19cd661e49dd9022d3f920b69d843333b896451
|
||||
Subproject commit dc8b50b999777bcb23c89bb5907c785c3f654441
|
@ -48,6 +48,9 @@ set(gRPC_ABSL_PROVIDER "clickhouse" CACHE STRING "" FORCE)
|
||||
# We don't want to build C# extensions.
|
||||
set(gRPC_BUILD_CSHARP_EXT OFF)
|
||||
|
||||
# TODO: Remove this. We generally like to compile with C++23 but grpc isn't ready yet.
|
||||
set (CMAKE_CXX_STANDARD 20)
|
||||
|
||||
set(_gRPC_CARES_LIBRARIES ch_contrib::c-ares)
|
||||
set(gRPC_CARES_PROVIDER "clickhouse" CACHE STRING "" FORCE)
|
||||
add_subdirectory("${_gRPC_SOURCE_DIR}" "${_gRPC_BINARY_DIR}")
|
||||
|
2
contrib/krb5
vendored
2
contrib/krb5
vendored
@ -1 +1 @@
|
||||
Subproject commit f8262a1b548eb29d97e059260042036255d07f8d
|
||||
Subproject commit 9453aec0d50e5aff9b189051611b321b40935d02
|
@ -160,6 +160,8 @@ set(ALL_SRCS
|
||||
|
||||
# "${KRB5_SOURCE_DIR}/lib/gssapi/spnego/negoex_trace.c"
|
||||
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/builtin/kdf.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/builtin/cmac.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/prng.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/enc_dk_cmac.c"
|
||||
# "${KRB5_SOURCE_DIR}/lib/crypto/krb/crc32.c"
|
||||
@ -183,7 +185,6 @@ set(ALL_SRCS
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/block_size.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/string_to_key.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/verify_checksum.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/crypto_libinit.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/derive.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/random_to_key.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/verify_checksum_iov.c"
|
||||
@ -217,9 +218,7 @@ set(ALL_SRCS
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/s2k_rc4.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/valid_cksumtype.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/nfold.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/prng_fortuna.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/encrypt_length.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/cmac.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/keyblocks.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/prf_rc4.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/s2k_pbkdf2.c"
|
||||
@ -228,11 +227,11 @@ set(ALL_SRCS
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/rc4.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/des3.c"
|
||||
#"${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/camellia.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/cmac.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/sha256.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/hmac.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/kdf.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/pbkdf2.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/init.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/stubs.c"
|
||||
# "${KRB5_SOURCE_DIR}/lib/crypto/openssl/hash_provider/hash_crc32.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/hash_provider/hash_evp.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/des/des_keys.c"
|
||||
@ -312,7 +311,6 @@ set(ALL_SRCS
|
||||
"${KRB5_SOURCE_DIR}/lib/krb5/krb/allow_weak.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/krb5/krb/mk_rep.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/krb5/krb/mk_priv.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/krb5/krb/s4u_authdata.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/krb5/krb/preauth_otp.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/krb5/krb/init_keyblock.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/krb5/krb/ser_addr.c"
|
||||
@ -688,6 +686,7 @@ target_include_directories(_krb5 PRIVATE
|
||||
|
||||
target_compile_definitions(_krb5 PRIVATE
|
||||
KRB5_PRIVATE
|
||||
CRYPTO_OPENSSL
|
||||
_GSS_STATIC_LINK=1
|
||||
KRB5_DEPRECATED=1
|
||||
LOCALEDIR="/usr/local/share/locale"
|
||||
|
@ -6,6 +6,10 @@ if (MSVC)
|
||||
target_compile_definitions (_farmhash PRIVATE FARMHASH_NO_BUILTIN_EXPECT=1)
|
||||
endif ()
|
||||
|
||||
if (ARCH_S390X)
|
||||
add_compile_definitions(WORDS_BIGENDIAN)
|
||||
endif ()
|
||||
|
||||
target_include_directories (_farmhash BEFORE PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
add_library(ch_contrib::farmhash ALIAS _farmhash)
|
||||
|
@ -98,6 +98,16 @@ set(LLVM_ENABLE_BINDINGS 0 CACHE INTERNAL "")
|
||||
|
||||
set (LLVM_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/llvm")
|
||||
set (LLVM_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/llvm-project/llvm")
|
||||
|
||||
# Since we always use toolchain files to generate hermatic builds, cmake will
|
||||
# think it's a cross compilation, and LLVM will try to configure NATIVE LLVM
|
||||
# targets with all tests enabled, which will slow down cmake configuration and
|
||||
# compilation (You'll see Building native llvm-tblgen...). Let's disable the
|
||||
# cross compiling indicator for now.
|
||||
#
|
||||
# TODO We should let cmake know whether it's indeed a cross compilation in the
|
||||
# first place.
|
||||
set (CMAKE_CROSSCOMPILING 0)
|
||||
add_subdirectory ("${LLVM_SOURCE_DIR}" "${LLVM_BINARY_DIR}")
|
||||
|
||||
set_directory_properties (PROPERTIES
|
||||
|
2
contrib/orc
vendored
2
contrib/orc
vendored
@ -1 +1 @@
|
||||
Subproject commit f9a393ed2433a60034795284f82d093b348f2102
|
||||
Subproject commit c5d7755ba0b9a95631c8daea4d094101f26ec761
|
@ -43,7 +43,8 @@
|
||||
"docker/test/stateful": {
|
||||
"name": "clickhouse/stateful-test",
|
||||
"dependent": [
|
||||
"docker/test/stress"
|
||||
"docker/test/stress",
|
||||
"docker/test/upgrade"
|
||||
]
|
||||
},
|
||||
"docker/test/unit": {
|
||||
@ -54,6 +55,10 @@
|
||||
"name": "clickhouse/stress-test",
|
||||
"dependent": []
|
||||
},
|
||||
"docker/test/upgrade": {
|
||||
"name": "clickhouse/upgrade-check",
|
||||
"dependent": []
|
||||
},
|
||||
"docker/test/codebrowser": {
|
||||
"name": "clickhouse/codebrowser",
|
||||
"dependent": []
|
||||
|
@ -29,7 +29,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
esac
|
||||
|
||||
ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release"
|
||||
ARG VERSION="23.2.1.2537"
|
||||
ARG VERSION="23.2.4.12"
|
||||
ARG PACKAGES="clickhouse-keeper"
|
||||
|
||||
# user/group precreated explicitly with fixed uid/gid on purpose.
|
||||
|
@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="23.2.1.2537"
|
||||
ARG VERSION="23.2.4.12"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
# user/group precreated explicitly with fixed uid/gid on purpose.
|
||||
|
@ -1,4 +1,4 @@
|
||||
FROM ubuntu:20.04
|
||||
FROM ubuntu:22.04
|
||||
|
||||
# see https://github.com/moby/moby/issues/4032#issuecomment-192327844
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
@ -9,19 +9,20 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
|
||||
&& groupadd -r clickhouse --gid=101 \
|
||||
&& useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \
|
||||
&& apt-get update \
|
||||
&& apt-get upgrade -yq \
|
||||
&& apt-get install --yes --no-install-recommends \
|
||||
apt-transport-https \
|
||||
ca-certificates \
|
||||
dirmngr \
|
||||
gnupg \
|
||||
locales \
|
||||
gnupg2 \
|
||||
wget \
|
||||
locales \
|
||||
tzdata \
|
||||
&& apt-get clean
|
||||
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
|
||||
ARG VERSION="23.2.1.2537"
|
||||
ARG VERSION="23.2.4.12"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
# set non-empty deb_location_url url to create a docker image
|
||||
@ -80,15 +81,8 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
&& mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client \
|
||||
&& chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client
|
||||
|
||||
# Remove as much of Ubuntu as possible.
|
||||
# ClickHouse does not need Ubuntu. It can run on top of Linux kernel without any OS distribution.
|
||||
# ClickHouse does not need Docker at all. ClickHouse is above all that.
|
||||
# It does not care about Ubuntu, Docker, or other cruft and you should neither.
|
||||
# The fact that this Docker image is based on Ubuntu is just a misconception.
|
||||
# Some vulnerability scanners are arguing about Ubuntu, which is not relevant to ClickHouse at all.
|
||||
# ClickHouse does not care when you report false vulnerabilities by running some Docker scanners.
|
||||
|
||||
RUN apt-get remove --purge -y libksba8 && apt-get autoremove -y
|
||||
RUN apt-get autoremove --purge -yq libksba8 && \
|
||||
apt-get autoremove -yq
|
||||
|
||||
# we need to allow "others" access to clickhouse folder, because docker container
|
||||
# can be started with arbitrary uid (openshift usecase)
|
||||
|
@ -1,4 +1,4 @@
|
||||
# rebuild in #33610
|
||||
# rebuild in #47031
|
||||
# docker build -t clickhouse/stateful-test .
|
||||
ARG FROM_TAG=latest
|
||||
FROM clickhouse/stateless-test:$FROM_TAG
|
||||
|
@ -21,10 +21,9 @@ RUN apt-get update -y \
|
||||
openssl \
|
||||
netcat-openbsd \
|
||||
telnet \
|
||||
llvm-9 \
|
||||
brotli
|
||||
brotli \
|
||||
&& apt-get clean
|
||||
|
||||
COPY ./stress /stress
|
||||
COPY run.sh /
|
||||
|
||||
ENV DATASETS="hits visits"
|
||||
|
@ -8,229 +8,13 @@ dmesg --clear
|
||||
|
||||
set -x
|
||||
|
||||
# core.COMM.PID-TID
|
||||
sysctl kernel.core_pattern='core.%e.%p-%P'
|
||||
# we mount tests folder from repo to /usr/share
|
||||
ln -s /usr/share/clickhouse-test/ci/stress.py /usr/bin/stress
|
||||
ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
|
||||
|
||||
OK="\tOK\t\\N\t"
|
||||
FAIL="\tFAIL\t\\N\t"
|
||||
|
||||
FAILURE_CONTEXT_LINES=50
|
||||
FAILURE_CONTEXT_MAX_LINE_WIDTH=400
|
||||
|
||||
function escaped()
|
||||
{
|
||||
# That's the simplest way I found to escape a string in bash. Yep, bash is the most convenient programming language.
|
||||
# Also limit lines width just in case (too long lines are not really useful usually)
|
||||
clickhouse local -S 's String' --input-format=LineAsString -q "select substr(s, 1, $FAILURE_CONTEXT_MAX_LINE_WIDTH)
|
||||
from table format CustomSeparated settings format_custom_row_after_delimiter='\\\\\\\\n'"
|
||||
}
|
||||
function head_escaped()
|
||||
{
|
||||
head -n $FAILURE_CONTEXT_LINES $1 | escaped
|
||||
}
|
||||
function unts()
|
||||
{
|
||||
grep -Po "[0-9][0-9]:[0-9][0-9] \K.*"
|
||||
}
|
||||
function trim_server_logs()
|
||||
{
|
||||
head -n $FAILURE_CONTEXT_LINES "/test_output/$1" | grep -Eo " \[ [0-9]+ \] \{.*" | escaped
|
||||
}
|
||||
|
||||
function install_packages()
|
||||
{
|
||||
dpkg -i $1/clickhouse-common-static_*.deb
|
||||
dpkg -i $1/clickhouse-common-static-dbg_*.deb
|
||||
dpkg -i $1/clickhouse-server_*.deb
|
||||
dpkg -i $1/clickhouse-client_*.deb
|
||||
}
|
||||
|
||||
function configure()
|
||||
{
|
||||
# install test configs
|
||||
export USE_DATABASE_ORDINARY=1
|
||||
export EXPORT_S3_STORAGE_POLICIES=1
|
||||
/usr/share/clickhouse-test/config/install.sh
|
||||
|
||||
# we mount tests folder from repo to /usr/share
|
||||
ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
|
||||
ln -s /usr/share/clickhouse-test/ci/download_release_packages.py /usr/bin/download_release_packages
|
||||
ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag
|
||||
|
||||
# avoid too slow startup
|
||||
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
|
||||
| sed "s|<snapshot_distance>100000</snapshot_distance>|<snapshot_distance>10000</snapshot_distance>|" \
|
||||
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
|
||||
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
sudo chgrp clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
|
||||
# for clickhouse-server (via service)
|
||||
echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment
|
||||
# for clickhouse-client
|
||||
export ASAN_OPTIONS='malloc_context_size=10 allocator_release_to_os_interval_ms=10000'
|
||||
|
||||
# since we run clickhouse from root
|
||||
sudo chown root: /var/lib/clickhouse
|
||||
|
||||
# Set more frequent update period of asynchronous metrics to more frequently update information about real memory usage (less chance of OOM).
|
||||
echo "<clickhouse><asynchronous_metrics_update_period_s>1</asynchronous_metrics_update_period_s></clickhouse>" \
|
||||
> /etc/clickhouse-server/config.d/asynchronous_metrics_update_period_s.xml
|
||||
|
||||
local total_mem
|
||||
total_mem=$(awk '/MemTotal/ { print $(NF-1) }' /proc/meminfo) # KiB
|
||||
total_mem=$(( total_mem*1024 )) # bytes
|
||||
|
||||
# Set maximum memory usage as half of total memory (less chance of OOM).
|
||||
#
|
||||
# But not via max_server_memory_usage but via max_memory_usage_for_user,
|
||||
# so that we can override this setting and execute service queries, like:
|
||||
# - hung check
|
||||
# - show/drop database
|
||||
# - ...
|
||||
#
|
||||
# So max_memory_usage_for_user will be a soft limit, and
|
||||
# max_server_memory_usage will be hard limit, and queries that should be
|
||||
# executed regardless memory limits will use max_memory_usage_for_user=0,
|
||||
# instead of relying on max_untracked_memory
|
||||
|
||||
max_server_memory_usage_to_ram_ratio=0.5
|
||||
echo "Setting max_server_memory_usage_to_ram_ratio to ${max_server_memory_usage_to_ram_ratio}"
|
||||
cat > /etc/clickhouse-server/config.d/max_server_memory_usage.xml <<EOL
|
||||
<clickhouse>
|
||||
<max_server_memory_usage_to_ram_ratio>${max_server_memory_usage_to_ram_ratio}</max_server_memory_usage_to_ram_ratio>
|
||||
</clickhouse>
|
||||
EOL
|
||||
|
||||
local max_users_mem
|
||||
max_users_mem=$((total_mem*30/100)) # 30%
|
||||
echo "Setting max_memory_usage_for_user=$max_users_mem and max_memory_usage for queries to 10G"
|
||||
cat > /etc/clickhouse-server/users.d/max_memory_usage_for_user.xml <<EOL
|
||||
<clickhouse>
|
||||
<profiles>
|
||||
<default>
|
||||
<max_memory_usage>10G</max_memory_usage>
|
||||
<max_memory_usage_for_user>${max_users_mem}</max_memory_usage_for_user>
|
||||
</default>
|
||||
</profiles>
|
||||
</clickhouse>
|
||||
EOL
|
||||
|
||||
cat > /etc/clickhouse-server/config.d/core.xml <<EOL
|
||||
<clickhouse>
|
||||
<core_dump>
|
||||
<!-- 100GiB -->
|
||||
<size_limit>107374182400</size_limit>
|
||||
</core_dump>
|
||||
<!-- NOTE: no need to configure core_path,
|
||||
since clickhouse is not started as daemon (via clickhouse start)
|
||||
-->
|
||||
<core_path>$PWD</core_path>
|
||||
</clickhouse>
|
||||
EOL
|
||||
|
||||
# Let OOM killer terminate other processes before clickhouse-server:
|
||||
cat > /etc/clickhouse-server/config.d/oom_score.xml <<EOL
|
||||
<clickhouse>
|
||||
<oom_score>-1000</oom_score>
|
||||
</clickhouse>
|
||||
EOL
|
||||
|
||||
# Analyzer is not yet ready for testing
|
||||
cat > /etc/clickhouse-server/users.d/no_analyzer.xml <<EOL
|
||||
<clickhouse>
|
||||
<profiles>
|
||||
<default>
|
||||
<constraints>
|
||||
<allow_experimental_analyzer>
|
||||
<readonly/>
|
||||
</allow_experimental_analyzer>
|
||||
</constraints>
|
||||
</default>
|
||||
</profiles>
|
||||
</clickhouse>
|
||||
EOL
|
||||
|
||||
}
|
||||
|
||||
function stop()
|
||||
{
|
||||
local max_tries="${1:-90}"
|
||||
local pid
|
||||
# Preserve the pid, since the server can hung after the PID will be deleted.
|
||||
pid="$(cat /var/run/clickhouse-server/clickhouse-server.pid)"
|
||||
|
||||
clickhouse stop --max-tries "$max_tries" --do-not-kill && return
|
||||
|
||||
# We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces.
|
||||
echo -e "Possible deadlock on shutdown (see gdb.log)$FAIL" >> /test_output/test_results.tsv
|
||||
kill -TERM "$(pidof gdb)" ||:
|
||||
sleep 5
|
||||
echo "thread apply all backtrace (on stop)" >> /test_output/gdb.log
|
||||
timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$pid" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log
|
||||
clickhouse stop --force
|
||||
}
|
||||
|
||||
function start()
|
||||
{
|
||||
counter=0
|
||||
until clickhouse-client --query "SELECT 1"
|
||||
do
|
||||
if [ "$counter" -gt ${1:-120} ]
|
||||
then
|
||||
echo "Cannot start clickhouse-server"
|
||||
rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt ||:
|
||||
echo -e "Cannot start clickhouse-server$FAIL$(trim_server_logs application_errors.txt)" >> /test_output/test_results.tsv
|
||||
cat /var/log/clickhouse-server/stdout.log
|
||||
tail -n100 /var/log/clickhouse-server/stderr.log
|
||||
tail -n100000 /var/log/clickhouse-server/clickhouse-server.log | rg -F -v -e '<Warning> RaftInstance:' -e '<Information> RaftInstance' | tail -n100
|
||||
break
|
||||
fi
|
||||
# use root to match with current uid
|
||||
clickhouse start --user root >/var/log/clickhouse-server/stdout.log 2>>/var/log/clickhouse-server/stderr.log
|
||||
sleep 0.5
|
||||
counter=$((counter + 1))
|
||||
done
|
||||
|
||||
# Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog
|
||||
# and clickhouse-server can do fork-exec, for example, to run some bridge.
|
||||
# Do not set nostop noprint for all signals, because some it may cause gdb to hang,
|
||||
# explicitly ignore non-fatal signals that are used by server.
|
||||
# Number of SIGRTMIN can be determined only in runtime.
|
||||
RTMIN=$(kill -l SIGRTMIN)
|
||||
echo "
|
||||
set follow-fork-mode parent
|
||||
handle SIGHUP nostop noprint pass
|
||||
handle SIGINT nostop noprint pass
|
||||
handle SIGQUIT nostop noprint pass
|
||||
handle SIGPIPE nostop noprint pass
|
||||
handle SIGTERM nostop noprint pass
|
||||
handle SIGUSR1 nostop noprint pass
|
||||
handle SIGUSR2 nostop noprint pass
|
||||
handle SIG$RTMIN nostop noprint pass
|
||||
info signals
|
||||
continue
|
||||
backtrace full
|
||||
thread apply all backtrace full
|
||||
info registers
|
||||
disassemble /s
|
||||
up
|
||||
disassemble /s
|
||||
up
|
||||
disassemble /s
|
||||
p \"done\"
|
||||
detach
|
||||
quit
|
||||
" > script.gdb
|
||||
|
||||
# FIXME Hung check may work incorrectly because of attached gdb
|
||||
# 1. False positives are possible
|
||||
# 2. We cannot attach another gdb to get stacktraces if some queries hung
|
||||
gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log &
|
||||
sleep 5
|
||||
# gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s)
|
||||
time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||:
|
||||
}
|
||||
# Stress tests and upgrade check uses similar code that was placed
|
||||
# in a separate bash library. See tests/ci/stress_tests.lib
|
||||
source /usr/share/clickhouse-test/ci/stress_tests.lib
|
||||
|
||||
install_packages package_folder
|
||||
|
||||
@ -260,6 +44,8 @@ if [ "$is_tsan_build" -eq "0" ]; then
|
||||
fi
|
||||
|
||||
export ZOOKEEPER_FAULT_INJECTION=1
|
||||
# Initial run without S3 to create system.*_log on local file system to make it
|
||||
# available for dump via clickhouse-local
|
||||
configure
|
||||
|
||||
azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
|
||||
@ -396,7 +182,7 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau
|
||||
|
||||
start
|
||||
|
||||
./stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \
|
||||
stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \
|
||||
&& echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Test script failed$FAIL script exit code: $?" >> /test_output/test_results.tsv
|
||||
|
||||
@ -413,316 +199,27 @@ unset "${!THREAD_@}"
|
||||
|
||||
start
|
||||
|
||||
clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \
|
||||
|| (rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt \
|
||||
&& echo -e "Server failed to start (see application_errors.txt and clickhouse-server.clean.log)$FAIL$(trim_server_logs application_errors.txt)" \
|
||||
>> /test_output/test_results.tsv)
|
||||
check_server_start
|
||||
|
||||
stop
|
||||
|
||||
[ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL"
|
||||
[ -f /var/log/clickhouse-server/stderr.log ] || echo -e "Stderr log does not exist\tFAIL"
|
||||
|
||||
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.final.log
|
||||
|
||||
# Grep logs for sanitizer asserts, crashes and other critical errors
|
||||
check_logs_for_critical_errors
|
||||
|
||||
# Sanitizer asserts
|
||||
rg -Fa "==================" /var/log/clickhouse-server/stderr.log | rg -v "in query:" >> /test_output/tmp
|
||||
rg -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
|
||||
rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \
|
||||
&& echo -e "Sanitizer assert (in stderr.log)$FAIL$(head_escaped /test_output/tmp)" >> /test_output/test_results.tsv \
|
||||
|| echo -e "No sanitizer asserts$OK" >> /test_output/test_results.tsv
|
||||
rm -f /test_output/tmp
|
||||
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
|
||||
|
||||
# OOM
|
||||
rg -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \
|
||||
&& echo -e "Signal 9 in clickhouse-server.log$FAIL" >> /test_output/test_results.tsv \
|
||||
|| echo -e "No OOM messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Logical errors
|
||||
rg -Fa "Code: 49. DB::Exception: " /var/log/clickhouse-server/clickhouse-server*.log > /test_output/logical_errors.txt \
|
||||
&& echo -e "Logical error thrown (see clickhouse-server.log or logical_errors.txt)$FAIL$(head_escaped /test_output/logical_errors.txt)" >> /test_output/test_results.tsv \
|
||||
|| echo -e "No logical errors$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file logical_errors.txt if it's empty
|
||||
[ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt
|
||||
|
||||
# No such key errors
|
||||
rg --text "Code: 499.*The specified key does not exist" /var/log/clickhouse-server/clickhouse-server*.log > /test_output/no_such_key_errors.txt \
|
||||
&& echo -e "S3_ERROR No such key thrown (see clickhouse-server.log or no_such_key_errors.txt)$FAIL$(trim_server_logs no_such_key_errors.txt)" >> /test_output/test_results.tsv \
|
||||
|| echo -e "No lost s3 keys$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file no_such_key_errors.txt if it's empty
|
||||
[ -s /test_output/no_such_key_errors.txt ] || rm /test_output/no_such_key_errors.txt
|
||||
|
||||
# Crash
|
||||
rg -Fa "########################################" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \
|
||||
&& echo -e "Killed by signal (in clickhouse-server.log)$FAIL" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Not crashed$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# It also checks for crash without stacktrace (printed by watchdog)
|
||||
rg -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server*.log > /test_output/fatal_messages.txt \
|
||||
&& echo -e "Fatal message in clickhouse-server.log (see fatal_messages.txt)$FAIL$(trim_server_logs fatal_messages.txt)" >> /test_output/test_results.tsv \
|
||||
|| echo -e "No fatal messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file fatal_messages.txt if it's empty
|
||||
[ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt
|
||||
|
||||
rg -Fa "########################################" /test_output/* > /dev/null \
|
||||
&& echo -e "Killed by signal (output files)$FAIL" >> /test_output/test_results.tsv
|
||||
|
||||
function get_gdb_log_context()
|
||||
{
|
||||
rg -A50 -Fa " received signal " /test_output/gdb.log | head_escaped
|
||||
}
|
||||
|
||||
rg -Fa " received signal " /test_output/gdb.log > /dev/null \
|
||||
&& echo -e "Found signal in gdb.log$FAIL$(get_gdb_log_context)" >> /test_output/test_results.tsv
|
||||
|
||||
if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
|
||||
echo -e "Backward compatibility check\n"
|
||||
|
||||
echo "Get previous release tag"
|
||||
previous_release_tag=$(clickhouse-client --version | rg -o "[0-9]*\.[0-9]*\.[0-9]*\.[0-9]*" | get_previous_release_tag)
|
||||
echo $previous_release_tag
|
||||
|
||||
echo "Clone previous release repository"
|
||||
git clone https://github.com/ClickHouse/ClickHouse.git --no-tags --progress --branch=$previous_release_tag --no-recurse-submodules --depth=1 previous_release_repository
|
||||
|
||||
echo "Download clickhouse-server from the previous release"
|
||||
mkdir previous_release_package_folder
|
||||
|
||||
echo $previous_release_tag | download_release_packages && echo -e "Download script exit code$OK" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Download script failed$FAIL" >> /test_output/test_results.tsv
|
||||
|
||||
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.clean.log
|
||||
for table in query_log trace_log
|
||||
do
|
||||
clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||:
|
||||
done
|
||||
|
||||
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
|
||||
|
||||
# Check if we cloned previous release repository successfully
|
||||
if ! [ "$(ls -A previous_release_repository/tests/queries)" ]
|
||||
then
|
||||
echo -e "Backward compatibility check: Failed to clone previous release tests$FAIL" >> /test_output/test_results.tsv
|
||||
elif ! [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ]
|
||||
then
|
||||
echo -e "Backward compatibility check: Failed to download previous release packages$FAIL" >> /test_output/test_results.tsv
|
||||
else
|
||||
echo -e "Successfully cloned previous release tests$OK" >> /test_output/test_results.tsv
|
||||
echo -e "Successfully downloaded previous release packages$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Uninstall current packages
|
||||
dpkg --remove clickhouse-client
|
||||
dpkg --remove clickhouse-server
|
||||
dpkg --remove clickhouse-common-static-dbg
|
||||
dpkg --remove clickhouse-common-static
|
||||
|
||||
rm -rf /var/lib/clickhouse/*
|
||||
|
||||
# Make BC check more funny by forcing Ordinary engine for system database
|
||||
mkdir /var/lib/clickhouse/metadata
|
||||
echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/system.sql
|
||||
|
||||
# Install previous release packages
|
||||
install_packages previous_release_package_folder
|
||||
|
||||
# Start server from previous release
|
||||
# Previous version may not be ready for fault injections
|
||||
export ZOOKEEPER_FAULT_INJECTION=0
|
||||
configure
|
||||
|
||||
# Avoid "Setting s3_check_objects_after_upload is neither a builtin setting..."
|
||||
rm -f /etc/clickhouse-server/users.d/enable_blobs_check.xml ||:
|
||||
rm -f /etc/clickhouse-server/users.d/marks.xml ||:
|
||||
|
||||
# Remove s3 related configs to avoid "there is no disk type `cache`"
|
||||
rm -f /etc/clickhouse-server/config.d/storage_conf.xml ||:
|
||||
rm -f /etc/clickhouse-server/config.d/azure_storage_conf.xml ||:
|
||||
|
||||
# Turn on after 22.12
|
||||
rm -f /etc/clickhouse-server/config.d/compressed_marks_and_index.xml ||:
|
||||
# it uses recently introduced settings which previous versions may not have
|
||||
rm -f /etc/clickhouse-server/users.d/insert_keeper_retries.xml ||:
|
||||
|
||||
# Turn on after 23.1
|
||||
rm -f /etc/clickhouse-server/users.d/prefetch_settings.xml ||:
|
||||
|
||||
start
|
||||
|
||||
clickhouse-client --query="SELECT 'Server version: ', version()"
|
||||
|
||||
# Install new package before running stress test because we should use new
|
||||
# clickhouse-client and new clickhouse-test.
|
||||
#
|
||||
# But we should leave old binary in /usr/bin/ and debug symbols in
|
||||
# /usr/lib/debug/usr/bin (if any) for gdb and internal DWARF parser, so it
|
||||
# will print sane stacktraces and also to avoid possible crashes.
|
||||
#
|
||||
# FIXME: those files can be extracted directly from debian package, but
|
||||
# actually better solution will be to use different PATH instead of playing
|
||||
# games with files from packages.
|
||||
mv /usr/bin/clickhouse previous_release_package_folder/
|
||||
mv /usr/lib/debug/usr/bin/clickhouse.debug previous_release_package_folder/
|
||||
install_packages package_folder
|
||||
mv /usr/bin/clickhouse package_folder/
|
||||
mv /usr/lib/debug/usr/bin/clickhouse.debug package_folder/
|
||||
mv previous_release_package_folder/clickhouse /usr/bin/
|
||||
mv previous_release_package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug
|
||||
|
||||
mkdir tmp_stress_output
|
||||
|
||||
./stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" \
|
||||
--backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \
|
||||
&& echo -e "Backward compatibility check: Test script exit code$OK" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Backward compatibility check: Test script failed$FAIL" >> /test_output/test_results.tsv
|
||||
rm -rf tmp_stress_output
|
||||
|
||||
# We experienced deadlocks in this command in very rare cases. Let's debug it:
|
||||
timeout 10m clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" ||
|
||||
(
|
||||
echo "thread apply all backtrace (on select tables count)" >> /test_output/gdb.log
|
||||
timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log
|
||||
clickhouse stop --force
|
||||
)
|
||||
|
||||
# Use bigger timeout for previous version
|
||||
stop 300
|
||||
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.stress.log
|
||||
|
||||
# Start new server
|
||||
mv package_folder/clickhouse /usr/bin/
|
||||
mv package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug
|
||||
# Disable fault injections on start (we don't test them here, and it can lead to tons of requests in case of huge number of tables).
|
||||
export ZOOKEEPER_FAULT_INJECTION=0
|
||||
configure
|
||||
start 500
|
||||
clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \
|
||||
|| (rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/bc_check_application_errors.txt \
|
||||
&& echo -e "Backward compatibility check: Server failed to start$FAIL$(trim_server_logs bc_check_application_errors.txt)" >> /test_output/test_results.tsv)
|
||||
|
||||
clickhouse-client --query="SELECT 'Server version: ', version()"
|
||||
|
||||
# Let the server run for a while before checking log.
|
||||
sleep 60
|
||||
|
||||
stop
|
||||
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.dirty.log
|
||||
|
||||
# Error messages (we should ignore some errors)
|
||||
# FIXME https://github.com/ClickHouse/ClickHouse/issues/38643 ("Unknown index: idx.")
|
||||
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 ("Cannot parse string 'Hello' as UInt64")
|
||||
# FIXME Not sure if it's expected, but some tests from BC check may not be finished yet when we restarting server.
|
||||
# Let's just ignore all errors from queries ("} <Error> TCPHandler: Code:", "} <Error> executeQuery: Code:")
|
||||
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'")
|
||||
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility
|
||||
echo "Check for Error messages in server log:"
|
||||
rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
|
||||
-e "Code: 236. DB::Exception: Cancelled mutating parts" \
|
||||
-e "REPLICA_IS_ALREADY_ACTIVE" \
|
||||
-e "REPLICA_ALREADY_EXISTS" \
|
||||
-e "ALL_REPLICAS_LOST" \
|
||||
-e "DDLWorker: Cannot parse DDL task query" \
|
||||
-e "RaftInstance: failed to accept a rpc connection due to error 125" \
|
||||
-e "UNKNOWN_DATABASE" \
|
||||
-e "NETWORK_ERROR" \
|
||||
-e "UNKNOWN_TABLE" \
|
||||
-e "ZooKeeperClient" \
|
||||
-e "KEEPER_EXCEPTION" \
|
||||
-e "DirectoryMonitor" \
|
||||
-e "TABLE_IS_READ_ONLY" \
|
||||
-e "Code: 1000, e.code() = 111, Connection refused" \
|
||||
-e "UNFINISHED" \
|
||||
-e "NETLINK_ERROR" \
|
||||
-e "Renaming unexpected part" \
|
||||
-e "PART_IS_TEMPORARILY_LOCKED" \
|
||||
-e "and a merge is impossible: we didn't find" \
|
||||
-e "found in queue and some source parts for it was lost" \
|
||||
-e "is lost forever." \
|
||||
-e "Unknown index: idx." \
|
||||
-e "Cannot parse string 'Hello' as UInt64" \
|
||||
-e "} <Error> TCPHandler: Code:" \
|
||||
-e "} <Error> executeQuery: Code:" \
|
||||
-e "Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'" \
|
||||
-e "[Queue = DB::DynamicRuntimeQueue]: Code: 235. DB::Exception: Part" \
|
||||
-e "The set of parts restored in place of" \
|
||||
-e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \
|
||||
-e "Code: 269. DB::Exception: Destination table is myself" \
|
||||
-e "Coordination::Exception: Connection loss" \
|
||||
-e "MutateFromLogEntryTask" \
|
||||
-e "No connection to ZooKeeper, cannot get shared table ID" \
|
||||
-e "Session expired" \
|
||||
-e "TOO_MANY_PARTS" \
|
||||
-e "Container already exists" \
|
||||
/var/log/clickhouse-server/clickhouse-server.backward.dirty.log | rg -Fa "<Error>" > /test_output/bc_check_error_messages.txt \
|
||||
&& echo -e "Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)$FAIL$(trim_server_logs bc_check_error_messages.txt)" \
|
||||
>> /test_output/test_results.tsv \
|
||||
|| echo -e "Backward compatibility check: No Error messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file bc_check_error_messages.txt if it's empty
|
||||
[ -s /test_output/bc_check_error_messages.txt ] || rm /test_output/bc_check_error_messages.txt
|
||||
|
||||
# Sanitizer asserts
|
||||
rg -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
|
||||
rg -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
|
||||
rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \
|
||||
&& echo -e "Backward compatibility check: Sanitizer assert (in stderr.log)$FAIL$(head_escaped /test_output/tmp)" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Backward compatibility check: No sanitizer asserts$OK" >> /test_output/test_results.tsv
|
||||
rm -f /test_output/tmp
|
||||
|
||||
# OOM
|
||||
rg -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \
|
||||
&& echo -e "Backward compatibility check: Signal 9 in clickhouse-server.log$FAIL" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Backward compatibility check: No OOM messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Logical errors
|
||||
echo "Check for Logical errors in server log:"
|
||||
rg -Fa -A20 "Code: 49. DB::Exception:" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_logical_errors.txt \
|
||||
&& echo -e "Backward compatibility check: Logical error thrown (see clickhouse-server.log or bc_check_logical_errors.txt)$FAIL$(trim_server_logs bc_check_logical_errors.txt)" \
|
||||
>> /test_output/test_results.tsv \
|
||||
|| echo -e "Backward compatibility check: No logical errors$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file bc_check_logical_errors.txt if it's empty
|
||||
[ -s /test_output/bc_check_logical_errors.txt ] || rm /test_output/bc_check_logical_errors.txt
|
||||
|
||||
# Crash
|
||||
rg -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \
|
||||
&& echo -e "Backward compatibility check: Killed by signal (in clickhouse-server.log)$FAIL" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Backward compatibility check: Not crashed$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# It also checks for crash without stacktrace (printed by watchdog)
|
||||
echo "Check for Fatal message in server log:"
|
||||
rg -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_fatal_messages.txt \
|
||||
&& echo -e "Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)$FAIL$(trim_server_logs bc_check_fatal_messages.txt)" \
|
||||
>> /test_output/test_results.tsv \
|
||||
|| echo -e "Backward compatibility check: No fatal messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file bc_check_fatal_messages.txt if it's empty
|
||||
[ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt
|
||||
|
||||
tar -chf /test_output/coordination.backward.tar /var/lib/clickhouse/coordination ||:
|
||||
for table in query_log trace_log
|
||||
do
|
||||
clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" \
|
||||
| zstd --threads=0 > /test_output/$table.backward.tsv.zst ||:
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
dmesg -T > /test_output/dmesg.log
|
||||
|
||||
# OOM in dmesg -- those are real
|
||||
grep -q -F -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE' /test_output/dmesg.log \
|
||||
&& echo -e "OOM in dmesg$FAIL$(head_escaped /test_output/dmesg.log)" >> /test_output/test_results.tsv \
|
||||
|| echo -e "No OOM in dmesg$OK" >> /test_output/test_results.tsv
|
||||
collect_query_and_trace_logs
|
||||
|
||||
mv /var/log/clickhouse-server/stderr.log /test_output/
|
||||
|
||||
# Write check result into check_status.tsv
|
||||
# Try to choose most specific error for the whole check status
|
||||
clickhouse-local --structure "test String, res String, time Nullable(Float32), desc String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by
|
||||
(test like 'Backward compatibility check%'), -- BC check goes last
|
||||
(test like '%Sanitizer%') DESC,
|
||||
(test like '%Killed by signal%') DESC,
|
||||
(test like '%gdb.log%') DESC,
|
||||
@ -732,14 +229,8 @@ clickhouse-local --structure "test String, res String, time Nullable(Float32), d
|
||||
(test like '%OOM%') DESC,
|
||||
(test like '%Signal 9%') DESC,
|
||||
(test like '%Fatal message%') DESC,
|
||||
(test like '%Error message%') DESC,
|
||||
(test like '%previous release%') DESC,
|
||||
rowNumberInAllBlocks()
|
||||
LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv
|
||||
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
|
||||
|
||||
# Core dumps
|
||||
find . -type f -maxdepth 1 -name 'core.*' | while read core; do
|
||||
zstd --threads=0 $core
|
||||
mv $core.zst /test_output/
|
||||
done
|
||||
collect_core_dumps
|
||||
|
31
docker/test/upgrade/Dockerfile
Normal file
31
docker/test/upgrade/Dockerfile
Normal file
@ -0,0 +1,31 @@
|
||||
# rebuild in #33610
|
||||
# docker build -t clickhouse/upgrade-check .
|
||||
ARG FROM_TAG=latest
|
||||
FROM clickhouse/stateful-test:$FROM_TAG
|
||||
|
||||
RUN apt-get update -y \
|
||||
&& env DEBIAN_FRONTEND=noninteractive \
|
||||
apt-get install --yes --no-install-recommends \
|
||||
bash \
|
||||
tzdata \
|
||||
fakeroot \
|
||||
debhelper \
|
||||
parallel \
|
||||
expect \
|
||||
python3 \
|
||||
python3-lxml \
|
||||
python3-termcolor \
|
||||
python3-requests \
|
||||
curl \
|
||||
sudo \
|
||||
openssl \
|
||||
netcat-openbsd \
|
||||
telnet \
|
||||
brotli \
|
||||
&& apt-get clean
|
||||
|
||||
COPY run.sh /
|
||||
|
||||
ENV EXPORT_S3_STORAGE_POLICIES=1
|
||||
|
||||
CMD ["/bin/bash", "/run.sh"]
|
215
docker/test/upgrade/run.sh
Normal file
215
docker/test/upgrade/run.sh
Normal file
@ -0,0 +1,215 @@
|
||||
#!/bin/bash
|
||||
# shellcheck disable=SC2094
|
||||
# shellcheck disable=SC2086
|
||||
# shellcheck disable=SC2024
|
||||
|
||||
# Avoid overlaps with previous runs
|
||||
dmesg --clear
|
||||
|
||||
set -x
|
||||
|
||||
# we mount tests folder from repo to /usr/share
|
||||
ln -s /usr/share/clickhouse-test/ci/stress.py /usr/bin/stress
|
||||
ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
|
||||
ln -s /usr/share/clickhouse-test/ci/download_release_packages.py /usr/bin/download_release_packages
|
||||
ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag
|
||||
|
||||
# Stress tests and upgrade check uses similar code that was placed
|
||||
# in a separate bash library. See tests/ci/stress_tests.lib
|
||||
source /usr/share/clickhouse-test/ci/stress_tests.lib
|
||||
|
||||
azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
|
||||
./setup_minio.sh stateless # to have a proper environment
|
||||
|
||||
echo "Get previous release tag"
|
||||
previous_release_tag=$(dpkg --info package_folder/clickhouse-client*.deb | grep "Version: " | awk '{print $2}' | cut -f1 -d'+' | get_previous_release_tag)
|
||||
echo $previous_release_tag
|
||||
|
||||
echo "Clone previous release repository"
|
||||
git clone https://github.com/ClickHouse/ClickHouse.git --no-tags --progress --branch=$previous_release_tag --no-recurse-submodules --depth=1 previous_release_repository
|
||||
|
||||
echo "Download clickhouse-server from the previous release"
|
||||
mkdir previous_release_package_folder
|
||||
|
||||
echo $previous_release_tag | download_release_packages && echo -e "Download script exit code$OK" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Download script failed$FAIL" >> /test_output/test_results.tsv
|
||||
|
||||
# Check if we cloned previous release repository successfully
|
||||
if ! [ "$(ls -A previous_release_repository/tests/queries)" ]
|
||||
then
|
||||
echo -e 'failure\tFailed to clone previous release tests' > /test_output/check_status.tsv
|
||||
exit
|
||||
elif ! [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ]
|
||||
then
|
||||
echo -e 'failure\tFailed to download previous release packages' > /test_output/check_status.tsv
|
||||
exit
|
||||
fi
|
||||
|
||||
echo -e "Successfully cloned previous release tests$OK" >> /test_output/test_results.tsv
|
||||
echo -e "Successfully downloaded previous release packages$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Make upgrade check more funny by forcing Ordinary engine for system database
|
||||
mkdir -p /var/lib/clickhouse/metadata
|
||||
echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/system.sql
|
||||
|
||||
# Install previous release packages
|
||||
install_packages previous_release_package_folder
|
||||
|
||||
# Initial run without S3 to create system.*_log on local file system to make it
|
||||
# available for dump via clickhouse-local
|
||||
configure
|
||||
|
||||
start
|
||||
stop
|
||||
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log
|
||||
|
||||
# force_sync=false doesn't work correctly on some older versions
|
||||
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
|
||||
| sed "s|<force_sync>false</force_sync>|<force_sync>true</force_sync>|" \
|
||||
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
|
||||
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
|
||||
# But we still need default disk because some tables loaded only into it
|
||||
sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \
|
||||
| sed "s|<main><disk>s3</disk></main>|<main><disk>s3</disk></main><default><disk>default</disk></default>|" \
|
||||
> /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
|
||||
sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
|
||||
sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
|
||||
|
||||
# Start server from previous release
|
||||
# Let's enable S3 storage by default
|
||||
export USE_S3_STORAGE_FOR_MERGE_TREE=1
|
||||
# Previous version may not be ready for fault injections
|
||||
export ZOOKEEPER_FAULT_INJECTION=0
|
||||
configure
|
||||
|
||||
start
|
||||
|
||||
clickhouse-client --query="SELECT 'Server version: ', version()"
|
||||
|
||||
mkdir tmp_stress_output
|
||||
|
||||
stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" --upgrade-check --output-folder tmp_stress_output --global-time-limit=1200 \
|
||||
&& echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Test script failed$FAIL script exit code: $?" >> /test_output/test_results.tsv
|
||||
|
||||
rm -rf tmp_stress_output
|
||||
|
||||
# We experienced deadlocks in this command in very rare cases. Let's debug it:
|
||||
timeout 10m clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" ||
|
||||
(
|
||||
echo "thread apply all backtrace (on select tables count)" >> /test_output/gdb.log
|
||||
timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log
|
||||
clickhouse stop --force
|
||||
)
|
||||
|
||||
# Use bigger timeout for previous version and disable additional hang check
|
||||
stop 300 false
|
||||
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log
|
||||
|
||||
# Install and start new server
|
||||
install_packages package_folder
|
||||
# Disable fault injections on start (we don't test them here, and it can lead to tons of requests in case of huge number of tables).
|
||||
export ZOOKEEPER_FAULT_INJECTION=0
|
||||
configure
|
||||
start 500
|
||||
clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \
|
||||
|| (rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt \
|
||||
&& echo -e "Server failed to start (see application_errors.txt and clickhouse-server.clean.log)$FAIL$(trim_server_logs application_errors.txt)" \
|
||||
>> /test_output/test_results.tsv)
|
||||
|
||||
# Remove file application_errors.txt if it's empty
|
||||
[ -s /test_output/application_errors.txt ] || rm /test_output/application_errors.txt
|
||||
|
||||
clickhouse-client --query="SELECT 'Server version: ', version()"
|
||||
|
||||
# Let the server run for a while before checking log.
|
||||
sleep 60
|
||||
|
||||
stop
|
||||
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.upgrade.log
|
||||
|
||||
# Error messages (we should ignore some errors)
|
||||
# FIXME https://github.com/ClickHouse/ClickHouse/issues/38643 ("Unknown index: idx.")
|
||||
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 ("Cannot parse string 'Hello' as UInt64")
|
||||
# FIXME Not sure if it's expected, but some tests from stress test may not be finished yet when we restarting server.
|
||||
# Let's just ignore all errors from queries ("} <Error> TCPHandler: Code:", "} <Error> executeQuery: Code:")
|
||||
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'")
|
||||
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility
|
||||
echo "Check for Error messages in server log:"
|
||||
rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
|
||||
-e "Code: 236. DB::Exception: Cancelled mutating parts" \
|
||||
-e "REPLICA_IS_ALREADY_ACTIVE" \
|
||||
-e "REPLICA_ALREADY_EXISTS" \
|
||||
-e "ALL_REPLICAS_LOST" \
|
||||
-e "DDLWorker: Cannot parse DDL task query" \
|
||||
-e "RaftInstance: failed to accept a rpc connection due to error 125" \
|
||||
-e "UNKNOWN_DATABASE" \
|
||||
-e "NETWORK_ERROR" \
|
||||
-e "UNKNOWN_TABLE" \
|
||||
-e "ZooKeeperClient" \
|
||||
-e "KEEPER_EXCEPTION" \
|
||||
-e "DirectoryMonitor" \
|
||||
-e "TABLE_IS_READ_ONLY" \
|
||||
-e "Code: 1000, e.code() = 111, Connection refused" \
|
||||
-e "UNFINISHED" \
|
||||
-e "NETLINK_ERROR" \
|
||||
-e "Renaming unexpected part" \
|
||||
-e "PART_IS_TEMPORARILY_LOCKED" \
|
||||
-e "and a merge is impossible: we didn't find" \
|
||||
-e "found in queue and some source parts for it was lost" \
|
||||
-e "is lost forever." \
|
||||
-e "Unknown index: idx." \
|
||||
-e "Cannot parse string 'Hello' as UInt64" \
|
||||
-e "} <Error> TCPHandler: Code:" \
|
||||
-e "} <Error> executeQuery: Code:" \
|
||||
-e "Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'" \
|
||||
-e "The set of parts restored in place of" \
|
||||
-e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \
|
||||
-e "Code: 269. DB::Exception: Destination table is myself" \
|
||||
-e "Coordination::Exception: Connection loss" \
|
||||
-e "MutateFromLogEntryTask" \
|
||||
-e "No connection to ZooKeeper, cannot get shared table ID" \
|
||||
-e "Session expired" \
|
||||
-e "TOO_MANY_PARTS" \
|
||||
-e "Authentication failed" \
|
||||
-e "Cannot flush" \
|
||||
-e "Container already exists" \
|
||||
clickhouse-server.upgrade.log \
|
||||
| grep -av -e "_repl_01111_.*Mapping for table with UUID" \
|
||||
| zgrep -Fa "<Error>" > /test_output/upgrade_error_messages.txt \
|
||||
&& echo -e "Error message in clickhouse-server.log (see upgrade_error_messages.txt)$FAIL$(head_escaped /test_output/upgrade_error_messages.txt)" \
|
||||
>> /test_output/test_results.tsv \
|
||||
|| echo -e "No Error messages after server upgrade$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file upgrade_error_messages.txt if it's empty
|
||||
[ -s /test_output/upgrade_error_messages.txt ] || rm /test_output/upgrade_error_messages.txt
|
||||
|
||||
# Grep logs for sanitizer asserts, crashes and other critical errors
|
||||
check_logs_for_critical_errors
|
||||
|
||||
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
|
||||
|
||||
collect_query_and_trace_logs
|
||||
|
||||
mv /var/log/clickhouse-server/stderr.log /test_output/
|
||||
|
||||
# Write check result into check_status.tsv
|
||||
# Try to choose most specific error for the whole check status
|
||||
clickhouse-local --structure "test String, res String, time Nullable(Float32), desc String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by
|
||||
(test like '%Sanitizer%') DESC,
|
||||
(test like '%Killed by signal%') DESC,
|
||||
(test like '%gdb.log%') DESC,
|
||||
(test ilike '%possible deadlock%') DESC,
|
||||
(test like '%start%') DESC,
|
||||
(test like '%dmesg%') DESC,
|
||||
(test like '%OOM%') DESC,
|
||||
(test like '%Signal 9%') DESC,
|
||||
(test like '%Fatal message%') DESC,
|
||||
(test like '%Error message%') DESC,
|
||||
(test like '%previous release%') DESC,
|
||||
rowNumberInAllBlocks()
|
||||
LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv
|
||||
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
|
||||
|
||||
collect_core_dumps
|
@ -60,12 +60,21 @@ fi
|
||||
clickhouse_download_filename_prefix="clickhouse"
|
||||
clickhouse="$clickhouse_download_filename_prefix"
|
||||
|
||||
i=0
|
||||
while [ -f "$clickhouse" ]
|
||||
do
|
||||
clickhouse="${clickhouse_download_filename_prefix}.${i}"
|
||||
i=$(($i+1))
|
||||
done
|
||||
if [ -f "$clickhouse" ]
|
||||
then
|
||||
read -p "ClickHouse binary ${clickhouse} already exists. Overwrite? [y/N] " answer
|
||||
if [ "$answer" = "y" -o "$answer" = "Y" ]
|
||||
then
|
||||
rm -f "$clickhouse"
|
||||
else
|
||||
i=0
|
||||
while [ -f "$clickhouse" ]
|
||||
do
|
||||
clickhouse="${clickhouse_download_filename_prefix}.${i}"
|
||||
i=$(($i+1))
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
URL="https://builds.clickhouse.com/master/${DIR}/clickhouse"
|
||||
echo
|
||||
|
55
docs/changelogs/v22.12.4.76-stable.md
Normal file
55
docs/changelogs/v22.12.4.76-stable.md
Normal file
@ -0,0 +1,55 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v22.12.4.76-stable (cb5772db805) FIXME as compared to v22.12.3.5-stable (893de538f02)
|
||||
|
||||
#### Performance Improvement
|
||||
* Backported in [#45704](https://github.com/ClickHouse/ClickHouse/issues/45704): Fixed performance of short `SELECT` queries that read from tables with large number of`Array`/`Map`/`Nested` columns. [#45630](https://github.com/ClickHouse/ClickHouse/pull/45630) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Backported in [#46378](https://github.com/ClickHouse/ClickHouse/issues/46378): Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
|
||||
#### Bug Fix
|
||||
* Backported in [#45672](https://github.com/ClickHouse/ClickHouse/issues/45672): Fix wiping sensitive info in logs. [#45603](https://github.com/ClickHouse/ClickHouse/pull/45603) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#45200](https://github.com/ClickHouse/ClickHouse/issues/45200): Fix zookeeper downloading, update the version, and optimize the image size. [#44853](https://github.com/ClickHouse/ClickHouse/pull/44853) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#46116](https://github.com/ClickHouse/ClickHouse/issues/46116): Remove the dependency on the `adduser` tool from the packages, because we don't use it. This fixes [#44934](https://github.com/ClickHouse/ClickHouse/issues/44934). [#45011](https://github.com/ClickHouse/ClickHouse/pull/45011) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#46035](https://github.com/ClickHouse/ClickHouse/issues/46035): Add systemd.service file for clickhouse-keeper. Fixes [#44293](https://github.com/ClickHouse/ClickHouse/issues/44293). [#45568](https://github.com/ClickHouse/ClickHouse/pull/45568) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#46484](https://github.com/ClickHouse/ClickHouse/issues/46484): Get rid of unnecessary build for standalone clickhouse-keeper. [#46367](https://github.com/ClickHouse/ClickHouse/pull/46367) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#46509](https://github.com/ClickHouse/ClickHouse/issues/46509): Some time ago the ccache compression was changed to `zst`, but `gz` archives are downloaded by default. It fixes it by prioritizing zst archive. [#46490](https://github.com/ClickHouse/ClickHouse/pull/46490) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#47058](https://github.com/ClickHouse/ClickHouse/issues/47058): Fix error during server startup on old distros (e.g. Amazon Linux 2) and on ARM that glibc 2.28 symbols are not found. [#47008](https://github.com/ClickHouse/ClickHouse/pull/47008) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#45904](https://github.com/ClickHouse/ClickHouse/issues/45904): Fixed bug with non-parsable default value for EPHEMERAL column in table metadata. [#44026](https://github.com/ClickHouse/ClickHouse/pull/44026) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Backported in [#45321](https://github.com/ClickHouse/ClickHouse/issues/45321): Fixed a bug in normalization of a `DEFAULT` expression in `CREATE TABLE` statement. The second argument of function `in` (or the right argument of operator `IN`) might be replaced with the result of its evaluation during CREATE query execution. Fixes [#44496](https://github.com/ClickHouse/ClickHouse/issues/44496). [#44547](https://github.com/ClickHouse/ClickHouse/pull/44547) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Backported in [#45000](https://github.com/ClickHouse/ClickHouse/issues/45000): Another fix for `Cannot read all data` error which could happen while reading `LowCardinality` dictionary from remote fs. Fixes [#44709](https://github.com/ClickHouse/ClickHouse/issues/44709). [#44875](https://github.com/ClickHouse/ClickHouse/pull/44875) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#45553](https://github.com/ClickHouse/ClickHouse/issues/45553): Fix `SELECT ... FROM system.dictionaries` exception when there is a dictionary with a bad structure (e.g. incorrect type in xml config). [#45399](https://github.com/ClickHouse/ClickHouse/pull/45399) ([Aleksei Filatov](https://github.com/aalexfvk)).
|
||||
* Backported in [#46226](https://github.com/ClickHouse/ClickHouse/issues/46226): A couple of seg faults have been reported around `c-ares`. All of the recent stack traces observed fail on inserting into `std::unodered_set<>`. I believe I have found the root cause of this, it seems to be unprocessed queries. Prior to this PR, CH calls `poll` to wait on the file descriptors in the `c-ares` channel. According to the [poll docs](https://man7.org/linux/man-pages/man2/poll.2.html), a negative return value means an error has ocurred. Because of this, we would abort the execution and return failure. The problem is that `poll` will also return a negative value if a system interrupt occurs. A system interrupt does not mean the processing has failed or ended, but we would abort it anyways because we were checking for negative values. Once the execution is aborted, the whole stack is destroyed, which includes the `std::unordered_set<std::string>` passed to the `void *` parameter of the c-ares callback. Once c-ares completed the request, the callback would be invoked and would access an invalid memory address causing a segfault. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)).
|
||||
* Backported in [#46218](https://github.com/ClickHouse/ClickHouse/issues/46218): Fix reading of non existing nested columns with multiple level in compact parts. [#46045](https://github.com/ClickHouse/ClickHouse/pull/46045) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Backported in [#46446](https://github.com/ClickHouse/ClickHouse/issues/46446): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Backported in [#46678](https://github.com/ClickHouse/ClickHouse/issues/46678): Fix an invalid processing of constant `LowCardinality` argument in function `arrayMap`. This bug could lead to a segfault in release, and logical error `Bad cast` in debug build. [#46569](https://github.com/ClickHouse/ClickHouse/pull/46569) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#46872](https://github.com/ClickHouse/ClickHouse/issues/46872): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#46954](https://github.com/ClickHouse/ClickHouse/issues/46954): Fix result of LIKE predicates which translate to substring searches and contain quoted non-LIKE metacharacters. [#46875](https://github.com/ClickHouse/ClickHouse/pull/46875) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Automatically merge green backport PRs and green approved PRs [#41110](https://github.com/ClickHouse/ClickHouse/pull/41110) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Improve release scripts [#45074](https://github.com/ClickHouse/ClickHouse/pull/45074) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix wrong approved_at, simplify conditions [#45302](https://github.com/ClickHouse/ClickHouse/pull/45302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Get rid of artifactory in favor of r2 + ch-repos-manager [#45421](https://github.com/ClickHouse/ClickHouse/pull/45421) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Another attempt to fix automerge, or at least to have debug footprint [#45476](https://github.com/ClickHouse/ClickHouse/pull/45476) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Trim refs/tags/ from GITHUB_TAG in release workflow [#45636](https://github.com/ClickHouse/ClickHouse/pull/45636) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add check for running workflows to merge_pr.py [#45803](https://github.com/ClickHouse/ClickHouse/pull/45803) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Get rid of progress timestamps in release publishing [#45818](https://github.com/ClickHouse/ClickHouse/pull/45818) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add necessary dependency for sanitizers [#45959](https://github.com/ClickHouse/ClickHouse/pull/45959) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add helping logging to auto-merge script [#46080](https://github.com/ClickHouse/ClickHouse/pull/46080) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix write buffer destruction order for vertical merge. [#46205](https://github.com/ClickHouse/ClickHouse/pull/46205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Improve install_check.py [#46458](https://github.com/ClickHouse/ClickHouse/pull/46458) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix dependencies for InstallPackagesTestAarch64 [#46597](https://github.com/ClickHouse/ClickHouse/pull/46597) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Reduce updates of Mergeable Check [#46781](https://github.com/ClickHouse/ClickHouse/pull/46781) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
29
docs/changelogs/v22.12.5.34-stable.md
Normal file
29
docs/changelogs/v22.12.5.34-stable.md
Normal file
@ -0,0 +1,29 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v22.12.5.34-stable (b82d6401ca1) FIXME as compared to v22.12.4.76-stable (cb5772db805)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#46983](https://github.com/ClickHouse/ClickHouse/issues/46983): - Apply `ALTER TABLE table_name ON CLUSTER cluster MOVE PARTITION|PART partition_expr TO DISK|VOLUME 'disk_name'` to all replicas. Because `ALTER TABLE t MOVE` is not replicated. [#46402](https://github.com/ClickHouse/ClickHouse/pull/46402) ([lizhuoyu5](https://github.com/lzydmxy)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#45729](https://github.com/ClickHouse/ClickHouse/issues/45729): Fix key description when encountering duplicate primary keys. This can happen in projections. See [#45590](https://github.com/ClickHouse/ClickHouse/issues/45590) for details. [#45686](https://github.com/ClickHouse/ClickHouse/pull/45686) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Backported in [#46398](https://github.com/ClickHouse/ClickHouse/issues/46398): Fix `SYSTEM UNFREEZE` queries failing with the exception `CANNOT_PARSE_INPUT_ASSERTION_FAILED`. [#46325](https://github.com/ClickHouse/ClickHouse/pull/46325) ([Aleksei Filatov](https://github.com/aalexfvk)).
|
||||
* Backported in [#46903](https://github.com/ClickHouse/ClickHouse/issues/46903): - Fix incorrect alias recursion in QueryNormalizer. [#46609](https://github.com/ClickHouse/ClickHouse/pull/46609) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#47210](https://github.com/ClickHouse/ClickHouse/issues/47210): `INSERT` queries through native TCP protocol and HTTP protocol were not canceled correctly in some cases. It could lead to a partially applied query if a client canceled the query, or if a client died or, in rare cases, on network errors. As a result, it could lead to not working deduplication. Fixes [#27667](https://github.com/ClickHouse/ClickHouse/issues/27667) and [#45377](https://github.com/ClickHouse/ClickHouse/issues/45377). [#46681](https://github.com/ClickHouse/ClickHouse/pull/46681) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Backported in [#47157](https://github.com/ClickHouse/ClickHouse/issues/47157): - Fix arithmetic operations in aggregate optimization with `min` and `max`. [#46705](https://github.com/ClickHouse/ClickHouse/pull/46705) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Backported in [#46881](https://github.com/ClickHouse/ClickHouse/issues/46881): Fix MSan report in the `maxIntersections` function. This closes [#43126](https://github.com/ClickHouse/ClickHouse/issues/43126). [#46847](https://github.com/ClickHouse/ClickHouse/pull/46847) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#47359](https://github.com/ClickHouse/ClickHouse/issues/47359): Fix possible deadlock on distributed query cancellation. [#47161](https://github.com/ClickHouse/ClickHouse/pull/47161) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Use /etc/default/clickhouse in systemd too [#47003](https://github.com/ClickHouse/ClickHouse/pull/47003) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Update typing for a new PyGithub version [#47123](https://github.com/ClickHouse/ClickHouse/pull/47123) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Follow-up to [#46681](https://github.com/ClickHouse/ClickHouse/issues/46681) [#47284](https://github.com/ClickHouse/ClickHouse/pull/47284) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Add a manual trigger for release workflow [#47302](https://github.com/ClickHouse/ClickHouse/pull/47302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
17
docs/changelogs/v22.3.19.6-lts.md
Normal file
17
docs/changelogs/v22.3.19.6-lts.md
Normal file
@ -0,0 +1,17 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v22.3.19.6-lts (467e0a7bd77) FIXME as compared to v22.3.18.37-lts (fe512717551)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#46440](https://github.com/ClickHouse/ClickHouse/issues/46440): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
40
docs/changelogs/v22.8.14.53-lts.md
Normal file
40
docs/changelogs/v22.8.14.53-lts.md
Normal file
@ -0,0 +1,40 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v22.8.14.53-lts (4ea67c40077) FIXME as compared to v22.8.13.20-lts (e4817946d18)
|
||||
|
||||
#### Performance Improvement
|
||||
* Backported in [#45845](https://github.com/ClickHouse/ClickHouse/issues/45845): Fixed performance of short `SELECT` queries that read from tables with large number of`Array`/`Map`/`Nested` columns. [#45630](https://github.com/ClickHouse/ClickHouse/pull/45630) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Backported in [#46374](https://github.com/ClickHouse/ClickHouse/issues/46374): Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#46358](https://github.com/ClickHouse/ClickHouse/issues/46358): Allow using Vertical merge algorithm with parts in Compact format. This will allow ClickHouse server to use much less memory for background operations. This closes [#46084](https://github.com/ClickHouse/ClickHouse/issues/46084). [#46282](https://github.com/ClickHouse/ClickHouse/pull/46282) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#46112](https://github.com/ClickHouse/ClickHouse/issues/46112): Remove the dependency on the `adduser` tool from the packages, because we don't use it. This fixes [#44934](https://github.com/ClickHouse/ClickHouse/issues/44934). [#45011](https://github.com/ClickHouse/ClickHouse/pull/45011) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#46482](https://github.com/ClickHouse/ClickHouse/issues/46482): Get rid of unnecessary build for standalone clickhouse-keeper. [#46367](https://github.com/ClickHouse/ClickHouse/pull/46367) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#46505](https://github.com/ClickHouse/ClickHouse/issues/46505): Some time ago the ccache compression was changed to `zst`, but `gz` archives are downloaded by default. It fixes it by prioritizing zst archive. [#46490](https://github.com/ClickHouse/ClickHouse/pull/46490) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#45908](https://github.com/ClickHouse/ClickHouse/issues/45908): Fixed bug with non-parsable default value for EPHEMERAL column in table metadata. [#44026](https://github.com/ClickHouse/ClickHouse/pull/44026) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Backported in [#46238](https://github.com/ClickHouse/ClickHouse/issues/46238): A couple of seg faults have been reported around `c-ares`. All of the recent stack traces observed fail on inserting into `std::unodered_set<>`. I believe I have found the root cause of this, it seems to be unprocessed queries. Prior to this PR, CH calls `poll` to wait on the file descriptors in the `c-ares` channel. According to the [poll docs](https://man7.org/linux/man-pages/man2/poll.2.html), a negative return value means an error has ocurred. Because of this, we would abort the execution and return failure. The problem is that `poll` will also return a negative value if a system interrupt occurs. A system interrupt does not mean the processing has failed or ended, but we would abort it anyways because we were checking for negative values. Once the execution is aborted, the whole stack is destroyed, which includes the `std::unordered_set<std::string>` passed to the `void *` parameter of the c-ares callback. Once c-ares completed the request, the callback would be invoked and would access an invalid memory address causing a segfault. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)).
|
||||
* Backported in [#45727](https://github.com/ClickHouse/ClickHouse/issues/45727): Fix key description when encountering duplicate primary keys. This can happen in projections. See [#45590](https://github.com/ClickHouse/ClickHouse/issues/45590) for details. [#45686](https://github.com/ClickHouse/ClickHouse/pull/45686) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Backported in [#46394](https://github.com/ClickHouse/ClickHouse/issues/46394): Fix `SYSTEM UNFREEZE` queries failing with the exception `CANNOT_PARSE_INPUT_ASSERTION_FAILED`. [#46325](https://github.com/ClickHouse/ClickHouse/pull/46325) ([Aleksei Filatov](https://github.com/aalexfvk)).
|
||||
* Backported in [#46442](https://github.com/ClickHouse/ClickHouse/issues/46442): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Backported in [#46674](https://github.com/ClickHouse/ClickHouse/issues/46674): Fix an invalid processing of constant `LowCardinality` argument in function `arrayMap`. This bug could lead to a segfault in release, and logical error `Bad cast` in debug build. [#46569](https://github.com/ClickHouse/ClickHouse/pull/46569) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#46879](https://github.com/ClickHouse/ClickHouse/issues/46879): Fix MSan report in the `maxIntersections` function. This closes [#43126](https://github.com/ClickHouse/ClickHouse/issues/43126). [#46847](https://github.com/ClickHouse/ClickHouse/pull/46847) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#46871](https://github.com/ClickHouse/ClickHouse/issues/46871): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Another attempt to fix automerge, or at least to have debug footprint [#45476](https://github.com/ClickHouse/ClickHouse/pull/45476) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add check for running workflows to merge_pr.py [#45803](https://github.com/ClickHouse/ClickHouse/pull/45803) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Get rid of progress timestamps in release publishing [#45818](https://github.com/ClickHouse/ClickHouse/pull/45818) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add necessary dependency for sanitizers [#45959](https://github.com/ClickHouse/ClickHouse/pull/45959) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add helping logging to auto-merge script [#46080](https://github.com/ClickHouse/ClickHouse/pull/46080) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix write buffer destruction order for vertical merge. [#46205](https://github.com/ClickHouse/ClickHouse/pull/46205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
28
docs/changelogs/v22.8.15.23-lts.md
Normal file
28
docs/changelogs/v22.8.15.23-lts.md
Normal file
@ -0,0 +1,28 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v22.8.15.23-lts (d36fa168bbf) FIXME as compared to v22.8.14.53-lts (4ea67c40077)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#46981](https://github.com/ClickHouse/ClickHouse/issues/46981): - Apply `ALTER TABLE table_name ON CLUSTER cluster MOVE PARTITION|PART partition_expr TO DISK|VOLUME 'disk_name'` to all replicas. Because `ALTER TABLE t MOVE` is not replicated. [#46402](https://github.com/ClickHouse/ClickHouse/pull/46402) ([lizhuoyu5](https://github.com/lzydmxy)).
|
||||
|
||||
#### Bug Fix
|
||||
* Backported in [#47336](https://github.com/ClickHouse/ClickHouse/issues/47336): Sometimes after changing a role that could be not reflected on the access rights of a user who uses that role. This PR fixes that. [#46772](https://github.com/ClickHouse/ClickHouse/pull/46772) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#46901](https://github.com/ClickHouse/ClickHouse/issues/46901): - Fix incorrect alias recursion in QueryNormalizer. [#46609](https://github.com/ClickHouse/ClickHouse/pull/46609) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#47156](https://github.com/ClickHouse/ClickHouse/issues/47156): - Fix arithmetic operations in aggregate optimization with `min` and `max`. [#46705](https://github.com/ClickHouse/ClickHouse/pull/46705) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Backported in [#46987](https://github.com/ClickHouse/ClickHouse/issues/46987): Fix result of LIKE predicates which translate to substring searches and contain quoted non-LIKE metacharacters. [#46875](https://github.com/ClickHouse/ClickHouse/pull/46875) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Backported in [#47357](https://github.com/ClickHouse/ClickHouse/issues/47357): Fix possible deadlock on distributed query cancellation. [#47161](https://github.com/ClickHouse/ClickHouse/pull/47161) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Reduce updates of Mergeable Check [#46781](https://github.com/ClickHouse/ClickHouse/pull/46781) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Update typing for a new PyGithub version [#47123](https://github.com/ClickHouse/ClickHouse/pull/47123) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add a manual trigger for release workflow [#47302](https://github.com/ClickHouse/ClickHouse/pull/47302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
47
docs/changelogs/v23.1.4.58-stable.md
Normal file
47
docs/changelogs/v23.1.4.58-stable.md
Normal file
@ -0,0 +1,47 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.1.4.58-stable (9ed562163a5) FIXME as compared to v23.1.3.5-stable (548b494bcce)
|
||||
|
||||
#### Performance Improvement
|
||||
* Backported in [#46380](https://github.com/ClickHouse/ClickHouse/issues/46380): Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#46985](https://github.com/ClickHouse/ClickHouse/issues/46985): - Apply `ALTER TABLE table_name ON CLUSTER cluster MOVE PARTITION|PART partition_expr TO DISK|VOLUME 'disk_name'` to all replicas. Because `ALTER TABLE t MOVE` is not replicated. [#46402](https://github.com/ClickHouse/ClickHouse/pull/46402) ([lizhuoyu5](https://github.com/lzydmxy)).
|
||||
* Backported in [#46778](https://github.com/ClickHouse/ClickHouse/issues/46778): Backward compatibility for T64 codec support for IPv4. [#46747](https://github.com/ClickHouse/ClickHouse/pull/46747) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Backported in [#47020](https://github.com/ClickHouse/ClickHouse/issues/47020): Allow IPv4 in range(). [#46995](https://github.com/ClickHouse/ClickHouse/pull/46995) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#46031](https://github.com/ClickHouse/ClickHouse/issues/46031): Add systemd.service file for clickhouse-keeper. Fixes [#44293](https://github.com/ClickHouse/ClickHouse/issues/44293). [#45568](https://github.com/ClickHouse/ClickHouse/pull/45568) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#46477](https://github.com/ClickHouse/ClickHouse/issues/46477): Get rid of unnecessary build for standalone clickhouse-keeper. [#46367](https://github.com/ClickHouse/ClickHouse/pull/46367) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#46511](https://github.com/ClickHouse/ClickHouse/issues/46511): Some time ago the ccache compression was changed to `zst`, but `gz` archives are downloaded by default. It fixes it by prioritizing zst archive. [#46490](https://github.com/ClickHouse/ClickHouse/pull/46490) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#46228](https://github.com/ClickHouse/ClickHouse/issues/46228): A couple of seg faults have been reported around `c-ares`. All of the recent stack traces observed fail on inserting into `std::unodered_set<>`. I believe I have found the root cause of this, it seems to be unprocessed queries. Prior to this PR, CH calls `poll` to wait on the file descriptors in the `c-ares` channel. According to the [poll docs](https://man7.org/linux/man-pages/man2/poll.2.html), a negative return value means an error has ocurred. Because of this, we would abort the execution and return failure. The problem is that `poll` will also return a negative value if a system interrupt occurs. A system interrupt does not mean the processing has failed or ended, but we would abort it anyways because we were checking for negative values. Once the execution is aborted, the whole stack is destroyed, which includes the `std::unordered_set<std::string>` passed to the `void *` parameter of the c-ares callback. Once c-ares completed the request, the callback would be invoked and would access an invalid memory address causing a segfault. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)).
|
||||
* Backported in [#46967](https://github.com/ClickHouse/ClickHouse/issues/46967): Backward compatibility - allow implicit narrowing conversion from UInt64 to IPv4 - required for "INSERT ... VALUES ..." expression. [#45865](https://github.com/ClickHouse/ClickHouse/pull/45865) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Backported in [#46220](https://github.com/ClickHouse/ClickHouse/issues/46220): Fix reading of non existing nested columns with multiple level in compact parts. [#46045](https://github.com/ClickHouse/ClickHouse/pull/46045) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Backported in [#46751](https://github.com/ClickHouse/ClickHouse/issues/46751): Follow-up fix for Replace domain IP types (IPv4, IPv6) with native https://github.com/ClickHouse/ClickHouse/pull/43221. [#46087](https://github.com/ClickHouse/ClickHouse/pull/46087) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Backported in [#46448](https://github.com/ClickHouse/ClickHouse/issues/46448): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Backported in [#46680](https://github.com/ClickHouse/ClickHouse/issues/46680): Fix an invalid processing of constant `LowCardinality` argument in function `arrayMap`. This bug could lead to a segfault in release, and logical error `Bad cast` in debug build. [#46569](https://github.com/ClickHouse/ClickHouse/pull/46569) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#46873](https://github.com/ClickHouse/ClickHouse/issues/46873): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#46956](https://github.com/ClickHouse/ClickHouse/issues/46956): Fix result of LIKE predicates which translate to substring searches and contain quoted non-LIKE metacharacters. [#46875](https://github.com/ClickHouse/ClickHouse/pull/46875) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Another attempt to fix automerge, or at least to have debug footprint [#45476](https://github.com/ClickHouse/ClickHouse/pull/45476) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Support DELETE ON CLUSTER [#45786](https://github.com/ClickHouse/ClickHouse/pull/45786) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Add check for running workflows to merge_pr.py [#45803](https://github.com/ClickHouse/ClickHouse/pull/45803) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add necessary dependency for sanitizers [#45959](https://github.com/ClickHouse/ClickHouse/pull/45959) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add helping logging to auto-merge script [#46080](https://github.com/ClickHouse/ClickHouse/pull/46080) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix write buffer destruction order for vertical merge. [#46205](https://github.com/ClickHouse/ClickHouse/pull/46205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Wait for background tasks in ~UploadHelper [#46334](https://github.com/ClickHouse/ClickHouse/pull/46334) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Improve install_check.py [#46458](https://github.com/ClickHouse/ClickHouse/pull/46458) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix dependencies for InstallPackagesTestAarch64 [#46597](https://github.com/ClickHouse/ClickHouse/pull/46597) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Reduce updates of Mergeable Check [#46781](https://github.com/ClickHouse/ClickHouse/pull/46781) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
28
docs/changelogs/v23.1.5.24-stable.md
Normal file
28
docs/changelogs/v23.1.5.24-stable.md
Normal file
@ -0,0 +1,28 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.1.5.24-stable (0e51b53ba99) FIXME as compared to v23.1.4.58-stable (9ed562163a5)
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#47060](https://github.com/ClickHouse/ClickHouse/issues/47060): Fix error during server startup on old distros (e.g. Amazon Linux 2) and on ARM that glibc 2.28 symbols are not found. [#47008](https://github.com/ClickHouse/ClickHouse/pull/47008) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#46401](https://github.com/ClickHouse/ClickHouse/issues/46401): Fix `SYSTEM UNFREEZE` queries failing with the exception `CANNOT_PARSE_INPUT_ASSERTION_FAILED`. [#46325](https://github.com/ClickHouse/ClickHouse/pull/46325) ([Aleksei Filatov](https://github.com/aalexfvk)).
|
||||
* Backported in [#46905](https://github.com/ClickHouse/ClickHouse/issues/46905): - Fix incorrect alias recursion in QueryNormalizer. [#46609](https://github.com/ClickHouse/ClickHouse/pull/46609) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#47211](https://github.com/ClickHouse/ClickHouse/issues/47211): `INSERT` queries through native TCP protocol and HTTP protocol were not canceled correctly in some cases. It could lead to a partially applied query if a client canceled the query, or if a client died or, in rare cases, on network errors. As a result, it could lead to not working deduplication. Fixes [#27667](https://github.com/ClickHouse/ClickHouse/issues/27667) and [#45377](https://github.com/ClickHouse/ClickHouse/issues/45377). [#46681](https://github.com/ClickHouse/ClickHouse/pull/46681) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Backported in [#47118](https://github.com/ClickHouse/ClickHouse/issues/47118): - Fix arithmetic operations in aggregate optimization with `min` and `max`. [#46705](https://github.com/ClickHouse/ClickHouse/pull/46705) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Backported in [#46883](https://github.com/ClickHouse/ClickHouse/issues/46883): Fix MSan report in the `maxIntersections` function. This closes [#43126](https://github.com/ClickHouse/ClickHouse/issues/43126). [#46847](https://github.com/ClickHouse/ClickHouse/pull/46847) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#47361](https://github.com/ClickHouse/ClickHouse/issues/47361): Fix possible deadlock on distributed query cancellation. [#47161](https://github.com/ClickHouse/ClickHouse/pull/47161) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Use /etc/default/clickhouse in systemd too [#47003](https://github.com/ClickHouse/ClickHouse/pull/47003) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Update typing for a new PyGithub version [#47123](https://github.com/ClickHouse/ClickHouse/pull/47123) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Follow-up to [#46681](https://github.com/ClickHouse/ClickHouse/issues/46681) [#47284](https://github.com/ClickHouse/ClickHouse/pull/47284) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Add a manual trigger for release workflow [#47302](https://github.com/ClickHouse/ClickHouse/pull/47302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
30
docs/changelogs/v23.2.2.20-stable.md
Normal file
30
docs/changelogs/v23.2.2.20-stable.md
Normal file
@ -0,0 +1,30 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.2.2.20-stable (f6c269c8df2) FIXME as compared to v23.2.1.2537-stable (52bf836e03a)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#46914](https://github.com/ClickHouse/ClickHouse/issues/46914): Allow PREWHERE for Merge with different DEFAULT expression for column. [#46831](https://github.com/ClickHouse/ClickHouse/pull/46831) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Backported in [#47022](https://github.com/ClickHouse/ClickHouse/issues/47022): Allow IPv4 in range(). [#46995](https://github.com/ClickHouse/ClickHouse/pull/46995) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
|
||||
#### Bug Fix
|
||||
* Backported in [#46828](https://github.com/ClickHouse/ClickHouse/issues/46828): Combined PREWHERE column accumulated from multiple PREWHERE in some cases didn't contain 0's from previous steps. The fix is to apply final filter if we know that it wasn't applied from more than 1 last step. [#46785](https://github.com/ClickHouse/ClickHouse/pull/46785) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#47062](https://github.com/ClickHouse/ClickHouse/issues/47062): Fix error during server startup on old distros (e.g. Amazon Linux 2) and on ARM that glibc 2.28 symbols are not found. [#47008](https://github.com/ClickHouse/ClickHouse/pull/47008) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#46895](https://github.com/ClickHouse/ClickHouse/issues/46895): Fixed a bug in automatic retries of `DROP TABLE` query with `ReplicatedMergeTree` tables and `Atomic` databases. In rare cases it could lead to `Can't get data for node /zk_path/log_pointer` and `The specified key does not exist` errors if ZooKeeper session expired during DROP and a new replicated table with the same path in ZooKeeper was created in parallel. [#46384](https://github.com/ClickHouse/ClickHouse/pull/46384) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Backported in [#46865](https://github.com/ClickHouse/ClickHouse/issues/46865): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#46958](https://github.com/ClickHouse/ClickHouse/issues/46958): Fix result of LIKE predicates which translate to substring searches and contain quoted non-LIKE metacharacters. [#46875](https://github.com/ClickHouse/ClickHouse/pull/46875) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* More concise logging at trace level for PREWHERE steps [#46771](https://github.com/ClickHouse/ClickHouse/pull/46771) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Reduce updates of Mergeable Check [#46781](https://github.com/ClickHouse/ClickHouse/pull/46781) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
23
docs/changelogs/v23.2.3.17-stable.md
Normal file
23
docs/changelogs/v23.2.3.17-stable.md
Normal file
@ -0,0 +1,23 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.2.3.17-stable (dec18bf7281) FIXME as compared to v23.2.2.20-stable (f6c269c8df2)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#46907](https://github.com/ClickHouse/ClickHouse/issues/46907): - Fix incorrect alias recursion in QueryNormalizer. [#46609](https://github.com/ClickHouse/ClickHouse/pull/46609) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#47091](https://github.com/ClickHouse/ClickHouse/issues/47091): - Fix arithmetic operations in aggregate optimization with `min` and `max`. [#46705](https://github.com/ClickHouse/ClickHouse/pull/46705) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Backported in [#46885](https://github.com/ClickHouse/ClickHouse/issues/46885): Fix MSan report in the `maxIntersections` function. This closes [#43126](https://github.com/ClickHouse/ClickHouse/issues/43126). [#46847](https://github.com/ClickHouse/ClickHouse/pull/46847) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#47067](https://github.com/ClickHouse/ClickHouse/issues/47067): Fix typo in systemd service, which causes the systemd service start to fail. [#47051](https://github.com/ClickHouse/ClickHouse/pull/47051) ([Palash Goel](https://github.com/palash-goel)).
|
||||
* Backported in [#47259](https://github.com/ClickHouse/ClickHouse/issues/47259): Fix concrete columns PREWHERE support. [#47154](https://github.com/ClickHouse/ClickHouse/pull/47154) ([Azat Khuzhin](https://github.com/azat)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Use /etc/default/clickhouse in systemd too [#47003](https://github.com/ClickHouse/ClickHouse/pull/47003) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* do flushUntrackedMemory when context switches [#47102](https://github.com/ClickHouse/ClickHouse/pull/47102) ([Sema Checherinda](https://github.com/CheSema)).
|
||||
* Update typing for a new PyGithub version [#47123](https://github.com/ClickHouse/ClickHouse/pull/47123) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
20
docs/changelogs/v23.2.4.12-stable.md
Normal file
20
docs/changelogs/v23.2.4.12-stable.md
Normal file
@ -0,0 +1,20 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.2.4.12-stable (8fe866cb035) FIXME as compared to v23.2.3.17-stable (dec18bf7281)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#47277](https://github.com/ClickHouse/ClickHouse/issues/47277): Fix IPv4/IPv6 serialization/deserialization in binary formats that was broken in https://github.com/ClickHouse/ClickHouse/pull/43221. Closes [#46522](https://github.com/ClickHouse/ClickHouse/issues/46522). [#46616](https://github.com/ClickHouse/ClickHouse/pull/46616) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Backported in [#47212](https://github.com/ClickHouse/ClickHouse/issues/47212): `INSERT` queries through native TCP protocol and HTTP protocol were not canceled correctly in some cases. It could lead to a partially applied query if a client canceled the query, or if a client died or, in rare cases, on network errors. As a result, it could lead to not working deduplication. Fixes [#27667](https://github.com/ClickHouse/ClickHouse/issues/27667) and [#45377](https://github.com/ClickHouse/ClickHouse/issues/45377). [#46681](https://github.com/ClickHouse/ClickHouse/pull/46681) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Backported in [#47363](https://github.com/ClickHouse/ClickHouse/issues/47363): Fix possible deadlock on distributed query cancellation. [#47161](https://github.com/ClickHouse/ClickHouse/pull/47161) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Follow-up to [#46681](https://github.com/ClickHouse/ClickHouse/issues/46681) [#47284](https://github.com/ClickHouse/ClickHouse/pull/47284) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Add a manual trigger for release workflow [#47302](https://github.com/ClickHouse/ClickHouse/pull/47302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
@ -172,7 +172,7 @@ Global thread pool is `GlobalThreadPool` singleton class. To allocate thread fro
|
||||
|
||||
Global pool is universal and all pools described below are implemented on top of it. This can be thought of as a hierarchy of pools. Any specialized pool takes its threads from the global pool using `ThreadPool` class. So the main purpose of any specialized pool is to apply limit on the number of simultaneous jobs and do job scheduling. If there are more jobs scheduled than threads in a pool, `ThreadPool` accumulates jobs in a queue with priorities. Each job has an integer priority. Default priority is zero. All jobs with higher priority values are started before any job with lower priority value. But there is no difference between already executing jobs, thus priority matters only when the pool in overloaded.
|
||||
|
||||
IO thread pool is implemented as a plain `ThreadPool` accessible via `IOThreadPool::get()` method. It is configured in the same way as global pool with `max_io_thread_pool_size`, `max_io_thread_pool_free_size` and `io_thread_pool_queue_size` settings. The main purpose of IO thread pool is to avoid exhaustion of the global pool with IO jobs, which could prevent queries from fully utilizing CPU.
|
||||
IO thread pool is implemented as a plain `ThreadPool` accessible via `IOThreadPool::get()` method. It is configured in the same way as global pool with `max_io_thread_pool_size`, `max_io_thread_pool_free_size` and `io_thread_pool_queue_size` settings. The main purpose of IO thread pool is to avoid exhaustion of the global pool with IO jobs, which could prevent queries from fully utilizing CPU. Backup to S3 does significant amount of IO operations and to avoid impact on interactive queries there is a separate `BackupsIOThreadPool` configured with `max_backups_io_thread_pool_size`, `max_backups_io_thread_pool_free_size` and `backups_io_thread_pool_queue_size` settings.
|
||||
|
||||
For periodic task execution there is `BackgroundSchedulePool` class. You can register tasks using `BackgroundSchedulePool::TaskHolder` objects and the pool ensures that no task runs two jobs at the same time. It also allows you to postpone task execution to a specific instant in the future or temporarily deactivate task. Global `Context` provides a few instances of this class for different purposes. For general purpose tasks `Context::getSchedulePool()` is used.
|
||||
|
||||
|
123
docs/en/development/build-cross-s390x.md
Normal file
123
docs/en/development/build-cross-s390x.md
Normal file
@ -0,0 +1,123 @@
|
||||
---
|
||||
slug: /en/development/build-cross-s390x
|
||||
sidebar_position: 69
|
||||
title: How to Build, Run and Debug ClickHouse on Linux for s390x (zLinux)
|
||||
sidebar_label: Build on Linux for s390x (zLinux)
|
||||
---
|
||||
|
||||
As of writing (2023/3/10) building for s390x considered to be experimental. Not all features can be enabled, has broken features and is currently under active development.
|
||||
|
||||
|
||||
## Building
|
||||
|
||||
As s390x does not support boringssl, it uses OpenSSL and has two related build options.
|
||||
- By default, the s390x build will dynamically link to OpenSSL libraries. It will build OpenSSL shared objects, so it's not necessary to install OpenSSL beforehand. (This option is recommended in all cases.)
|
||||
- Another option is to build OpenSSL in-tree. In this case two build flags need to be supplied to cmake
|
||||
```bash
|
||||
-DENABLE_OPENSSL_DYNAMIC=0 -DENABLE_OPENSSL=1
|
||||
```
|
||||
|
||||
These instructions assume that the host machine is x86_64 and has all the tooling required to build natively based on the [build instructions](../development/build.md). It also assumes that the host is Ubuntu 22.04 but the following instructions should also work on Ubuntu 20.04.
|
||||
|
||||
In addition to installing the tooling used to build natively, the following additional packages need to be installed:
|
||||
|
||||
```bash
|
||||
apt-get install binutils-s390x-linux-gnu libc6-dev-s390x-cross gcc-s390x-linux-gnu binfmt-support qemu-user-static
|
||||
```
|
||||
|
||||
If you wish to cross compile rust code install the rust cross compile target for s390x:
|
||||
```bash
|
||||
rustup target add s390x-unknown-linux-gnu
|
||||
```
|
||||
|
||||
To build for s390x:
|
||||
```bash
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-s390x.cmake ..
|
||||
ninja
|
||||
```
|
||||
|
||||
## Running
|
||||
|
||||
Once built, the binary can be run with, eg.:
|
||||
|
||||
```bash
|
||||
qemu-s390x-static -L /usr/s390x-linux-gnu ./clickhouse
|
||||
```
|
||||
|
||||
## Debugging
|
||||
|
||||
Install LLDB:
|
||||
|
||||
```bash
|
||||
apt-get install lldb-15
|
||||
```
|
||||
|
||||
To Debug a s390x executable, run clickhouse using QEMU in debug mode:
|
||||
|
||||
```bash
|
||||
qemu-s390x-static -g 31338 -L /usr/s390x-linux-gnu ./clickhouse
|
||||
```
|
||||
|
||||
In another shell run LLDB and attach, replace `<Clickhouse Parent Directory>` and `<build directory>` with the values corresponding to your environment.
|
||||
```bash
|
||||
lldb-15
|
||||
(lldb) target create ./clickhouse
|
||||
Current executable set to '/<Clickhouse Parent Directory>/ClickHouse/<build directory>/programs/clickhouse' (s390x).
|
||||
(lldb) settings set target.source-map <build directory> /<Clickhouse Parent Directory>/ClickHouse
|
||||
(lldb) gdb-remote 31338
|
||||
Process 1 stopped
|
||||
* thread #1, stop reason = signal SIGTRAP
|
||||
frame #0: 0x0000004020e74cd0
|
||||
-> 0x4020e74cd0: lgr %r2, %r15
|
||||
0x4020e74cd4: aghi %r15, -160
|
||||
0x4020e74cd8: xc 0(8,%r15), 0(%r15)
|
||||
0x4020e74cde: brasl %r14, 275429939040
|
||||
(lldb) b main
|
||||
Breakpoint 1: 9 locations.
|
||||
(lldb) c
|
||||
Process 1 resuming
|
||||
Process 1 stopped
|
||||
* thread #1, stop reason = breakpoint 1.1
|
||||
frame #0: 0x0000004005cd9fc0 clickhouse`main(argc_=1, argv_=0x0000004020e594a8) at main.cpp:450:17
|
||||
447 #if !defined(FUZZING_MODE)
|
||||
448 int main(int argc_, char ** argv_)
|
||||
449 {
|
||||
-> 450 inside_main = true;
|
||||
451 SCOPE_EXIT({ inside_main = false; });
|
||||
452
|
||||
453 /// PHDR cache is required for query profiler to work reliably
|
||||
```
|
||||
|
||||
## Visual Studio Code integration
|
||||
|
||||
- (CodeLLDB extension)[https://github.com/vadimcn/vscode-lldb] is required for visual debugging, the (Command Variable)[https://github.com/rioj7/command-variable] extension can help dynamic launches if using (cmake variants)[https://github.com/microsoft/vscode-cmake-tools/blob/main/docs/variants.md].
|
||||
- Make sure to set the backend to your llvm installation eg. `"lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so"`
|
||||
- Launcher:
|
||||
```json
|
||||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Debug",
|
||||
"type": "lldb",
|
||||
"request": "custom",
|
||||
"targetCreateCommands": ["target create ${command:cmake.launchTargetDirectory}/clickhouse"],
|
||||
"processCreateCommands": ["settings set target.source-map ${input:targetdir} ${workspaceFolder}", "gdb-remote 31338"],
|
||||
"sourceMap": { "${input:targetdir}": "${workspaceFolder}" },
|
||||
}
|
||||
],
|
||||
"inputs": [
|
||||
{
|
||||
"id": "targetdir",
|
||||
"type": "command",
|
||||
"command": "extension.commandvariable.transform",
|
||||
"args": {
|
||||
"text": "${command:cmake.launchTargetDirectory}",
|
||||
"find": ".*/([^/]+)/[^/]+$",
|
||||
"replace": "$1"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
- Make sure to run the clickhouse executable in debug mode prior to launch. (It is also possible to create a `preLaunchTask` that automates this)
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
slug: /en/development/contrib
|
||||
sidebar_position: 71
|
||||
sidebar_position: 72
|
||||
sidebar_label: Third-Party Libraries
|
||||
description: A list of third-party libraries used
|
||||
---
|
||||
|
@ -67,7 +67,7 @@ It generally means that the SSH keys for connecting to GitHub are missing. These
|
||||
|
||||
You can also clone the repository via https protocol:
|
||||
|
||||
git clone --recursive--shallow-submodules https://github.com/ClickHouse/ClickHouse.git
|
||||
git clone --recursive --shallow-submodules https://github.com/ClickHouse/ClickHouse.git
|
||||
|
||||
This, however, will not let you send your changes to the server. You can still use it temporarily and add the SSH keys later replacing the remote address of the repository with `git remote` command.
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
slug: /en/development/style
|
||||
sidebar_position: 69
|
||||
sidebar_position: 70
|
||||
sidebar_label: C++ Guide
|
||||
description: A list of recommendations regarding coding style, naming convention, formatting and more
|
||||
---
|
||||
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
slug: /en/development/tests
|
||||
sidebar_position: 70
|
||||
sidebar_position: 71
|
||||
sidebar_label: Testing
|
||||
title: ClickHouse Testing
|
||||
description: Most of ClickHouse features can be tested with functional tests and they are mandatory to use for every change in ClickHouse code that can be tested that way.
|
||||
@ -31,6 +31,9 @@ folder and run the following command:
|
||||
PATH=$PATH:<path to clickhouse-client> tests/clickhouse-test 01428_hash_set_nan_key
|
||||
```
|
||||
|
||||
Test results (`stderr` and `stdout`) are written to files `01428_hash_set_nan_key.[stderr|stdout]` which
|
||||
are located near the test file itself (so for `queries/0_stateless/foo.sql` output will be in `queries/0_stateless/foo.stdout`).
|
||||
|
||||
For more options, see `tests/clickhouse-test --help`. You can simply run all tests or run subset of tests filtered by substring in test name: `./clickhouse-test substring`. There are also options to run tests in parallel or in randomized order.
|
||||
|
||||
### Adding a New Test
|
||||
@ -39,12 +42,59 @@ To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` director
|
||||
|
||||
Tests should use (create, drop, etc) only tables in `test` database that is assumed to be created beforehand; also tests can use temporary tables.
|
||||
|
||||
### Restricting test runs
|
||||
|
||||
A test can have zero or more _test tags_ specifying restrictions for test runs.
|
||||
|
||||
For `.sh` tests tags are written as a comment on the second line:
|
||||
|
||||
```bash
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-fasttest
|
||||
```
|
||||
|
||||
For `.sql` tests tags are placed in the first line as a SQL comment:
|
||||
|
||||
```sql
|
||||
-- Tags: no-fasttest
|
||||
SELECT 1
|
||||
```
|
||||
|
||||
|Tag name | What it does | Usage example |
|
||||
|---|---|---|
|
||||
| `disabled`| Test is not run ||
|
||||
| `long` | Test's execution time is extended from 1 to 10 minutes ||
|
||||
| `deadlock` | Test is run in a loop for a long time ||
|
||||
| `race` | Same as `deadlock`. Prefer `deadlock` ||
|
||||
| `shard` | Server is required to listen to `127.0.0.*` ||
|
||||
| `distributed` | Same as `shard`. Prefer `shard` ||
|
||||
| `global` | Same as `shard`. Prefer `shard` ||
|
||||
| `zookeeper` | Test requires Zookeeper or ClickHouse Keeper to run | Test uses `ReplicatedMergeTree` |
|
||||
| `replica` | Same as `zookeeper`. Prefer `zookeeper` ||
|
||||
| `no-fasttest`| Test is not run under [Fast test](continuous-integration#fast-test) | Test uses `MySQL` table engine which is disabled in Fast test|
|
||||
| `no-[asan, tsan, msan, ubsan]` | Disables tests in build with [sanitizers](#sanitizers) | Test is run under QEMU which doesn't work with sanitizers |
|
||||
| `no-replicated-database` |||
|
||||
| `no-ordinary-database` |||
|
||||
| `no-parallel` | Disables running other tests in parallel with this one | Test reads from `system` tables and invariants may be broken|
|
||||
| `no-parallel-replicas` |||
|
||||
| `no-debug` |||
|
||||
| `no-stress` |||
|
||||
| `no-polymorphic-parts` |||
|
||||
| `no-random-settings` |||
|
||||
| `no-random-merge-tree-settings` |||
|
||||
| `no-backward-compatibility-check` |||
|
||||
| `no-cpu-x86_64` |||
|
||||
| `no-cpu-aarch64` |||
|
||||
| `no-cpu-ppc64le` |||
|
||||
| `no-s3-storage` |||
|
||||
|
||||
In addition to the above settings, you can use `USE_*` flags from `system.build_options` to define usage of particular ClickHouse features.
|
||||
For example, if your test uses a MySQL table, you should add a tag `use-mysql`.
|
||||
|
||||
### Choosing the Test Name
|
||||
|
||||
The name of the test starts with a five-digit prefix followed by a descriptive name, such as `00422_hash_function_constexpr.sql`. To choose the prefix, find the largest prefix already present in the directory, and increment it by one. In the meantime, some other tests might be added with the same numeric prefix, but this is OK and does not lead to any problems, you don't have to change it later.
|
||||
|
||||
Some tests are marked with `zookeeper`, `shard` or `long` in their names. `zookeeper` is for tests that are using ZooKeeper. `shard` is for tests that requires server to listen `127.0.0.*`; `distributed` or `global` have the same meaning. `long` is for tests that run slightly longer that one second. You can disable these groups of tests using `--no-zookeeper`, `--no-shard` and `--no-long` options, respectively. Make sure to add a proper prefix to your test name if it needs ZooKeeper or distributed queries.
|
||||
|
||||
### Checking for an Error that Must Occur
|
||||
|
||||
Sometimes you want to test that a server error occurs for an incorrect query. We support special annotations for this in SQL tests, in the following form:
|
||||
|
@ -19,8 +19,8 @@ Kafka lets you:
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
(
|
||||
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
|
||||
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
|
||||
name1 [type1],
|
||||
name2 [type2],
|
||||
...
|
||||
) ENGINE = Kafka()
|
||||
SETTINGS
|
||||
@ -113,6 +113,10 @@ Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format
|
||||
|
||||
</details>
|
||||
|
||||
:::info
|
||||
The Kafka table engine doesn't support columns with [default value](../../../sql-reference/statements/create/table.md#default_value). If you need columns with default value, you can add them at materialized view level (see below).
|
||||
:::
|
||||
|
||||
## Description {#description}
|
||||
|
||||
The delivered messages are tracked automatically, so each message in a group is only counted once. If you want to get the data twice, then create a copy of the table with another group name.
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Approximate Nearest Neighbor Search Indexes [experimental] {#table_engines-ANNIndex}
|
||||
|
||||
The main task that indexes achieve is to quickly find nearest neighbors for multidimensional data. An example of such a problem can be finding similar pictures (texts) for a given picture (text). That problem can be reduced to finding the nearest [embeddings](https://cloud.google.com/architecture/overview-extracting-and-serving-feature-embeddings-for-machine-learning). They can be created from data using [UDF](../../../sql-reference/functions/index.md#executable-user-defined-functions).
|
||||
The main task that indexes achieve is to quickly find nearest neighbors for multidimensional data. An example of such a problem can be finding similar pictures (texts) for a given picture (text). That problem can be reduced to finding the nearest [embeddings](https://cloud.google.com/architecture/overview-extracting-and-serving-feature-embeddings-for-machine-learning). They can be created from data using [UDF](/docs/en/sql-reference/functions/index.md/#executable-user-defined-functions).
|
||||
|
||||
The next queries find the closest neighbors in N-dimensional space using the L2 (Euclidean) distance:
|
||||
``` sql
|
||||
@ -39,7 +39,7 @@ Approximate Nearest Neighbor Search Indexes (`ANNIndexes`) are similar to skip i
|
||||
LIMIT N
|
||||
```
|
||||
|
||||
In these queries, `DistanceFunction` is selected from [distance functions](../../../sql-reference/functions/distance-functions). `Point` is a known vector (something like `(0.1, 0.1, ... )`). To avoid writing large vectors, use [client parameters](../../../interfaces/cli.md#queries-with-parameters-cli-queries-with-parameters). `Value` - a float value that will bound the neighbourhood.
|
||||
In these queries, `DistanceFunction` is selected from [distance functions](/docs/en/sql-reference/functions/distance-functions.md). `Point` is a known vector (something like `(0.1, 0.1, ... )`). To avoid writing large vectors, use [client parameters](/docs/en//interfaces/cli.md#queries-with-parameters-cli-queries-with-parameters). `Value` - a float value that will bound the neighbourhood.
|
||||
|
||||
:::note
|
||||
ANN index can't speed up query that satisfies both types (`where + order by`, only one of them). All queries must have the limit, as algorithms are used to find nearest neighbors and need a specific number of them.
|
||||
@ -85,13 +85,13 @@ As the indexes are built only during insertions into table, `INSERT` and `OPTIMI
|
||||
You can create your table with index which uses certain algorithm. Now only indices based on the following algorithms are supported:
|
||||
|
||||
# Index list
|
||||
- [Annoy](../../../engines/table-engines/mergetree-family/annindexes.md#annoy-annoy)
|
||||
- [Annoy](/docs/en/engines/table-engines/mergetree-family/annindexes.md#annoy-annoy)
|
||||
|
||||
# Annoy {#annoy}
|
||||
Implementation of the algorithm was taken from [this repository](https://github.com/spotify/annoy).
|
||||
|
||||
Short description of the algorithm:
|
||||
The algorithm recursively divides in half all space by random linear surfaces (lines in 2D, planes in 3D e.t.c.). Thus it makes tree of polyhedrons and points that they contains. Repeating the operation several times for greater accuracy it creates a forest.
|
||||
The algorithm recursively divides in half all space by random linear surfaces (lines in 2D, planes in 3D etc.). Thus it makes tree of polyhedrons and points that they contains. Repeating the operation several times for greater accuracy it creates a forest.
|
||||
To find K Nearest Neighbours it goes down through the trees and fills the buffer of closest points using the priority queue of polyhedrons. Next, it sorts buffer and return the nearest K points.
|
||||
|
||||
__Examples__:
|
||||
@ -118,7 +118,7 @@ ORDER BY id;
|
||||
```
|
||||
|
||||
:::note
|
||||
Table with array field will work faster, but all arrays **must** have same length. Use [CONSTRAINT](../../../sql-reference/statements/create/table.md#constraints) to avoid errors. For example, `CONSTRAINT constraint_name_1 CHECK length(data) = 256`.
|
||||
Table with array field will work faster, but all arrays **must** have same length. Use [CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints) to avoid errors. For example, `CONSTRAINT constraint_name_1 CHECK length(data) = 256`.
|
||||
:::
|
||||
|
||||
Parameter `NumTrees` is the number of trees which the algorithm will create. The bigger it is, the slower (approximately linear) it works (in both `CREATE` and `SELECT` requests), but the better accuracy you get (adjusted for randomness). By default it is set to `100`. Parameter `DistanceName` is name of distance function. By default it is set to `L2Distance`. It can be set without changing first parameter, for example
|
||||
|
@ -450,29 +450,32 @@ INDEX sample_index3 (lower(str), str) TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY
|
||||
|
||||
Conditions in the `WHERE` clause contains calls of the functions that operate with columns. If the column is a part of an index, ClickHouse tries to use this index when performing the functions. ClickHouse supports different subsets of functions for using indexes.
|
||||
|
||||
The `set` index can be used with all functions. Function subsets for other indexes are shown in the table below.
|
||||
Indexes of type `set` can be utilized by all functions. The other index types are supported as follows:
|
||||
|
||||
| Function (operator) / Index | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter |
|
||||
|------------------------------------------------------------------------------------------------------------|-------------|--------|-------------|-------------|---------------|
|
||||
| [equals (=, ==)](/docs/en/sql-reference/functions/comparison-functions.md/#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ |
|
||||
| [notEquals(!=, <>)](/docs/en/sql-reference/functions/comparison-functions.md/#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ |
|
||||
| [like](/docs/en/sql-reference/functions/string-search-functions.md/#function-like) | ✔ | ✔ | ✔ | ✔ | ✗ |
|
||||
| [notLike](/docs/en/sql-reference/functions/string-search-functions.md/#function-notlike) | ✔ | ✔ | ✔ | ✔ | ✗ |
|
||||
| [startsWith](/docs/en/sql-reference/functions/string-functions.md/#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ |
|
||||
| [endsWith](/docs/en/sql-reference/functions/string-functions.md/#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ |
|
||||
| [multiSearchAny](/docs/en/sql-reference/functions/string-search-functions.md/#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ |
|
||||
| [in](/docs/en/sql-reference/functions/in-functions#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ |
|
||||
| [notIn](/docs/en/sql-reference/functions/in-functions#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ |
|
||||
| [less (<)](/docs/en/sql-reference/functions/comparison-functions.md/#function-less) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [greater (>)](/docs/en/sql-reference/functions/comparison-functions.md/#function-greater) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [lessOrEquals (<=)](/docs/en/sql-reference/functions/comparison-functions.md/#function-lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [greaterOrEquals (>=)](/docs/en/sql-reference/functions/comparison-functions.md/#function-greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [empty](/docs/en/sql-reference/functions/array-functions#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [notEmpty](/docs/en/sql-reference/functions/array-functions#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| hasToken | ✗ | ✗ | ✗ | ✔ | ✗ |
|
||||
| hasTokenOrNull | ✗ | ✗ | ✗ | ✔ | ✗ |
|
||||
| hasTokenCaseInsensitive | ✗ | ✗ | ✗ | ✔ | ✗ |
|
||||
| hasTokenCaseInsensitiveOrNull | ✗ | ✗ | ✗ | ✔ | ✗ |
|
||||
|------------------------------------------------------------------------------------------------------------|-------------|--------|------------|------------|--------------|
|
||||
| [equals (=, ==)](/docs/en/sql-reference/functions/comparison-functions.md/#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ |
|
||||
| [notEquals(!=, <>)](/docs/en/sql-reference/functions/comparison-functions.md/#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ |
|
||||
| [like](/docs/en/sql-reference/functions/string-search-functions.md/#function-like) | ✔ | ✔ | ✔ | ✔ | ✗ |
|
||||
| [notLike](/docs/en/sql-reference/functions/string-search-functions.md/#function-notlike) | ✔ | ✔ | ✔ | ✔ | ✗ |
|
||||
| [startsWith](/docs/en/sql-reference/functions/string-functions.md/#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ |
|
||||
| [endsWith](/docs/en/sql-reference/functions/string-functions.md/#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ |
|
||||
| [multiSearchAny](/docs/en/sql-reference/functions/string-search-functions.md/#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ |
|
||||
| [in](/docs/en/sql-reference/functions/in-functions#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ |
|
||||
| [notIn](/docs/en/sql-reference/functions/in-functions#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ |
|
||||
| [less (<)](/docs/en/sql-reference/functions/comparison-functions.md/#function-less) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [greater (>)](/docs/en/sql-reference/functions/comparison-functions.md/#function-greater) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [lessOrEquals (<=)](/docs/en/sql-reference/functions/comparison-functions.md/#function-lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [greaterOrEquals (>=)](/docs/en/sql-reference/functions/comparison-functions.md/#function-greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [empty](/docs/en/sql-reference/functions/array-functions#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [notEmpty](/docs/en/sql-reference/functions/array-functions#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [has](/docs/en/sql-reference/functions/array-functions#function-has) | ✗ | ✗ | ✔ | ✔ | ✔ |
|
||||
| [hasAny](/docs/en/sql-reference/functions/array-functions#function-hasAny) | ✗ | ✗ | ✗ | ✗ | ✔ |
|
||||
| [hasAll](/docs/en/sql-reference/functions/array-functions#function-hasAll) | ✗ | ✗ | ✗ | ✗ | ✔ |
|
||||
| hasToken | ✗ | ✗ | ✗ | ✔ | ✗ |
|
||||
| hasTokenOrNull | ✗ | ✗ | ✗ | ✔ | ✗ |
|
||||
| hasTokenCaseInsensitive | ✗ | ✗ | ✗ | ✔ | ✗ |
|
||||
| hasTokenCaseInsensitiveOrNull | ✗ | ✗ | ✗ | ✔ | ✗ |
|
||||
|
||||
Functions with a constant argument that is less than ngram size can’t be used by `ngrambf_v1` for query optimization.
|
||||
|
||||
|
File diff suppressed because one or more lines are too long
@ -1232,50 +1232,52 @@ Each row is formatted as a single document and each column is formatted as a sin
|
||||
|
||||
For output it uses the following correspondence between ClickHouse types and BSON types:
|
||||
|
||||
| ClickHouse type | BSON Type |
|
||||
|-----------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------|
|
||||
| [Bool](/docs/en/sql-reference/data-types/boolean.md) | `\x08` boolean |
|
||||
| [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 |
|
||||
| [Int16UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 |
|
||||
| [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 |
|
||||
| [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `\x12` int64 |
|
||||
| [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `\x12` int64 |
|
||||
| [Float32/Float64](/docs/en/sql-reference/data-types/float.md) | `\x01` double |
|
||||
| [Date](/docs/en/sql-reference/data-types/date.md)/[Date32](/docs/en/sql-reference/data-types/date32.md) | `\x10` int32 |
|
||||
| [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `\x12` int64 |
|
||||
| [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `\x09` datetime |
|
||||
| [Decimal32](/docs/en/sql-reference/data-types/decimal.md) | `\x10` int32 |
|
||||
| [Decimal64](/docs/en/sql-reference/data-types/decimal.md) | `\x12` int64 |
|
||||
| [Decimal128](/docs/en/sql-reference/data-types/decimal.md) | `\x05` binary, `\x00` binary subtype, size = 16 |
|
||||
| [Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `\x05` binary, `\x00` binary subtype, size = 32 |
|
||||
| [Int128/UInt128](/docs/en/sql-reference/data-types/int-uint.md) | `\x05` binary, `\x00` binary subtype, size = 16 |
|
||||
| [Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `\x05` binary, `\x00` binary subtype, size = 32 |
|
||||
| ClickHouse type | BSON Type |
|
||||
|-----------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------|
|
||||
| [Bool](/docs/en/sql-reference/data-types/boolean.md) | `\x08` boolean |
|
||||
| [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 |
|
||||
| [Int16UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 |
|
||||
| [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 |
|
||||
| [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `\x12` int64 |
|
||||
| [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `\x12` int64 |
|
||||
| [Float32/Float64](/docs/en/sql-reference/data-types/float.md) | `\x01` double |
|
||||
| [Date](/docs/en/sql-reference/data-types/date.md)/[Date32](/docs/en/sql-reference/data-types/date32.md) | `\x10` int32 |
|
||||
| [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `\x12` int64 |
|
||||
| [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `\x09` datetime |
|
||||
| [Decimal32](/docs/en/sql-reference/data-types/decimal.md) | `\x10` int32 |
|
||||
| [Decimal64](/docs/en/sql-reference/data-types/decimal.md) | `\x12` int64 |
|
||||
| [Decimal128](/docs/en/sql-reference/data-types/decimal.md) | `\x05` binary, `\x00` binary subtype, size = 16 |
|
||||
| [Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `\x05` binary, `\x00` binary subtype, size = 32 |
|
||||
| [Int128/UInt128](/docs/en/sql-reference/data-types/int-uint.md) | `\x05` binary, `\x00` binary subtype, size = 16 |
|
||||
| [Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `\x05` binary, `\x00` binary subtype, size = 32 |
|
||||
| [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `\x05` binary, `\x00` binary subtype or \x02 string if setting output_format_bson_string_as_string is enabled |
|
||||
| [UUID](/docs/en/sql-reference/data-types/uuid.md) | `\x05` binary, `\x04` uuid subtype, size = 16 |
|
||||
| [Array](/docs/en/sql-reference/data-types/array.md) | `\x04` array |
|
||||
| [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `\x04` array |
|
||||
| [Named Tuple](/docs/en/sql-reference/data-types/tuple.md) | `\x03` document |
|
||||
| [Map](/docs/en/sql-reference/data-types/map.md) (with String keys) | `\x03` document |
|
||||
| [UUID](/docs/en/sql-reference/data-types/uuid.md) | `\x05` binary, `\x04` uuid subtype, size = 16 |
|
||||
| [Array](/docs/en/sql-reference/data-types/array.md) | `\x04` array |
|
||||
| [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `\x04` array |
|
||||
| [Named Tuple](/docs/en/sql-reference/data-types/tuple.md) | `\x03` document |
|
||||
| [Map](/docs/en/sql-reference/data-types/map.md) (with String keys) | `\x03` document |
|
||||
| [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `\x10` int32 |
|
||||
| [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `\x05` binary, `\x00` binary subtype |
|
||||
|
||||
For input it uses the following correspondence between BSON types and ClickHouse types:
|
||||
|
||||
| BSON Type | ClickHouse Type |
|
||||
|------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `\x01` double | [Float32/Float64](/docs/en/sql-reference/data-types/float.md) |
|
||||
| `\x02` string | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x03` document | [Map](/docs/en/sql-reference/data-types/map.md)/[Named Tuple](/docs/en/sql-reference/data-types/tuple.md) |
|
||||
| `\x04` array | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md) |
|
||||
| `\x05` binary, `\x00` binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x05` binary, `\x02` old binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x05` binary, `\x03` old uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) |
|
||||
| `\x05` binary, `\x04` uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) |
|
||||
| `\x07` ObjectId | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x08` boolean | [Bool](/docs/en/sql-reference/data-types/boolean.md) |
|
||||
| `\x09` datetime | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) |
|
||||
| `\x0A` null value | [NULL](/docs/en/sql-reference/data-types/nullable.md) |
|
||||
| `\x0D` JavaScript code | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x0E` symbol | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x10` int32 | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md) |
|
||||
| BSON Type | ClickHouse Type |
|
||||
|------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `\x01` double | [Float32/Float64](/docs/en/sql-reference/data-types/float.md) |
|
||||
| `\x02` string | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x03` document | [Map](/docs/en/sql-reference/data-types/map.md)/[Named Tuple](/docs/en/sql-reference/data-types/tuple.md) |
|
||||
| `\x04` array | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md) |
|
||||
| `\x05` binary, `\x00` binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)/[IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) |
|
||||
| `\x05` binary, `\x02` old binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x05` binary, `\x03` old uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) |
|
||||
| `\x05` binary, `\x04` uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) |
|
||||
| `\x07` ObjectId | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x08` boolean | [Bool](/docs/en/sql-reference/data-types/boolean.md) |
|
||||
| `\x09` datetime | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) |
|
||||
| `\x0A` null value | [NULL](/docs/en/sql-reference/data-types/nullable.md) |
|
||||
| `\x0D` JavaScript code | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x0E` symbol | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x10` int32 | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md)/[IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) |
|
||||
| `\x12` int64 | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal64](/docs/en/sql-reference/data-types/decimal.md)/[DateTime64](/docs/en/sql-reference/data-types/datetime64.md) |
|
||||
|
||||
Other BSON types are not supported. Also, it performs conversion between different integer types (for example, you can insert BSON int32 value into ClickHouse UInt8).
|
||||
@ -1608,23 +1610,25 @@ See also [Format Schema](#formatschema).
|
||||
|
||||
The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
|
||||
|
||||
| CapnProto data type (`INSERT`) | ClickHouse data type | CapnProto data type (`SELECT`) |
|
||||
|--------------------------------|-----------------------------------------------------------|--------------------------------|
|
||||
| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
|
||||
| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` |
|
||||
| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md) | `UINT16` |
|
||||
| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` |
|
||||
| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
|
||||
| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` |
|
||||
| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
|
||||
| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [DateTime64](/docs/en/sql-reference/data-types/datetime.md) | `INT64` |
|
||||
| `FLOAT32` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` |
|
||||
| `FLOAT64` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` |
|
||||
| `TEXT, DATA` | [String](/docs/en/sql-reference/data-types/string.md), [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `TEXT, DATA` |
|
||||
| `union(T, Void), union(Void, T)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md) | `union(T, Void), union(Void, T)` |
|
||||
| `ENUM` | [Enum(8\|16)](/docs/en/sql-reference/data-types/enum.md) | `ENUM` |
|
||||
| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
|
||||
| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
|
||||
| CapnProto data type (`INSERT`) | ClickHouse data type | CapnProto data type (`SELECT`) |
|
||||
|----------------------------------|------------------------------------------------------------------------------------------------------------------------|------------------------------|
|
||||
| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
|
||||
| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` |
|
||||
| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md) | `UINT16` |
|
||||
| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` |
|
||||
| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
|
||||
| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` |
|
||||
| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
|
||||
| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [DateTime64](/docs/en/sql-reference/data-types/datetime.md) | `INT64` |
|
||||
| `FLOAT32` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` |
|
||||
| `FLOAT64` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` |
|
||||
| `TEXT, DATA` | [String](/docs/en/sql-reference/data-types/string.md), [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `TEXT, DATA` |
|
||||
| `union(T, Void), union(Void, T)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md) | `union(T, Void), union(Void, T)` |
|
||||
| `ENUM` | [Enum(8\ |16)](/docs/en/sql-reference/data-types/enum.md) | `ENUM` |
|
||||
| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
|
||||
| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
|
||||
| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` |
|
||||
| `DATA` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `DATA` |
|
||||
|
||||
For working with `Enum` in CapnProto format use the [format_capn_proto_enum_comparising_mode](/docs/en/operations/settings/settings-formats.md/#format_capn_proto_enum_comparising_mode) setting.
|
||||
|
||||
@ -1804,21 +1808,23 @@ ClickHouse Avro format supports reading and writing [Avro data files](https://av
|
||||
|
||||
The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
|
||||
|
||||
| Avro data type `INSERT` | ClickHouse data type | Avro data type `SELECT` |
|
||||
|---------------------------------------------|----------------------------------------------------------------------------------------------------|------------------------------|
|
||||
| `boolean`, `int`, `long`, `float`, `double` | [Int(8\|16\|32)](/docs/en/sql-reference/data-types/int-uint.md), [UInt(8\|16\|32)](/docs/en/sql-reference/data-types/int-uint.md) | `int` |
|
||||
| `boolean`, `int`, `long`, `float`, `double` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `long` |
|
||||
| `boolean`, `int`, `long`, `float`, `double` | [Float32](/docs/en/sql-reference/data-types/float.md) | `float` |
|
||||
| `boolean`, `int`, `long`, `float`, `double` | [Float64](/docs/en/sql-reference/data-types/float.md) | `double` |
|
||||
| `bytes`, `string`, `fixed`, `enum` | [String](/docs/en/sql-reference/data-types/string.md) | `bytes` or `string` \* |
|
||||
| `bytes`, `string`, `fixed` | [FixedString(N)](/docs/en/sql-reference/data-types/fixedstring.md) | `fixed(N)` |
|
||||
| `enum` | [Enum(8\|16)](/docs/en/sql-reference/data-types/enum.md) | `enum` |
|
||||
| `array(T)` | [Array(T)](/docs/en/sql-reference/data-types/array.md) | `array(T)` |
|
||||
| `union(null, T)`, `union(T, null)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md) | `union(null, T)` |
|
||||
| `null` | [Nullable(Nothing)](/docs/en/sql-reference/data-types/special-data-types/nothing.md) | `null` |
|
||||
| `int (date)` \** | [Date](/docs/en/sql-reference/data-types/date.md) | `int (date)` \** |
|
||||
| `long (timestamp-millis)` \** | [DateTime64(3)](/docs/en/sql-reference/data-types/datetime.md) | `long (timestamp-millis)` \* |
|
||||
| `long (timestamp-micros)` \** | [DateTime64(6)](/docs/en/sql-reference/data-types/datetime.md) | `long (timestamp-micros)` \* |
|
||||
| Avro data type `INSERT` | ClickHouse data type | Avro data type `SELECT` |
|
||||
|---------------------------------------------|-----------------------------------------------------------------------------------------------------------------|-------------------------------------------------|
|
||||
| `boolean`, `int`, `long`, `float`, `double` | [Int(8\ | 16\ |32)](/docs/en/sql-reference/data-types/int-uint.md), [UInt(8\|16\|32)](/docs/en/sql-reference/data-types/int-uint.md) | `int` |
|
||||
| `boolean`, `int`, `long`, `float`, `double` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `long` |
|
||||
| `boolean`, `int`, `long`, `float`, `double` | [Float32](/docs/en/sql-reference/data-types/float.md) | `float` |
|
||||
| `boolean`, `int`, `long`, `float`, `double` | [Float64](/docs/en/sql-reference/data-types/float.md) | `double` |
|
||||
| `bytes`, `string`, `fixed`, `enum` | [String](/docs/en/sql-reference/data-types/string.md) | `bytes` or `string` \* |
|
||||
| `bytes`, `string`, `fixed` | [FixedString(N)](/docs/en/sql-reference/data-types/fixedstring.md) | `fixed(N)` |
|
||||
| `enum` | [Enum(8\ | 16)](/docs/en/sql-reference/data-types/enum.md) | `enum` |
|
||||
| `array(T)` | [Array(T)](/docs/en/sql-reference/data-types/array.md) | `array(T)` |
|
||||
| `union(null, T)`, `union(T, null)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md) | `union(null, T)` |
|
||||
| `null` | [Nullable(Nothing)](/docs/en/sql-reference/data-types/special-data-types/nothing.md) | `null` |
|
||||
| `int (date)` \** | [Date](/docs/en/sql-reference/data-types/date.md) | `int (date)` \** |
|
||||
| `long (timestamp-millis)` \** | [DateTime64(3)](/docs/en/sql-reference/data-types/datetime.md) | `long (timestamp-millis)` \* |
|
||||
| `long (timestamp-micros)` \** | [DateTime64(6)](/docs/en/sql-reference/data-types/datetime.md) | `long (timestamp-micros)` \* |
|
||||
| `int` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `int` |
|
||||
| `fixed(16)` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `fixed(16)` |
|
||||
|
||||
\* `bytes` is default, controlled by [output_format_avro_string_column_pattern](/docs/en/operations/settings/settings-formats.md/#output_format_avro_string_column_pattern)
|
||||
\** [Avro logical types](https://avro.apache.org/docs/current/spec.html#Logical+Types)
|
||||
@ -1918,28 +1924,30 @@ Setting `format_avro_schema_registry_url` needs to be configured in `users.xml`
|
||||
|
||||
The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
|
||||
|
||||
| Parquet data type (`INSERT`) | ClickHouse data type | Parquet data type (`SELECT`) |
|
||||
|-----------------------------------------------|-----------------------------------------------------------------|------------------------------|
|
||||
| `BOOL` | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `BOOL` |
|
||||
| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
|
||||
| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` |
|
||||
| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `UINT16` |
|
||||
| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` |
|
||||
| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `UINT32` |
|
||||
| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` |
|
||||
| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
|
||||
| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `INT64` |
|
||||
| `FLOAT` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT` |
|
||||
| `DOUBLE` | [Float64](/docs/en/sql-reference/data-types/float.md) | `DOUBLE` |
|
||||
| `DATE` | [Date32](/docs/en/sql-reference/data-types/date.md) | `DATE` |
|
||||
| `TIME (ms)` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
|
||||
| `TIMESTAMP`, `TIME (us, ns)` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `TIMESTAMP` |
|
||||
| `STRING`, `BINARY` | [String](/docs/en/sql-reference/data-types/string.md) | `BINARY` |
|
||||
| `STRING`, `BINARY`, `FIXED_LENGTH_BYTE_ARRAY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `FIXED_LENGTH_BYTE_ARRAY` |
|
||||
| `DECIMAL` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL` |
|
||||
| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
|
||||
| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
|
||||
| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` |
|
||||
| Parquet data type (`INSERT`) | ClickHouse data type | Parquet data type (`SELECT`) |
|
||||
|----------------------------------------------------|-----------------------------------------------------------------|------------------------------|
|
||||
| `BOOL` | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `BOOL` |
|
||||
| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
|
||||
| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` |
|
||||
| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `UINT16` |
|
||||
| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` |
|
||||
| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `UINT32` |
|
||||
| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` |
|
||||
| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
|
||||
| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `INT64` |
|
||||
| `FLOAT` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT` |
|
||||
| `DOUBLE` | [Float64](/docs/en/sql-reference/data-types/float.md) | `DOUBLE` |
|
||||
| `DATE` | [Date32](/docs/en/sql-reference/data-types/date.md) | `DATE` |
|
||||
| `TIME (ms)` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
|
||||
| `TIMESTAMP`, `TIME (us, ns)` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `TIMESTAMP` |
|
||||
| `STRING`, `BINARY` | [String](/docs/en/sql-reference/data-types/string.md) | `BINARY` |
|
||||
| `STRING`, `BINARY`, `FIXED_LENGTH_BYTE_ARRAY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `FIXED_LENGTH_BYTE_ARRAY` |
|
||||
| `DECIMAL` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL` |
|
||||
| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
|
||||
| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
|
||||
| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` |
|
||||
| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` |
|
||||
| `FIXED_LENGTH_BYTE_ARRAY` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `FIXED_LENGTH_BYTE_ARRAY` |
|
||||
|
||||
Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.
|
||||
|
||||
@ -1971,7 +1979,9 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
|
||||
- [input_format_parquet_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_case_insensitive_column_matching) - ignore case when matching Parquet columns with ClickHouse columns. Default value - `false`.
|
||||
- [input_format_parquet_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_allow_missing_columns) - allow missing columns while reading Parquet data. Default value - `false`.
|
||||
- [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`.
|
||||
- [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`.
|
||||
- [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`.
|
||||
- [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`.
|
||||
- [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `snappy`.
|
||||
|
||||
## Arrow {#data-format-arrow}
|
||||
|
||||
@ -2006,6 +2016,8 @@ The table below shows supported data types and how they match ClickHouse [data t
|
||||
| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
|
||||
| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
|
||||
| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` |
|
||||
| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` |
|
||||
| `FIXED_SIZE_BINARY`, `BINARY` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `FIXED_SIZE_BINARY` |
|
||||
|
||||
Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.
|
||||
|
||||
@ -2040,6 +2052,7 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam
|
||||
- [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`.
|
||||
- [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`.
|
||||
- [output_format_arrow_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_fixed_string_as_fixed_byte_array) - use Arrow FIXED_SIZE_BINARY type instead of Binary/String for FixedString columns. Default value - `true`.
|
||||
- [output_format_arrow_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_compression_method) - compression method used in output Arrow format. Default value - `none`.
|
||||
|
||||
## ArrowStream {#data-format-arrow-stream}
|
||||
|
||||
@ -2053,8 +2066,8 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam
|
||||
|
||||
The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
|
||||
|
||||
| ORC data type (`INSERT`) | ClickHouse data type | ORC data type (`SELECT`) |
|
||||
|---------------------------------------|---------------------------------------------------------|--------------------------|
|
||||
| ORC data type (`INSERT`) | ClickHouse data type | ORC data type (`SELECT`) |
|
||||
|---------------------------------------|---------------------------------------------------------------|--------------------------|
|
||||
| `Boolean` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `Boolean` |
|
||||
| `Tinyint` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `Tinyint` |
|
||||
| `Smallint` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `Smallint` |
|
||||
@ -2069,6 +2082,7 @@ The table below shows supported data types and how they match ClickHouse [data t
|
||||
| `List` | [Array](/docs/en/sql-reference/data-types/array.md) | `List` |
|
||||
| `Struct` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `Struct` |
|
||||
| `Map` | [Map](/docs/en/sql-reference/data-types/map.md) | `Map` |
|
||||
| `-` | [IPv4](/docs/en/sql-reference/data-types/int-uint.md) | `Int` |
|
||||
|
||||
Other types are not supported.
|
||||
|
||||
@ -2095,6 +2109,7 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename.
|
||||
### Arrow format settings {#parquet-format-settings}
|
||||
|
||||
- [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`.
|
||||
- [output_format_orc_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_orc_compression_method) - compression method used in output ORC format. Default value - `none`.
|
||||
- [input_format_arrow_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`.
|
||||
- [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`.
|
||||
- [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`.
|
||||
@ -2263,8 +2278,8 @@ ClickHouse supports reading and writing [MessagePack](https://msgpack.org/) data
|
||||
|
||||
### Data Types Matching {#data-types-matching-msgpack}
|
||||
|
||||
| MessagePack data type (`INSERT`) | ClickHouse data type | MessagePack data type (`SELECT`) |
|
||||
|--------------------------------------------------------------------|-----------------------------------------------------------|------------------------------------|
|
||||
| MessagePack data type (`INSERT`) | ClickHouse data type | MessagePack data type (`SELECT`) |
|
||||
|--------------------------------------------------------------------|-----------------------------------------------------------------|------------------------------------|
|
||||
| `uint N`, `positive fixint` | [UIntN](/docs/en/sql-reference/data-types/int-uint.md) | `uint N` |
|
||||
| `int N`, `negative fixint` | [IntN](/docs/en/sql-reference/data-types/int-uint.md) | `int N` |
|
||||
| `bool` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `uint 8` |
|
||||
@ -2277,6 +2292,8 @@ ClickHouse supports reading and writing [MessagePack](https://msgpack.org/) data
|
||||
| `uint 64` | [DateTime64](/docs/en/sql-reference/data-types/datetime.md) | `uint 64` |
|
||||
| `fixarray`, `array 16`, `array 32` | [Array](/docs/en/sql-reference/data-types/array.md) | `fixarray`, `array 16`, `array 32` |
|
||||
| `fixmap`, `map 16`, `map 32` | [Map](/docs/en/sql-reference/data-types/map.md) | `fixmap`, `map 16`, `map 32` |
|
||||
| `uint 32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `uint 32` |
|
||||
| `bin 8` | [String](/docs/en/sql-reference/data-types/string.md) | `bin 8` |
|
||||
|
||||
Example:
|
||||
|
||||
|
@ -309,6 +309,7 @@ The HTTP interface allows passing external data (external temporary tables) for
|
||||
## Response Buffering {#response-buffering}
|
||||
|
||||
You can enable response buffering on the server-side. The `buffer_size` and `wait_end_of_query` URL parameters are provided for this purpose.
|
||||
Also settings `http_response_buffer_size` and `http_wait_end_of_query` can be used.
|
||||
|
||||
`buffer_size` determines the number of bytes in the result to buffer in the server memory. If a result body is larger than this threshold, the buffer is written to the HTTP channel, and the remaining data is sent directly to the HTTP channel.
|
||||
|
||||
|
@ -117,7 +117,7 @@ clickhouse-local --file='hobbies.jsonl' --table='hobbies' --query='SELECT * FROM
|
||||
4 47 Brayan ['movies','skydiving']
|
||||
```
|
||||
|
||||
# Using structure from insertion table {#using-structure-from-insertion-table}
|
||||
## Using structure from insertion table {#using-structure-from-insertion-table}
|
||||
|
||||
When table functions `file/s3/url/hdfs` are used to insert data into a table,
|
||||
there is an option to use the structure from the insertion table instead of extracting it from the data.
|
||||
@ -222,14 +222,14 @@ INSERT INTO hobbies4 SELECT id, empty(hobbies) ? NULL : hobbies[1] FROM file(hob
|
||||
|
||||
In this case, there are some operations performed on the column `hobbies` in the `SELECT` query to insert it into the table, so ClickHouse cannot use the structure from the insertion table, and schema inference will be used.
|
||||
|
||||
# Schema inference cache {#schema-inference-cache}
|
||||
## Schema inference cache {#schema-inference-cache}
|
||||
|
||||
For most input formats schema inference reads some data to determine its structure and this process can take some time.
|
||||
To prevent inferring the same schema every time ClickHouse read the data from the same file, the inferred schema is cached and when accessing the same file again, ClickHouse will use the schema from the cache.
|
||||
|
||||
There are special settings that control this cache:
|
||||
- `schema_inference_cache_max_elements_for_{file/s3/hdfs/url}` - the maximum number of cached schemas for the corresponding table function. The default value is `4096`. These settings should be set in the server config.
|
||||
- `use_cache_for_{file,s3,hdfs,url}_schema_inference` - allows turning on/off using cache for schema inference. These settings can be used in queries.
|
||||
- `schema_inference_use_cache_for_{file,s3,hdfs,url}` - allows turning on/off using cache for schema inference. These settings can be used in queries.
|
||||
|
||||
The schema of the file can be changed by modifying the data or by changing format settings.
|
||||
For this reason, the schema inference cache identifies the schema by file source, format name, used format settings, and the last modification time of the file.
|
||||
@ -326,14 +326,14 @@ SELECT count() FROM system.schema_inference_cache WHERE storage='S3'
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
# Text formats {#text-formats}
|
||||
## Text formats {#text-formats}
|
||||
|
||||
For text formats, ClickHouse reads the data row by row, extracts column values according to the format,
|
||||
and then uses some recursive parsers and heuristics to determine the type for each value. The maximum number of rows read from the data in schema inference
|
||||
is controlled by the setting `input_format_max_rows_to_read_for_schema_inference` with default value 25000.
|
||||
By default, all inferred types are [Nullable](../sql-reference/data-types/nullable.md), but you can change this by setting `schema_inference_make_columns_nullable` (see examples in the [settings](#settings-for-text-formats) section).
|
||||
|
||||
## JSON formats {#json-formats}
|
||||
### JSON formats {#json-formats}
|
||||
|
||||
In JSON formats ClickHouse parses values according to the JSON specification and then tries to find the most appropriate data type for them.
|
||||
|
||||
@ -464,9 +464,9 @@ most likely this column contains only Nulls or empty Arrays/Maps.
|
||||
...
|
||||
```
|
||||
|
||||
### JSON settings {#json-settings}
|
||||
#### JSON settings {#json-settings}
|
||||
|
||||
#### input_format_json_read_objects_as_strings
|
||||
##### input_format_json_read_objects_as_strings
|
||||
|
||||
Enabling this setting allows reading nested JSON objects as strings.
|
||||
This setting can be used to read nested JSON objects without using JSON object type.
|
||||
@ -486,7 +486,7 @@ DESC format(JSONEachRow, $$
|
||||
└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
#### input_format_json_try_infer_numbers_from_strings
|
||||
##### input_format_json_try_infer_numbers_from_strings
|
||||
|
||||
Enabling this setting allows inferring numbers from string values.
|
||||
|
||||
@ -507,7 +507,7 @@ DESC format(JSONEachRow, $$
|
||||
└───────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
#### input_format_json_read_numbers_as_strings
|
||||
##### input_format_json_read_numbers_as_strings
|
||||
|
||||
Enabling this setting allows reading numeric values as strings.
|
||||
|
||||
@ -528,7 +528,7 @@ DESC format(JSONEachRow, $$
|
||||
└───────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
#### input_format_json_read_bools_as_numbers
|
||||
##### input_format_json_read_bools_as_numbers
|
||||
|
||||
Enabling this setting allows reading Bool values as numbers.
|
||||
|
||||
@ -549,7 +549,7 @@ DESC format(JSONEachRow, $$
|
||||
└───────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
## CSV {#csv}
|
||||
### CSV {#csv}
|
||||
|
||||
In CSV format ClickHouse extracts column values from the row according to delimiters. ClickHouse expects all types except numbers and strings to be enclosed in double quotes. If the value is in double quotes, ClickHouse tries to parse
|
||||
the data inside quotes using the recursive parser and then tries to find the most appropriate data type for it. If the value is not in double quotes, ClickHouse tries to parse it as a number,
|
||||
@ -726,7 +726,7 @@ $$)
|
||||
└──────────────┴───────────────┘
|
||||
```
|
||||
|
||||
## TSV/TSKV {#tsv-tskv}
|
||||
### TSV/TSKV {#tsv-tskv}
|
||||
|
||||
In TSV/TSKV formats ClickHouse extracts column value from the row according to tabular delimiters and then parses extracted value using
|
||||
the recursive parser to determine the most appropriate type. If the type cannot be determined, ClickHouse treats this value as String.
|
||||
@ -1019,7 +1019,7 @@ DESC format(TSV, '[1,2,3] 42.42 Hello World!')
|
||||
└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
## CustomSeparated {#custom-separated}
|
||||
### CustomSeparated {#custom-separated}
|
||||
|
||||
In CustomSeparated format ClickHouse first extracts all column values from the row according to specified delimiters and then tries to infer
|
||||
the data type for each value according to escaping rule.
|
||||
@ -1080,7 +1080,7 @@ $$)
|
||||
└────────┴───────────────┴────────────┘
|
||||
```
|
||||
|
||||
## Template {#template}
|
||||
### Template {#template}
|
||||
|
||||
In Template format ClickHouse first extracts all column values from the row according to the specified template and then tries to infer the
|
||||
data type for each value according to its escaping rule.
|
||||
@ -1120,7 +1120,7 @@ $$)
|
||||
└──────────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
## Regexp {#regexp}
|
||||
### Regexp {#regexp}
|
||||
|
||||
Similar to Template, in Regexp format ClickHouse first extracts all column values from the row according to specified regular expression and then tries to infer
|
||||
data type for each value according to the specified escaping rule.
|
||||
@ -1142,9 +1142,9 @@ Line: value_1=2, value_2="Some string 2", value_3="[4, 5, NULL]"$$)
|
||||
└──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
## Settings for text formats {settings-for-text-formats}
|
||||
### Settings for text formats {#settings-for-text-formats}
|
||||
|
||||
### input_format_max_rows_to_read_for_schema_inference
|
||||
#### input_format_max_rows_to_read_for_schema_inference
|
||||
|
||||
This setting controls the maximum number of rows to be read while schema inference.
|
||||
The more rows are read, the more time is spent on schema inference, but the greater the chance to
|
||||
@ -1152,7 +1152,7 @@ correctly determine the types (especially when the data contains a lot of nulls)
|
||||
|
||||
Default value: `25000`.
|
||||
|
||||
### column_names_for_schema_inference
|
||||
#### column_names_for_schema_inference
|
||||
|
||||
The list of column names to use in schema inference for formats without explicit column names. Specified names will be used instead of default `c1,c2,c3,...`. The format: `column1,column2,column3,...`.
|
||||
|
||||
@ -1169,7 +1169,7 @@ DESC format(TSV, 'Hello, World! 42 [1, 2, 3]') settings column_names_for_schema_
|
||||
└──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
### schema_inference_hints
|
||||
#### schema_inference_hints
|
||||
|
||||
The list of column names and types to use in schema inference instead of automatically determined types. The format: 'column_name1 column_type1, column_name2 column_type2, ...'.
|
||||
This setting can be used to specify the types of columns that could not be determined automatically or for optimizing the schema.
|
||||
@ -1189,7 +1189,7 @@ DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : nul
|
||||
└─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
### schema_inference_make_columns_nullable
|
||||
#### schema_inference_make_columns_nullable
|
||||
|
||||
Controls making inferred types `Nullable` in schema inference for formats without information about nullability.
|
||||
If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will be `Nullable` only if the column contains `NULL` in a sample that is parsed during schema inference.
|
||||
@ -1232,7 +1232,7 @@ DESC format(JSONEachRow, $$
|
||||
└─────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
### input_format_try_infer_integers
|
||||
#### input_format_try_infer_integers
|
||||
|
||||
If enabled, ClickHouse will try to infer integers instead of floats in schema inference for text formats.
|
||||
If all numbers in the column from sample data are integers, the result type will be `Int64`, if at least one number is float, the result type will be `Float64`.
|
||||
@ -1289,7 +1289,7 @@ DESC format(JSONEachRow, $$
|
||||
└────────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
### input_format_try_infer_datetimes
|
||||
#### input_format_try_infer_datetimes
|
||||
|
||||
If enabled, ClickHouse will try to infer type `DateTime64` from string fields in schema inference for text formats.
|
||||
If all fields from a column in sample data were successfully parsed as datetimes, the result type will be `DateTime64(9)`,
|
||||
@ -1337,7 +1337,7 @@ DESC format(JSONEachRow, $$
|
||||
|
||||
Note: Parsing datetimes during schema inference respect setting [date_time_input_format](/docs/en/operations/settings/settings-formats.md#date_time_input_format)
|
||||
|
||||
### input_format_try_infer_dates
|
||||
#### input_format_try_infer_dates
|
||||
|
||||
If enabled, ClickHouse will try to infer type `Date` from string fields in schema inference for text formats.
|
||||
If all fields from a column in sample data were successfully parsed as dates, the result type will be `Date`,
|
||||
@ -1383,14 +1383,14 @@ DESC format(JSONEachRow, $$
|
||||
└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
# Self describing formats {#self-describing-formats}
|
||||
## Self describing formats {#self-describing-formats}
|
||||
|
||||
Self-describing formats contain information about the structure of the data in the data itself,
|
||||
it can be some header with a description, a binary type tree, or some kind of table.
|
||||
To automatically infer a schema from files in such formats, ClickHouse reads a part of the data containing
|
||||
information about the types and converts it into a schema of the ClickHouse table.
|
||||
|
||||
## Formats with -WithNamesAndTypes suffix {#formats-with-names-and-types}
|
||||
### Formats with -WithNamesAndTypes suffix {#formats-with-names-and-types}
|
||||
|
||||
ClickHouse supports some text formats with the suffix -WithNamesAndTypes. This suffix means that the data contains two additional rows with column names and types before the actual data.
|
||||
While schema inference for such formats, ClickHouse reads the first two rows and extracts column names and types.
|
||||
@ -1412,7 +1412,7 @@ $$)
|
||||
└──────┴──────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
## JSON formats with metadata {#json-with-metadata}
|
||||
### JSON formats with metadata {#json-with-metadata}
|
||||
|
||||
Some JSON input formats ([JSON](formats.md#json), [JSONCompact](formats.md#json-compact), [JSONColumnsWithMetadata](formats.md#jsoncolumnswithmetadata)) contain metadata with column names and types.
|
||||
In schema inference for such formats, ClickHouse reads this metadata.
|
||||
@ -1465,7 +1465,7 @@ $$)
|
||||
└──────┴──────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
## Avro {#avro}
|
||||
### Avro {#avro}
|
||||
|
||||
In Avro format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches:
|
||||
|
||||
@ -1485,7 +1485,7 @@ In Avro format ClickHouse reads its schema from the data and converts it to Clic
|
||||
|
||||
Other Avro types are not supported.
|
||||
|
||||
## Parquet {#parquet}
|
||||
### Parquet {#parquet}
|
||||
|
||||
In Parquet format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches:
|
||||
|
||||
@ -1513,7 +1513,7 @@ In Parquet format ClickHouse reads its schema from the data and converts it to C
|
||||
|
||||
Other Parquet types are not supported. By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`.
|
||||
|
||||
## Arrow {#arrow}
|
||||
### Arrow {#arrow}
|
||||
|
||||
In Arrow format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches:
|
||||
|
||||
@ -1541,7 +1541,7 @@ In Arrow format ClickHouse reads its schema from the data and converts it to Cli
|
||||
|
||||
Other Arrow types are not supported. By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`.
|
||||
|
||||
## ORC {#orc}
|
||||
### ORC {#orc}
|
||||
|
||||
In ORC format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches:
|
||||
|
||||
@ -1564,17 +1564,17 @@ In ORC format ClickHouse reads its schema from the data and converts it to Click
|
||||
|
||||
Other ORC types are not supported. By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`.
|
||||
|
||||
## Native {#native}
|
||||
### Native {#native}
|
||||
|
||||
Native format is used inside ClickHouse and contains the schema in the data.
|
||||
In schema inference, ClickHouse reads the schema from the data without any transformations.
|
||||
|
||||
# Formats with external schema {#formats-with-external-schema}
|
||||
## Formats with external schema {#formats-with-external-schema}
|
||||
|
||||
Such formats require a schema describing the data in a separate file in a specific schema language.
|
||||
To automatically infer a schema from files in such formats, ClickHouse reads external schema from a separate file and transforms it to a ClickHouse table schema.
|
||||
|
||||
# Protobuf {#protobuf}
|
||||
### Protobuf {#protobuf}
|
||||
|
||||
In schema inference for Protobuf format ClickHouse uses the following type matches:
|
||||
|
||||
@ -1592,7 +1592,7 @@ In schema inference for Protobuf format ClickHouse uses the following type match
|
||||
| `repeated T` | [Array(T)](../sql-reference/data-types/array.md) |
|
||||
| `message`, `group` | [Tuple](../sql-reference/data-types/tuple.md) |
|
||||
|
||||
# CapnProto {#capnproto}
|
||||
### CapnProto {#capnproto}
|
||||
|
||||
In schema inference for CapnProto format ClickHouse uses the following type matches:
|
||||
|
||||
@ -1615,13 +1615,13 @@ In schema inference for CapnProto format ClickHouse uses the following type matc
|
||||
| `struct` | [Tuple](../sql-reference/data-types/tuple.md) |
|
||||
| `union(T, Void)`, `union(Void, T)` | [Nullable(T)](../sql-reference/data-types/nullable.md) |
|
||||
|
||||
# Strong-typed binary formats {#strong-typed-binary-formats}
|
||||
## Strong-typed binary formats {#strong-typed-binary-formats}
|
||||
|
||||
In such formats, each serialized value contains information about its type (and possibly about its name), but there is no information about the whole table.
|
||||
In schema inference for such formats, ClickHouse reads data row by row (up to `input_format_max_rows_to_read_for_schema_inference` rows) and extracts
|
||||
the type (and possibly name) for each value from the data and then converts these types to ClickHouse types.
|
||||
|
||||
## MsgPack {msgpack}
|
||||
### MsgPack {#msgpack}
|
||||
|
||||
In MsgPack format there is no delimiter between rows, to use schema inference for this format you should specify the number of columns in the table
|
||||
using the setting `input_format_msgpack_number_of_columns`. ClickHouse uses the following type matches:
|
||||
@ -1641,7 +1641,7 @@ using the setting `input_format_msgpack_number_of_columns`. ClickHouse uses the
|
||||
|
||||
By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`.
|
||||
|
||||
## BSONEachRow {#bsoneachrow}
|
||||
### BSONEachRow {#bsoneachrow}
|
||||
|
||||
In BSONEachRow each row of data is presented as a BSON document. In schema inference ClickHouse reads BSON documents one by one and extracts
|
||||
values, names, and types from the data and then transforms these types to ClickHouse types using the following type matches:
|
||||
@ -1661,11 +1661,11 @@ values, names, and types from the data and then transforms these types to ClickH
|
||||
|
||||
By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`.
|
||||
|
||||
# Formats with constant schema {#formats-with-constant-schema}
|
||||
## Formats with constant schema {#formats-with-constant-schema}
|
||||
|
||||
Data in such formats always have the same schema.
|
||||
|
||||
## LineAsString {#line-as-string}
|
||||
### LineAsString {#line-as-string}
|
||||
|
||||
In this format, ClickHouse reads the whole line from the data into a single column with `String` data type. The inferred type for this format is always `String` and the column name is `line`.
|
||||
|
||||
@ -1680,7 +1680,7 @@ DESC format(LineAsString, 'Hello\nworld!')
|
||||
└──────┴────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
## JSONAsString {#json-as-string}
|
||||
### JSONAsString {#json-as-string}
|
||||
|
||||
In this format, ClickHouse reads the whole JSON object from the data into a single column with `String` data type. The inferred type for this format is always `String` and the column name is `json`.
|
||||
|
||||
@ -1695,7 +1695,7 @@ DESC format(JSONAsString, '{"x" : 42, "y" : "Hello, World!"}')
|
||||
└──────┴────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
## JSONAsObject {#json-as-object}
|
||||
### JSONAsObject {#json-as-object}
|
||||
|
||||
In this format, ClickHouse reads the whole JSON object from the data into a single column with `Object('json')` data type. Inferred type for this format is always `String` and the column name is `json`.
|
||||
|
||||
|
@ -26,6 +26,7 @@ ClickHouse Inc does **not** maintain the libraries listed below and hasn’t don
|
||||
- [one-ck](https://github.com/lizhichao/one-ck)
|
||||
- [glushkovds/phpclickhouse-laravel](https://packagist.org/packages/glushkovds/phpclickhouse-laravel)
|
||||
- [kolya7k ClickHouse PHP extension](https://github.com//kolya7k/clickhouse-php)
|
||||
- [hyvor/clickhouse-php](https://github.com/hyvor/clickhouse-php)
|
||||
- Go
|
||||
- [clickhouse](https://github.com/kshvakov/clickhouse/)
|
||||
- [go-clickhouse](https://github.com/roistat/go-clickhouse)
|
||||
|
@ -765,7 +765,7 @@ Default value: `0`.
|
||||
|
||||
## concurrent_threads_soft_limit_ratio_to_cores {#concurrent_threads_soft_limit_ratio_to_cores}
|
||||
The maximum number of query processing threads as multiple of number of logical cores.
|
||||
More details: [concurrent_threads_soft_limit_num](#concurrent-threads-soft-limit-num).
|
||||
More details: [concurrent_threads_soft_limit_num](#concurrent_threads_soft_limit_num).
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -967,6 +967,7 @@ The maximum number of jobs that can be scheduled on the Global Thread pool. Incr
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — No limit.
|
||||
|
||||
Default value: `10000`.
|
||||
|
||||
@ -976,6 +977,69 @@ Default value: `10000`.
|
||||
<thread_pool_queue_size>12000</thread_pool_queue_size>
|
||||
```
|
||||
|
||||
## max_io_thread_pool_size {#max-io-thread-pool-size}
|
||||
|
||||
ClickHouse uses threads from the IO Thread pool to do some IO operations (e.g. to interact with S3). `max_io_thread_pool_size` limits the maximum number of threads in the pool.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
|
||||
Default value: `100`.
|
||||
|
||||
## max_io_thread_pool_free_size {#max-io-thread-pool-free-size}
|
||||
|
||||
If the number of **idle** threads in the IO Thread pool exceeds `max_io_thread_pool_free_size`, ClickHouse will release resources occupied by idling threads and decrease the pool size. Threads can be created again if necessary.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## io_thread_pool_queue_size {#io-thread-pool-queue-size}
|
||||
|
||||
The maximum number of jobs that can be scheduled on the IO Thread pool.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — No limit.
|
||||
|
||||
Default value: `10000`.
|
||||
|
||||
## max_backups_io_thread_pool_size {#max-backups-io-thread-pool-size}
|
||||
|
||||
ClickHouse uses threads from the Backups IO Thread pool to do S3 backup IO operations. `max_backups_io_thread_pool_size` limits the maximum number of threads in the pool.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
|
||||
Default value: `1000`.
|
||||
|
||||
## max_backups_io_thread_pool_free_size {#max-backups-io-thread-pool-free-size}
|
||||
|
||||
If the number of **idle** threads in the Backups IO Thread pool exceeds `max_backup_io_thread_pool_free_size`, ClickHouse will release resources occupied by idling threads and decrease the pool size. Threads can be created again if necessary.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- Zero.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## backups_io_thread_pool_queue_size {#backups-io-thread-pool-queue-size}
|
||||
|
||||
The maximum number of jobs that can be scheduled on the Backups IO Thread pool. It is recommended to keep this queue unlimited due to the current S3 backup logic.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — No limit.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## background_pool_size {#background_pool_size}
|
||||
|
||||
Sets the number of threads performing background merges and mutations for tables with MergeTree engines. This setting is also could be applied at server startup from the `default` profile configuration for backward compatibility at the ClickHouse server start. You can only increase the number of threads at runtime. To lower the number of threads you have to restart the server. By adjusting this setting, you manage CPU and disk load. Smaller pool size utilizes less CPU and disk resources, but background processes advance slower which might eventually impact query performance.
|
||||
@ -1254,12 +1318,12 @@ Settings:
|
||||
|
||||
``` xml
|
||||
<prometheus>
|
||||
<endpoint>/metrics</endpoint>
|
||||
<port>8001</port>
|
||||
<metrics>true</metrics>
|
||||
<events>true</events>
|
||||
<asynchronous_metrics>true</asynchronous_metrics>
|
||||
</prometheus>
|
||||
<endpoint>/metrics</endpoint>
|
||||
<port>9363</port>
|
||||
<metrics>true</metrics>
|
||||
<events>true</events>
|
||||
<asynchronous_metrics>true</asynchronous_metrics>
|
||||
</prometheus>
|
||||
```
|
||||
|
||||
## query_log {#server_configuration_parameters-query-log}
|
||||
|
@ -50,7 +50,7 @@ If there are multiple profiles active for a user, then constraints are merged. M
|
||||
|
||||
Read-only mode is enabled by `readonly` setting (not to confuse with `readonly` constraint type):
|
||||
- `readonly=0`: No read-only restrictions.
|
||||
- `readonly=1`: Only read queries are allowed and settings cannot be changes unless `changeable_in_readonly` is set.
|
||||
- `readonly=1`: Only read queries are allowed and settings cannot be changed unless `changeable_in_readonly` is set.
|
||||
- `readonly=2`: Only read queries are allowed, but settings can be changed, except for `readonly` setting itself.
|
||||
|
||||
|
||||
|
@ -142,6 +142,10 @@ y Nullable(String)
|
||||
z IPv4
|
||||
```
|
||||
|
||||
:::warning
|
||||
If the `schema_inference_hints` is not formated properly, or if there is a typo or a wrong datatype, etc... the whole schema_inference_hints will be ignored.
|
||||
:::
|
||||
|
||||
## schema_inference_make_columns_nullable {#schema_inference_make_columns_nullable}
|
||||
|
||||
Controls making inferred types `Nullable` in schema inference for formats without information about nullability.
|
||||
@ -1010,6 +1014,12 @@ Use Arrow FIXED_SIZE_BINARY type instead of Binary/String for FixedString column
|
||||
|
||||
Enabled by default.
|
||||
|
||||
### output_format_arrow_compression_method {#output_format_arrow_compression_method}
|
||||
|
||||
Compression method used in output Arrow format. Supported codecs: `lz4_frame`, `zstd`, `none` (uncompressed)
|
||||
|
||||
Default value: `none`.
|
||||
|
||||
## ORC format settings {#orc-format-settings}
|
||||
|
||||
### input_format_orc_import_nested {#input_format_orc_import_nested}
|
||||
@ -1053,6 +1063,12 @@ Use ORC String type instead of Binary for String columns.
|
||||
|
||||
Disabled by default.
|
||||
|
||||
### output_format_orc_compression_method {#output_format_orc_compression_method}
|
||||
|
||||
Compression method used in output ORC format. Supported codecs: `lz4`, `snappy`, `zlib`, `zstd`, `none` (uncompressed)
|
||||
|
||||
Default value: `none`.
|
||||
|
||||
## Parquet format settings {#parquet-format-settings}
|
||||
|
||||
### input_format_parquet_import_nested {#input_format_parquet_import_nested}
|
||||
@ -1102,6 +1118,18 @@ Use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedStrin
|
||||
|
||||
Enabled by default.
|
||||
|
||||
### output_format_parquet_version {#output_format_parquet_version}
|
||||
|
||||
The version of Parquet format used in output format. Supported versions: `1.0`, `2.4`, `2.6` and `2.latest`.
|
||||
|
||||
Default value: `2.latest`.
|
||||
|
||||
### output_format_parquet_compression_method {#output_format_parquet_compression_method}
|
||||
|
||||
Compression method used in output Parquet format. Supported codecs: `snappy`, `lz4`, `brotli`, `zstd`, `gzip`, `none` (uncompressed)
|
||||
|
||||
Default value: `snappy`.
|
||||
|
||||
## Hive format settings {#hive-format-settings}
|
||||
|
||||
### input_format_hive_text_fields_delimiter {#input_format_hive_text_fields_delimiter}
|
||||
@ -1464,7 +1492,7 @@ Default value: `65505`.
|
||||
|
||||
The name of table that will be used in the output INSERT statement.
|
||||
|
||||
Default value: `'table''`.
|
||||
Default value: `table`.
|
||||
|
||||
### output_format_sql_insert_include_column_names {#output_format_sql_insert_include_column_names}
|
||||
|
||||
@ -1504,4 +1532,12 @@ Disabled by default.
|
||||
|
||||
The maximum allowed size for String in RowBinary format. It prevents allocating large amount of memory in case of corrupted data. 0 means there is no limit.
|
||||
|
||||
Default value: `1GiB`
|
||||
Default value: `1GiB`.
|
||||
|
||||
## Native format settings {#native-format-settings}
|
||||
|
||||
### input_format_native_allow_types_conversion {#input_format_native_allow_types_conversion}
|
||||
|
||||
Allow types conversion in Native input format between columns from input data and requested columns.
|
||||
|
||||
Enabled by default.
|
||||
|
@ -966,10 +966,10 @@ This is an expert-level setting, and you shouldn't change it if you're just gett
|
||||
|
||||
## max_query_size {#settings-max_query_size}
|
||||
|
||||
The maximum part of a query that can be taken to RAM for parsing with the SQL parser.
|
||||
The INSERT query also contains data for INSERT that is processed by a separate stream parser (that consumes O(1) RAM), which is not included in this restriction.
|
||||
The maximum number of bytes of a query string parsed by the SQL parser.
|
||||
Data in the VALUES clause of INSERT queries is processed by a separate stream parser (that consumes O(1) RAM) and not affected by this restriction.
|
||||
|
||||
Default value: 256 KiB.
|
||||
Default value: 262144 (= 256 KiB).
|
||||
|
||||
## max_parser_depth {#max_parser_depth}
|
||||
|
||||
@ -1248,7 +1248,9 @@ Possible values:
|
||||
Default value: 1.
|
||||
|
||||
:::warning
|
||||
Disable this setting if you use [max_parallel_replicas](#settings-max_parallel_replicas).
|
||||
Disable this setting if you use [max_parallel_replicas](#settings-max_parallel_replicas) without [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key).
|
||||
If [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key) is set, disable this setting only if it's used on a cluster with multiple shards containing multiple replicas.
|
||||
If it's used on a cluster with a single shard and multiple replicas, disabling this setting will have negative effects.
|
||||
:::
|
||||
|
||||
## totals_mode {#totals-mode}
|
||||
@ -1273,16 +1275,47 @@ Default value: `1`.
|
||||
|
||||
**Additional Info**
|
||||
|
||||
This setting is useful for replicated tables with a sampling key. A query may be processed faster if it is executed on several servers in parallel. But the query performance may degrade in the following cases:
|
||||
This options will produce different results depending on the settings used.
|
||||
|
||||
:::warning
|
||||
This setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain requirements. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md/#max_parallel_replica-subqueries) for more details.
|
||||
:::
|
||||
|
||||
### Parallel processing using `SAMPLE` key
|
||||
|
||||
A query may be processed faster if it is executed on several servers in parallel. But the query performance may degrade in the following cases:
|
||||
|
||||
- The position of the sampling key in the partitioning key does not allow efficient range scans.
|
||||
- Adding a sampling key to the table makes filtering by other columns less efficient.
|
||||
- The sampling key is an expression that is expensive to calculate.
|
||||
- The cluster latency distribution has a long tail, so that querying more servers increases the query overall latency.
|
||||
|
||||
:::warning
|
||||
This setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain requirements. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md/#max_parallel_replica-subqueries) for more details.
|
||||
:::
|
||||
### Parallel processing using [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key)
|
||||
|
||||
This setting is useful for any replicated table.
|
||||
|
||||
## parallel_replicas_custom_key {#settings-parallel_replicas_custom_key}
|
||||
|
||||
An arbitrary integer expression that can be used to split work between replicas for a specific table.
|
||||
The value can be any integer expression.
|
||||
A query may be processed faster if it is executed on several servers in parallel but it depends on the used [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key)
|
||||
and [parallel_replicas_custom_key_filter_type](#settings-parallel_replicas_custom_key_filter_type).
|
||||
|
||||
Simple expressions using primary keys are preferred.
|
||||
|
||||
If the setting is used on a cluster that consists of a single shard with multiple replicas, those replicas will be converted into virtual shards.
|
||||
Otherwise, it will behave same as for `SAMPLE` key, it will use multiple replicas of each shard.
|
||||
|
||||
## parallel_replicas_custom_key_filter_type {#settings-parallel_replicas_custom_key_filter_type}
|
||||
|
||||
How to use `parallel_replicas_custom_key` expression for splitting work between replicas.
|
||||
|
||||
Possible values:
|
||||
|
||||
- `default` — Use the default implementation using modulo operation on the `parallel_replicas_custom_key`.
|
||||
- `range` — Split the entire value space of the expression in the ranges. This type of filtering is useful if values of `parallel_replicas_custom_key` are uniformly spread across the entire integer space, e.g. hash values.
|
||||
|
||||
Default value: `default`.
|
||||
|
||||
## compile_expressions {#compile-expressions}
|
||||
|
||||
@ -1515,7 +1548,7 @@ Enables or disables asynchronous inserts. This makes sense only for insertion ov
|
||||
|
||||
If enabled, the data is combined into batches before the insertion into tables, so it is possible to do small and frequent insertions into ClickHouse (up to 15000 queries per second) without buffer tables.
|
||||
|
||||
The data is inserted either after the [async_insert_max_data_size](#async-insert-max-data-size) is exceeded or after [async_insert_busy_timeout_ms](#async-insert-busy-timeout-ms) milliseconds since the first `INSERT` query. If the [async_insert_stale_timeout_ms](#async-insert-stale-timeout-ms) is set to a non-zero value, the data is inserted after `async_insert_stale_timeout_ms` milliseconds since the last query.
|
||||
The data is inserted either after the [async_insert_max_data_size](#async-insert-max-data-size) is exceeded or after [async_insert_busy_timeout_ms](#async-insert-busy-timeout-ms) milliseconds since the first `INSERT` query. If the [async_insert_stale_timeout_ms](#async-insert-stale-timeout-ms) is set to a non-zero value, the data is inserted after `async_insert_stale_timeout_ms` milliseconds since the last query. Also the buffer will be flushed to disk if at least [async_insert_max_query_number](#async-insert-max-query-number) async insert queries per block were received. This last setting takes effect only if [async_insert_deduplicate](#async-insert-deduplicate) is enabled.
|
||||
|
||||
If [wait_for_async_insert](#wait-for-async-insert) is enabled, every client will wait for the data to be processed and flushed to the table. Otherwise, the query would be processed almost instantly, even if the data is not inserted.
|
||||
|
||||
|
@ -80,7 +80,7 @@ Required parameters:
|
||||
|
||||
- `type` — `encrypted`. Otherwise the encrypted disk is not created.
|
||||
- `disk` — Type of disk for data storage.
|
||||
- `key` — The key for encryption and decryption. Type: [Uint64](/docs/en/sql-reference/data-types/int-uint.md). You can use `key_hex` parameter to encrypt in hexadecimal form.
|
||||
- `key` — The key for encryption and decryption. Type: [Uint64](/docs/en/sql-reference/data-types/int-uint.md). You can use `key_hex` parameter to encode the key in hexadecimal form.
|
||||
You can specify multiple keys using the `id` attribute (see example above).
|
||||
|
||||
Optional parameters:
|
||||
@ -135,11 +135,13 @@ Example of configuration for versions later or equal to 22.8:
|
||||
</cache>
|
||||
</disks>
|
||||
<policies>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>cache</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
<s3-cache>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>cache</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</s3-cache>
|
||||
<policies>
|
||||
</storage_configuration>
|
||||
```
|
||||
@ -159,11 +161,13 @@ Example of configuration for versions earlier than 22.8:
|
||||
</s3>
|
||||
</disks>
|
||||
<policies>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>s3</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
<s3-cache>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>s3</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</s3-cache>
|
||||
<policies>
|
||||
</storage_configuration>
|
||||
```
|
||||
|
@ -15,6 +15,13 @@ Columns:
|
||||
|
||||
- `operation_name` ([String](../../sql-reference/data-types/string.md)) — The name of the operation.
|
||||
|
||||
- `kind` ([Enum8](../../sql-reference/data-types/enum.md)) — The [SpanKind](https://opentelemetry.io/docs/reference/specification/trace/api/#spankind) of the span.
|
||||
- `INTERNAL` — Indicates that the span represents an internal operation within an application.
|
||||
- `SERVER` — Indicates that the span covers server-side handling of a synchronous RPC or other remote request.
|
||||
- `CLIENT` — Indicates that the span describes a request to some remote service.
|
||||
- `PRODUCER` — Indicates that the span describes the initiators of an asynchronous request. This parent span will often end before the corresponding child CONSUMER span, possibly even before the child span starts.
|
||||
- `CONSUMER` - Indicates that the span describes a child of an asynchronous PRODUCER request.
|
||||
|
||||
- `start_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The start time of the `trace span` (in microseconds).
|
||||
|
||||
- `finish_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The finish time of the `trace span` (in microseconds).
|
||||
@ -42,6 +49,7 @@ trace_id: cdab0847-0d62-61d5-4d38-dd65b19a1914
|
||||
span_id: 701487461015578150
|
||||
parent_span_id: 2991972114672045096
|
||||
operation_name: DB::Block DB::InterpreterSelectQuery::getSampleBlockImpl()
|
||||
kind: INTERNAL
|
||||
start_time_us: 1612374594529090
|
||||
finish_time_us: 1612374594529108
|
||||
finish_date: 2021-02-03
|
||||
|
52
docs/en/operations/system-tables/server_settings.md
Normal file
52
docs/en/operations/system-tables/server_settings.md
Normal file
@ -0,0 +1,52 @@
|
||||
---
|
||||
slug: /en/operations/system-tables/server_settings
|
||||
---
|
||||
# server_settings
|
||||
|
||||
Contains information about global settings for the server, which were specified in `config.xml`.
|
||||
Currently, the table shows only settings from the first layer of `config.xml` and doesn't support nested configs (e.g. [logger](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-logger)).
|
||||
|
||||
Columns:
|
||||
|
||||
- `name` ([String](../../sql-reference/data-types/string.md)) — Server setting name.
|
||||
- `value` ([String](../../sql-reference/data-types/string.md)) — Server setting value.
|
||||
- `default` ([String](../../sql-reference/data-types/string.md)) — Server setting default value.
|
||||
- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether a setting was specified in `config.xml`
|
||||
- `description` ([String](../../sql-reference/data-types/string.md)) — Short server setting description.
|
||||
- `type` ([String](../../sql-reference/data-types/string.md)) — Server setting value type.
|
||||
|
||||
**Example**
|
||||
|
||||
The following example shows how to get information about server settings which name contains `thread_pool`.
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM system.server_settings
|
||||
WHERE name LIKE '%thread_pool%'
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─name─────────────────────────┬─value─┬─default─┬─changed─┬─description─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─type───┐
|
||||
│ max_thread_pool_size │ 5000 │ 10000 │ 1 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations. │ UInt64 │
|
||||
│ max_thread_pool_free_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │
|
||||
│ thread_pool_queue_size │ 10000 │ 10000 │ 0 │ The maximum number of tasks that will be placed in a queue and wait for execution. │ UInt64 │
|
||||
│ max_io_thread_pool_size │ 100 │ 100 │ 0 │ The maximum number of threads that would be used for IO operations │ UInt64 │
|
||||
│ max_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for IO thread pool. │ UInt64 │
|
||||
│ io_thread_pool_queue_size │ 10000 │ 10000 │ 0 │ Queue size for IO thread pool. │ UInt64 │
|
||||
└──────────────────────────────┴───────┴─────────┴─────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────┘
|
||||
```
|
||||
|
||||
Using of `WHERE changed` can be useful, for example, when you want to check
|
||||
whether settings in configuration files are loaded correctly and are in use.
|
||||
|
||||
<!-- -->
|
||||
|
||||
``` sql
|
||||
SELECT * FROM system.server_settings WHERE changed AND name='max_thread_pool_size'
|
||||
```
|
||||
|
||||
**See also**
|
||||
|
||||
- [Settings](../../operations/system-tables/settings.md)
|
||||
- [Configuration Files](../../operations/configuration-files.md)
|
||||
- [Server Settings](../../operations/server-configuration-parameters/settings.md)
|
@ -16,6 +16,7 @@ Columns:
|
||||
- `readonly` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the current user can change the setting:
|
||||
- `0` — Current user can change the setting.
|
||||
- `1` — Current user can’t change the setting.
|
||||
- `default` ([String](../../sql-reference/data-types/string.md)) — Setting default value.
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -27,7 +27,7 @@ $ clickhouse-format --query "select number from numbers(10) where number%2 order
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
```sql
|
||||
SELECT number
|
||||
FROM numbers(10)
|
||||
WHERE number % 2
|
||||
@ -54,7 +54,7 @@ $ clickhouse-format -n <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELECT 1 UNIO
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
```sql
|
||||
SELECT *
|
||||
FROM
|
||||
(
|
||||
@ -75,7 +75,7 @@ $ clickhouse-format --seed Hello --obfuscate <<< "SELECT cost_first_screen BETWE
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
```sql
|
||||
SELECT treasury_mammoth_hazelnut BETWEEN nutmeg AND span, CASE WHEN chive >= 116 THEN switching ELSE ANYTHING END;
|
||||
```
|
||||
|
||||
@ -87,7 +87,7 @@ $ clickhouse-format --seed World --obfuscate <<< "SELECT cost_first_screen BETWE
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
```sql
|
||||
SELECT horse_tape_summer BETWEEN folklore AND moccasins, CASE WHEN intestine >= 116 THEN nonconformist ELSE FORESTRY END;
|
||||
```
|
||||
|
||||
@ -99,7 +99,7 @@ $ clickhouse-format --backslash <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELE
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
```sql
|
||||
SELECT * \
|
||||
FROM \
|
||||
( \
|
||||
|
@ -6,29 +6,26 @@ sidebar_label: clickhouse-local
|
||||
|
||||
# clickhouse-local
|
||||
|
||||
The `clickhouse-local` program enables you to perform fast processing on local files, without having to deploy and configure the ClickHouse server.
|
||||
The `clickhouse-local` program enables you to perform fast processing on local files, without having to deploy and configure the ClickHouse server. It accepts data that represent tables and queries them using [ClickHouse SQL dialect](../../sql-reference/). `clickhouse-local` uses the same core as ClickHouse server, so it supports most of the features and the same set of formats and table engines.
|
||||
|
||||
Accepts data that represent tables and queries them using [ClickHouse SQL dialect](../../sql-reference/).
|
||||
|
||||
`clickhouse-local` uses the same core as ClickHouse server, so it supports most of the features and the same set of formats and table engines.
|
||||
|
||||
By default `clickhouse-local` does not have access to data on the same host, but it supports loading server configuration using `--config-file` argument.
|
||||
|
||||
:::warning
|
||||
It is not recommended to load production server configuration into `clickhouse-local` because data can be damaged in case of human error.
|
||||
:::
|
||||
|
||||
For temporary data, a unique temporary data directory is created by default.
|
||||
By default `clickhouse-local` has access to data on the same host, and it does not depend on the server's configuration. It also supports loading server configuration using `--config-file` argument. For temporary data, a unique temporary data directory is created by default.
|
||||
|
||||
## Usage {#usage}
|
||||
|
||||
Basic usage:
|
||||
Basic usage (Linux):
|
||||
|
||||
``` bash
|
||||
$ clickhouse-local --structure "table_structure" --input-format "format_of_incoming_data" \
|
||||
--query "query"
|
||||
$ clickhouse-local --structure "table_structure" --input-format "format_of_incoming_data" --query "query"
|
||||
```
|
||||
|
||||
Basic usage (Mac):
|
||||
|
||||
``` bash
|
||||
$ ./clickhouse local --structure "table_structure" --input-format "format_of_incoming_data" --query "query"
|
||||
```
|
||||
|
||||
Also supported on Windows through WSL2.
|
||||
|
||||
Arguments:
|
||||
|
||||
- `-S`, `--structure` — table structure for input data.
|
||||
|
@ -7,8 +7,8 @@ sidebar_position: 37
|
||||
|
||||
Calculates the value of `Σ((x - x̅)(y - y̅)) / (n - 1)`.
|
||||
|
||||
Returns Float64. When `n <= 1`, returns +∞.
|
||||
Returns Float64. When `n <= 1`, returns `nan`.
|
||||
|
||||
:::note
|
||||
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `covarSampStable` function. It works slower but provides a lower computational error.
|
||||
:::
|
||||
:::
|
||||
|
@ -11,15 +11,15 @@ sidebar_title: exponentialMovingAverage
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
exponentialMovingAverage(x)(value, timestamp)
|
||||
exponentialMovingAverage(x)(value, timeunit)
|
||||
```
|
||||
|
||||
Each `value` corresponds to the determinate `timestamp`. The half-life `x` is the time lag at which the exponential weights decay by one-half. The function returns a weighted average: the older the time point, the less weight the corresponding value is considered to be.
|
||||
Each `value` corresponds to the determinate `timeunit`. The half-life `x` is the time lag at which the exponential weights decay by one-half. The function returns a weighted average: the older the time point, the less weight the corresponding value is considered to be.
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `value` — Value. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
|
||||
- `timestamp` — Timestamp. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
|
||||
- `timeunit` — Timeunit. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). Timeunit is not timestamp (seconds), it's -- an index of the time interval. Can be calculated using [intDiv](../../functions/arithmetic-functions/#intdiva-b).
|
||||
|
||||
**Parameters**
|
||||
|
||||
@ -148,3 +148,58 @@ Result:
|
||||
│ 1 │ 49 │ 0.825 │ █████████████████████████████████████████▎│
|
||||
└───────┴──────┴──────────────────────┴────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
CREATE TABLE data
|
||||
ENGINE = Memory AS
|
||||
SELECT
|
||||
10 AS value,
|
||||
toDateTime('2020-01-01') + (3600 * number) AS time
|
||||
FROM numbers_mt(10);
|
||||
|
||||
|
||||
-- Calculate timeunit using intDiv
|
||||
SELECT
|
||||
value,
|
||||
time,
|
||||
exponentialMovingAverage(1)(value, intDiv(toUInt32(time), 3600)) OVER (ORDER BY time ASC) AS res,
|
||||
intDiv(toUInt32(time), 3600) AS timeunit
|
||||
FROM data
|
||||
ORDER BY time ASC;
|
||||
|
||||
┌─value─┬────────────────time─┬─────────res─┬─timeunit─┐
|
||||
│ 10 │ 2020-01-01 00:00:00 │ 5 │ 438288 │
|
||||
│ 10 │ 2020-01-01 01:00:00 │ 7.5 │ 438289 │
|
||||
│ 10 │ 2020-01-01 02:00:00 │ 8.75 │ 438290 │
|
||||
│ 10 │ 2020-01-01 03:00:00 │ 9.375 │ 438291 │
|
||||
│ 10 │ 2020-01-01 04:00:00 │ 9.6875 │ 438292 │
|
||||
│ 10 │ 2020-01-01 05:00:00 │ 9.84375 │ 438293 │
|
||||
│ 10 │ 2020-01-01 06:00:00 │ 9.921875 │ 438294 │
|
||||
│ 10 │ 2020-01-01 07:00:00 │ 9.9609375 │ 438295 │
|
||||
│ 10 │ 2020-01-01 08:00:00 │ 9.98046875 │ 438296 │
|
||||
│ 10 │ 2020-01-01 09:00:00 │ 9.990234375 │ 438297 │
|
||||
└───────┴─────────────────────┴─────────────┴──────────┘
|
||||
|
||||
|
||||
-- Calculate timeunit using toRelativeHourNum
|
||||
SELECT
|
||||
value,
|
||||
time,
|
||||
exponentialMovingAverage(1)(value, toRelativeHourNum(time)) OVER (ORDER BY time ASC) AS res,
|
||||
toRelativeHourNum(time) AS timeunit
|
||||
FROM data
|
||||
ORDER BY time ASC;
|
||||
|
||||
┌─value─┬────────────────time─┬─────────res─┬─timeunit─┐
|
||||
│ 10 │ 2020-01-01 00:00:00 │ 5 │ 438288 │
|
||||
│ 10 │ 2020-01-01 01:00:00 │ 7.5 │ 438289 │
|
||||
│ 10 │ 2020-01-01 02:00:00 │ 8.75 │ 438290 │
|
||||
│ 10 │ 2020-01-01 03:00:00 │ 9.375 │ 438291 │
|
||||
│ 10 │ 2020-01-01 04:00:00 │ 9.6875 │ 438292 │
|
||||
│ 10 │ 2020-01-01 05:00:00 │ 9.84375 │ 438293 │
|
||||
│ 10 │ 2020-01-01 06:00:00 │ 9.921875 │ 438294 │
|
||||
│ 10 │ 2020-01-01 07:00:00 │ 9.9609375 │ 438295 │
|
||||
│ 10 │ 2020-01-01 08:00:00 │ 9.98046875 │ 438296 │
|
||||
│ 10 │ 2020-01-01 09:00:00 │ 9.990234375 │ 438297 │
|
||||
└───────┴─────────────────────┴─────────────┴──────────┘
|
||||
```
|
||||
|
@ -48,7 +48,35 @@ When dividing by zero you get ‘inf’, ‘-inf’, or ‘nan’.
|
||||
## intDiv(a, b)
|
||||
|
||||
Calculates the quotient of the numbers. Divides into integers, rounding down (by the absolute value).
|
||||
An exception is thrown when dividing by zero or when dividing a minimal negative number by minus one.
|
||||
|
||||
Returns an integer of the type of the dividend (the first parameter).
|
||||
|
||||
An exception is thrown when dividing by zero, when the quotient does not fit in the range of the dividend, or when dividing a minimal negative number by minus one.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
intDiv(toFloat64(1), 0.001) AS res,
|
||||
toTypeName(res)
|
||||
```
|
||||
```response
|
||||
┌──res─┬─toTypeName(intDiv(toFloat64(1), 0.001))─┐
|
||||
│ 1000 │ Int64 │
|
||||
└──────┴─────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
intDiv(1, 0.001) AS res,
|
||||
toTypeName(res)
|
||||
```
|
||||
```response
|
||||
Received exception from server (version 23.2.1):
|
||||
Code: 153. DB::Exception: Received from localhost:9000. DB::Exception: Cannot perform integer division, because it will produce infinite or too large number: While processing intDiv(1, 0.001) AS res, toTypeName(res). (ILLEGAL_DIVISION)
|
||||
```
|
||||
|
||||
## intDivOrZero(a, b)
|
||||
|
||||
|
@ -1126,15 +1126,48 @@ Rounds the time to the half hour.
|
||||
|
||||
## toYYYYMM
|
||||
|
||||
Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 100 + MM).
|
||||
Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 100 + MM). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
|
||||
|
||||
### example
|
||||
```sql
|
||||
SELECT
|
||||
toYYYYMM(now(), 'US/Eastern')
|
||||
```
|
||||
```response
|
||||
┌─toYYYYMM(now(), 'US/Eastern')─┐
|
||||
│ 202303 │
|
||||
└───────────────────────────────┘
|
||||
```
|
||||
|
||||
## toYYYYMMDD
|
||||
|
||||
Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 10000 + MM \* 100 + DD).
|
||||
Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 10000 + MM \* 100 + DD). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
|
||||
|
||||
### example
|
||||
```sql
|
||||
SELECT
|
||||
toYYYYMMDD(now(), 'US/Eastern')
|
||||
```
|
||||
```response
|
||||
┌─toYYYYMMDD(now(), 'US/Eastern')─┐
|
||||
│ 20230302 │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
## toYYYYMMDDhhmmss
|
||||
|
||||
Converts a date or date with time to a UInt64 number containing the year and month number (YYYY \* 10000000000 + MM \* 100000000 + DD \* 1000000 + hh \* 10000 + mm \* 100 + ss).
|
||||
Converts a date or date with time to a UInt64 number containing the year and month number (YYYY \* 10000000000 + MM \* 100000000 + DD \* 1000000 + hh \* 10000 + mm \* 100 + ss). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
|
||||
|
||||
### example
|
||||
```sql
|
||||
SELECT
|
||||
toYYYYMMDDhhmmss(now(), 'US/Eastern')
|
||||
```
|
||||
```response
|
||||
┌─toYYYYMMDDhhmmss(now(), 'US/Eastern')─┐
|
||||
│ 20230302112209 │
|
||||
└───────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## addYears, addMonths, addWeeks, addDays, addHours, addMinutes, addSeconds, addQuarters
|
||||
|
||||
@ -1231,8 +1264,8 @@ Using replacement fields, you can define a pattern for the resulting string. “
|
||||
| %e | day of the month, space-padded (1-31) | 2 |
|
||||
| %f | fractional second from the fractional part of DateTime64 | 1234560 |
|
||||
| %F | short YYYY-MM-DD date, equivalent to %Y-%m-%d | 2018-01-02 |
|
||||
| %G | four-digit year format for ISO week number, calculated from the week-based year [defined by the ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates) standard, normally useful only with %V | 2018 |
|
||||
| %g | two-digit year format, aligned to ISO 8601, abbreviated from four-digit notation | 18 |
|
||||
| %G | four-digit year format for ISO week number, calculated from the week-based year [defined by the ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates) standard, normally useful only with %V | 2018 |
|
||||
| %h | hour in 12h format (01-12) | 09 |
|
||||
| %H | hour in 24h format (00-23) | 22 |
|
||||
| %i | minute (00-59) | 33 |
|
||||
|
@ -280,12 +280,20 @@ SELECT
|
||||
|
||||
## toIPv4OrDefault(string)
|
||||
|
||||
Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns 0.
|
||||
Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns `0.0.0.0` (0 IPv4).
|
||||
|
||||
## toIPv4OrNull(string)
|
||||
|
||||
Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns null.
|
||||
|
||||
## toIPv6OrDefault(string)
|
||||
|
||||
Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns `::` (0 IPv6).
|
||||
|
||||
## toIPv6OrNull(string)
|
||||
|
||||
Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns null.
|
||||
|
||||
## toIPv6
|
||||
|
||||
Converts a string form of IPv6 address to [IPv6](../../sql-reference/data-types/domains/ipv6.md) type. If the IPv6 address has an invalid format, returns an empty value.
|
||||
|
@ -579,3 +579,33 @@ Result:
|
||||
│ 3628800 │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
## width_bucket(operand, low, high, count)
|
||||
|
||||
Returns the number of the bucket in which `operand` falls in a histogram having `count` equal-width buckets spanning the range `low` to `high`. Returns `0` if `operand < low`, and returns `count+1` if `operand >= high`.
|
||||
|
||||
`operand`, `low`, `high` can be any native number type. `count` can only be unsigned native integer and its value cannot be zero.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
widthBucket(operand, low, high, count)
|
||||
```
|
||||
|
||||
There is also a case insensitive alias called `WIDTH_BUCKET` to provide compatibility with other databases.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT widthBucket(10.15, -8.6, 23, 18);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─widthBucket(10.15, -8.6, 23, 18)─┐
|
||||
│ 11 │
|
||||
└──────────────────────────────────┘
|
||||
```
|
@ -226,6 +226,17 @@ SELECT splitByNonAlpha(' 1! a, b. ');
|
||||
Concatenates string representations of values listed in the array with the separator. `separator` is an optional parameter: a constant string, set to an empty string by default.
|
||||
Returns the string.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT arrayStringConcat(['12/05/2021', '12:50:00'], ' ') AS DateString;
|
||||
```
|
||||
```text
|
||||
┌─DateString──────────┐
|
||||
│ 12/05/2021 12:50:00 │
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
## alphaTokens(s[, max_substrings]), splitByAlpha(s[, max_substrings])
|
||||
|
||||
Selects substrings of consecutive bytes from the ranges a-z and A-Z.Returns an array of substrings.
|
||||
@ -364,4 +375,4 @@ Result:
|
||||
┌─tokens────────────────────────────┐
|
||||
│ ['test1','test2','test3','test4'] │
|
||||
└───────────────────────────────────┘
|
||||
```
|
||||
```
|
||||
|
@ -330,7 +330,7 @@ repeat(s, n)
|
||||
**Arguments**
|
||||
|
||||
- `s` — The string to repeat. [String](../../sql-reference/data-types/string.md).
|
||||
- `n` — The number of times to repeat the string. [UInt](../../sql-reference/data-types/int-uint.md).
|
||||
- `n` — The number of times to repeat the string. [UInt or Int](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
|
@ -66,6 +66,42 @@ Result:
|
||||
|
||||
- [Map(key, value)](../../sql-reference/data-types/map.md) data type
|
||||
|
||||
## mapFromArrays
|
||||
|
||||
Merges an [Array](../../sql-reference/data-types/array.md) of keys and an [Array](../../sql-reference/data-types/array.md) of values into a [Map(key, value)](../../sql-reference/data-types/map.md).
|
||||
|
||||
The function is a more convenient alternative to `CAST((key_array, value_array), 'Map(key_type, value_type)')`. For example, instead of writing `CAST((['aa', 'bb'], [4, 5]), 'Map(String, UInt32)')`, you can write `mapFromArrays(['aa', 'bb'], [4, 5])`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
mapFromArrays(keys, values)
|
||||
```
|
||||
|
||||
Alias: `MAP_FROM_ARRAYS(keys, values)`
|
||||
|
||||
**Arguments**
|
||||
- `keys` — Given key array to create a map from. The nested type of array must be: [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UUID](../../sql-reference/data-types/uuid.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), [Date32](../../sql-reference/data-types/date32.md), [Enum](../../sql-reference/data-types/enum.md)
|
||||
- `values` - Given value array to create a map from.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A map whose keys and values are constructed from the key and value arrays
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
select mapFromArrays(['a', 'b', 'c'], [1, 2, 3])
|
||||
```
|
||||
|
||||
```text
|
||||
┌─mapFromArrays(['a', 'b', 'c'], [1, 2, 3])─┐
|
||||
│ {'a':1,'b':2,'c':3} │
|
||||
└───────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## mapAdd
|
||||
|
||||
Collect all the keys and sum corresponding values.
|
||||
@ -235,7 +271,7 @@ Determines whether the `map` contains the `key` parameter.
|
||||
mapContains(map, key)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
|
||||
- `key` — Key. Type matches the type of keys of `map` parameter.
|
||||
@ -280,7 +316,7 @@ Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operat
|
||||
mapKeys(map)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
|
||||
|
||||
@ -323,7 +359,7 @@ Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operat
|
||||
mapValues(map)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
|
||||
|
||||
@ -362,7 +398,7 @@ Result:
|
||||
mapContainsKeyLike(map, pattern)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
|
||||
- `pattern` - String pattern to match.
|
||||
@ -400,7 +436,7 @@ Result:
|
||||
mapExtractKeyLike(map, pattern)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
|
||||
- `pattern` - String pattern to match.
|
||||
@ -438,7 +474,7 @@ Result:
|
||||
mapApply(func, map)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
|
||||
- `map` — [Map](../../sql-reference/data-types/map.md).
|
||||
@ -478,7 +514,7 @@ Result:
|
||||
mapFilter(func, map)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
|
||||
- `map` — [Map](../../sql-reference/data-types/map.md).
|
||||
@ -520,7 +556,7 @@ Result:
|
||||
mapUpdate(map1, map2)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `map1` [Map](../../sql-reference/data-types/map.md).
|
||||
- `map2` [Map](../../sql-reference/data-types/map.md).
|
||||
|
@ -6,22 +6,23 @@ sidebar_label: Type Conversion
|
||||
|
||||
# Type Conversion Functions
|
||||
|
||||
## Common Issues of Numeric Conversions
|
||||
## Common Issues with Data Conversion
|
||||
|
||||
When you convert a value from one to another data type, you should remember that if you try to fit a value from a larger data type to a smaller one (for example Int64 to Int32), or convert from one data type to another (for example `String` to `Int`), you could have data loss. Test beforehand.
|
||||
Be aware of potential data loss if values of a datatype are converted to a smaller datatype (for example from `Int64` to `Int32`) or between
|
||||
incompatible datatypes (for example from `String` to `Int`). Make sure to check carefully if the result is as expected.
|
||||
|
||||
ClickHouse has the [same behavior as C++ programs](https://en.cppreference.com/w/cpp/language/implicit_conversion).
|
||||
ClickHouse generally uses the [same behavior as C++ programs](https://en.cppreference.com/w/cpp/language/implicit_conversion).
|
||||
|
||||
## toInt(8\|16\|32\|64\|128\|256)
|
||||
|
||||
Converts an input value to the [Int](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes:
|
||||
Converts an input value to a value the [Int](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes:
|
||||
|
||||
- `toInt8(expr)` — Results in the `Int8` data type.
|
||||
- `toInt16(expr)` — Results in the `Int16` data type.
|
||||
- `toInt32(expr)` — Results in the `Int32` data type.
|
||||
- `toInt64(expr)` — Results in the `Int64` data type.
|
||||
- `toInt128(expr)` — Results in the `Int128` data type.
|
||||
- `toInt256(expr)` — Results in the `Int256` data type.
|
||||
- `toInt8(expr)` — Converts to a value of data type `Int8`.
|
||||
- `toInt16(expr)` — Converts to a value of data type `Int16`.
|
||||
- `toInt32(expr)` — Converts to a value of data type `Int32`.
|
||||
- `toInt64(expr)` — Converts to a value of data type `Int64`.
|
||||
- `toInt128(expr)` — Converts to a value of data type `Int128`.
|
||||
- `toInt256(expr)` — Converts to a value of data type `Int256`.
|
||||
|
||||
**Arguments**
|
||||
|
||||
@ -53,7 +54,7 @@ Result:
|
||||
|
||||
## toInt(8\|16\|32\|64\|128\|256)OrZero
|
||||
|
||||
It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If failed, returns 0.
|
||||
Takes an argument of type [String](/docs/en/sql-reference/data-types/string.md) and tries to parse it into an Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If unsuccessful, returns `0`.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -73,7 +74,7 @@ Result:
|
||||
|
||||
## toInt(8\|16\|32\|64\|128\|256)OrNull
|
||||
|
||||
It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If failed, returns NULL.
|
||||
It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If unsuccessful, returns `NULL`.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -93,7 +94,7 @@ Result:
|
||||
|
||||
## toInt(8\|16\|32\|64\|128\|256)OrDefault
|
||||
|
||||
It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If failed, returns the default type value.
|
||||
It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If unsuccessful, returns the default type value.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -116,11 +117,11 @@ Result:
|
||||
|
||||
Converts an input value to the [UInt](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes:
|
||||
|
||||
- `toUInt8(expr)` — Results in the `UInt8` data type.
|
||||
- `toUInt16(expr)` — Results in the `UInt16` data type.
|
||||
- `toUInt32(expr)` — Results in the `UInt32` data type.
|
||||
- `toUInt64(expr)` — Results in the `UInt64` data type.
|
||||
- `toUInt256(expr)` — Results in the `UInt256` data type.
|
||||
- `toUInt8(expr)` — Converts to a value of data type `UInt8`.
|
||||
- `toUInt16(expr)` — Converts to a value of data type `UInt16`.
|
||||
- `toUInt32(expr)` — Converts to a value of data type `UInt32`.
|
||||
- `toUInt64(expr)` — Converts to a value of data type `UInt64`.
|
||||
- `toUInt256(expr)` — Converts to a value of data type `UInt256`.
|
||||
|
||||
**Arguments**
|
||||
|
||||
@ -128,7 +129,7 @@ Converts an input value to the [UInt](/docs/en/sql-reference/data-types/int-uint
|
||||
|
||||
**Returned value**
|
||||
|
||||
Integer value in the `UInt8`, `UInt16`, `UInt32`, `UInt64` or `UInt256` data type.
|
||||
- Integer value in the `UInt8`, `UInt16`, `UInt32`, `UInt64` or `UInt256` data type.
|
||||
|
||||
Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers.
|
||||
|
||||
@ -166,26 +167,30 @@ Result:
|
||||
|
||||
## toDate
|
||||
|
||||
Converts the argument to `Date` data type.
|
||||
Converts the argument to [Date](/docs/en/sql-reference/data-types/date.md) data type.
|
||||
|
||||
If the argument is [DateTime](/docs/en/sql-reference/data-types/datetime.md) or [DateTime64](/docs/en/sql-reference/data-types/datetime64.md), it truncates it and leaves the date component of the DateTime:
|
||||
|
||||
If the argument is `DateTime` or `DateTime64`, it truncates it, leaving the date component of the DateTime:
|
||||
```sql
|
||||
SELECT
|
||||
now() AS x,
|
||||
toDate(x)
|
||||
```
|
||||
|
||||
```response
|
||||
┌───────────────────x─┬─toDate(now())─┐
|
||||
│ 2022-12-30 13:44:17 │ 2022-12-30 │
|
||||
└─────────────────────┴───────────────┘
|
||||
```
|
||||
|
||||
If the argument is a string, it is parsed as Date or DateTime. If it was parsed as DateTime, the date component is being used:
|
||||
If the argument is a [String](/docs/en/sql-reference/data-types/string.md), it is parsed as [Date](/docs/en/sql-reference/data-types/date.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md). If it was parsed as [DateTime](/docs/en/sql-reference/data-types/datetime.md), the date component is being used:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
toDate('2022-12-30') AS x,
|
||||
toTypeName(x)
|
||||
```
|
||||
|
||||
```response
|
||||
┌──────────x─┬─toTypeName(toDate('2022-12-30'))─┐
|
||||
│ 2022-12-30 │ Date │
|
||||
@ -193,18 +198,20 @@ SELECT
|
||||
|
||||
1 row in set. Elapsed: 0.001 sec.
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
toDate('2022-12-30 01:02:03') AS x,
|
||||
toTypeName(x)
|
||||
```
|
||||
|
||||
```response
|
||||
┌──────────x─┬─toTypeName(toDate('2022-12-30 01:02:03'))─┐
|
||||
│ 2022-12-30 │ Date │
|
||||
└────────────┴───────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
If the argument is a number and it looks like a UNIX timestamp (is greater than 65535), it is interpreted as a DateTime, then truncated to Date in the current timezone. The timezone argument can be specified as a second argument of the function. The truncation to Date depends on the timezone:
|
||||
If the argument is a number and looks like a UNIX timestamp (is greater than 65535), it is interpreted as a [DateTime](/docs/en/sql-reference/data-types/datetime.md), then truncated to [Date](/docs/en/sql-reference/data-types/date.md) in the current timezone. The timezone argument can be specified as a second argument of the function. The truncation to [Date](/docs/en/sql-reference/data-types/date.md) depends on the timezone:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
@ -217,6 +224,7 @@ SELECT
|
||||
toDate(ts) AS date_Amsterdam_2,
|
||||
toDate(ts, 'Pacific/Apia') AS date_Samoa_2
|
||||
```
|
||||
|
||||
```response
|
||||
Row 1:
|
||||
──────
|
||||
@ -232,7 +240,7 @@ date_Samoa_2: 2022-12-31
|
||||
|
||||
The example above demonstrates how the same UNIX timestamp can be interpreted as different dates in different time zones.
|
||||
|
||||
If the argument is a number and it is smaller than 65536, it is interpreted as the number of days since 1970-01-01 (a UNIX day) and converted to Date. It corresponds to the internal numeric representation of the `Date` data type. Example:
|
||||
If the argument is a number and it is smaller than 65536, it is interpreted as the number of days since 1970-01-01 (the first UNIX day) and converted to [Date](/docs/en/sql-reference/data-types/date.md). It corresponds to the internal numeric representation of the `Date` data type. Example:
|
||||
|
||||
```sql
|
||||
SELECT toDate(12345)
|
||||
@ -270,8 +278,6 @@ SELECT
|
||||
└─────────────────────┴───────────────┴─────────────┴─────────────────────┘
|
||||
```
|
||||
|
||||
Have a nice day working with dates and times.
|
||||
|
||||
## toDateOrZero
|
||||
|
||||
## toDateOrNull
|
||||
@ -288,7 +294,7 @@ Have a nice day working with dates and times.
|
||||
|
||||
## toDate32
|
||||
|
||||
Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32` returns the border values supported by `Date32`. If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, borders of `Date` are taken into account.
|
||||
Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32` returns the border values supported by [Date32](/docs/en/sql-reference/data-types/date32.md). If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, it's borders are taken into account.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -302,9 +308,7 @@ toDate32(expr)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A calendar date.
|
||||
|
||||
Type: [Date32](/docs/en/sql-reference/data-types/date32.md).
|
||||
- A calendar date. Type [Date32](/docs/en/sql-reference/data-types/date32.md).
|
||||
|
||||
**Example**
|
||||
|
||||
@ -332,7 +336,7 @@ SELECT toDate32('1899-01-01') AS value, toTypeName(value);
|
||||
└────────────┴────────────────────────────────────┘
|
||||
```
|
||||
|
||||
3. With `Date`-type argument:
|
||||
3. With [Date](/docs/en/sql-reference/data-types/date.md) argument:
|
||||
|
||||
``` sql
|
||||
SELECT toDate32(toDate('1899-01-01')) AS value, toTypeName(value);
|
||||
@ -386,7 +390,7 @@ Result:
|
||||
|
||||
## toDate32OrDefault
|
||||
|
||||
Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32OrDefault` returns the lower border value supported by `Date32`. If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, borders of `Date` are taken into account. Returns default value if an invalid argument is received.
|
||||
Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32OrDefault` returns the lower border value supported by [Date32](/docs/en/sql-reference/data-types/date32.md). If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, it's borders are taken into account. Returns default value if an invalid argument is received.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -666,7 +670,7 @@ YYYY-MM-DD
|
||||
YYYY-MM-DD hh:mm:ss
|
||||
```
|
||||
|
||||
As an exception, if converting from UInt32, Int32, UInt64, or Int64 numeric types to Date, and if the number is greater than or equal to 65536, the number is interpreted as a Unix timestamp (and not as the number of days) and is rounded to the date. This allows support for the common occurrence of writing ‘toDate(unix_timestamp)’, which otherwise would be an error and would require writing the more cumbersome ‘toDate(toDateTime(unix_timestamp))’.
|
||||
As an exception, if converting from UInt32, Int32, UInt64, or Int64 numeric types to Date, and if the number is greater than or equal to 65536, the number is interpreted as a Unix timestamp (and not as the number of days) and is rounded to the date. This allows support for the common occurrence of writing `toDate(unix_timestamp)`, which otherwise would be an error and would require writing the more cumbersome `toDate(toDateTime(unix_timestamp))`.
|
||||
|
||||
Conversion between a date and a date with time is performed the natural way: by adding a null time or dropping the time.
|
||||
|
||||
@ -696,7 +700,7 @@ Also see the `toUnixTimestamp` function.
|
||||
|
||||
## toFixedString(s, N)
|
||||
|
||||
Converts a String type argument to a FixedString(N) type (a string with fixed length N). N must be a constant.
|
||||
Converts a [String](/docs/en/sql-reference/data-types/string.md) type argument to a [FixedString(N)](/docs/en/sql-reference/data-types/fixedstring.md) type (a string of fixed length N).
|
||||
If the string has fewer bytes than N, it is padded with null bytes to the right. If the string has more bytes than N, an exception is thrown.
|
||||
|
||||
## toStringCutToZero(s)
|
||||
@ -914,7 +918,7 @@ Result:
|
||||
└─────────────────────┴─────────────────────┴────────────┴─────────────────────┴───────────────────────────┘
|
||||
```
|
||||
|
||||
Conversion to FixedString(N) only works for arguments of type [String](/docs/en/sql-reference/data-types/string.md) or [FixedString](/docs/en/sql-reference/data-types/fixedstring.md).
|
||||
Conversion to [FixedString (N)](/docs/en/sql-reference/data-types/fixedstring.md) only works for arguments of type [String](/docs/en/sql-reference/data-types/string.md) or [FixedString](/docs/en/sql-reference/data-types/fixedstring.md).
|
||||
|
||||
Type conversion to [Nullable](/docs/en/sql-reference/data-types/nullable.md) and back is supported.
|
||||
|
||||
@ -1174,7 +1178,7 @@ For all of the formats with separator the function parses months names expressed
|
||||
|
||||
**Returned value**
|
||||
|
||||
- `time_string` converted to the `DateTime` data type.
|
||||
- `time_string` converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type.
|
||||
|
||||
**Examples**
|
||||
|
||||
@ -1254,10 +1258,10 @@ Result:
|
||||
|
||||
**See Also**
|
||||
|
||||
- [ISO 8601 announcement by @xkcd](https://xkcd.com/1179/)
|
||||
- [RFC 1123](https://tools.ietf.org/html/rfc1123)
|
||||
- [toDate](#todate)
|
||||
- [toDateTime](#todatetime)
|
||||
- [ISO 8601 announcement by @xkcd](https://xkcd.com/1179/)
|
||||
|
||||
## parseDateTimeBestEffortUS
|
||||
|
||||
|
@ -233,8 +233,9 @@ If `some_predicate` is not selective enough, it will return large amount of data
|
||||
|
||||
### Distributed Subqueries and max_parallel_replicas
|
||||
|
||||
When max_parallel_replicas is greater than 1, distributed queries are further transformed. For example, the following:
|
||||
When [max_parallel_replicas](#settings-max_parallel_replicas) is greater than 1, distributed queries are further transformed.
|
||||
|
||||
For example, the following:
|
||||
```sql
|
||||
SELECT CounterID, count() FROM distributed_table_1 WHERE UserID IN (SELECT UserID FROM local_table_2 WHERE CounterID < 100)
|
||||
SETTINGS max_parallel_replicas=3
|
||||
@ -247,8 +248,12 @@ SELECT CounterID, count() FROM local_table_1 WHERE UserID IN (SELECT UserID FROM
|
||||
SETTINGS parallel_replicas_count=3, parallel_replicas_offset=M
|
||||
```
|
||||
|
||||
where M is between 1 and 3 depending on which replica the local query is executing on. These settings affect every MergeTree-family table in the query and have the same effect as applying `SAMPLE 1/3 OFFSET (M-1)/3` on each table.
|
||||
where M is between 1 and 3 depending on which replica the local query is executing on.
|
||||
|
||||
Therefore adding the max_parallel_replicas setting will only produce correct results if both tables have the same replication scheme and are sampled by UserID or a subkey of it. In particular, if local_table_2 does not have a sampling key, incorrect results will be produced. The same rule applies to JOIN.
|
||||
These settings affect every MergeTree-family table in the query and have the same effect as applying `SAMPLE 1/3 OFFSET (M-1)/3` on each table.
|
||||
|
||||
Therefore adding the [max_parallel_replicas](#settings-max_parallel_replicas) setting will only produce correct results if both tables have the same replication scheme and are sampled by UserID or a subkey of it. In particular, if local_table_2 does not have a sampling key, incorrect results will be produced. The same rule applies to JOIN.
|
||||
|
||||
One workaround if local_table_2 does not meet the requirements, is to use `GLOBAL IN` or `GLOBAL JOIN`.
|
||||
|
||||
If a table doesn't have a sampling key, more flexible options for [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key) can be used that can produce different and more optimal behaviour.
|
||||
|
@ -16,7 +16,7 @@ ALTER TABLE [db].name [ON CLUSTER cluster] MODIFY COMMENT 'Comment'
|
||||
|
||||
**Examples**
|
||||
|
||||
Creating a table with comment (for more information, see the [COMMENT] clause(../../../sql-reference/statements/create/table.md#comment-table)):
|
||||
Creating a table with comment (for more information, see the [COMMENT](../../../sql-reference/statements/create/table.md#comment-table) clause):
|
||||
|
||||
``` sql
|
||||
CREATE TABLE table_with_comment
|
||||
|
@ -19,8 +19,15 @@ CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_n
|
||||
|
||||
## Example
|
||||
|
||||
Create a user:
|
||||
```sql
|
||||
CREATE USER robin IDENTIFIED BY 'password';
|
||||
```
|
||||
|
||||
Create the `max_memory_usage_profile` settings profile with value and constraints for the `max_memory_usage` setting and assign it to user `robin`:
|
||||
|
||||
``` sql
|
||||
CREATE SETTINGS PROFILE max_memory_usage_profile SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000 TO robin
|
||||
CREATE
|
||||
SETTINGS PROFILE max_memory_usage_profile SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000
|
||||
TO robin
|
||||
```
|
||||
|
@ -17,10 +17,11 @@ By default, tables are created only on the current server. Distributed DDL queri
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
(
|
||||
name1 [type1] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr1] [compression_codec] [TTL expr1],
|
||||
name2 [type2] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr2] [compression_codec] [TTL expr2],
|
||||
name1 [type1] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr1] [compression_codec] [TTL expr1] [COMMENT 'comment for column'],
|
||||
name2 [type2] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr2] [compression_codec] [TTL expr2] [COMMENT 'comment for column'],
|
||||
...
|
||||
) ENGINE = engine
|
||||
COMMENT 'comment for table'
|
||||
```
|
||||
|
||||
Creates a table named `table_name` in the `db` database or the current database if `db` is not set, with the structure specified in brackets and the `engine` engine.
|
||||
@ -32,6 +33,8 @@ Expressions can also be defined for default values (see below).
|
||||
|
||||
If necessary, primary key can be specified, with one or more key expressions.
|
||||
|
||||
Comments can be added for columns and for the table.
|
||||
|
||||
### With a Schema Similar to Other Table
|
||||
|
||||
``` sql
|
||||
@ -107,25 +110,23 @@ If the type is not `Nullable` and if `NULL` is specified, it will be treated as
|
||||
|
||||
See also [data_type_default_nullable](../../../operations/settings/settings.md#data_type_default_nullable) setting.
|
||||
|
||||
## Default Values
|
||||
## Default Values {#default_values}
|
||||
|
||||
The column description can specify an expression for a default value, in one of the following ways: `DEFAULT expr`, `MATERIALIZED expr`, `ALIAS expr`.
|
||||
The column description can specify a default value expression in the form of `DEFAULT expr`, `MATERIALIZED expr`, or `ALIAS expr`. Example: `URLDomain String DEFAULT domain(URL)`.
|
||||
|
||||
Example: `URLDomain String DEFAULT domain(URL)`.
|
||||
The expression `expr` is optional. If it is omitted, the column type must be specified explicitly and the default value will be `0` for numeric columns, `''` (the empty string) for string columns, `[]` (the empty array) for array columns, `1970-01-01` for date columns, or `NULL` for nullable columns.
|
||||
|
||||
If an expression for the default value is not defined, the default values will be set to zeros for numbers, empty strings for strings, empty arrays for arrays, and `1970-01-01` for dates or zero unix timestamp for DateTime, NULL for Nullable.
|
||||
The column type of a default value column can be omitted in which case it is infered from `expr`'s type. For example the type of column `EventDate DEFAULT toDate(EventTime)` will be date.
|
||||
|
||||
If the default expression is defined, the column type is optional. If there isn’t an explicitly defined type, the default expression type is used. Example: `EventDate DEFAULT toDate(EventTime)` – the ‘Date’ type will be used for the ‘EventDate’ column.
|
||||
If both a data type and a default value expression are specified, an implicit type casting function inserted which converts the expression to the specified type. Example: `Hits UInt32 DEFAULT 0` is internally represented as `Hits UInt32 DEFAULT toUInt32(0)`.
|
||||
|
||||
If the data type and default expression are defined explicitly, this expression will be cast to the specified type using type casting functions. Example: `Hits UInt32 DEFAULT 0` means the same thing as `Hits UInt32 DEFAULT toUInt32(0)`.
|
||||
|
||||
Default expressions may be defined as an arbitrary expression from table constants and columns. When creating and changing the table structure, it checks that expressions do not contain loops. For INSERT, it checks that expressions are resolvable – that all columns they can be calculated from have been passed.
|
||||
A default value expression `expr` may reference arbitrary table columns and constants. ClickHouse checks that changes of the table structure do not introduce loops in the expression calculation. For INSERT, it checks that expressions are resolvable – that all columns they can be calculated from have been passed.
|
||||
|
||||
### DEFAULT
|
||||
|
||||
`DEFAULT expr`
|
||||
|
||||
Normal default value. If the INSERT query does not specify the corresponding column, it will be filled in by computing the corresponding expression.
|
||||
Normal default value. If the value of such a column is not specified in an INSERT query, it is computed from `expr`.
|
||||
|
||||
Example:
|
||||
|
||||
@ -151,9 +152,9 @@ SELECT * FROM test;
|
||||
|
||||
`MATERIALIZED expr`
|
||||
|
||||
Materialized expression. Such a column can’t be specified for INSERT, because it is always calculated.
|
||||
For an INSERT without a list of columns, these columns are not considered.
|
||||
In addition, this column is not substituted when using an asterisk in a SELECT query. This is to preserve the invariant that the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns.
|
||||
Materialized expression. Values of such columns are always calculated, they cannot be specified in INSERT queries.
|
||||
|
||||
Also, default value columns of this type are not included in the result of `SELECT *`. This is to preserve the invariant that the result of a `SELECT *` can always be inserted back into the table using `INSERT`. This behavior can be disabled with setting `asterisk_include_materialized_columns`.
|
||||
|
||||
Example:
|
||||
|
||||
@ -189,8 +190,9 @@ SELECT * FROM test SETTINGS asterisk_include_materialized_columns=1;
|
||||
|
||||
`EPHEMERAL [expr]`
|
||||
|
||||
Ephemeral column. Such a column isn't stored in the table and cannot be SELECTed, but can be referenced in the defaults of CREATE statement. If `expr` is omitted type for column is required.
|
||||
INSERT without list of columns will skip such column, so SELECT/INSERT invariant is preserved - the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns.
|
||||
Ephemeral column. Columns of this type are not stored in the table and it is not possible to SELECT from them. The only purpose of ephemeral columns is to build default value expressions of other columns from them.
|
||||
|
||||
An insert without explicitly specified columns will skip columns of this type. This is to preserve the invariant that the result of a `SELECT *` can always be inserted back into the table using `INSERT`.
|
||||
|
||||
Example:
|
||||
|
||||
@ -202,7 +204,7 @@ CREATE OR REPLACE TABLE test
|
||||
hexed FixedString(4) DEFAULT unhex(unhexed)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id
|
||||
ORDER BY id;
|
||||
|
||||
INSERT INTO test (id, unhexed) Values (1, '5a90b714');
|
||||
|
||||
@ -224,9 +226,9 @@ hex(hexed): 5A90B714
|
||||
|
||||
`ALIAS expr`
|
||||
|
||||
Synonym. Such a column isn’t stored in the table at all.
|
||||
Its values can’t be inserted in a table, and it is not substituted when using an asterisk in a SELECT query.
|
||||
It can be used in SELECTs if the alias is expanded during query parsing.
|
||||
Calculated columns (synonym). Column of this type are not stored in the table and it is not possible to INSERT values into them.
|
||||
|
||||
When SELECT queries explicitly reference columns of this type, the value is computed at query time from `expr`. By default, `SELECT *` excludes ALIAS columns. This behavior can be disabled with setting `asteriks_include_alias_columns`.
|
||||
|
||||
When using the ALTER query to add new columns, old data for these columns is not written. Instead, when reading old data that does not have values for the new columns, expressions are computed on the fly by default. However, if running the expressions requires different columns that are not indicated in the query, these columns will additionally be read, but only for the blocks of data that need it.
|
||||
|
||||
@ -267,7 +269,7 @@ You can define a [primary key](../../../engines/table-engines/mergetree-family/m
|
||||
CREATE TABLE db.table_name
|
||||
(
|
||||
name1 type1, name2 type2, ...,
|
||||
PRIMARY KEY(expr1[, expr2,...])]
|
||||
PRIMARY KEY(expr1[, expr2,...])
|
||||
)
|
||||
ENGINE = engine;
|
||||
```
|
||||
@ -391,15 +393,15 @@ These codecs are designed to make compression more effective by using specific f
|
||||
|
||||
#### DoubleDelta
|
||||
|
||||
`DoubleDelta` — Calculates delta of deltas and writes it in compact binary form. Optimal compression rates are achieved for monotonic sequences with a constant stride, such as time series data. Can be used with any fixed-width type. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. Uses 1 extra bit for 32-byte deltas: 5-bit prefixes instead of 4-bit prefixes. For additional information, see Compressing Time Stamps in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf).
|
||||
`DoubleDelta(bytes_size)` — Calculates delta of deltas and writes it in compact binary form. Possible `bytes_size` values: 1, 2, 4, 8, the default value is `sizeof(type)` if equal to 1, 2, 4, or 8. In all other cases, it’s 1. Optimal compression rates are achieved for monotonic sequences with a constant stride, such as time series data. Can be used with any fixed-width type. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. Uses 1 extra bit for 32-bit deltas: 5-bit prefixes instead of 4-bit prefixes. For additional information, see Compressing Time Stamps in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf).
|
||||
|
||||
#### Gorilla
|
||||
|
||||
`Gorilla` — Calculates XOR between current and previous floating point value and writes it in compact binary form. The smaller the difference between consecutive values is, i.e. the slower the values of the series changes, the better the compression rate. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. For additional information, see section 4.1 in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](https://doi.org/10.14778/2824032.2824078).
|
||||
`Gorilla(bytes_size)` — Calculates XOR between current and previous floating point value and writes it in compact binary form. The smaller the difference between consecutive values is, i.e. the slower the values of the series changes, the better the compression rate. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. Possible `bytes_size` values: 1, 2, 4, 8, the default value is `sizeof(type)` if equal to 1, 2, 4, or 8. In all other cases, it’s 1. For additional information, see section 4.1 in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](https://doi.org/10.14778/2824032.2824078).
|
||||
|
||||
#### FPC
|
||||
|
||||
`FPC` - Repeatedly predicts the next floating point value in the sequence using the better of two predictors, then XORs the actual with the predicted value, and leading-zero compresses the result. Similar to Gorilla, this is efficient when storing a series of floating point values that change slowly. For 64-bit values (double), FPC is faster than Gorilla, for 32-bit values your mileage may vary. For a detailed description of the algorithm see [High Throughput Compression of Double-Precision Floating-Point Data](https://userweb.cs.txstate.edu/~burtscher/papers/dcc07a.pdf).
|
||||
`FPC(level, float_size)` - Repeatedly predicts the next floating point value in the sequence using the better of two predictors, then XORs the actual with the predicted value, and leading-zero compresses the result. Similar to Gorilla, this is efficient when storing a series of floating point values that change slowly. For 64-bit values (double), FPC is faster than Gorilla, for 32-bit values your mileage may vary. Possible `level` values: 1-28, the default value is 12. Possible `float_size` values: 4, 8, the default value is `sizeof(type)` if type is Float. In all other cases, it’s 4. For a detailed description of the algorithm see [High Throughput Compression of Double-Precision Floating-Point Data](https://userweb.cs.txstate.edu/~burtscher/papers/dcc07a.pdf).
|
||||
|
||||
#### T64
|
||||
|
||||
@ -471,7 +473,7 @@ ENGINE = MergeTree ORDER BY x;
|
||||
ClickHouse supports temporary tables which have the following characteristics:
|
||||
|
||||
- Temporary tables disappear when the session ends, including if the connection is lost.
|
||||
- A temporary table uses the Memory engine only.
|
||||
- A temporary table uses the Memory table engine when engine is not specified and it may use any table engine except Replicated and `KeeperMap` engines.
|
||||
- The DB can’t be specified for a temporary table. It is created outside of databases.
|
||||
- Impossible to create a temporary table with distributed DDL query on all cluster servers (by using `ON CLUSTER`): this table exists only in the current session.
|
||||
- If a temporary table has the same name as another one and a query specifies the table name without specifying the DB, the temporary table will be used.
|
||||
@ -485,7 +487,7 @@ CREATE TEMPORARY TABLE [IF NOT EXISTS] table_name
|
||||
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
|
||||
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
|
||||
...
|
||||
)
|
||||
) [ENGINE = engine]
|
||||
```
|
||||
|
||||
In most cases, temporary tables are not created manually, but when using external data for a query, or for distributed `(GLOBAL) IN`. For more information, see the appropriate sections
|
||||
@ -573,7 +575,7 @@ SELECT * FROM base.t1;
|
||||
You can add a comment to the table when you creating it.
|
||||
|
||||
:::note
|
||||
The comment is supported for all table engines except [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) and [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md).
|
||||
The comment clause is supported by all table engines except [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) and [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md).
|
||||
:::
|
||||
|
||||
|
||||
|
@ -54,6 +54,10 @@ SELECT * FROM view(column1=value1, column2=value2 ...)
|
||||
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE] AS SELECT ...
|
||||
```
|
||||
|
||||
:::tip
|
||||
Here is a step by step guide on using [Materialized views](docs/en/guides/developer/cascading-materialized-views.md).
|
||||
:::
|
||||
|
||||
Materialized views store data transformed by the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query.
|
||||
|
||||
When creating a materialized view without `TO [db].[table]`, you must specify `ENGINE` – the table engine for storing data.
|
||||
@ -66,6 +70,12 @@ A materialized view is implemented as follows: when inserting data to the table
|
||||
Materialized views in ClickHouse use **column names** instead of column order during insertion into destination table. If some column names are not present in the `SELECT` query result, ClickHouse uses a default value, even if the column is not [Nullable](../../data-types/nullable.md). A safe practice would be to add aliases for every column when using Materialized views.
|
||||
|
||||
Materialized views in ClickHouse are implemented more like insert triggers. If there’s some aggregation in the view query, it’s applied only to the batch of freshly inserted data. Any changes to existing data of source table (like update, delete, drop partition, etc.) does not change the materialized view.
|
||||
|
||||
Materialized views in ClickHouse do not have deterministic behaviour in case of errors. This means that blocks that had been already written will be preserved in the destination table, but all blocks after error will not.
|
||||
|
||||
By default if pushing to one of views fails, then the INSERT query will fail too, and some blocks may not be written to the destination table. This can be changed using `materialized_views_ignore_errors` setting (you should set it for `INSERT` query), if you will set `materialized_views_ignore_errors=true`, then any errors while pushing to views will be ignored and all blocks will be written to the destination table.
|
||||
|
||||
Also note, that `materialized_views_ignore_errors` set to `true` by default for `system.*_log` tables.
|
||||
:::
|
||||
|
||||
If you specify `POPULATE`, the existing table data is inserted into the view when creating it, as if making a `CREATE TABLE ... AS SELECT ...` . Otherwise, the query contains only the data inserted in the table after creating the view. We **do not recommend** using `POPULATE`, since data inserted in the table during the view creation will not be inserted in it.
|
||||
|
@ -105,7 +105,8 @@ Hierarchy of privileges:
|
||||
- [CREATE](#grant-create)
|
||||
- `CREATE DATABASE`
|
||||
- `CREATE TABLE`
|
||||
- `CREATE TEMPORARY TABLE`
|
||||
- `CREATE ARBITRARY TEMPORARY TABLE`
|
||||
- `CREATE TEMPORARY TABLE`
|
||||
- `CREATE VIEW`
|
||||
- `CREATE DICTIONARY`
|
||||
- `CREATE FUNCTION`
|
||||
@ -313,7 +314,8 @@ Allows executing [CREATE](../../sql-reference/statements/create/index.md) and [A
|
||||
- `CREATE`. Level: `GROUP`
|
||||
- `CREATE DATABASE`. Level: `DATABASE`
|
||||
- `CREATE TABLE`. Level: `TABLE`
|
||||
- `CREATE TEMPORARY TABLE`. Level: `GLOBAL`
|
||||
- `CREATE ARBITRARY TEMPORARY TABLE`. Level: `GLOBAL`
|
||||
- `CREATE TEMPORARY TABLE`. Level: `GLOBAL`
|
||||
- `CREATE VIEW`. Level: `VIEW`
|
||||
- `CREATE DICTIONARY`. Level: `DICTIONARY`
|
||||
|
||||
|
@ -91,6 +91,13 @@ INSERT INTO t FORMAT TabSeparated
|
||||
|
||||
You can insert data separately from the query by using the command-line client or the HTTP interface. For more information, see the section “[Interfaces](../../interfaces)”.
|
||||
|
||||
:::note
|
||||
If you want to specify `SETTINGS` for `INSERT` query then you have to do it _before_ `FORMAT` clause since everything after `FORMAT format_name` is treated as data. For example:
|
||||
```sql
|
||||
INSERT INTO table SETTINGS ... FORMAT format_name data_set
|
||||
```
|
||||
:::
|
||||
|
||||
## Constraints
|
||||
|
||||
If table has [constraints](../../sql-reference/statements/create/table.md#constraints), their expressions will be checked for each row of inserted data. If any of those constraints is not satisfied — server will raise an exception containing constraint name and expression, the query will be stopped.
|
||||
|
@ -6,21 +6,22 @@ sidebar_label: file
|
||||
|
||||
# file
|
||||
|
||||
Creates a table from a file. This table function is similar to [url](../../sql-reference/table-functions/url.md) and [hdfs](../../sql-reference/table-functions/hdfs.md) ones.
|
||||
Creates a table from a file. This table function is similar to [url](/docs/en/sql-reference/table-functions/url.md) and [hdfs](/docs/en/sql-reference/table-functions/hdfs.md) ones.
|
||||
|
||||
`file` function can be used in `SELECT` and `INSERT` queries on data in [File](../../engines/table-engines/special/file.md) tables.
|
||||
`file` function can be used in `SELECT` and `INSERT` queries on data in [File](/docs/en/engines/table-engines/special/file.md) tables.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
file(path [,format] [,structure])
|
||||
file(path [,format] [,structure] [,compression])
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `path` — The relative path to the file from [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in read-only mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings.
|
||||
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
|
||||
- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in read-only mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings.
|
||||
- `format` — The [format](/docs/en/interfaces/formats.md#formats) of the file.
|
||||
- `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`.
|
||||
- `compression` — The existing compression type when used in a `SELECT` query, or the desired compression type when used in an `INSERT` query. The supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`.
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -53,7 +54,7 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U
|
||||
└─────────┴─────────┴─────────┘
|
||||
```
|
||||
|
||||
Getting the first 10 lines of a table that contains 3 columns of [UInt32](../../sql-reference/data-types/int-uint.md) type from a CSV file:
|
||||
Getting the first 10 lines of a table that contains 3 columns of [UInt32](/docs/en/sql-reference/data-types/int-uint.md) type from a CSV file:
|
||||
|
||||
``` sql
|
||||
SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 10;
|
||||
@ -143,4 +144,4 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Virtual columns](../../engines/table-engines/index.md#table_engines-virtual_columns)
|
||||
- [Virtual columns](/docs/en/engines/table-engines/index.md#table_engines-virtual_columns)
|
||||
|
@ -23,23 +23,3 @@ You can use table functions in:
|
||||
:::warning
|
||||
You can’t use table functions if the [allow_ddl](../../operations/settings/permissions-for-queries.md#settings_allow_ddl) setting is disabled.
|
||||
:::
|
||||
|
||||
| Function | Description |
|
||||
|------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| [file](../../sql-reference/table-functions/file.md) | Creates a [File](../../engines/table-engines/special/file.md)-engine table. |
|
||||
| [merge](../../sql-reference/table-functions/merge.md) | Creates a [Merge](../../engines/table-engines/special/merge.md)-engine table. |
|
||||
| [numbers](../../sql-reference/table-functions/numbers.md) | Creates a table with a single column filled with integer numbers. |
|
||||
| [remote](../../sql-reference/table-functions/remote.md) | Allows you to access remote servers without creating a [Distributed](../../engines/table-engines/special/distributed.md)-engine table. |
|
||||
| [url](../../sql-reference/table-functions/url.md) | Creates a [Url](../../engines/table-engines/special/url.md)-engine table. |
|
||||
| [mysql](../../sql-reference/table-functions/mysql.md) | Creates a [MySQL](../../engines/table-engines/integrations/mysql.md)-engine table. |
|
||||
| [postgresql](../../sql-reference/table-functions/postgresql.md) | Creates a [PostgreSQL](../../engines/table-engines/integrations/postgresql.md)-engine table. |
|
||||
| [jdbc](../../sql-reference/table-functions/jdbc.md) | Creates a [JDBC](../../engines/table-engines/integrations/jdbc.md)-engine table. |
|
||||
| [odbc](../../sql-reference/table-functions/odbc.md) | Creates a [ODBC](../../engines/table-engines/integrations/odbc.md)-engine table. |
|
||||
| [hdfs](../../sql-reference/table-functions/hdfs.md) | Creates a [HDFS](../../engines/table-engines/integrations/hdfs.md)-engine table. |
|
||||
| [s3](../../sql-reference/table-functions/s3.md) | Creates a [S3](../../engines/table-engines/integrations/s3.md)-engine table. |
|
||||
| [sqlite](../../sql-reference/table-functions/sqlite.md) | Creates a [sqlite](../../engines/table-engines/integrations/sqlite.md)-engine table. |
|
||||
|
||||
:::note
|
||||
Only these table functions are enabled in readonly mode :
|
||||
null, view, viewIfPermitted, numbers, numbers_mt, generateRandom, values, cluster, clusterAllReplicas
|
||||
:::
|
@ -89,7 +89,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
└─────────────────────┴───────────┴──────────┴──────┘
|
||||
```
|
||||
|
||||
Первая строка отменяет предыдущее состояние объекта (пользователя). Она должен повторять все поля из ключа сортировки для отменённого состояния за исключением `Sign`.
|
||||
Первая строка отменяет предыдущее состояние объекта (пользователя). Она должна повторять все поля из ключа сортировки для отменённого состояния за исключением `Sign`.
|
||||
|
||||
Вторая строка содержит текущее состояние.
|
||||
|
||||
|
@ -584,7 +584,7 @@ TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y);
|
||||
|
||||
Данные с истекшим `TTL` удаляются, когда ClickHouse мёржит куски данных.
|
||||
|
||||
Когда ClickHouse видит, что некоторые данные устарели, он выполняет внеплановые мёржи. Для управление частотой подобных мёржей, можно задать настройку `merge_with_ttl_timeout`. Если её значение слишком низкое, придется выполнять много внеплановых мёржей, которые могут начать потреблять значительную долю ресурсов сервера.
|
||||
Когда ClickHouse видит, что некоторые данные устарели, он выполняет внеплановые мёржи. Для управления частотой подобных мёржей, можно задать настройку `merge_with_ttl_timeout`. Если её значение слишком низкое, придется выполнять много внеплановых мёржей, которые могут начать потреблять значительную долю ресурсов сервера.
|
||||
|
||||
Если вы выполните запрос `SELECT` между слияниями вы можете получить устаревшие данные. Чтобы избежать этого используйте запрос [OPTIMIZE](../../../engines/table-engines/mergetree-family/mergetree.md#misc_operations-optimize) перед `SELECT`.
|
||||
|
||||
@ -679,7 +679,7 @@ TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y);
|
||||
- `policy_name_N` — название политики. Названия политик должны быть уникальны.
|
||||
- `volume_name_N` — название тома. Названия томов должны быть уникальны.
|
||||
- `disk` — диск, находящийся внутри тома.
|
||||
- `max_data_part_size_bytes` — максимальный размер куска данных, который может находится на любом из дисков этого тома. Если в результате слияния размер куска ожидается больше, чем max_data_part_size_bytes, то этот кусок будет записан в следующий том. В основном эта функция позволяет хранить новые / мелкие куски на горячем (SSD) томе и перемещать их на холодный (HDD) том, когда они достигают большого размера. Не используйте этот параметр, если политика имеет только один том.
|
||||
- `max_data_part_size_bytes` — максимальный размер куска данных, который может находиться на любом из дисков этого тома. Если в результате слияния размер куска ожидается больше, чем max_data_part_size_bytes, то этот кусок будет записан в следующий том. В основном эта функция позволяет хранить новые / мелкие куски на горячем (SSD) томе и перемещать их на холодный (HDD) том, когда они достигают большого размера. Не используйте этот параметр, если политика имеет только один том.
|
||||
- `move_factor` — доля доступного свободного места на томе, если места становится меньше, то данные начнут перемещение на следующий том, если он есть (по умолчанию 0.1). Для перемещения куски сортируются по размеру от большего к меньшему (по убыванию) и выбираются куски, совокупный размер которых достаточен для соблюдения условия `move_factor`, если совокупный размер всех партов недостаточен, будут перемещены все парты.
|
||||
- `prefer_not_to_merge` — Отключает слияние кусков данных, хранящихся на данном томе. Если данная настройка включена, то слияние данных, хранящихся на данном томе, не допускается. Это позволяет контролировать работу ClickHouse с медленными дисками.
|
||||
|
||||
@ -730,7 +730,7 @@ TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y);
|
||||
|
||||
В приведенном примере, политика `hdd_in_order` реализует прицип [round-robin](https://ru.wikipedia.org/wiki/Round-robin_(%D0%B0%D0%BB%D0%B3%D0%BE%D1%80%D0%B8%D1%82%D0%BC)). Так как в политике есть всего один том (`single`), то все записи производятся на его диски по круговому циклу. Такая политика может быть полезна при наличии в системе нескольких похожих дисков, но при этом не сконфигурирован RAID. Учтите, что каждый отдельный диск ненадёжен и чтобы не потерять важные данные это необходимо скомпенсировать за счет хранения данных в трёх копиях.
|
||||
|
||||
Если система содержит диски различных типов, то может пригодиться политика `moving_from_ssd_to_hdd`. В томе `hot` находится один SSD-диск (`fast_ssd`), а также задается ограничение на максимальный размер куска, который может храниться на этом томе (1GB). Все куски такой таблицы больше 1GB будут записываться сразу на том `cold`, в котором содержится один HDD-диск `disk1`. Также, при заполнении диска `fast_ssd` более чем на 80% данные будут переносится на диск `disk1` фоновым процессом.
|
||||
Если система содержит диски различных типов, то может пригодиться политика `moving_from_ssd_to_hdd`. В томе `hot` находится один SSD-диск (`fast_ssd`), а также задается ограничение на максимальный размер куска, который может храниться на этом томе (1GB). Все куски такой таблицы больше 1GB будут записываться сразу на том `cold`, в котором содержится один HDD-диск `disk1`. Также при заполнении диска `fast_ssd` более чем на 80% данные будут переноситься на диск `disk1` фоновым процессом.
|
||||
|
||||
Порядок томов в политиках хранения важен, при достижении условий на переполнение тома данные переносятся на следующий. Порядок дисков в томах так же важен, данные пишутся по очереди на каждый из них.
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user