mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 16:12:01 +00:00
Merge remote-tracking branch 'upstream/master' into named-collections-finish
This commit is contained in:
commit
bb0beb7449
80
.clang-tidy
80
.clang-tidy
@ -23,9 +23,12 @@ Checks: '*,
|
||||
-bugprone-implicit-widening-of-multiplication-result,
|
||||
-bugprone-narrowing-conversions,
|
||||
-bugprone-not-null-terminated-result,
|
||||
-bugprone-reserved-identifier,
|
||||
-bugprone-unchecked-optional-access,
|
||||
|
||||
-cert-dcl16-c,
|
||||
-cert-dcl37-c,
|
||||
-cert-dcl51-cpp,
|
||||
-cert-err58-cpp,
|
||||
-cert-msc32-c,
|
||||
-cert-msc51-cpp,
|
||||
@ -129,6 +132,7 @@ Checks: '*,
|
||||
-readability-function-cognitive-complexity,
|
||||
-readability-function-size,
|
||||
-readability-identifier-length,
|
||||
-readability-identifier-naming,
|
||||
-readability-implicit-bool-conversion,
|
||||
-readability-isolate-declaration,
|
||||
-readability-magic-numbers,
|
||||
@ -158,56 +162,28 @@ Checks: '*,
|
||||
|
||||
WarningsAsErrors: '*'
|
||||
|
||||
# TODO: use dictionary syntax for CheckOptions when minimum clang-tidy level rose to 15
|
||||
# some-check.SomeOption: 'some value'
|
||||
# instead of
|
||||
# - key: some-check.SomeOption
|
||||
# value: 'some value'
|
||||
CheckOptions:
|
||||
- key: readability-identifier-naming.ClassCase
|
||||
value: CamelCase
|
||||
- key: readability-identifier-naming.EnumCase
|
||||
value: CamelCase
|
||||
- key: readability-identifier-naming.LocalVariableCase
|
||||
value: lower_case
|
||||
- key: readability-identifier-naming.StaticConstantCase
|
||||
value: aNy_CasE
|
||||
- key: readability-identifier-naming.MemberCase
|
||||
value: lower_case
|
||||
- key: readability-identifier-naming.PrivateMemberPrefix
|
||||
value: ''
|
||||
- key: readability-identifier-naming.ProtectedMemberPrefix
|
||||
value: ''
|
||||
- key: readability-identifier-naming.PublicMemberCase
|
||||
value: lower_case
|
||||
- key: readability-identifier-naming.MethodCase
|
||||
value: camelBack
|
||||
- key: readability-identifier-naming.PrivateMethodPrefix
|
||||
value: ''
|
||||
- key: readability-identifier-naming.ProtectedMethodPrefix
|
||||
value: ''
|
||||
- key: readability-identifier-naming.ParameterPackCase
|
||||
value: lower_case
|
||||
- key: readability-identifier-naming.StructCase
|
||||
value: CamelCase
|
||||
- key: readability-identifier-naming.TemplateTemplateParameterCase
|
||||
value: CamelCase
|
||||
- key: readability-identifier-naming.TemplateUsingCase
|
||||
value: lower_case
|
||||
- key: readability-identifier-naming.TypeTemplateParameterCase
|
||||
value: CamelCase
|
||||
- key: readability-identifier-naming.TypedefCase
|
||||
value: CamelCase
|
||||
- key: readability-identifier-naming.UnionCase
|
||||
value: CamelCase
|
||||
- key: readability-identifier-naming.UsingCase
|
||||
value: CamelCase
|
||||
- key: modernize-loop-convert.UseCxx20ReverseRanges
|
||||
value: false
|
||||
- key: performance-move-const-arg.CheckTriviallyCopyableMove
|
||||
value: false
|
||||
# Workaround clang-tidy bug: https://github.com/llvm/llvm-project/issues/46097
|
||||
- key: readability-identifier-naming.TypeTemplateParameterIgnoredRegexp
|
||||
value: expr-type
|
||||
- key: cppcoreguidelines-avoid-do-while.IgnoreMacros
|
||||
value: true
|
||||
readability-identifier-naming.ClassCase: CamelCase
|
||||
readability-identifier-naming.EnumCase: CamelCase
|
||||
readability-identifier-naming.LocalVariableCase: lower_case
|
||||
readability-identifier-naming.StaticConstantCase: aNy_CasE
|
||||
readability-identifier-naming.MemberCase: lower_case
|
||||
readability-identifier-naming.PrivateMemberPrefix: ''
|
||||
readability-identifier-naming.ProtectedMemberPrefix: ''
|
||||
readability-identifier-naming.PublicMemberCase: lower_case
|
||||
readability-identifier-naming.MethodCase: camelBack
|
||||
readability-identifier-naming.PrivateMethodPrefix: ''
|
||||
readability-identifier-naming.ProtectedMethodPrefix: ''
|
||||
readability-identifier-naming.ParameterPackCase: lower_case
|
||||
readability-identifier-naming.StructCase: CamelCase
|
||||
readability-identifier-naming.TemplateTemplateParameterCase: CamelCase
|
||||
readability-identifier-naming.TemplateUsingCase: lower_case
|
||||
readability-identifier-naming.TypeTemplateParameterCase: CamelCase
|
||||
readability-identifier-naming.TypedefCase: CamelCase
|
||||
readability-identifier-naming.UnionCase: CamelCase
|
||||
readability-identifier-naming.UsingCase: CamelCase
|
||||
modernize-loop-convert.UseCxx20ReverseRanges: false
|
||||
performance-move-const-arg.CheckTriviallyCopyableMove: false
|
||||
# Workaround clang-tidy bug: https://github.com/llvm/llvm-project/issues/46097
|
||||
readability-identifier-naming.TypeTemplateParameterIgnoredRegexp: expr-type
|
||||
cppcoreguidelines-avoid-do-while.IgnoreMacros: true
|
||||
|
43
.github/workflows/backport_branches.yml
vendored
43
.github/workflows/backport_branches.yml
vendored
@ -79,7 +79,7 @@ jobs:
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ runner.temp }}/changed_images.json
|
||||
CompatibilityCheck:
|
||||
CompatibilityCheckX86:
|
||||
needs: [BuilderDebRelease]
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
@ -98,12 +98,43 @@ jobs:
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: CompatibilityCheck
|
||||
- name: CompatibilityCheckX86
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
CompatibilityCheckAarch64:
|
||||
needs: [BuilderDebAarch64]
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/compatibility_check
|
||||
REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
EOF
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: CompatibilityCheckAarch64
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -421,7 +452,8 @@ jobs:
|
||||
- name: Check docker clickhouse/clickhouse-server building
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 docker_server.py --release-type head --no-push
|
||||
python3 docker_server.py --release-type head --no-push \
|
||||
--image-repo clickhouse/clickhouse-server --image-path docker/server
|
||||
python3 docker_server.py --release-type head --no-push --no-ubuntu \
|
||||
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
|
||||
- name: Cleanup
|
||||
@ -741,7 +773,8 @@ jobs:
|
||||
- FunctionalStatefulTestDebug
|
||||
- StressTestTsan
|
||||
- IntegrationTestsRelease
|
||||
- CompatibilityCheck
|
||||
- CompatibilityCheckX86
|
||||
- CompatibilityCheckAarch64
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
|
1
.github/workflows/cherry_pick.yml
vendored
1
.github/workflows/cherry_pick.yml
vendored
@ -35,7 +35,6 @@ jobs:
|
||||
fetch-depth: 0
|
||||
- name: Cherry pick
|
||||
run: |
|
||||
sudo pip install GitPython
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 cherry_pick.py
|
||||
- name: Cleanup
|
||||
|
43
.github/workflows/master.yml
vendored
43
.github/workflows/master.yml
vendored
@ -110,7 +110,7 @@ jobs:
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
CompatibilityCheck:
|
||||
CompatibilityCheckX86:
|
||||
needs: [BuilderDebRelease]
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
@ -129,12 +129,43 @@ jobs:
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: CompatibilityCheck
|
||||
- name: CompatibilityCheckX86
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
CompatibilityCheckAarch64:
|
||||
needs: [BuilderDebAarch64]
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/compatibility_check
|
||||
REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
EOF
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: CompatibilityCheckAarch64
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -829,7 +860,8 @@ jobs:
|
||||
- name: Check docker clickhouse/clickhouse-server building
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 docker_server.py --release-type head
|
||||
python3 docker_server.py --release-type head \
|
||||
--image-repo clickhouse/clickhouse-server --image-path docker/server
|
||||
python3 docker_server.py --release-type head --no-ubuntu \
|
||||
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
|
||||
- name: Cleanup
|
||||
@ -3124,7 +3156,8 @@ jobs:
|
||||
- PerformanceComparisonX86-1
|
||||
- PerformanceComparisonX86-2
|
||||
- PerformanceComparisonX86-3
|
||||
- CompatibilityCheck
|
||||
- CompatibilityCheckX86
|
||||
- CompatibilityCheckAarch64
|
||||
- ASTFuzzerTestDebug
|
||||
- ASTFuzzerTestAsan
|
||||
- ASTFuzzerTestTsan
|
||||
|
44
.github/workflows/pull_request.yml
vendored
44
.github/workflows/pull_request.yml
vendored
@ -37,7 +37,6 @@ jobs:
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 run_check.py
|
||||
PythonUnitTests:
|
||||
needs: CheckLabels
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
@ -174,7 +173,7 @@ jobs:
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
CompatibilityCheck:
|
||||
CompatibilityCheckX86:
|
||||
needs: [BuilderDebRelease]
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
@ -193,12 +192,43 @@ jobs:
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: CompatibilityCheck
|
||||
- name: CompatibilityCheckX86
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
CompatibilityCheckAarch64:
|
||||
needs: [BuilderDebAarch64]
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/compatibility_check
|
||||
REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
EOF
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: CompatibilityCheckAarch64
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -886,7 +916,8 @@ jobs:
|
||||
- name: Check docker clickhouse/clickhouse-server building
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 docker_server.py --release-type head --no-push
|
||||
python3 docker_server.py --release-type head --no-push \
|
||||
--image-repo clickhouse/clickhouse-server --image-path docker/server
|
||||
python3 docker_server.py --release-type head --no-push --no-ubuntu \
|
||||
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
|
||||
- name: Cleanup
|
||||
@ -4792,7 +4823,8 @@ jobs:
|
||||
- UnitTestsMsan
|
||||
- UnitTestsUBsan
|
||||
- UnitTestsReleaseClang
|
||||
- CompatibilityCheck
|
||||
- CompatibilityCheckX86
|
||||
- CompatibilityCheckAarch64
|
||||
- IntegrationTestsFlakyCheck
|
||||
- SQLancerTestRelease
|
||||
- SQLancerTestDebug
|
||||
|
31
.github/workflows/release.yml
vendored
31
.github/workflows/release.yml
vendored
@ -7,15 +7,28 @@ on: # yamllint disable-line rule:truthy
|
||||
release:
|
||||
types:
|
||||
- published
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
tag:
|
||||
description: 'Release tag'
|
||||
required: true
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
ReleasePublish:
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Set tag from input
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
run: |
|
||||
echo "GITHUB_TAG=${{ github.event.inputs.tag }}" >> "$GITHUB_ENV"
|
||||
- name: Set tag from REF
|
||||
if: github.event_name == 'release'
|
||||
run: |
|
||||
echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV"
|
||||
- name: Deploy packages and assets
|
||||
run: |
|
||||
GITHUB_TAG="${GITHUB_REF#refs/tags/}"
|
||||
curl --silent --data '' \
|
||||
curl --silent --data '' --no-buffer \
|
||||
'${{ secrets.PACKAGES_RELEASE_URL }}/release/'"${GITHUB_TAG}"'?binary=binary_darwin&binary=binary_darwin_aarch64&sync=true'
|
||||
############################################################################################
|
||||
##################################### Docker images #######################################
|
||||
@ -23,16 +36,26 @@ jobs:
|
||||
DockerServerImages:
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Set tag from input
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
run: |
|
||||
echo "GITHUB_TAG=${{ github.event.inputs.tag }}" >> "$GITHUB_ENV"
|
||||
- name: Set tag from REF
|
||||
if: github.event_name == 'release'
|
||||
run: |
|
||||
echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV"
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
fetch-depth: 0 # otherwise we will have no version info
|
||||
ref: ${{ env.GITHUB_TAG }}
|
||||
- name: Check docker clickhouse/clickhouse-server building
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 docker_server.py --release-type auto --version "${{ github.ref }}"
|
||||
python3 docker_server.py --release-type auto --version "${{ github.ref }}" --no-ubuntu \
|
||||
python3 docker_server.py --release-type auto --version "$GITHUB_TAG" \
|
||||
--image-repo clickhouse/clickhouse-server --image-path docker/server
|
||||
python3 docker_server.py --release-type auto --version "$GITHUB_TAG" --no-ubuntu \
|
||||
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
|
43
.github/workflows/release_branches.yml
vendored
43
.github/workflows/release_branches.yml
vendored
@ -71,7 +71,7 @@ jobs:
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ runner.temp }}/changed_images.json
|
||||
CompatibilityCheck:
|
||||
CompatibilityCheckX86:
|
||||
needs: [BuilderDebRelease]
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
@ -90,12 +90,43 @@ jobs:
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: CompatibilityCheck
|
||||
- name: CompatibilityCheckX86
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
CompatibilityCheckAarch64:
|
||||
needs: [BuilderDebAarch64]
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/compatibility_check
|
||||
REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
EOF
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: CompatibilityCheckAarch64
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -494,7 +525,8 @@ jobs:
|
||||
- name: Check docker clickhouse/clickhouse-server building
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 docker_server.py --release-type head --no-push
|
||||
python3 docker_server.py --release-type head --no-push \
|
||||
--image-repo clickhouse/clickhouse-server --image-path docker/server
|
||||
python3 docker_server.py --release-type head --no-push --no-ubuntu \
|
||||
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
|
||||
- name: Cleanup
|
||||
@ -1947,7 +1979,8 @@ jobs:
|
||||
- IntegrationTestsTsan1
|
||||
- IntegrationTestsTsan2
|
||||
- IntegrationTestsTsan3
|
||||
- CompatibilityCheck
|
||||
- CompatibilityCheckX86
|
||||
- CompatibilityCheckAarch64
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
|
@ -301,12 +301,12 @@ if (ENABLE_BUILD_PROFILING)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
set (CMAKE_CXX_STANDARD 20)
|
||||
set (CMAKE_CXX_EXTENSIONS ON) # Same as gnu++2a (ON) vs c++2a (OFF): https://cmake.org/cmake/help/latest/prop_tgt/CXX_EXTENSIONS.html
|
||||
set (CMAKE_CXX_STANDARD 23)
|
||||
set (CMAKE_CXX_EXTENSIONS OFF)
|
||||
set (CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
set (CMAKE_C_STANDARD 11)
|
||||
set (CMAKE_C_EXTENSIONS ON)
|
||||
set (CMAKE_C_EXTENSIONS ON) # required by most contribs written in C
|
||||
set (CMAKE_C_STANDARD_REQUIRED ON)
|
||||
|
||||
if (COMPILER_GCC OR COMPILER_CLANG)
|
||||
|
@ -14,7 +14,7 @@ curl https://clickhouse.com/ | sh
|
||||
* [Tutorial](https://clickhouse.com/docs/en/getting_started/tutorial/) shows how to set up and query a small ClickHouse cluster.
|
||||
* [Documentation](https://clickhouse.com/docs/en/) provides more in-depth information.
|
||||
* [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
|
||||
* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-1gh9ds7f4-PgDhJAaF8ad5RbWBAAjzFg) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
|
||||
* [Slack](https://clickhouse.com/slack) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
|
||||
* [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events.
|
||||
* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation.
|
||||
* [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev.
|
||||
|
@ -2,6 +2,10 @@ if (USE_CLANG_TIDY)
|
||||
set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
|
||||
endif ()
|
||||
|
||||
# TODO: Remove this. We like to compile with C++23 (set by top-level CMakeLists) but Clang crashes with our libcxx
|
||||
# when instantiated from JSON.cpp. Try again when libcxx(abi) and Clang are upgraded to 16.
|
||||
set (CMAKE_CXX_STANDARD 20)
|
||||
|
||||
set (SRCS
|
||||
argsToConfig.cpp
|
||||
coverage.cpp
|
||||
|
@ -4,7 +4,6 @@
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include "defines.h"
|
||||
#include "TypePair.h"
|
||||
|
||||
/// General-purpose typelist. Easy on compilation times as it does not use recursion.
|
||||
template <typename ...Args>
|
||||
@ -28,7 +27,7 @@ namespace TypeListUtils /// In some contexts it's more handy to use functions in
|
||||
constexpr Root<Args...> changeRoot(TypeList<Args...>) { return {}; }
|
||||
|
||||
template <typename F, typename ...Args>
|
||||
constexpr void forEach(TypeList<Args...>, F && f) { (std::forward<F>(f)(Id<Args>{}), ...); }
|
||||
constexpr void forEach(TypeList<Args...>, F && f) { (std::forward<F>(f)(TypeList<Args>{}), ...); }
|
||||
}
|
||||
|
||||
template <typename TypeListLeft, typename TypeListRight>
|
||||
|
@ -1,4 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
template <typename T, typename V> struct TypePair {};
|
||||
template <typename T> struct Id {};
|
@ -159,22 +159,22 @@ inline const char * find_first_symbols_sse42(const char * const begin, const cha
|
||||
#endif
|
||||
|
||||
for (; pos < end; ++pos)
|
||||
if ( (num_chars >= 1 && maybe_negate<positive>(*pos == c01))
|
||||
|| (num_chars >= 2 && maybe_negate<positive>(*pos == c02))
|
||||
|| (num_chars >= 3 && maybe_negate<positive>(*pos == c03))
|
||||
|| (num_chars >= 4 && maybe_negate<positive>(*pos == c04))
|
||||
|| (num_chars >= 5 && maybe_negate<positive>(*pos == c05))
|
||||
|| (num_chars >= 6 && maybe_negate<positive>(*pos == c06))
|
||||
|| (num_chars >= 7 && maybe_negate<positive>(*pos == c07))
|
||||
|| (num_chars >= 8 && maybe_negate<positive>(*pos == c08))
|
||||
|| (num_chars >= 9 && maybe_negate<positive>(*pos == c09))
|
||||
|| (num_chars >= 10 && maybe_negate<positive>(*pos == c10))
|
||||
|| (num_chars >= 11 && maybe_negate<positive>(*pos == c11))
|
||||
|| (num_chars >= 12 && maybe_negate<positive>(*pos == c12))
|
||||
|| (num_chars >= 13 && maybe_negate<positive>(*pos == c13))
|
||||
|| (num_chars >= 14 && maybe_negate<positive>(*pos == c14))
|
||||
|| (num_chars >= 15 && maybe_negate<positive>(*pos == c15))
|
||||
|| (num_chars >= 16 && maybe_negate<positive>(*pos == c16)))
|
||||
if ( (num_chars == 1 && maybe_negate<positive>(is_in<c01>(*pos)))
|
||||
|| (num_chars == 2 && maybe_negate<positive>(is_in<c01, c02>(*pos)))
|
||||
|| (num_chars == 3 && maybe_negate<positive>(is_in<c01, c02, c03>(*pos)))
|
||||
|| (num_chars == 4 && maybe_negate<positive>(is_in<c01, c02, c03, c04>(*pos)))
|
||||
|| (num_chars == 5 && maybe_negate<positive>(is_in<c01, c02, c03, c04, c05>(*pos)))
|
||||
|| (num_chars == 6 && maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06>(*pos)))
|
||||
|| (num_chars == 7 && maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06, c07>(*pos)))
|
||||
|| (num_chars == 8 && maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06, c07, c08>(*pos)))
|
||||
|| (num_chars == 9 && maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06, c07, c08, c09>(*pos)))
|
||||
|| (num_chars == 10 && maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06, c07, c08, c09, c10>(*pos)))
|
||||
|| (num_chars == 11 && maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11>(*pos)))
|
||||
|| (num_chars == 12 && maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12>(*pos)))
|
||||
|| (num_chars == 13 && maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12, c13>(*pos)))
|
||||
|| (num_chars == 14 && maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12, c13, c14>(*pos)))
|
||||
|| (num_chars == 15 && maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12, c13, c14, c15>(*pos)))
|
||||
|| (num_chars == 16 && maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12, c13, c14, c15, c16>(*pos))))
|
||||
return pos;
|
||||
return return_mode == ReturnMode::End ? end : nullptr;
|
||||
}
|
||||
|
214
base/base/hex.h
Normal file
214
base/base/hex.h
Normal file
@ -0,0 +1,214 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstring>
|
||||
#include "types.h"
|
||||
|
||||
/// Maps 0..15 to 0..9A..F or 0..9a..f correspondingly.
|
||||
|
||||
constexpr inline std::string_view hex_digit_to_char_uppercase_table = "0123456789ABCDEF";
|
||||
constexpr inline std::string_view hex_digit_to_char_lowercase_table = "0123456789abcdef";
|
||||
|
||||
constexpr char hexDigitUppercase(unsigned char c)
|
||||
{
|
||||
return hex_digit_to_char_uppercase_table[c];
|
||||
}
|
||||
constexpr char hexDigitLowercase(unsigned char c)
|
||||
{
|
||||
return hex_digit_to_char_lowercase_table[c];
|
||||
}
|
||||
|
||||
/// Maps 0..255 to 00..FF or 00..ff correspondingly
|
||||
|
||||
constexpr inline std::string_view hex_byte_to_char_uppercase_table = //
|
||||
"000102030405060708090A0B0C0D0E0F"
|
||||
"101112131415161718191A1B1C1D1E1F"
|
||||
"202122232425262728292A2B2C2D2E2F"
|
||||
"303132333435363738393A3B3C3D3E3F"
|
||||
"404142434445464748494A4B4C4D4E4F"
|
||||
"505152535455565758595A5B5C5D5E5F"
|
||||
"606162636465666768696A6B6C6D6E6F"
|
||||
"707172737475767778797A7B7C7D7E7F"
|
||||
"808182838485868788898A8B8C8D8E8F"
|
||||
"909192939495969798999A9B9C9D9E9F"
|
||||
"A0A1A2A3A4A5A6A7A8A9AAABACADAEAF"
|
||||
"B0B1B2B3B4B5B6B7B8B9BABBBCBDBEBF"
|
||||
"C0C1C2C3C4C5C6C7C8C9CACBCCCDCECF"
|
||||
"D0D1D2D3D4D5D6D7D8D9DADBDCDDDEDF"
|
||||
"E0E1E2E3E4E5E6E7E8E9EAEBECEDEEEF"
|
||||
"F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF";
|
||||
|
||||
constexpr inline std::string_view hex_byte_to_char_lowercase_table = //
|
||||
"000102030405060708090a0b0c0d0e0f"
|
||||
"101112131415161718191a1b1c1d1e1f"
|
||||
"202122232425262728292a2b2c2d2e2f"
|
||||
"303132333435363738393a3b3c3d3e3f"
|
||||
"404142434445464748494a4b4c4d4e4f"
|
||||
"505152535455565758595a5b5c5d5e5f"
|
||||
"606162636465666768696a6b6c6d6e6f"
|
||||
"707172737475767778797a7b7c7d7e7f"
|
||||
"808182838485868788898a8b8c8d8e8f"
|
||||
"909192939495969798999a9b9c9d9e9f"
|
||||
"a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
|
||||
"b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
|
||||
"c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
|
||||
"d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
|
||||
"e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
|
||||
"f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
|
||||
|
||||
inline void writeHexByteUppercase(UInt8 byte, void * out)
|
||||
{
|
||||
memcpy(out, &hex_byte_to_char_uppercase_table[static_cast<size_t>(byte) * 2], 2);
|
||||
}
|
||||
|
||||
inline void writeHexByteLowercase(UInt8 byte, void * out)
|
||||
{
|
||||
memcpy(out, &hex_byte_to_char_lowercase_table[static_cast<size_t>(byte) * 2], 2);
|
||||
}
|
||||
|
||||
constexpr inline std::string_view bin_byte_to_char_table = //
|
||||
"0000000000000001000000100000001100000100000001010000011000000111"
|
||||
"0000100000001001000010100000101100001100000011010000111000001111"
|
||||
"0001000000010001000100100001001100010100000101010001011000010111"
|
||||
"0001100000011001000110100001101100011100000111010001111000011111"
|
||||
"0010000000100001001000100010001100100100001001010010011000100111"
|
||||
"0010100000101001001010100010101100101100001011010010111000101111"
|
||||
"0011000000110001001100100011001100110100001101010011011000110111"
|
||||
"0011100000111001001110100011101100111100001111010011111000111111"
|
||||
"0100000001000001010000100100001101000100010001010100011001000111"
|
||||
"0100100001001001010010100100101101001100010011010100111001001111"
|
||||
"0101000001010001010100100101001101010100010101010101011001010111"
|
||||
"0101100001011001010110100101101101011100010111010101111001011111"
|
||||
"0110000001100001011000100110001101100100011001010110011001100111"
|
||||
"0110100001101001011010100110101101101100011011010110111001101111"
|
||||
"0111000001110001011100100111001101110100011101010111011001110111"
|
||||
"0111100001111001011110100111101101111100011111010111111001111111"
|
||||
"1000000010000001100000101000001110000100100001011000011010000111"
|
||||
"1000100010001001100010101000101110001100100011011000111010001111"
|
||||
"1001000010010001100100101001001110010100100101011001011010010111"
|
||||
"1001100010011001100110101001101110011100100111011001111010011111"
|
||||
"1010000010100001101000101010001110100100101001011010011010100111"
|
||||
"1010100010101001101010101010101110101100101011011010111010101111"
|
||||
"1011000010110001101100101011001110110100101101011011011010110111"
|
||||
"1011100010111001101110101011101110111100101111011011111010111111"
|
||||
"1100000011000001110000101100001111000100110001011100011011000111"
|
||||
"1100100011001001110010101100101111001100110011011100111011001111"
|
||||
"1101000011010001110100101101001111010100110101011101011011010111"
|
||||
"1101100011011001110110101101101111011100110111011101111011011111"
|
||||
"1110000011100001111000101110001111100100111001011110011011100111"
|
||||
"1110100011101001111010101110101111101100111011011110111011101111"
|
||||
"1111000011110001111100101111001111110100111101011111011011110111"
|
||||
"1111100011111001111110101111101111111100111111011111111011111111";
|
||||
|
||||
inline void writeBinByte(UInt8 byte, void * out)
|
||||
{
|
||||
memcpy(out, &bin_byte_to_char_table[static_cast<size_t>(byte) * 8], 8);
|
||||
}
|
||||
|
||||
/// Produces hex representation of an unsigned int with leading zeros (for checksums)
|
||||
template <typename TUInt>
|
||||
inline void writeHexUIntImpl(TUInt uint_, char * out, std::string_view table)
|
||||
{
|
||||
union
|
||||
{
|
||||
TUInt value;
|
||||
UInt8 uint8[sizeof(TUInt)];
|
||||
};
|
||||
|
||||
value = uint_;
|
||||
|
||||
for (size_t i = 0; i < sizeof(TUInt); ++i)
|
||||
{
|
||||
if constexpr (std::endian::native == std::endian::little)
|
||||
memcpy(out + i * 2, &table[static_cast<size_t>(uint8[sizeof(TUInt) - 1 - i]) * 2], 2);
|
||||
else
|
||||
memcpy(out + i * 2, &table[static_cast<size_t>(uint8[i]) * 2], 2);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TUInt>
|
||||
inline void writeHexUIntUppercase(TUInt uint_, char * out)
|
||||
{
|
||||
writeHexUIntImpl(uint_, out, hex_byte_to_char_uppercase_table);
|
||||
}
|
||||
|
||||
template <typename TUInt>
|
||||
inline void writeHexUIntLowercase(TUInt uint_, char * out)
|
||||
{
|
||||
writeHexUIntImpl(uint_, out, hex_byte_to_char_lowercase_table);
|
||||
}
|
||||
|
||||
template <typename TUInt>
|
||||
std::string getHexUIntUppercase(TUInt uint_)
|
||||
{
|
||||
std::string res(sizeof(TUInt) * 2, '\0');
|
||||
writeHexUIntUppercase(uint_, res.data());
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename TUInt>
|
||||
std::string getHexUIntLowercase(TUInt uint_)
|
||||
{
|
||||
std::string res(sizeof(TUInt) * 2, '\0');
|
||||
writeHexUIntLowercase(uint_, res.data());
|
||||
return res;
|
||||
}
|
||||
|
||||
/// Maps 0..9, A..F, a..f to 0..15. Other chars are mapped to implementation specific value.
|
||||
|
||||
constexpr inline std::string_view hex_char_to_digit_table
|
||||
= {"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff" //0-9
|
||||
"\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //A-Z
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //a-z
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff",
|
||||
256};
|
||||
|
||||
constexpr UInt8 unhex(char c)
|
||||
{
|
||||
return hex_char_to_digit_table[static_cast<UInt8>(c)];
|
||||
}
|
||||
|
||||
constexpr UInt8 unhex2(const char * data)
|
||||
{
|
||||
return static_cast<UInt8>(unhex(data[0])) * 0x10 + static_cast<UInt8>(unhex(data[1]));
|
||||
}
|
||||
|
||||
constexpr UInt16 unhex4(const char * data)
|
||||
{
|
||||
return static_cast<UInt16>(unhex(data[0])) * 0x1000 + static_cast<UInt16>(unhex(data[1])) * 0x100
|
||||
+ static_cast<UInt16>(unhex(data[2])) * 0x10 + static_cast<UInt16>(unhex(data[3]));
|
||||
}
|
||||
|
||||
template <typename TUInt>
|
||||
constexpr TUInt unhexUInt(const char * data)
|
||||
{
|
||||
TUInt res = 0;
|
||||
if constexpr ((sizeof(TUInt) <= 8) || ((sizeof(TUInt) % 8) != 0))
|
||||
{
|
||||
for (size_t i = 0; i < sizeof(TUInt) * 2; ++i, ++data)
|
||||
{
|
||||
res <<= 4;
|
||||
res += unhex(*data);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < sizeof(TUInt) / 8; ++i, data += 16)
|
||||
{
|
||||
res <<= 64;
|
||||
res += unhexUInt<UInt64>(data);
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
13
base/base/interpolate.h
Normal file
13
base/base/interpolate.h
Normal file
@ -0,0 +1,13 @@
|
||||
#pragma once
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
|
||||
/** Linear interpolation in logarithmic coordinates.
|
||||
* Exponential interpolation is related to linear interpolation
|
||||
* exactly in same way as geometric mean is related to arithmetic mean.
|
||||
*/
|
||||
constexpr double interpolateExponential(double min, double max, double ratio)
|
||||
{
|
||||
assert(min > 0 && ratio >= 0 && ratio <= 1);
|
||||
return min * std::pow(max / min, ratio);
|
||||
}
|
@ -466,7 +466,7 @@ namespace Data
|
||||
bool extractManualImpl(std::size_t pos, T & val, SQLSMALLINT cType)
|
||||
{
|
||||
SQLRETURN rc = 0;
|
||||
T value = (T)0;
|
||||
T value;
|
||||
|
||||
resizeLengths(pos);
|
||||
|
||||
|
@ -105,6 +105,8 @@ public:
|
||||
const std::string & getText() const;
|
||||
/// Returns the text of the message.
|
||||
|
||||
void appendText(const std::string & text);
|
||||
|
||||
void setPriority(Priority prio);
|
||||
/// Sets the priority of the message.
|
||||
|
||||
|
@ -27,8 +27,7 @@ Message::Message():
|
||||
_tid(0),
|
||||
_file(0),
|
||||
_line(0),
|
||||
_pMap(0),
|
||||
_fmt_str(0)
|
||||
_pMap(0)
|
||||
{
|
||||
init();
|
||||
}
|
||||
@ -157,6 +156,12 @@ void Message::setText(const std::string& text)
|
||||
}
|
||||
|
||||
|
||||
void Message::appendText(const std::string & text)
|
||||
{
|
||||
_text.append(text);
|
||||
}
|
||||
|
||||
|
||||
void Message::setPriority(Priority prio)
|
||||
{
|
||||
_prio = prio;
|
||||
|
@ -90,6 +90,9 @@ namespace MongoDB
|
||||
|
||||
Poco::Net::SocketAddress address() const;
|
||||
/// Returns the address of the MongoDB server.
|
||||
|
||||
const std::string & uri() const;
|
||||
/// Returns the uri on which the connection was made.
|
||||
|
||||
void connect(const std::string & hostAndPort);
|
||||
/// Connects to the given MongoDB server.
|
||||
@ -148,6 +151,7 @@ namespace MongoDB
|
||||
private:
|
||||
Poco::Net::SocketAddress _address;
|
||||
Poco::Net::StreamSocket _socket;
|
||||
std::string _uri;
|
||||
};
|
||||
|
||||
|
||||
@ -158,6 +162,10 @@ namespace MongoDB
|
||||
{
|
||||
return _address;
|
||||
}
|
||||
inline const std::string & Connection::uri() const
|
||||
{
|
||||
return _uri;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -145,68 +145,155 @@ void Connection::connect(const Poco::Net::StreamSocket& socket)
|
||||
|
||||
void Connection::connect(const std::string& uri, SocketFactory& socketFactory)
|
||||
{
|
||||
Poco::URI theURI(uri);
|
||||
if (theURI.getScheme() != "mongodb") throw Poco::UnknownURISchemeException(uri);
|
||||
std::vector<std::string> strAddresses;
|
||||
std::string newURI;
|
||||
|
||||
std::string userInfo = theURI.getUserInfo();
|
||||
std::string host = theURI.getHost();
|
||||
Poco::UInt16 port = theURI.getPort();
|
||||
if (port == 0) port = 27017;
|
||||
if (uri.find(',') != std::string::npos)
|
||||
{
|
||||
size_t pos;
|
||||
size_t head = 0;
|
||||
if ((pos = uri.find("@")) != std::string::npos)
|
||||
{
|
||||
head = pos + 1;
|
||||
}
|
||||
else if ((pos = uri.find("://")) != std::string::npos)
|
||||
{
|
||||
head = pos + 3;
|
||||
}
|
||||
|
||||
std::string databaseName = theURI.getPath();
|
||||
if (!databaseName.empty() && databaseName[0] == '/') databaseName.erase(0, 1);
|
||||
if (databaseName.empty()) databaseName = "admin";
|
||||
std::string tempstr;
|
||||
std::string::const_iterator it = uri.begin();
|
||||
it += head;
|
||||
size_t tail = head;
|
||||
for (;it != uri.end() && *it != '?' && *it != '/'; ++it)
|
||||
{
|
||||
tempstr += *it;
|
||||
tail++;
|
||||
}
|
||||
|
||||
bool ssl = false;
|
||||
Poco::Timespan connectTimeout;
|
||||
Poco::Timespan socketTimeout;
|
||||
std::string authMechanism = Database::AUTH_SCRAM_SHA1;
|
||||
it = tempstr.begin();
|
||||
std::string token;
|
||||
for (;it != tempstr.end(); ++it)
|
||||
{
|
||||
if (*it == ',')
|
||||
{
|
||||
newURI = uri.substr(0, head) + token + uri.substr(tail, uri.length());
|
||||
strAddresses.push_back(newURI);
|
||||
token = "";
|
||||
}
|
||||
else
|
||||
{
|
||||
token += *it;
|
||||
}
|
||||
}
|
||||
newURI = uri.substr(0, head) + token + uri.substr(tail, uri.length());
|
||||
strAddresses.push_back(newURI);
|
||||
}
|
||||
else
|
||||
{
|
||||
strAddresses.push_back(uri);
|
||||
}
|
||||
|
||||
Poco::URI::QueryParameters params = theURI.getQueryParameters();
|
||||
for (Poco::URI::QueryParameters::const_iterator it = params.begin(); it != params.end(); ++it)
|
||||
{
|
||||
if (it->first == "ssl")
|
||||
{
|
||||
ssl = (it->second == "true");
|
||||
}
|
||||
else if (it->first == "connectTimeoutMS")
|
||||
{
|
||||
connectTimeout = static_cast<Poco::Timespan::TimeDiff>(1000)*Poco::NumberParser::parse(it->second);
|
||||
}
|
||||
else if (it->first == "socketTimeoutMS")
|
||||
{
|
||||
socketTimeout = static_cast<Poco::Timespan::TimeDiff>(1000)*Poco::NumberParser::parse(it->second);
|
||||
}
|
||||
else if (it->first == "authMechanism")
|
||||
{
|
||||
authMechanism = it->second;
|
||||
}
|
||||
}
|
||||
newURI = strAddresses.front();
|
||||
Poco::URI theURI(newURI);
|
||||
if (theURI.getScheme() != "mongodb") throw Poco::UnknownURISchemeException(uri);
|
||||
|
||||
connect(socketFactory.createSocket(host, port, connectTimeout, ssl));
|
||||
std::string userInfo = theURI.getUserInfo();
|
||||
std::string databaseName = theURI.getPath();
|
||||
if (!databaseName.empty() && databaseName[0] == '/') databaseName.erase(0, 1);
|
||||
if (databaseName.empty()) databaseName = "admin";
|
||||
|
||||
if (socketTimeout > 0)
|
||||
{
|
||||
_socket.setSendTimeout(socketTimeout);
|
||||
_socket.setReceiveTimeout(socketTimeout);
|
||||
}
|
||||
bool ssl = false;
|
||||
Poco::Timespan connectTimeout;
|
||||
Poco::Timespan socketTimeout;
|
||||
std::string authMechanism = Database::AUTH_SCRAM_SHA1;
|
||||
std::string readPreference="primary";
|
||||
|
||||
if (!userInfo.empty())
|
||||
{
|
||||
std::string username;
|
||||
std::string password;
|
||||
std::string::size_type pos = userInfo.find(':');
|
||||
if (pos != std::string::npos)
|
||||
{
|
||||
username.assign(userInfo, 0, pos++);
|
||||
password.assign(userInfo, pos, userInfo.size() - pos);
|
||||
}
|
||||
else username = userInfo;
|
||||
Poco::URI::QueryParameters params = theURI.getQueryParameters();
|
||||
for (Poco::URI::QueryParameters::const_iterator it = params.begin(); it != params.end(); ++it)
|
||||
{
|
||||
if (it->first == "ssl")
|
||||
{
|
||||
ssl = (it->second == "true");
|
||||
}
|
||||
else if (it->first == "connectTimeoutMS")
|
||||
{
|
||||
connectTimeout = static_cast<Poco::Timespan::TimeDiff>(1000)*Poco::NumberParser::parse(it->second);
|
||||
}
|
||||
else if (it->first == "socketTimeoutMS")
|
||||
{
|
||||
socketTimeout = static_cast<Poco::Timespan::TimeDiff>(1000)*Poco::NumberParser::parse(it->second);
|
||||
}
|
||||
else if (it->first == "authMechanism")
|
||||
{
|
||||
authMechanism = it->second;
|
||||
}
|
||||
else if (it->first == "readPreference")
|
||||
{
|
||||
readPreference= it->second;
|
||||
}
|
||||
}
|
||||
|
||||
Database database(databaseName);
|
||||
if (!database.authenticate(*this, username, password, authMechanism))
|
||||
throw Poco::NoPermissionException(Poco::format("Access to MongoDB database %s denied for user %s", databaseName, username));
|
||||
}
|
||||
for (std::vector<std::string>::const_iterator it = strAddresses.cbegin();it != strAddresses.cend(); ++it)
|
||||
{
|
||||
newURI = *it;
|
||||
theURI = Poco::URI(newURI);
|
||||
|
||||
std::string host = theURI.getHost();
|
||||
Poco::UInt16 port = theURI.getPort();
|
||||
if (port == 0) port = 27017;
|
||||
|
||||
connect(socketFactory.createSocket(host, port, connectTimeout, ssl));
|
||||
_uri = newURI;
|
||||
if (socketTimeout > 0)
|
||||
{
|
||||
_socket.setSendTimeout(socketTimeout);
|
||||
_socket.setReceiveTimeout(socketTimeout);
|
||||
}
|
||||
if (strAddresses.size() > 1)
|
||||
{
|
||||
Poco::MongoDB::QueryRequest request("admin.$cmd");
|
||||
request.setNumberToReturn(1);
|
||||
request.selector().add("isMaster", 1);
|
||||
Poco::MongoDB::ResponseMessage response;
|
||||
|
||||
sendRequest(request, response);
|
||||
_uri = newURI;
|
||||
if (!response.documents().empty())
|
||||
{
|
||||
Poco::MongoDB::Document::Ptr doc = response.documents()[0];
|
||||
if (doc->get<bool>("ismaster") && readPreference == "primary")
|
||||
{
|
||||
break;
|
||||
}
|
||||
else if (!doc->get<bool>("ismaster") && readPreference == "secondary")
|
||||
{
|
||||
break;
|
||||
}
|
||||
else if (it + 1 == strAddresses.cend())
|
||||
{
|
||||
throw Poco::URISyntaxException(uri);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!userInfo.empty())
|
||||
{
|
||||
std::string username;
|
||||
std::string password;
|
||||
std::string::size_type pos = userInfo.find(':');
|
||||
if (pos != std::string::npos)
|
||||
{
|
||||
username.assign(userInfo, 0, pos++);
|
||||
password.assign(userInfo, pos, userInfo.size() - pos);
|
||||
}
|
||||
else username = userInfo;
|
||||
|
||||
Database database(databaseName);
|
||||
|
||||
if (!database.authenticate(*this, username, password, authMechanism))
|
||||
throw Poco::NoPermissionException(Poco::format("Access to MongoDB database %s denied for user %s", databaseName, username));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -50,15 +50,18 @@ endif ()
|
||||
string (REGEX MATCHALL "[0-9]+" COMPILER_VERSION_LIST ${CMAKE_CXX_COMPILER_VERSION})
|
||||
list (GET COMPILER_VERSION_LIST 0 COMPILER_VERSION_MAJOR)
|
||||
|
||||
# Example values: `lld-10`, `gold`.
|
||||
# Example values: `lld-10`
|
||||
option (LINKER_NAME "Linker name or full path")
|
||||
|
||||
if (LINKER_NAME MATCHES "gold")
|
||||
message (FATAL_ERROR "Linking with gold is unsupported. Please use lld.")
|
||||
endif ()
|
||||
|
||||
# s390x doesnt support lld
|
||||
if (NOT ARCH_S390X)
|
||||
if (NOT LINKER_NAME)
|
||||
if (COMPILER_GCC)
|
||||
find_program (LLD_PATH NAMES "ld.lld")
|
||||
find_program (GOLD_PATH NAMES "ld.gold")
|
||||
elseif (COMPILER_CLANG)
|
||||
# llvm lld is a generic driver.
|
||||
# Invoke ld.lld (Unix), ld64.lld (macOS), lld-link (Windows), wasm-ld (WebAssembly) instead
|
||||
@ -67,13 +70,11 @@ if (NOT ARCH_S390X)
|
||||
elseif (OS_DARWIN)
|
||||
find_program (LLD_PATH NAMES "ld64.lld-${COMPILER_VERSION_MAJOR}" "ld64.lld")
|
||||
endif ()
|
||||
find_program (GOLD_PATH NAMES "ld.gold" "gold")
|
||||
endif ()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if ((OS_LINUX OR OS_DARWIN) AND NOT LINKER_NAME)
|
||||
# prefer lld linker over gold or ld on linux and macos
|
||||
if (LLD_PATH)
|
||||
if (COMPILER_GCC)
|
||||
# GCC driver requires one of supported linker names like "lld".
|
||||
@ -83,17 +84,6 @@ if ((OS_LINUX OR OS_DARWIN) AND NOT LINKER_NAME)
|
||||
set (LINKER_NAME ${LLD_PATH})
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (NOT LINKER_NAME)
|
||||
if (GOLD_PATH)
|
||||
message (FATAL_ERROR "Linking with gold is unsupported. Please use lld.")
|
||||
if (COMPILER_GCC)
|
||||
set (LINKER_NAME "gold")
|
||||
else ()
|
||||
set (LINKER_NAME ${GOLD_PATH})
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
# TODO: allow different linker on != OS_LINUX
|
||||
|
||||
|
@ -115,6 +115,13 @@ configure_file("${ORC_SOURCE_SRC_DIR}/Adaptor.hh.in" "${ORC_BUILD_INCLUDE_DIR}/A
|
||||
|
||||
# ARROW_ORC + adapters/orc/CMakefiles
|
||||
set(ORC_SRCS
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/orc_proto.pb.h"
|
||||
"${ORC_SOURCE_SRC_DIR}/sargs/ExpressionTree.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/sargs/Literal.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/sargs/PredicateLeaf.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/sargs/SargsApplier.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/sargs/SearchArgument.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/sargs/TruthValue.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/Exceptions.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/OrcFile.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/Reader.cc"
|
||||
@ -129,13 +136,20 @@ set(ORC_SRCS
|
||||
"${ORC_SOURCE_SRC_DIR}/MemoryPool.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/RLE.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/RLEv1.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/RLEv2.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/RleDecoderV2.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/RleEncoderV2.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/RLEV2Util.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/Statistics.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/StripeStream.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/Timezone.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/TypeImpl.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/Vector.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/Writer.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/Adaptor.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/BloomFilter.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/Murmur3.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/BlockBuffer.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/wrap/orc-proto-wrapper.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/io/InputStream.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/io/OutputStream.cc"
|
||||
"${ORC_ADDITION_SOURCE_DIR}/orc_proto.pb.cc"
|
||||
@ -358,6 +372,9 @@ SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zlib.cc" ${ARROW_SRCS})
|
||||
add_definitions(-DARROW_WITH_ZSTD)
|
||||
SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zstd.cc" ${ARROW_SRCS})
|
||||
|
||||
add_definitions(-DARROW_WITH_BROTLI)
|
||||
SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_brotli.cc" ${ARROW_SRCS})
|
||||
|
||||
|
||||
add_library(_arrow ${ARROW_SRCS})
|
||||
|
||||
@ -372,6 +389,7 @@ target_link_libraries(_arrow PRIVATE
|
||||
ch_contrib::snappy
|
||||
ch_contrib::zlib
|
||||
ch_contrib::zstd
|
||||
ch_contrib::brotli
|
||||
)
|
||||
target_link_libraries(_arrow PUBLIC _orc)
|
||||
|
||||
|
@ -48,6 +48,9 @@ set(gRPC_ABSL_PROVIDER "clickhouse" CACHE STRING "" FORCE)
|
||||
# We don't want to build C# extensions.
|
||||
set(gRPC_BUILD_CSHARP_EXT OFF)
|
||||
|
||||
# TODO: Remove this. We generally like to compile with C++23 but grpc isn't ready yet.
|
||||
set (CMAKE_CXX_STANDARD 20)
|
||||
|
||||
set(_gRPC_CARES_LIBRARIES ch_contrib::c-ares)
|
||||
set(gRPC_CARES_PROVIDER "clickhouse" CACHE STRING "" FORCE)
|
||||
add_subdirectory("${_gRPC_SOURCE_DIR}" "${_gRPC_BINARY_DIR}")
|
||||
|
2
contrib/krb5
vendored
2
contrib/krb5
vendored
@ -1 +1 @@
|
||||
Subproject commit f8262a1b548eb29d97e059260042036255d07f8d
|
||||
Subproject commit 9453aec0d50e5aff9b189051611b321b40935d02
|
@ -160,6 +160,8 @@ set(ALL_SRCS
|
||||
|
||||
# "${KRB5_SOURCE_DIR}/lib/gssapi/spnego/negoex_trace.c"
|
||||
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/builtin/kdf.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/builtin/cmac.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/prng.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/enc_dk_cmac.c"
|
||||
# "${KRB5_SOURCE_DIR}/lib/crypto/krb/crc32.c"
|
||||
@ -183,7 +185,6 @@ set(ALL_SRCS
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/block_size.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/string_to_key.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/verify_checksum.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/crypto_libinit.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/derive.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/random_to_key.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/verify_checksum_iov.c"
|
||||
@ -217,9 +218,7 @@ set(ALL_SRCS
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/s2k_rc4.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/valid_cksumtype.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/nfold.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/prng_fortuna.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/encrypt_length.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/cmac.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/keyblocks.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/prf_rc4.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/s2k_pbkdf2.c"
|
||||
@ -228,11 +227,11 @@ set(ALL_SRCS
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/rc4.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/des3.c"
|
||||
#"${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/camellia.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/cmac.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/sha256.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/hmac.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/kdf.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/pbkdf2.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/init.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/stubs.c"
|
||||
# "${KRB5_SOURCE_DIR}/lib/crypto/openssl/hash_provider/hash_crc32.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/hash_provider/hash_evp.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/des/des_keys.c"
|
||||
@ -312,7 +311,6 @@ set(ALL_SRCS
|
||||
"${KRB5_SOURCE_DIR}/lib/krb5/krb/allow_weak.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/krb5/krb/mk_rep.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/krb5/krb/mk_priv.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/krb5/krb/s4u_authdata.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/krb5/krb/preauth_otp.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/krb5/krb/init_keyblock.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/krb5/krb/ser_addr.c"
|
||||
@ -688,6 +686,7 @@ target_include_directories(_krb5 PRIVATE
|
||||
|
||||
target_compile_definitions(_krb5 PRIVATE
|
||||
KRB5_PRIVATE
|
||||
CRYPTO_OPENSSL
|
||||
_GSS_STATIC_LINK=1
|
||||
KRB5_DEPRECATED=1
|
||||
LOCALEDIR="/usr/local/share/locale"
|
||||
|
@ -6,6 +6,10 @@ if (MSVC)
|
||||
target_compile_definitions (_farmhash PRIVATE FARMHASH_NO_BUILTIN_EXPECT=1)
|
||||
endif ()
|
||||
|
||||
if (ARCH_S390X)
|
||||
add_compile_definitions(WORDS_BIGENDIAN)
|
||||
endif ()
|
||||
|
||||
target_include_directories (_farmhash BEFORE PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
add_library(ch_contrib::farmhash ALIAS _farmhash)
|
||||
|
2
contrib/orc
vendored
2
contrib/orc
vendored
@ -1 +1 @@
|
||||
Subproject commit f9a393ed2433a60034795284f82d093b348f2102
|
||||
Subproject commit c5d7755ba0b9a95631c8daea4d094101f26ec761
|
@ -29,7 +29,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
esac
|
||||
|
||||
ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release"
|
||||
ARG VERSION="23.2.1.2537"
|
||||
ARG VERSION="23.2.4.12"
|
||||
ARG PACKAGES="clickhouse-keeper"
|
||||
|
||||
# user/group precreated explicitly with fixed uid/gid on purpose.
|
||||
|
@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="23.2.1.2537"
|
||||
ARG VERSION="23.2.4.12"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
# user/group precreated explicitly with fixed uid/gid on purpose.
|
||||
|
@ -22,7 +22,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
|
||||
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
|
||||
ARG VERSION="23.2.1.2537"
|
||||
ARG VERSION="23.2.4.12"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
# set non-empty deb_location_url url to create a docker image
|
||||
|
@ -44,6 +44,8 @@ if [ "$is_tsan_build" -eq "0" ]; then
|
||||
fi
|
||||
|
||||
export ZOOKEEPER_FAULT_INJECTION=1
|
||||
# Initial run without S3 to create system.*_log on local file system to make it
|
||||
# available for dump via clickhouse-local
|
||||
configure
|
||||
|
||||
azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
|
||||
|
@ -49,19 +49,26 @@ echo -e "Successfully cloned previous release tests$OK" >> /test_output/test_res
|
||||
echo -e "Successfully downloaded previous release packages$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Make upgrade check more funny by forcing Ordinary engine for system database
|
||||
mkdir /var/lib/clickhouse/metadata
|
||||
mkdir -p /var/lib/clickhouse/metadata
|
||||
echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/system.sql
|
||||
|
||||
# Install previous release packages
|
||||
install_packages previous_release_package_folder
|
||||
|
||||
# Start server from previous release
|
||||
# Let's enable S3 storage by default
|
||||
export USE_S3_STORAGE_FOR_MERGE_TREE=1
|
||||
# Previous version may not be ready for fault injections
|
||||
export ZOOKEEPER_FAULT_INJECTION=0
|
||||
# Initial run without S3 to create system.*_log on local file system to make it
|
||||
# available for dump via clickhouse-local
|
||||
configure
|
||||
|
||||
start
|
||||
stop
|
||||
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log
|
||||
|
||||
# force_sync=false doesn't work correctly on some older versions
|
||||
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
|
||||
| sed "s|<force_sync>false</force_sync>|<force_sync>true</force_sync>|" \
|
||||
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
|
||||
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
|
||||
# But we still need default disk because some tables loaded only into it
|
||||
sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \
|
||||
| sed "s|<main><disk>s3</disk></main>|<main><disk>s3</disk></main><default><disk>default</disk></default>|" \
|
||||
@ -69,6 +76,13 @@ sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \
|
||||
sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
|
||||
sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
|
||||
|
||||
# Start server from previous release
|
||||
# Let's enable S3 storage by default
|
||||
export USE_S3_STORAGE_FOR_MERGE_TREE=1
|
||||
# Previous version may not be ready for fault injections
|
||||
export ZOOKEEPER_FAULT_INJECTION=0
|
||||
configure
|
||||
|
||||
start
|
||||
|
||||
clickhouse-client --query="SELECT 'Server version: ', version()"
|
||||
@ -161,8 +175,10 @@ rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
|
||||
-e "Authentication failed" \
|
||||
-e "Cannot flush" \
|
||||
-e "Container already exists" \
|
||||
/var/log/clickhouse-server/clickhouse-server.upgrade.log | zgrep -Fa "<Error>" > /test_output/upgrade_error_messages.txt \
|
||||
&& echo -e "Error message in clickhouse-server.log (see upgrade_error_messages.txt)$FAIL$(head_escaped /test_output/bc_check_error_messages.txt)" \
|
||||
clickhouse-server.upgrade.log \
|
||||
| grep -av -e "_repl_01111_.*Mapping for table with UUID" \
|
||||
| zgrep -Fa "<Error>" > /test_output/upgrade_error_messages.txt \
|
||||
&& echo -e "Error message in clickhouse-server.log (see upgrade_error_messages.txt)$FAIL$(head_escaped /test_output/upgrade_error_messages.txt)" \
|
||||
>> /test_output/test_results.tsv \
|
||||
|| echo -e "No Error messages after server upgrade$OK" >> /test_output/test_results.tsv
|
||||
|
||||
@ -176,8 +192,6 @@ tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
|
||||
|
||||
collect_query_and_trace_logs
|
||||
|
||||
check_oom_in_dmesg
|
||||
|
||||
mv /var/log/clickhouse-server/stderr.log /test_output/
|
||||
|
||||
# Write check result into check_status.tsv
|
||||
|
55
docs/changelogs/v22.12.4.76-stable.md
Normal file
55
docs/changelogs/v22.12.4.76-stable.md
Normal file
@ -0,0 +1,55 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v22.12.4.76-stable (cb5772db805) FIXME as compared to v22.12.3.5-stable (893de538f02)
|
||||
|
||||
#### Performance Improvement
|
||||
* Backported in [#45704](https://github.com/ClickHouse/ClickHouse/issues/45704): Fixed performance of short `SELECT` queries that read from tables with large number of`Array`/`Map`/`Nested` columns. [#45630](https://github.com/ClickHouse/ClickHouse/pull/45630) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Backported in [#46378](https://github.com/ClickHouse/ClickHouse/issues/46378): Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
|
||||
#### Bug Fix
|
||||
* Backported in [#45672](https://github.com/ClickHouse/ClickHouse/issues/45672): Fix wiping sensitive info in logs. [#45603](https://github.com/ClickHouse/ClickHouse/pull/45603) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#45200](https://github.com/ClickHouse/ClickHouse/issues/45200): Fix zookeeper downloading, update the version, and optimize the image size. [#44853](https://github.com/ClickHouse/ClickHouse/pull/44853) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#46116](https://github.com/ClickHouse/ClickHouse/issues/46116): Remove the dependency on the `adduser` tool from the packages, because we don't use it. This fixes [#44934](https://github.com/ClickHouse/ClickHouse/issues/44934). [#45011](https://github.com/ClickHouse/ClickHouse/pull/45011) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#46035](https://github.com/ClickHouse/ClickHouse/issues/46035): Add systemd.service file for clickhouse-keeper. Fixes [#44293](https://github.com/ClickHouse/ClickHouse/issues/44293). [#45568](https://github.com/ClickHouse/ClickHouse/pull/45568) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#46484](https://github.com/ClickHouse/ClickHouse/issues/46484): Get rid of unnecessary build for standalone clickhouse-keeper. [#46367](https://github.com/ClickHouse/ClickHouse/pull/46367) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#46509](https://github.com/ClickHouse/ClickHouse/issues/46509): Some time ago the ccache compression was changed to `zst`, but `gz` archives are downloaded by default. It fixes it by prioritizing zst archive. [#46490](https://github.com/ClickHouse/ClickHouse/pull/46490) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#47058](https://github.com/ClickHouse/ClickHouse/issues/47058): Fix error during server startup on old distros (e.g. Amazon Linux 2) and on ARM that glibc 2.28 symbols are not found. [#47008](https://github.com/ClickHouse/ClickHouse/pull/47008) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#45904](https://github.com/ClickHouse/ClickHouse/issues/45904): Fixed bug with non-parsable default value for EPHEMERAL column in table metadata. [#44026](https://github.com/ClickHouse/ClickHouse/pull/44026) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Backported in [#45321](https://github.com/ClickHouse/ClickHouse/issues/45321): Fixed a bug in normalization of a `DEFAULT` expression in `CREATE TABLE` statement. The second argument of function `in` (or the right argument of operator `IN`) might be replaced with the result of its evaluation during CREATE query execution. Fixes [#44496](https://github.com/ClickHouse/ClickHouse/issues/44496). [#44547](https://github.com/ClickHouse/ClickHouse/pull/44547) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Backported in [#45000](https://github.com/ClickHouse/ClickHouse/issues/45000): Another fix for `Cannot read all data` error which could happen while reading `LowCardinality` dictionary from remote fs. Fixes [#44709](https://github.com/ClickHouse/ClickHouse/issues/44709). [#44875](https://github.com/ClickHouse/ClickHouse/pull/44875) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#45553](https://github.com/ClickHouse/ClickHouse/issues/45553): Fix `SELECT ... FROM system.dictionaries` exception when there is a dictionary with a bad structure (e.g. incorrect type in xml config). [#45399](https://github.com/ClickHouse/ClickHouse/pull/45399) ([Aleksei Filatov](https://github.com/aalexfvk)).
|
||||
* Backported in [#46226](https://github.com/ClickHouse/ClickHouse/issues/46226): A couple of seg faults have been reported around `c-ares`. All of the recent stack traces observed fail on inserting into `std::unodered_set<>`. I believe I have found the root cause of this, it seems to be unprocessed queries. Prior to this PR, CH calls `poll` to wait on the file descriptors in the `c-ares` channel. According to the [poll docs](https://man7.org/linux/man-pages/man2/poll.2.html), a negative return value means an error has ocurred. Because of this, we would abort the execution and return failure. The problem is that `poll` will also return a negative value if a system interrupt occurs. A system interrupt does not mean the processing has failed or ended, but we would abort it anyways because we were checking for negative values. Once the execution is aborted, the whole stack is destroyed, which includes the `std::unordered_set<std::string>` passed to the `void *` parameter of the c-ares callback. Once c-ares completed the request, the callback would be invoked and would access an invalid memory address causing a segfault. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)).
|
||||
* Backported in [#46218](https://github.com/ClickHouse/ClickHouse/issues/46218): Fix reading of non existing nested columns with multiple level in compact parts. [#46045](https://github.com/ClickHouse/ClickHouse/pull/46045) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Backported in [#46446](https://github.com/ClickHouse/ClickHouse/issues/46446): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Backported in [#46678](https://github.com/ClickHouse/ClickHouse/issues/46678): Fix an invalid processing of constant `LowCardinality` argument in function `arrayMap`. This bug could lead to a segfault in release, and logical error `Bad cast` in debug build. [#46569](https://github.com/ClickHouse/ClickHouse/pull/46569) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#46872](https://github.com/ClickHouse/ClickHouse/issues/46872): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#46954](https://github.com/ClickHouse/ClickHouse/issues/46954): Fix result of LIKE predicates which translate to substring searches and contain quoted non-LIKE metacharacters. [#46875](https://github.com/ClickHouse/ClickHouse/pull/46875) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Automatically merge green backport PRs and green approved PRs [#41110](https://github.com/ClickHouse/ClickHouse/pull/41110) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Improve release scripts [#45074](https://github.com/ClickHouse/ClickHouse/pull/45074) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix wrong approved_at, simplify conditions [#45302](https://github.com/ClickHouse/ClickHouse/pull/45302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Get rid of artifactory in favor of r2 + ch-repos-manager [#45421](https://github.com/ClickHouse/ClickHouse/pull/45421) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Another attempt to fix automerge, or at least to have debug footprint [#45476](https://github.com/ClickHouse/ClickHouse/pull/45476) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Trim refs/tags/ from GITHUB_TAG in release workflow [#45636](https://github.com/ClickHouse/ClickHouse/pull/45636) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add check for running workflows to merge_pr.py [#45803](https://github.com/ClickHouse/ClickHouse/pull/45803) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Get rid of progress timestamps in release publishing [#45818](https://github.com/ClickHouse/ClickHouse/pull/45818) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add necessary dependency for sanitizers [#45959](https://github.com/ClickHouse/ClickHouse/pull/45959) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add helping logging to auto-merge script [#46080](https://github.com/ClickHouse/ClickHouse/pull/46080) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix write buffer destruction order for vertical merge. [#46205](https://github.com/ClickHouse/ClickHouse/pull/46205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Improve install_check.py [#46458](https://github.com/ClickHouse/ClickHouse/pull/46458) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix dependencies for InstallPackagesTestAarch64 [#46597](https://github.com/ClickHouse/ClickHouse/pull/46597) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Reduce updates of Mergeable Check [#46781](https://github.com/ClickHouse/ClickHouse/pull/46781) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
29
docs/changelogs/v22.12.5.34-stable.md
Normal file
29
docs/changelogs/v22.12.5.34-stable.md
Normal file
@ -0,0 +1,29 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v22.12.5.34-stable (b82d6401ca1) FIXME as compared to v22.12.4.76-stable (cb5772db805)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#46983](https://github.com/ClickHouse/ClickHouse/issues/46983): - Apply `ALTER TABLE table_name ON CLUSTER cluster MOVE PARTITION|PART partition_expr TO DISK|VOLUME 'disk_name'` to all replicas. Because `ALTER TABLE t MOVE` is not replicated. [#46402](https://github.com/ClickHouse/ClickHouse/pull/46402) ([lizhuoyu5](https://github.com/lzydmxy)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#45729](https://github.com/ClickHouse/ClickHouse/issues/45729): Fix key description when encountering duplicate primary keys. This can happen in projections. See [#45590](https://github.com/ClickHouse/ClickHouse/issues/45590) for details. [#45686](https://github.com/ClickHouse/ClickHouse/pull/45686) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Backported in [#46398](https://github.com/ClickHouse/ClickHouse/issues/46398): Fix `SYSTEM UNFREEZE` queries failing with the exception `CANNOT_PARSE_INPUT_ASSERTION_FAILED`. [#46325](https://github.com/ClickHouse/ClickHouse/pull/46325) ([Aleksei Filatov](https://github.com/aalexfvk)).
|
||||
* Backported in [#46903](https://github.com/ClickHouse/ClickHouse/issues/46903): - Fix incorrect alias recursion in QueryNormalizer. [#46609](https://github.com/ClickHouse/ClickHouse/pull/46609) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#47210](https://github.com/ClickHouse/ClickHouse/issues/47210): `INSERT` queries through native TCP protocol and HTTP protocol were not canceled correctly in some cases. It could lead to a partially applied query if a client canceled the query, or if a client died or, in rare cases, on network errors. As a result, it could lead to not working deduplication. Fixes [#27667](https://github.com/ClickHouse/ClickHouse/issues/27667) and [#45377](https://github.com/ClickHouse/ClickHouse/issues/45377). [#46681](https://github.com/ClickHouse/ClickHouse/pull/46681) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Backported in [#47157](https://github.com/ClickHouse/ClickHouse/issues/47157): - Fix arithmetic operations in aggregate optimization with `min` and `max`. [#46705](https://github.com/ClickHouse/ClickHouse/pull/46705) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Backported in [#46881](https://github.com/ClickHouse/ClickHouse/issues/46881): Fix MSan report in the `maxIntersections` function. This closes [#43126](https://github.com/ClickHouse/ClickHouse/issues/43126). [#46847](https://github.com/ClickHouse/ClickHouse/pull/46847) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#47359](https://github.com/ClickHouse/ClickHouse/issues/47359): Fix possible deadlock on distributed query cancellation. [#47161](https://github.com/ClickHouse/ClickHouse/pull/47161) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Use /etc/default/clickhouse in systemd too [#47003](https://github.com/ClickHouse/ClickHouse/pull/47003) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Update typing for a new PyGithub version [#47123](https://github.com/ClickHouse/ClickHouse/pull/47123) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Follow-up to [#46681](https://github.com/ClickHouse/ClickHouse/issues/46681) [#47284](https://github.com/ClickHouse/ClickHouse/pull/47284) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Add a manual trigger for release workflow [#47302](https://github.com/ClickHouse/ClickHouse/pull/47302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
40
docs/changelogs/v22.8.14.53-lts.md
Normal file
40
docs/changelogs/v22.8.14.53-lts.md
Normal file
@ -0,0 +1,40 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v22.8.14.53-lts (4ea67c40077) FIXME as compared to v22.8.13.20-lts (e4817946d18)
|
||||
|
||||
#### Performance Improvement
|
||||
* Backported in [#45845](https://github.com/ClickHouse/ClickHouse/issues/45845): Fixed performance of short `SELECT` queries that read from tables with large number of`Array`/`Map`/`Nested` columns. [#45630](https://github.com/ClickHouse/ClickHouse/pull/45630) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Backported in [#46374](https://github.com/ClickHouse/ClickHouse/issues/46374): Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#46358](https://github.com/ClickHouse/ClickHouse/issues/46358): Allow using Vertical merge algorithm with parts in Compact format. This will allow ClickHouse server to use much less memory for background operations. This closes [#46084](https://github.com/ClickHouse/ClickHouse/issues/46084). [#46282](https://github.com/ClickHouse/ClickHouse/pull/46282) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#46112](https://github.com/ClickHouse/ClickHouse/issues/46112): Remove the dependency on the `adduser` tool from the packages, because we don't use it. This fixes [#44934](https://github.com/ClickHouse/ClickHouse/issues/44934). [#45011](https://github.com/ClickHouse/ClickHouse/pull/45011) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#46482](https://github.com/ClickHouse/ClickHouse/issues/46482): Get rid of unnecessary build for standalone clickhouse-keeper. [#46367](https://github.com/ClickHouse/ClickHouse/pull/46367) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#46505](https://github.com/ClickHouse/ClickHouse/issues/46505): Some time ago the ccache compression was changed to `zst`, but `gz` archives are downloaded by default. It fixes it by prioritizing zst archive. [#46490](https://github.com/ClickHouse/ClickHouse/pull/46490) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#45908](https://github.com/ClickHouse/ClickHouse/issues/45908): Fixed bug with non-parsable default value for EPHEMERAL column in table metadata. [#44026](https://github.com/ClickHouse/ClickHouse/pull/44026) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Backported in [#46238](https://github.com/ClickHouse/ClickHouse/issues/46238): A couple of seg faults have been reported around `c-ares`. All of the recent stack traces observed fail on inserting into `std::unodered_set<>`. I believe I have found the root cause of this, it seems to be unprocessed queries. Prior to this PR, CH calls `poll` to wait on the file descriptors in the `c-ares` channel. According to the [poll docs](https://man7.org/linux/man-pages/man2/poll.2.html), a negative return value means an error has ocurred. Because of this, we would abort the execution and return failure. The problem is that `poll` will also return a negative value if a system interrupt occurs. A system interrupt does not mean the processing has failed or ended, but we would abort it anyways because we were checking for negative values. Once the execution is aborted, the whole stack is destroyed, which includes the `std::unordered_set<std::string>` passed to the `void *` parameter of the c-ares callback. Once c-ares completed the request, the callback would be invoked and would access an invalid memory address causing a segfault. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)).
|
||||
* Backported in [#45727](https://github.com/ClickHouse/ClickHouse/issues/45727): Fix key description when encountering duplicate primary keys. This can happen in projections. See [#45590](https://github.com/ClickHouse/ClickHouse/issues/45590) for details. [#45686](https://github.com/ClickHouse/ClickHouse/pull/45686) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Backported in [#46394](https://github.com/ClickHouse/ClickHouse/issues/46394): Fix `SYSTEM UNFREEZE` queries failing with the exception `CANNOT_PARSE_INPUT_ASSERTION_FAILED`. [#46325](https://github.com/ClickHouse/ClickHouse/pull/46325) ([Aleksei Filatov](https://github.com/aalexfvk)).
|
||||
* Backported in [#46442](https://github.com/ClickHouse/ClickHouse/issues/46442): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Backported in [#46674](https://github.com/ClickHouse/ClickHouse/issues/46674): Fix an invalid processing of constant `LowCardinality` argument in function `arrayMap`. This bug could lead to a segfault in release, and logical error `Bad cast` in debug build. [#46569](https://github.com/ClickHouse/ClickHouse/pull/46569) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#46879](https://github.com/ClickHouse/ClickHouse/issues/46879): Fix MSan report in the `maxIntersections` function. This closes [#43126](https://github.com/ClickHouse/ClickHouse/issues/43126). [#46847](https://github.com/ClickHouse/ClickHouse/pull/46847) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#46871](https://github.com/ClickHouse/ClickHouse/issues/46871): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Another attempt to fix automerge, or at least to have debug footprint [#45476](https://github.com/ClickHouse/ClickHouse/pull/45476) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add check for running workflows to merge_pr.py [#45803](https://github.com/ClickHouse/ClickHouse/pull/45803) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Get rid of progress timestamps in release publishing [#45818](https://github.com/ClickHouse/ClickHouse/pull/45818) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add necessary dependency for sanitizers [#45959](https://github.com/ClickHouse/ClickHouse/pull/45959) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add helping logging to auto-merge script [#46080](https://github.com/ClickHouse/ClickHouse/pull/46080) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix write buffer destruction order for vertical merge. [#46205](https://github.com/ClickHouse/ClickHouse/pull/46205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
28
docs/changelogs/v22.8.15.23-lts.md
Normal file
28
docs/changelogs/v22.8.15.23-lts.md
Normal file
@ -0,0 +1,28 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v22.8.15.23-lts (d36fa168bbf) FIXME as compared to v22.8.14.53-lts (4ea67c40077)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#46981](https://github.com/ClickHouse/ClickHouse/issues/46981): - Apply `ALTER TABLE table_name ON CLUSTER cluster MOVE PARTITION|PART partition_expr TO DISK|VOLUME 'disk_name'` to all replicas. Because `ALTER TABLE t MOVE` is not replicated. [#46402](https://github.com/ClickHouse/ClickHouse/pull/46402) ([lizhuoyu5](https://github.com/lzydmxy)).
|
||||
|
||||
#### Bug Fix
|
||||
* Backported in [#47336](https://github.com/ClickHouse/ClickHouse/issues/47336): Sometimes after changing a role that could be not reflected on the access rights of a user who uses that role. This PR fixes that. [#46772](https://github.com/ClickHouse/ClickHouse/pull/46772) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#46901](https://github.com/ClickHouse/ClickHouse/issues/46901): - Fix incorrect alias recursion in QueryNormalizer. [#46609](https://github.com/ClickHouse/ClickHouse/pull/46609) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#47156](https://github.com/ClickHouse/ClickHouse/issues/47156): - Fix arithmetic operations in aggregate optimization with `min` and `max`. [#46705](https://github.com/ClickHouse/ClickHouse/pull/46705) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Backported in [#46987](https://github.com/ClickHouse/ClickHouse/issues/46987): Fix result of LIKE predicates which translate to substring searches and contain quoted non-LIKE metacharacters. [#46875](https://github.com/ClickHouse/ClickHouse/pull/46875) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Backported in [#47357](https://github.com/ClickHouse/ClickHouse/issues/47357): Fix possible deadlock on distributed query cancellation. [#47161](https://github.com/ClickHouse/ClickHouse/pull/47161) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Reduce updates of Mergeable Check [#46781](https://github.com/ClickHouse/ClickHouse/pull/46781) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Update typing for a new PyGithub version [#47123](https://github.com/ClickHouse/ClickHouse/pull/47123) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add a manual trigger for release workflow [#47302](https://github.com/ClickHouse/ClickHouse/pull/47302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
47
docs/changelogs/v23.1.4.58-stable.md
Normal file
47
docs/changelogs/v23.1.4.58-stable.md
Normal file
@ -0,0 +1,47 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.1.4.58-stable (9ed562163a5) FIXME as compared to v23.1.3.5-stable (548b494bcce)
|
||||
|
||||
#### Performance Improvement
|
||||
* Backported in [#46380](https://github.com/ClickHouse/ClickHouse/issues/46380): Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#46985](https://github.com/ClickHouse/ClickHouse/issues/46985): - Apply `ALTER TABLE table_name ON CLUSTER cluster MOVE PARTITION|PART partition_expr TO DISK|VOLUME 'disk_name'` to all replicas. Because `ALTER TABLE t MOVE` is not replicated. [#46402](https://github.com/ClickHouse/ClickHouse/pull/46402) ([lizhuoyu5](https://github.com/lzydmxy)).
|
||||
* Backported in [#46778](https://github.com/ClickHouse/ClickHouse/issues/46778): Backward compatibility for T64 codec support for IPv4. [#46747](https://github.com/ClickHouse/ClickHouse/pull/46747) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Backported in [#47020](https://github.com/ClickHouse/ClickHouse/issues/47020): Allow IPv4 in range(). [#46995](https://github.com/ClickHouse/ClickHouse/pull/46995) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#46031](https://github.com/ClickHouse/ClickHouse/issues/46031): Add systemd.service file for clickhouse-keeper. Fixes [#44293](https://github.com/ClickHouse/ClickHouse/issues/44293). [#45568](https://github.com/ClickHouse/ClickHouse/pull/45568) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#46477](https://github.com/ClickHouse/ClickHouse/issues/46477): Get rid of unnecessary build for standalone clickhouse-keeper. [#46367](https://github.com/ClickHouse/ClickHouse/pull/46367) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#46511](https://github.com/ClickHouse/ClickHouse/issues/46511): Some time ago the ccache compression was changed to `zst`, but `gz` archives are downloaded by default. It fixes it by prioritizing zst archive. [#46490](https://github.com/ClickHouse/ClickHouse/pull/46490) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#46228](https://github.com/ClickHouse/ClickHouse/issues/46228): A couple of seg faults have been reported around `c-ares`. All of the recent stack traces observed fail on inserting into `std::unodered_set<>`. I believe I have found the root cause of this, it seems to be unprocessed queries. Prior to this PR, CH calls `poll` to wait on the file descriptors in the `c-ares` channel. According to the [poll docs](https://man7.org/linux/man-pages/man2/poll.2.html), a negative return value means an error has ocurred. Because of this, we would abort the execution and return failure. The problem is that `poll` will also return a negative value if a system interrupt occurs. A system interrupt does not mean the processing has failed or ended, but we would abort it anyways because we were checking for negative values. Once the execution is aborted, the whole stack is destroyed, which includes the `std::unordered_set<std::string>` passed to the `void *` parameter of the c-ares callback. Once c-ares completed the request, the callback would be invoked and would access an invalid memory address causing a segfault. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)).
|
||||
* Backported in [#46967](https://github.com/ClickHouse/ClickHouse/issues/46967): Backward compatibility - allow implicit narrowing conversion from UInt64 to IPv4 - required for "INSERT ... VALUES ..." expression. [#45865](https://github.com/ClickHouse/ClickHouse/pull/45865) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Backported in [#46220](https://github.com/ClickHouse/ClickHouse/issues/46220): Fix reading of non existing nested columns with multiple level in compact parts. [#46045](https://github.com/ClickHouse/ClickHouse/pull/46045) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Backported in [#46751](https://github.com/ClickHouse/ClickHouse/issues/46751): Follow-up fix for Replace domain IP types (IPv4, IPv6) with native https://github.com/ClickHouse/ClickHouse/pull/43221. [#46087](https://github.com/ClickHouse/ClickHouse/pull/46087) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Backported in [#46448](https://github.com/ClickHouse/ClickHouse/issues/46448): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Backported in [#46680](https://github.com/ClickHouse/ClickHouse/issues/46680): Fix an invalid processing of constant `LowCardinality` argument in function `arrayMap`. This bug could lead to a segfault in release, and logical error `Bad cast` in debug build. [#46569](https://github.com/ClickHouse/ClickHouse/pull/46569) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#46873](https://github.com/ClickHouse/ClickHouse/issues/46873): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#46956](https://github.com/ClickHouse/ClickHouse/issues/46956): Fix result of LIKE predicates which translate to substring searches and contain quoted non-LIKE metacharacters. [#46875](https://github.com/ClickHouse/ClickHouse/pull/46875) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Another attempt to fix automerge, or at least to have debug footprint [#45476](https://github.com/ClickHouse/ClickHouse/pull/45476) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Support DELETE ON CLUSTER [#45786](https://github.com/ClickHouse/ClickHouse/pull/45786) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Add check for running workflows to merge_pr.py [#45803](https://github.com/ClickHouse/ClickHouse/pull/45803) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add necessary dependency for sanitizers [#45959](https://github.com/ClickHouse/ClickHouse/pull/45959) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add helping logging to auto-merge script [#46080](https://github.com/ClickHouse/ClickHouse/pull/46080) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix write buffer destruction order for vertical merge. [#46205](https://github.com/ClickHouse/ClickHouse/pull/46205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Wait for background tasks in ~UploadHelper [#46334](https://github.com/ClickHouse/ClickHouse/pull/46334) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Improve install_check.py [#46458](https://github.com/ClickHouse/ClickHouse/pull/46458) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix dependencies for InstallPackagesTestAarch64 [#46597](https://github.com/ClickHouse/ClickHouse/pull/46597) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Reduce updates of Mergeable Check [#46781](https://github.com/ClickHouse/ClickHouse/pull/46781) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
28
docs/changelogs/v23.1.5.24-stable.md
Normal file
28
docs/changelogs/v23.1.5.24-stable.md
Normal file
@ -0,0 +1,28 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.1.5.24-stable (0e51b53ba99) FIXME as compared to v23.1.4.58-stable (9ed562163a5)
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#47060](https://github.com/ClickHouse/ClickHouse/issues/47060): Fix error during server startup on old distros (e.g. Amazon Linux 2) and on ARM that glibc 2.28 symbols are not found. [#47008](https://github.com/ClickHouse/ClickHouse/pull/47008) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#46401](https://github.com/ClickHouse/ClickHouse/issues/46401): Fix `SYSTEM UNFREEZE` queries failing with the exception `CANNOT_PARSE_INPUT_ASSERTION_FAILED`. [#46325](https://github.com/ClickHouse/ClickHouse/pull/46325) ([Aleksei Filatov](https://github.com/aalexfvk)).
|
||||
* Backported in [#46905](https://github.com/ClickHouse/ClickHouse/issues/46905): - Fix incorrect alias recursion in QueryNormalizer. [#46609](https://github.com/ClickHouse/ClickHouse/pull/46609) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#47211](https://github.com/ClickHouse/ClickHouse/issues/47211): `INSERT` queries through native TCP protocol and HTTP protocol were not canceled correctly in some cases. It could lead to a partially applied query if a client canceled the query, or if a client died or, in rare cases, on network errors. As a result, it could lead to not working deduplication. Fixes [#27667](https://github.com/ClickHouse/ClickHouse/issues/27667) and [#45377](https://github.com/ClickHouse/ClickHouse/issues/45377). [#46681](https://github.com/ClickHouse/ClickHouse/pull/46681) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Backported in [#47118](https://github.com/ClickHouse/ClickHouse/issues/47118): - Fix arithmetic operations in aggregate optimization with `min` and `max`. [#46705](https://github.com/ClickHouse/ClickHouse/pull/46705) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Backported in [#46883](https://github.com/ClickHouse/ClickHouse/issues/46883): Fix MSan report in the `maxIntersections` function. This closes [#43126](https://github.com/ClickHouse/ClickHouse/issues/43126). [#46847](https://github.com/ClickHouse/ClickHouse/pull/46847) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#47361](https://github.com/ClickHouse/ClickHouse/issues/47361): Fix possible deadlock on distributed query cancellation. [#47161](https://github.com/ClickHouse/ClickHouse/pull/47161) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Use /etc/default/clickhouse in systemd too [#47003](https://github.com/ClickHouse/ClickHouse/pull/47003) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Update typing for a new PyGithub version [#47123](https://github.com/ClickHouse/ClickHouse/pull/47123) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Follow-up to [#46681](https://github.com/ClickHouse/ClickHouse/issues/46681) [#47284](https://github.com/ClickHouse/ClickHouse/pull/47284) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Add a manual trigger for release workflow [#47302](https://github.com/ClickHouse/ClickHouse/pull/47302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
30
docs/changelogs/v23.2.2.20-stable.md
Normal file
30
docs/changelogs/v23.2.2.20-stable.md
Normal file
@ -0,0 +1,30 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.2.2.20-stable (f6c269c8df2) FIXME as compared to v23.2.1.2537-stable (52bf836e03a)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#46914](https://github.com/ClickHouse/ClickHouse/issues/46914): Allow PREWHERE for Merge with different DEFAULT expression for column. [#46831](https://github.com/ClickHouse/ClickHouse/pull/46831) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Backported in [#47022](https://github.com/ClickHouse/ClickHouse/issues/47022): Allow IPv4 in range(). [#46995](https://github.com/ClickHouse/ClickHouse/pull/46995) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
|
||||
#### Bug Fix
|
||||
* Backported in [#46828](https://github.com/ClickHouse/ClickHouse/issues/46828): Combined PREWHERE column accumulated from multiple PREWHERE in some cases didn't contain 0's from previous steps. The fix is to apply final filter if we know that it wasn't applied from more than 1 last step. [#46785](https://github.com/ClickHouse/ClickHouse/pull/46785) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#47062](https://github.com/ClickHouse/ClickHouse/issues/47062): Fix error during server startup on old distros (e.g. Amazon Linux 2) and on ARM that glibc 2.28 symbols are not found. [#47008](https://github.com/ClickHouse/ClickHouse/pull/47008) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#46895](https://github.com/ClickHouse/ClickHouse/issues/46895): Fixed a bug in automatic retries of `DROP TABLE` query with `ReplicatedMergeTree` tables and `Atomic` databases. In rare cases it could lead to `Can't get data for node /zk_path/log_pointer` and `The specified key does not exist` errors if ZooKeeper session expired during DROP and a new replicated table with the same path in ZooKeeper was created in parallel. [#46384](https://github.com/ClickHouse/ClickHouse/pull/46384) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Backported in [#46865](https://github.com/ClickHouse/ClickHouse/issues/46865): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#46958](https://github.com/ClickHouse/ClickHouse/issues/46958): Fix result of LIKE predicates which translate to substring searches and contain quoted non-LIKE metacharacters. [#46875](https://github.com/ClickHouse/ClickHouse/pull/46875) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* More concise logging at trace level for PREWHERE steps [#46771](https://github.com/ClickHouse/ClickHouse/pull/46771) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Reduce updates of Mergeable Check [#46781](https://github.com/ClickHouse/ClickHouse/pull/46781) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
23
docs/changelogs/v23.2.3.17-stable.md
Normal file
23
docs/changelogs/v23.2.3.17-stable.md
Normal file
@ -0,0 +1,23 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.2.3.17-stable (dec18bf7281) FIXME as compared to v23.2.2.20-stable (f6c269c8df2)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#46907](https://github.com/ClickHouse/ClickHouse/issues/46907): - Fix incorrect alias recursion in QueryNormalizer. [#46609](https://github.com/ClickHouse/ClickHouse/pull/46609) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#47091](https://github.com/ClickHouse/ClickHouse/issues/47091): - Fix arithmetic operations in aggregate optimization with `min` and `max`. [#46705](https://github.com/ClickHouse/ClickHouse/pull/46705) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Backported in [#46885](https://github.com/ClickHouse/ClickHouse/issues/46885): Fix MSan report in the `maxIntersections` function. This closes [#43126](https://github.com/ClickHouse/ClickHouse/issues/43126). [#46847](https://github.com/ClickHouse/ClickHouse/pull/46847) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#47067](https://github.com/ClickHouse/ClickHouse/issues/47067): Fix typo in systemd service, which causes the systemd service start to fail. [#47051](https://github.com/ClickHouse/ClickHouse/pull/47051) ([Palash Goel](https://github.com/palash-goel)).
|
||||
* Backported in [#47259](https://github.com/ClickHouse/ClickHouse/issues/47259): Fix concrete columns PREWHERE support. [#47154](https://github.com/ClickHouse/ClickHouse/pull/47154) ([Azat Khuzhin](https://github.com/azat)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Use /etc/default/clickhouse in systemd too [#47003](https://github.com/ClickHouse/ClickHouse/pull/47003) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* do flushUntrackedMemory when context switches [#47102](https://github.com/ClickHouse/ClickHouse/pull/47102) ([Sema Checherinda](https://github.com/CheSema)).
|
||||
* Update typing for a new PyGithub version [#47123](https://github.com/ClickHouse/ClickHouse/pull/47123) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
20
docs/changelogs/v23.2.4.12-stable.md
Normal file
20
docs/changelogs/v23.2.4.12-stable.md
Normal file
@ -0,0 +1,20 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.2.4.12-stable (8fe866cb035) FIXME as compared to v23.2.3.17-stable (dec18bf7281)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#47277](https://github.com/ClickHouse/ClickHouse/issues/47277): Fix IPv4/IPv6 serialization/deserialization in binary formats that was broken in https://github.com/ClickHouse/ClickHouse/pull/43221. Closes [#46522](https://github.com/ClickHouse/ClickHouse/issues/46522). [#46616](https://github.com/ClickHouse/ClickHouse/pull/46616) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Backported in [#47212](https://github.com/ClickHouse/ClickHouse/issues/47212): `INSERT` queries through native TCP protocol and HTTP protocol were not canceled correctly in some cases. It could lead to a partially applied query if a client canceled the query, or if a client died or, in rare cases, on network errors. As a result, it could lead to not working deduplication. Fixes [#27667](https://github.com/ClickHouse/ClickHouse/issues/27667) and [#45377](https://github.com/ClickHouse/ClickHouse/issues/45377). [#46681](https://github.com/ClickHouse/ClickHouse/pull/46681) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Backported in [#47363](https://github.com/ClickHouse/ClickHouse/issues/47363): Fix possible deadlock on distributed query cancellation. [#47161](https://github.com/ClickHouse/ClickHouse/pull/47161) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Follow-up to [#46681](https://github.com/ClickHouse/ClickHouse/issues/46681) [#47284](https://github.com/ClickHouse/ClickHouse/pull/47284) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Add a manual trigger for release workflow [#47302](https://github.com/ClickHouse/ClickHouse/pull/47302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
@ -172,7 +172,7 @@ Global thread pool is `GlobalThreadPool` singleton class. To allocate thread fro
|
||||
|
||||
Global pool is universal and all pools described below are implemented on top of it. This can be thought of as a hierarchy of pools. Any specialized pool takes its threads from the global pool using `ThreadPool` class. So the main purpose of any specialized pool is to apply limit on the number of simultaneous jobs and do job scheduling. If there are more jobs scheduled than threads in a pool, `ThreadPool` accumulates jobs in a queue with priorities. Each job has an integer priority. Default priority is zero. All jobs with higher priority values are started before any job with lower priority value. But there is no difference between already executing jobs, thus priority matters only when the pool in overloaded.
|
||||
|
||||
IO thread pool is implemented as a plain `ThreadPool` accessible via `IOThreadPool::get()` method. It is configured in the same way as global pool with `max_io_thread_pool_size`, `max_io_thread_pool_free_size` and `io_thread_pool_queue_size` settings. The main purpose of IO thread pool is to avoid exhaustion of the global pool with IO jobs, which could prevent queries from fully utilizing CPU.
|
||||
IO thread pool is implemented as a plain `ThreadPool` accessible via `IOThreadPool::get()` method. It is configured in the same way as global pool with `max_io_thread_pool_size`, `max_io_thread_pool_free_size` and `io_thread_pool_queue_size` settings. The main purpose of IO thread pool is to avoid exhaustion of the global pool with IO jobs, which could prevent queries from fully utilizing CPU. Backup to S3 does significant amount of IO operations and to avoid impact on interactive queries there is a separate `BackupsIOThreadPool` configured with `max_backups_io_thread_pool_size`, `max_backups_io_thread_pool_free_size` and `backups_io_thread_pool_queue_size` settings.
|
||||
|
||||
For periodic task execution there is `BackgroundSchedulePool` class. You can register tasks using `BackgroundSchedulePool::TaskHolder` objects and the pool ensures that no task runs two jobs at the same time. It also allows you to postpone task execution to a specific instant in the future or temporarily deactivate task. Global `Context` provides a few instances of this class for different purposes. For general purpose tasks `Context::getSchedulePool()` is used.
|
||||
|
||||
|
123
docs/en/development/build-cross-s390x.md
Normal file
123
docs/en/development/build-cross-s390x.md
Normal file
@ -0,0 +1,123 @@
|
||||
---
|
||||
slug: /en/development/build-cross-s390x
|
||||
sidebar_position: 69
|
||||
title: How to Build, Run and Debug ClickHouse on Linux for s390x (zLinux)
|
||||
sidebar_label: Build on Linux for s390x (zLinux)
|
||||
---
|
||||
|
||||
As of writing (2023/3/10) building for s390x considered to be experimental. Not all features can be enabled, has broken features and is currently under active development.
|
||||
|
||||
|
||||
## Building
|
||||
|
||||
As s390x does not support boringssl, it uses OpenSSL and has two related build options.
|
||||
- By default, the s390x build will dynamically link to OpenSSL libraries. It will build OpenSSL shared objects, so it's not necessary to install OpenSSL beforehand. (This option is recommended in all cases.)
|
||||
- Another option is to build OpenSSL in-tree. In this case two build flags need to be supplied to cmake
|
||||
```bash
|
||||
-DENABLE_OPENSSL_DYNAMIC=0 -DENABLE_OPENSSL=1
|
||||
```
|
||||
|
||||
These instructions assume that the host machine is x86_64 and has all the tooling required to build natively based on the [build instructions](../development/build.md). It also assumes that the host is Ubuntu 22.04 but the following instructions should also work on Ubuntu 20.04.
|
||||
|
||||
In addition to installing the tooling used to build natively, the following additional packages need to be installed:
|
||||
|
||||
```bash
|
||||
apt-get install binutils-s390x-linux-gnu libc6-dev-s390x-cross gcc-s390x-linux-gnu binfmt-support qemu-user-static
|
||||
```
|
||||
|
||||
If you wish to cross compile rust code install the rust cross compile target for s390x:
|
||||
```bash
|
||||
rustup target add s390x-unknown-linux-gnu
|
||||
```
|
||||
|
||||
To build for s390x:
|
||||
```bash
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-s390x.cmake ..
|
||||
ninja
|
||||
```
|
||||
|
||||
## Running
|
||||
|
||||
Once built, the binary can be run with, eg.:
|
||||
|
||||
```bash
|
||||
qemu-s390x-static -L /usr/s390x-linux-gnu ./clickhouse
|
||||
```
|
||||
|
||||
## Debugging
|
||||
|
||||
Install LLDB:
|
||||
|
||||
```bash
|
||||
apt-get install lldb-15
|
||||
```
|
||||
|
||||
To Debug a s390x executable, run clickhouse using QEMU in debug mode:
|
||||
|
||||
```bash
|
||||
qemu-s390x-static -g 31338 -L /usr/s390x-linux-gnu ./clickhouse
|
||||
```
|
||||
|
||||
In another shell run LLDB and attach, replace `<Clickhouse Parent Directory>` and `<build directory>` with the values corresponding to your environment.
|
||||
```bash
|
||||
lldb-15
|
||||
(lldb) target create ./clickhouse
|
||||
Current executable set to '/<Clickhouse Parent Directory>/ClickHouse/<build directory>/programs/clickhouse' (s390x).
|
||||
(lldb) settings set target.source-map <build directory> /<Clickhouse Parent Directory>/ClickHouse
|
||||
(lldb) gdb-remote 31338
|
||||
Process 1 stopped
|
||||
* thread #1, stop reason = signal SIGTRAP
|
||||
frame #0: 0x0000004020e74cd0
|
||||
-> 0x4020e74cd0: lgr %r2, %r15
|
||||
0x4020e74cd4: aghi %r15, -160
|
||||
0x4020e74cd8: xc 0(8,%r15), 0(%r15)
|
||||
0x4020e74cde: brasl %r14, 275429939040
|
||||
(lldb) b main
|
||||
Breakpoint 1: 9 locations.
|
||||
(lldb) c
|
||||
Process 1 resuming
|
||||
Process 1 stopped
|
||||
* thread #1, stop reason = breakpoint 1.1
|
||||
frame #0: 0x0000004005cd9fc0 clickhouse`main(argc_=1, argv_=0x0000004020e594a8) at main.cpp:450:17
|
||||
447 #if !defined(FUZZING_MODE)
|
||||
448 int main(int argc_, char ** argv_)
|
||||
449 {
|
||||
-> 450 inside_main = true;
|
||||
451 SCOPE_EXIT({ inside_main = false; });
|
||||
452
|
||||
453 /// PHDR cache is required for query profiler to work reliably
|
||||
```
|
||||
|
||||
## Visual Studio Code integration
|
||||
|
||||
- (CodeLLDB extension)[https://github.com/vadimcn/vscode-lldb] is required for visual debugging, the (Command Variable)[https://github.com/rioj7/command-variable] extension can help dynamic launches if using (cmake variants)[https://github.com/microsoft/vscode-cmake-tools/blob/main/docs/variants.md].
|
||||
- Make sure to set the backend to your llvm installation eg. `"lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so"`
|
||||
- Launcher:
|
||||
```json
|
||||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Debug",
|
||||
"type": "lldb",
|
||||
"request": "custom",
|
||||
"targetCreateCommands": ["target create ${command:cmake.launchTargetDirectory}/clickhouse"],
|
||||
"processCreateCommands": ["settings set target.source-map ${input:targetdir} ${workspaceFolder}", "gdb-remote 31338"],
|
||||
"sourceMap": { "${input:targetdir}": "${workspaceFolder}" },
|
||||
}
|
||||
],
|
||||
"inputs": [
|
||||
{
|
||||
"id": "targetdir",
|
||||
"type": "command",
|
||||
"command": "extension.commandvariable.transform",
|
||||
"args": {
|
||||
"text": "${command:cmake.launchTargetDirectory}",
|
||||
"find": ".*/([^/]+)/[^/]+$",
|
||||
"replace": "$1"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
- Make sure to run the clickhouse executable in debug mode prior to launch. (It is also possible to create a `preLaunchTask` that automates this)
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
slug: /en/development/contrib
|
||||
sidebar_position: 71
|
||||
sidebar_position: 72
|
||||
sidebar_label: Third-Party Libraries
|
||||
description: A list of third-party libraries used
|
||||
---
|
||||
|
@ -67,7 +67,7 @@ It generally means that the SSH keys for connecting to GitHub are missing. These
|
||||
|
||||
You can also clone the repository via https protocol:
|
||||
|
||||
git clone --recursive--shallow-submodules https://github.com/ClickHouse/ClickHouse.git
|
||||
git clone --recursive --shallow-submodules https://github.com/ClickHouse/ClickHouse.git
|
||||
|
||||
This, however, will not let you send your changes to the server. You can still use it temporarily and add the SSH keys later replacing the remote address of the repository with `git remote` command.
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
slug: /en/development/style
|
||||
sidebar_position: 69
|
||||
sidebar_position: 70
|
||||
sidebar_label: C++ Guide
|
||||
description: A list of recommendations regarding coding style, naming convention, formatting and more
|
||||
---
|
||||
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
slug: /en/development/tests
|
||||
sidebar_position: 70
|
||||
sidebar_position: 71
|
||||
sidebar_label: Testing
|
||||
title: ClickHouse Testing
|
||||
description: Most of ClickHouse features can be tested with functional tests and they are mandatory to use for every change in ClickHouse code that can be tested that way.
|
||||
@ -31,6 +31,9 @@ folder and run the following command:
|
||||
PATH=$PATH:<path to clickhouse-client> tests/clickhouse-test 01428_hash_set_nan_key
|
||||
```
|
||||
|
||||
Test results (`stderr` and `stdout`) are written to files `01428_hash_set_nan_key.[stderr|stdout]` which
|
||||
are located near the test file itself (so for `queries/0_stateless/foo.sql` output will be in `queries/0_stateless/foo.stdout`).
|
||||
|
||||
For more options, see `tests/clickhouse-test --help`. You can simply run all tests or run subset of tests filtered by substring in test name: `./clickhouse-test substring`. There are also options to run tests in parallel or in randomized order.
|
||||
|
||||
### Adding a New Test
|
||||
|
@ -19,8 +19,8 @@ Kafka lets you:
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
(
|
||||
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
|
||||
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
|
||||
name1 [type1],
|
||||
name2 [type2],
|
||||
...
|
||||
) ENGINE = Kafka()
|
||||
SETTINGS
|
||||
@ -113,6 +113,10 @@ Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format
|
||||
|
||||
</details>
|
||||
|
||||
:::info
|
||||
The Kafka table engine doesn't support columns with [default value](../../../sql-reference/statements/create/table.md#default_value). If you need columns with default value, you can add them at materialized view level (see below).
|
||||
:::
|
||||
|
||||
## Description {#description}
|
||||
|
||||
The delivered messages are tracked automatically, so each message in a group is only counted once. If you want to get the data twice, then create a copy of the table with another group name.
|
||||
|
@ -450,29 +450,32 @@ INDEX sample_index3 (lower(str), str) TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY
|
||||
|
||||
Conditions in the `WHERE` clause contains calls of the functions that operate with columns. If the column is a part of an index, ClickHouse tries to use this index when performing the functions. ClickHouse supports different subsets of functions for using indexes.
|
||||
|
||||
The `set` index can be used with all functions. Function subsets for other indexes are shown in the table below.
|
||||
Indexes of type `set` can be utilized by all functions. The other index types are supported as follows:
|
||||
|
||||
| Function (operator) / Index | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter |
|
||||
|------------------------------------------------------------------------------------------------------------|-------------|--------|-------------|-------------|---------------|
|
||||
| [equals (=, ==)](/docs/en/sql-reference/functions/comparison-functions.md/#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ |
|
||||
| [notEquals(!=, <>)](/docs/en/sql-reference/functions/comparison-functions.md/#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ |
|
||||
| [like](/docs/en/sql-reference/functions/string-search-functions.md/#function-like) | ✔ | ✔ | ✔ | ✔ | ✗ |
|
||||
| [notLike](/docs/en/sql-reference/functions/string-search-functions.md/#function-notlike) | ✔ | ✔ | ✔ | ✔ | ✗ |
|
||||
| [startsWith](/docs/en/sql-reference/functions/string-functions.md/#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ |
|
||||
| [endsWith](/docs/en/sql-reference/functions/string-functions.md/#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ |
|
||||
| [multiSearchAny](/docs/en/sql-reference/functions/string-search-functions.md/#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ |
|
||||
| [in](/docs/en/sql-reference/functions/in-functions#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ |
|
||||
| [notIn](/docs/en/sql-reference/functions/in-functions#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ |
|
||||
| [less (<)](/docs/en/sql-reference/functions/comparison-functions.md/#function-less) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [greater (>)](/docs/en/sql-reference/functions/comparison-functions.md/#function-greater) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [lessOrEquals (<=)](/docs/en/sql-reference/functions/comparison-functions.md/#function-lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [greaterOrEquals (>=)](/docs/en/sql-reference/functions/comparison-functions.md/#function-greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [empty](/docs/en/sql-reference/functions/array-functions#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [notEmpty](/docs/en/sql-reference/functions/array-functions#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| hasToken | ✗ | ✗ | ✗ | ✔ | ✗ |
|
||||
| hasTokenOrNull | ✗ | ✗ | ✗ | ✔ | ✗ |
|
||||
| hasTokenCaseInsensitive | ✗ | ✗ | ✗ | ✔ | ✗ |
|
||||
| hasTokenCaseInsensitiveOrNull | ✗ | ✗ | ✗ | ✔ | ✗ |
|
||||
|------------------------------------------------------------------------------------------------------------|-------------|--------|------------|------------|--------------|
|
||||
| [equals (=, ==)](/docs/en/sql-reference/functions/comparison-functions.md/#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ |
|
||||
| [notEquals(!=, <>)](/docs/en/sql-reference/functions/comparison-functions.md/#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ |
|
||||
| [like](/docs/en/sql-reference/functions/string-search-functions.md/#function-like) | ✔ | ✔ | ✔ | ✔ | ✗ |
|
||||
| [notLike](/docs/en/sql-reference/functions/string-search-functions.md/#function-notlike) | ✔ | ✔ | ✔ | ✔ | ✗ |
|
||||
| [startsWith](/docs/en/sql-reference/functions/string-functions.md/#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ |
|
||||
| [endsWith](/docs/en/sql-reference/functions/string-functions.md/#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ |
|
||||
| [multiSearchAny](/docs/en/sql-reference/functions/string-search-functions.md/#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ |
|
||||
| [in](/docs/en/sql-reference/functions/in-functions#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ |
|
||||
| [notIn](/docs/en/sql-reference/functions/in-functions#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ |
|
||||
| [less (<)](/docs/en/sql-reference/functions/comparison-functions.md/#function-less) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [greater (>)](/docs/en/sql-reference/functions/comparison-functions.md/#function-greater) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [lessOrEquals (<=)](/docs/en/sql-reference/functions/comparison-functions.md/#function-lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [greaterOrEquals (>=)](/docs/en/sql-reference/functions/comparison-functions.md/#function-greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [empty](/docs/en/sql-reference/functions/array-functions#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [notEmpty](/docs/en/sql-reference/functions/array-functions#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [has](/docs/en/sql-reference/functions/array-functions#function-has) | ✗ | ✗ | ✔ | ✔ | ✔ |
|
||||
| [hasAny](/docs/en/sql-reference/functions/array-functions#function-hasAny) | ✗ | ✗ | ✗ | ✗ | ✔ |
|
||||
| [hasAll](/docs/en/sql-reference/functions/array-functions#function-hasAll) | ✗ | ✗ | ✗ | ✗ | ✔ |
|
||||
| hasToken | ✗ | ✗ | ✗ | ✔ | ✗ |
|
||||
| hasTokenOrNull | ✗ | ✗ | ✗ | ✔ | ✗ |
|
||||
| hasTokenCaseInsensitive | ✗ | ✗ | ✗ | ✔ | ✗ |
|
||||
| hasTokenCaseInsensitiveOrNull | ✗ | ✗ | ✗ | ✔ | ✗ |
|
||||
|
||||
Functions with a constant argument that is less than ngram size can’t be used by `ngrambf_v1` for query optimization.
|
||||
|
||||
|
File diff suppressed because one or more lines are too long
@ -1232,50 +1232,52 @@ Each row is formatted as a single document and each column is formatted as a sin
|
||||
|
||||
For output it uses the following correspondence between ClickHouse types and BSON types:
|
||||
|
||||
| ClickHouse type | BSON Type |
|
||||
|-----------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------|
|
||||
| [Bool](/docs/en/sql-reference/data-types/boolean.md) | `\x08` boolean |
|
||||
| [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 |
|
||||
| [Int16UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 |
|
||||
| [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 |
|
||||
| [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `\x12` int64 |
|
||||
| [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `\x12` int64 |
|
||||
| [Float32/Float64](/docs/en/sql-reference/data-types/float.md) | `\x01` double |
|
||||
| [Date](/docs/en/sql-reference/data-types/date.md)/[Date32](/docs/en/sql-reference/data-types/date32.md) | `\x10` int32 |
|
||||
| [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `\x12` int64 |
|
||||
| [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `\x09` datetime |
|
||||
| [Decimal32](/docs/en/sql-reference/data-types/decimal.md) | `\x10` int32 |
|
||||
| [Decimal64](/docs/en/sql-reference/data-types/decimal.md) | `\x12` int64 |
|
||||
| [Decimal128](/docs/en/sql-reference/data-types/decimal.md) | `\x05` binary, `\x00` binary subtype, size = 16 |
|
||||
| [Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `\x05` binary, `\x00` binary subtype, size = 32 |
|
||||
| [Int128/UInt128](/docs/en/sql-reference/data-types/int-uint.md) | `\x05` binary, `\x00` binary subtype, size = 16 |
|
||||
| [Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `\x05` binary, `\x00` binary subtype, size = 32 |
|
||||
| ClickHouse type | BSON Type |
|
||||
|-----------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------|
|
||||
| [Bool](/docs/en/sql-reference/data-types/boolean.md) | `\x08` boolean |
|
||||
| [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 |
|
||||
| [Int16UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 |
|
||||
| [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 |
|
||||
| [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `\x12` int64 |
|
||||
| [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `\x12` int64 |
|
||||
| [Float32/Float64](/docs/en/sql-reference/data-types/float.md) | `\x01` double |
|
||||
| [Date](/docs/en/sql-reference/data-types/date.md)/[Date32](/docs/en/sql-reference/data-types/date32.md) | `\x10` int32 |
|
||||
| [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `\x12` int64 |
|
||||
| [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `\x09` datetime |
|
||||
| [Decimal32](/docs/en/sql-reference/data-types/decimal.md) | `\x10` int32 |
|
||||
| [Decimal64](/docs/en/sql-reference/data-types/decimal.md) | `\x12` int64 |
|
||||
| [Decimal128](/docs/en/sql-reference/data-types/decimal.md) | `\x05` binary, `\x00` binary subtype, size = 16 |
|
||||
| [Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `\x05` binary, `\x00` binary subtype, size = 32 |
|
||||
| [Int128/UInt128](/docs/en/sql-reference/data-types/int-uint.md) | `\x05` binary, `\x00` binary subtype, size = 16 |
|
||||
| [Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `\x05` binary, `\x00` binary subtype, size = 32 |
|
||||
| [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `\x05` binary, `\x00` binary subtype or \x02 string if setting output_format_bson_string_as_string is enabled |
|
||||
| [UUID](/docs/en/sql-reference/data-types/uuid.md) | `\x05` binary, `\x04` uuid subtype, size = 16 |
|
||||
| [Array](/docs/en/sql-reference/data-types/array.md) | `\x04` array |
|
||||
| [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `\x04` array |
|
||||
| [Named Tuple](/docs/en/sql-reference/data-types/tuple.md) | `\x03` document |
|
||||
| [Map](/docs/en/sql-reference/data-types/map.md) (with String keys) | `\x03` document |
|
||||
| [UUID](/docs/en/sql-reference/data-types/uuid.md) | `\x05` binary, `\x04` uuid subtype, size = 16 |
|
||||
| [Array](/docs/en/sql-reference/data-types/array.md) | `\x04` array |
|
||||
| [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `\x04` array |
|
||||
| [Named Tuple](/docs/en/sql-reference/data-types/tuple.md) | `\x03` document |
|
||||
| [Map](/docs/en/sql-reference/data-types/map.md) (with String keys) | `\x03` document |
|
||||
| [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `\x10` int32 |
|
||||
| [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `\x05` binary, `\x00` binary subtype |
|
||||
|
||||
For input it uses the following correspondence between BSON types and ClickHouse types:
|
||||
|
||||
| BSON Type | ClickHouse Type |
|
||||
|------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `\x01` double | [Float32/Float64](/docs/en/sql-reference/data-types/float.md) |
|
||||
| `\x02` string | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x03` document | [Map](/docs/en/sql-reference/data-types/map.md)/[Named Tuple](/docs/en/sql-reference/data-types/tuple.md) |
|
||||
| `\x04` array | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md) |
|
||||
| `\x05` binary, `\x00` binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x05` binary, `\x02` old binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x05` binary, `\x03` old uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) |
|
||||
| `\x05` binary, `\x04` uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) |
|
||||
| `\x07` ObjectId | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x08` boolean | [Bool](/docs/en/sql-reference/data-types/boolean.md) |
|
||||
| `\x09` datetime | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) |
|
||||
| `\x0A` null value | [NULL](/docs/en/sql-reference/data-types/nullable.md) |
|
||||
| `\x0D` JavaScript code | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x0E` symbol | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x10` int32 | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md) |
|
||||
| BSON Type | ClickHouse Type |
|
||||
|------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `\x01` double | [Float32/Float64](/docs/en/sql-reference/data-types/float.md) |
|
||||
| `\x02` string | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x03` document | [Map](/docs/en/sql-reference/data-types/map.md)/[Named Tuple](/docs/en/sql-reference/data-types/tuple.md) |
|
||||
| `\x04` array | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md) |
|
||||
| `\x05` binary, `\x00` binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)/[IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) |
|
||||
| `\x05` binary, `\x02` old binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x05` binary, `\x03` old uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) |
|
||||
| `\x05` binary, `\x04` uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) |
|
||||
| `\x07` ObjectId | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x08` boolean | [Bool](/docs/en/sql-reference/data-types/boolean.md) |
|
||||
| `\x09` datetime | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) |
|
||||
| `\x0A` null value | [NULL](/docs/en/sql-reference/data-types/nullable.md) |
|
||||
| `\x0D` JavaScript code | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x0E` symbol | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x10` int32 | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md)/[IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) |
|
||||
| `\x12` int64 | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal64](/docs/en/sql-reference/data-types/decimal.md)/[DateTime64](/docs/en/sql-reference/data-types/datetime64.md) |
|
||||
|
||||
Other BSON types are not supported. Also, it performs conversion between different integer types (for example, you can insert BSON int32 value into ClickHouse UInt8).
|
||||
@ -1608,23 +1610,25 @@ See also [Format Schema](#formatschema).
|
||||
|
||||
The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
|
||||
|
||||
| CapnProto data type (`INSERT`) | ClickHouse data type | CapnProto data type (`SELECT`) |
|
||||
|--------------------------------|-----------------------------------------------------------|--------------------------------|
|
||||
| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
|
||||
| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` |
|
||||
| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md) | `UINT16` |
|
||||
| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` |
|
||||
| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
|
||||
| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` |
|
||||
| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
|
||||
| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [DateTime64](/docs/en/sql-reference/data-types/datetime.md) | `INT64` |
|
||||
| `FLOAT32` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` |
|
||||
| `FLOAT64` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` |
|
||||
| `TEXT, DATA` | [String](/docs/en/sql-reference/data-types/string.md), [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `TEXT, DATA` |
|
||||
| `union(T, Void), union(Void, T)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md) | `union(T, Void), union(Void, T)` |
|
||||
| `ENUM` | [Enum(8\|16)](/docs/en/sql-reference/data-types/enum.md) | `ENUM` |
|
||||
| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
|
||||
| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
|
||||
| CapnProto data type (`INSERT`) | ClickHouse data type | CapnProto data type (`SELECT`) |
|
||||
|----------------------------------|------------------------------------------------------------------------------------------------------------------------|------------------------------|
|
||||
| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
|
||||
| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` |
|
||||
| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md) | `UINT16` |
|
||||
| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` |
|
||||
| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
|
||||
| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` |
|
||||
| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
|
||||
| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [DateTime64](/docs/en/sql-reference/data-types/datetime.md) | `INT64` |
|
||||
| `FLOAT32` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` |
|
||||
| `FLOAT64` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` |
|
||||
| `TEXT, DATA` | [String](/docs/en/sql-reference/data-types/string.md), [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `TEXT, DATA` |
|
||||
| `union(T, Void), union(Void, T)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md) | `union(T, Void), union(Void, T)` |
|
||||
| `ENUM` | [Enum(8\ |16)](/docs/en/sql-reference/data-types/enum.md) | `ENUM` |
|
||||
| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
|
||||
| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
|
||||
| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` |
|
||||
| `DATA` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `DATA` |
|
||||
|
||||
For working with `Enum` in CapnProto format use the [format_capn_proto_enum_comparising_mode](/docs/en/operations/settings/settings-formats.md/#format_capn_proto_enum_comparising_mode) setting.
|
||||
|
||||
@ -1804,21 +1808,23 @@ ClickHouse Avro format supports reading and writing [Avro data files](https://av
|
||||
|
||||
The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
|
||||
|
||||
| Avro data type `INSERT` | ClickHouse data type | Avro data type `SELECT` |
|
||||
|---------------------------------------------|----------------------------------------------------------------------------------------------------|------------------------------|
|
||||
| `boolean`, `int`, `long`, `float`, `double` | [Int(8\|16\|32)](/docs/en/sql-reference/data-types/int-uint.md), [UInt(8\|16\|32)](/docs/en/sql-reference/data-types/int-uint.md) | `int` |
|
||||
| `boolean`, `int`, `long`, `float`, `double` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `long` |
|
||||
| `boolean`, `int`, `long`, `float`, `double` | [Float32](/docs/en/sql-reference/data-types/float.md) | `float` |
|
||||
| `boolean`, `int`, `long`, `float`, `double` | [Float64](/docs/en/sql-reference/data-types/float.md) | `double` |
|
||||
| `bytes`, `string`, `fixed`, `enum` | [String](/docs/en/sql-reference/data-types/string.md) | `bytes` or `string` \* |
|
||||
| `bytes`, `string`, `fixed` | [FixedString(N)](/docs/en/sql-reference/data-types/fixedstring.md) | `fixed(N)` |
|
||||
| `enum` | [Enum(8\|16)](/docs/en/sql-reference/data-types/enum.md) | `enum` |
|
||||
| `array(T)` | [Array(T)](/docs/en/sql-reference/data-types/array.md) | `array(T)` |
|
||||
| `union(null, T)`, `union(T, null)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md) | `union(null, T)` |
|
||||
| `null` | [Nullable(Nothing)](/docs/en/sql-reference/data-types/special-data-types/nothing.md) | `null` |
|
||||
| `int (date)` \** | [Date](/docs/en/sql-reference/data-types/date.md) | `int (date)` \** |
|
||||
| `long (timestamp-millis)` \** | [DateTime64(3)](/docs/en/sql-reference/data-types/datetime.md) | `long (timestamp-millis)` \* |
|
||||
| `long (timestamp-micros)` \** | [DateTime64(6)](/docs/en/sql-reference/data-types/datetime.md) | `long (timestamp-micros)` \* |
|
||||
| Avro data type `INSERT` | ClickHouse data type | Avro data type `SELECT` |
|
||||
|---------------------------------------------|-----------------------------------------------------------------------------------------------------------------|-------------------------------------------------|
|
||||
| `boolean`, `int`, `long`, `float`, `double` | [Int(8\ | 16\ |32)](/docs/en/sql-reference/data-types/int-uint.md), [UInt(8\|16\|32)](/docs/en/sql-reference/data-types/int-uint.md) | `int` |
|
||||
| `boolean`, `int`, `long`, `float`, `double` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `long` |
|
||||
| `boolean`, `int`, `long`, `float`, `double` | [Float32](/docs/en/sql-reference/data-types/float.md) | `float` |
|
||||
| `boolean`, `int`, `long`, `float`, `double` | [Float64](/docs/en/sql-reference/data-types/float.md) | `double` |
|
||||
| `bytes`, `string`, `fixed`, `enum` | [String](/docs/en/sql-reference/data-types/string.md) | `bytes` or `string` \* |
|
||||
| `bytes`, `string`, `fixed` | [FixedString(N)](/docs/en/sql-reference/data-types/fixedstring.md) | `fixed(N)` |
|
||||
| `enum` | [Enum(8\ | 16)](/docs/en/sql-reference/data-types/enum.md) | `enum` |
|
||||
| `array(T)` | [Array(T)](/docs/en/sql-reference/data-types/array.md) | `array(T)` |
|
||||
| `union(null, T)`, `union(T, null)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md) | `union(null, T)` |
|
||||
| `null` | [Nullable(Nothing)](/docs/en/sql-reference/data-types/special-data-types/nothing.md) | `null` |
|
||||
| `int (date)` \** | [Date](/docs/en/sql-reference/data-types/date.md) | `int (date)` \** |
|
||||
| `long (timestamp-millis)` \** | [DateTime64(3)](/docs/en/sql-reference/data-types/datetime.md) | `long (timestamp-millis)` \* |
|
||||
| `long (timestamp-micros)` \** | [DateTime64(6)](/docs/en/sql-reference/data-types/datetime.md) | `long (timestamp-micros)` \* |
|
||||
| `int` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `int` |
|
||||
| `fixed(16)` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `fixed(16)` |
|
||||
|
||||
\* `bytes` is default, controlled by [output_format_avro_string_column_pattern](/docs/en/operations/settings/settings-formats.md/#output_format_avro_string_column_pattern)
|
||||
\** [Avro logical types](https://avro.apache.org/docs/current/spec.html#Logical+Types)
|
||||
@ -1918,28 +1924,30 @@ Setting `format_avro_schema_registry_url` needs to be configured in `users.xml`
|
||||
|
||||
The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
|
||||
|
||||
| Parquet data type (`INSERT`) | ClickHouse data type | Parquet data type (`SELECT`) |
|
||||
|-----------------------------------------------|-----------------------------------------------------------------|------------------------------|
|
||||
| `BOOL` | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `BOOL` |
|
||||
| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
|
||||
| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` |
|
||||
| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `UINT16` |
|
||||
| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` |
|
||||
| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `UINT32` |
|
||||
| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` |
|
||||
| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
|
||||
| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `INT64` |
|
||||
| `FLOAT` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT` |
|
||||
| `DOUBLE` | [Float64](/docs/en/sql-reference/data-types/float.md) | `DOUBLE` |
|
||||
| `DATE` | [Date32](/docs/en/sql-reference/data-types/date.md) | `DATE` |
|
||||
| `TIME (ms)` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
|
||||
| `TIMESTAMP`, `TIME (us, ns)` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `TIMESTAMP` |
|
||||
| `STRING`, `BINARY` | [String](/docs/en/sql-reference/data-types/string.md) | `BINARY` |
|
||||
| `STRING`, `BINARY`, `FIXED_LENGTH_BYTE_ARRAY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `FIXED_LENGTH_BYTE_ARRAY` |
|
||||
| `DECIMAL` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL` |
|
||||
| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
|
||||
| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
|
||||
| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` |
|
||||
| Parquet data type (`INSERT`) | ClickHouse data type | Parquet data type (`SELECT`) |
|
||||
|----------------------------------------------------|-----------------------------------------------------------------|------------------------------|
|
||||
| `BOOL` | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `BOOL` |
|
||||
| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
|
||||
| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` |
|
||||
| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `UINT16` |
|
||||
| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` |
|
||||
| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `UINT32` |
|
||||
| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` |
|
||||
| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
|
||||
| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `INT64` |
|
||||
| `FLOAT` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT` |
|
||||
| `DOUBLE` | [Float64](/docs/en/sql-reference/data-types/float.md) | `DOUBLE` |
|
||||
| `DATE` | [Date32](/docs/en/sql-reference/data-types/date.md) | `DATE` |
|
||||
| `TIME (ms)` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
|
||||
| `TIMESTAMP`, `TIME (us, ns)` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `TIMESTAMP` |
|
||||
| `STRING`, `BINARY` | [String](/docs/en/sql-reference/data-types/string.md) | `BINARY` |
|
||||
| `STRING`, `BINARY`, `FIXED_LENGTH_BYTE_ARRAY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `FIXED_LENGTH_BYTE_ARRAY` |
|
||||
| `DECIMAL` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL` |
|
||||
| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
|
||||
| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
|
||||
| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` |
|
||||
| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` |
|
||||
| `FIXED_LENGTH_BYTE_ARRAY` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `FIXED_LENGTH_BYTE_ARRAY` |
|
||||
|
||||
Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.
|
||||
|
||||
@ -1973,6 +1981,7 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
|
||||
- [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`.
|
||||
- [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`.
|
||||
- [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`.
|
||||
- [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `snappy`.
|
||||
|
||||
## Arrow {#data-format-arrow}
|
||||
|
||||
@ -2007,6 +2016,8 @@ The table below shows supported data types and how they match ClickHouse [data t
|
||||
| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
|
||||
| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
|
||||
| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` |
|
||||
| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` |
|
||||
| `FIXED_SIZE_BINARY`, `BINARY` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `FIXED_SIZE_BINARY` |
|
||||
|
||||
Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.
|
||||
|
||||
@ -2041,6 +2052,7 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam
|
||||
- [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`.
|
||||
- [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`.
|
||||
- [output_format_arrow_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_fixed_string_as_fixed_byte_array) - use Arrow FIXED_SIZE_BINARY type instead of Binary/String for FixedString columns. Default value - `true`.
|
||||
- [output_format_arrow_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_compression_method) - compression method used in output Arrow format. Default value - `none`.
|
||||
|
||||
## ArrowStream {#data-format-arrow-stream}
|
||||
|
||||
@ -2054,8 +2066,8 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam
|
||||
|
||||
The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
|
||||
|
||||
| ORC data type (`INSERT`) | ClickHouse data type | ORC data type (`SELECT`) |
|
||||
|---------------------------------------|---------------------------------------------------------|--------------------------|
|
||||
| ORC data type (`INSERT`) | ClickHouse data type | ORC data type (`SELECT`) |
|
||||
|---------------------------------------|---------------------------------------------------------------|--------------------------|
|
||||
| `Boolean` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `Boolean` |
|
||||
| `Tinyint` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `Tinyint` |
|
||||
| `Smallint` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `Smallint` |
|
||||
@ -2070,6 +2082,7 @@ The table below shows supported data types and how they match ClickHouse [data t
|
||||
| `List` | [Array](/docs/en/sql-reference/data-types/array.md) | `List` |
|
||||
| `Struct` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `Struct` |
|
||||
| `Map` | [Map](/docs/en/sql-reference/data-types/map.md) | `Map` |
|
||||
| `-` | [IPv4](/docs/en/sql-reference/data-types/int-uint.md) | `Int` |
|
||||
|
||||
Other types are not supported.
|
||||
|
||||
@ -2096,6 +2109,7 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename.
|
||||
### Arrow format settings {#parquet-format-settings}
|
||||
|
||||
- [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`.
|
||||
- [output_format_orc_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_orc_compression_method) - compression method used in output ORC format. Default value - `none`.
|
||||
- [input_format_arrow_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`.
|
||||
- [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`.
|
||||
- [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`.
|
||||
@ -2264,8 +2278,8 @@ ClickHouse supports reading and writing [MessagePack](https://msgpack.org/) data
|
||||
|
||||
### Data Types Matching {#data-types-matching-msgpack}
|
||||
|
||||
| MessagePack data type (`INSERT`) | ClickHouse data type | MessagePack data type (`SELECT`) |
|
||||
|--------------------------------------------------------------------|-----------------------------------------------------------|------------------------------------|
|
||||
| MessagePack data type (`INSERT`) | ClickHouse data type | MessagePack data type (`SELECT`) |
|
||||
|--------------------------------------------------------------------|-----------------------------------------------------------------|------------------------------------|
|
||||
| `uint N`, `positive fixint` | [UIntN](/docs/en/sql-reference/data-types/int-uint.md) | `uint N` |
|
||||
| `int N`, `negative fixint` | [IntN](/docs/en/sql-reference/data-types/int-uint.md) | `int N` |
|
||||
| `bool` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `uint 8` |
|
||||
@ -2278,6 +2292,8 @@ ClickHouse supports reading and writing [MessagePack](https://msgpack.org/) data
|
||||
| `uint 64` | [DateTime64](/docs/en/sql-reference/data-types/datetime.md) | `uint 64` |
|
||||
| `fixarray`, `array 16`, `array 32` | [Array](/docs/en/sql-reference/data-types/array.md) | `fixarray`, `array 16`, `array 32` |
|
||||
| `fixmap`, `map 16`, `map 32` | [Map](/docs/en/sql-reference/data-types/map.md) | `fixmap`, `map 16`, `map 32` |
|
||||
| `uint 32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `uint 32` |
|
||||
| `bin 8` | [String](/docs/en/sql-reference/data-types/string.md) | `bin 8` |
|
||||
|
||||
Example:
|
||||
|
||||
|
@ -309,6 +309,7 @@ The HTTP interface allows passing external data (external temporary tables) for
|
||||
## Response Buffering {#response-buffering}
|
||||
|
||||
You can enable response buffering on the server-side. The `buffer_size` and `wait_end_of_query` URL parameters are provided for this purpose.
|
||||
Also settings `http_response_buffer_size` and `http_wait_end_of_query` can be used.
|
||||
|
||||
`buffer_size` determines the number of bytes in the result to buffer in the server memory. If a result body is larger than this threshold, the buffer is written to the HTTP channel, and the remaining data is sent directly to the HTTP channel.
|
||||
|
||||
|
@ -117,7 +117,7 @@ clickhouse-local --file='hobbies.jsonl' --table='hobbies' --query='SELECT * FROM
|
||||
4 47 Brayan ['movies','skydiving']
|
||||
```
|
||||
|
||||
# Using structure from insertion table {#using-structure-from-insertion-table}
|
||||
## Using structure from insertion table {#using-structure-from-insertion-table}
|
||||
|
||||
When table functions `file/s3/url/hdfs` are used to insert data into a table,
|
||||
there is an option to use the structure from the insertion table instead of extracting it from the data.
|
||||
@ -222,7 +222,7 @@ INSERT INTO hobbies4 SELECT id, empty(hobbies) ? NULL : hobbies[1] FROM file(hob
|
||||
|
||||
In this case, there are some operations performed on the column `hobbies` in the `SELECT` query to insert it into the table, so ClickHouse cannot use the structure from the insertion table, and schema inference will be used.
|
||||
|
||||
# Schema inference cache {#schema-inference-cache}
|
||||
## Schema inference cache {#schema-inference-cache}
|
||||
|
||||
For most input formats schema inference reads some data to determine its structure and this process can take some time.
|
||||
To prevent inferring the same schema every time ClickHouse read the data from the same file, the inferred schema is cached and when accessing the same file again, ClickHouse will use the schema from the cache.
|
||||
@ -326,14 +326,14 @@ SELECT count() FROM system.schema_inference_cache WHERE storage='S3'
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
# Text formats {#text-formats}
|
||||
## Text formats {#text-formats}
|
||||
|
||||
For text formats, ClickHouse reads the data row by row, extracts column values according to the format,
|
||||
and then uses some recursive parsers and heuristics to determine the type for each value. The maximum number of rows read from the data in schema inference
|
||||
is controlled by the setting `input_format_max_rows_to_read_for_schema_inference` with default value 25000.
|
||||
By default, all inferred types are [Nullable](../sql-reference/data-types/nullable.md), but you can change this by setting `schema_inference_make_columns_nullable` (see examples in the [settings](#settings-for-text-formats) section).
|
||||
|
||||
## JSON formats {#json-formats}
|
||||
### JSON formats {#json-formats}
|
||||
|
||||
In JSON formats ClickHouse parses values according to the JSON specification and then tries to find the most appropriate data type for them.
|
||||
|
||||
@ -464,9 +464,9 @@ most likely this column contains only Nulls or empty Arrays/Maps.
|
||||
...
|
||||
```
|
||||
|
||||
### JSON settings {#json-settings}
|
||||
#### JSON settings {#json-settings}
|
||||
|
||||
#### input_format_json_read_objects_as_strings
|
||||
##### input_format_json_read_objects_as_strings
|
||||
|
||||
Enabling this setting allows reading nested JSON objects as strings.
|
||||
This setting can be used to read nested JSON objects without using JSON object type.
|
||||
@ -486,7 +486,7 @@ DESC format(JSONEachRow, $$
|
||||
└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
#### input_format_json_try_infer_numbers_from_strings
|
||||
##### input_format_json_try_infer_numbers_from_strings
|
||||
|
||||
Enabling this setting allows inferring numbers from string values.
|
||||
|
||||
@ -507,7 +507,7 @@ DESC format(JSONEachRow, $$
|
||||
└───────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
#### input_format_json_read_numbers_as_strings
|
||||
##### input_format_json_read_numbers_as_strings
|
||||
|
||||
Enabling this setting allows reading numeric values as strings.
|
||||
|
||||
@ -528,7 +528,7 @@ DESC format(JSONEachRow, $$
|
||||
└───────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
#### input_format_json_read_bools_as_numbers
|
||||
##### input_format_json_read_bools_as_numbers
|
||||
|
||||
Enabling this setting allows reading Bool values as numbers.
|
||||
|
||||
@ -549,7 +549,7 @@ DESC format(JSONEachRow, $$
|
||||
└───────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
## CSV {#csv}
|
||||
### CSV {#csv}
|
||||
|
||||
In CSV format ClickHouse extracts column values from the row according to delimiters. ClickHouse expects all types except numbers and strings to be enclosed in double quotes. If the value is in double quotes, ClickHouse tries to parse
|
||||
the data inside quotes using the recursive parser and then tries to find the most appropriate data type for it. If the value is not in double quotes, ClickHouse tries to parse it as a number,
|
||||
@ -726,7 +726,7 @@ $$)
|
||||
└──────────────┴───────────────┘
|
||||
```
|
||||
|
||||
## TSV/TSKV {#tsv-tskv}
|
||||
### TSV/TSKV {#tsv-tskv}
|
||||
|
||||
In TSV/TSKV formats ClickHouse extracts column value from the row according to tabular delimiters and then parses extracted value using
|
||||
the recursive parser to determine the most appropriate type. If the type cannot be determined, ClickHouse treats this value as String.
|
||||
@ -1019,7 +1019,7 @@ DESC format(TSV, '[1,2,3] 42.42 Hello World!')
|
||||
└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
## CustomSeparated {#custom-separated}
|
||||
### CustomSeparated {#custom-separated}
|
||||
|
||||
In CustomSeparated format ClickHouse first extracts all column values from the row according to specified delimiters and then tries to infer
|
||||
the data type for each value according to escaping rule.
|
||||
@ -1080,7 +1080,7 @@ $$)
|
||||
└────────┴───────────────┴────────────┘
|
||||
```
|
||||
|
||||
## Template {#template}
|
||||
### Template {#template}
|
||||
|
||||
In Template format ClickHouse first extracts all column values from the row according to the specified template and then tries to infer the
|
||||
data type for each value according to its escaping rule.
|
||||
@ -1120,7 +1120,7 @@ $$)
|
||||
└──────────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
## Regexp {#regexp}
|
||||
### Regexp {#regexp}
|
||||
|
||||
Similar to Template, in Regexp format ClickHouse first extracts all column values from the row according to specified regular expression and then tries to infer
|
||||
data type for each value according to the specified escaping rule.
|
||||
@ -1142,9 +1142,9 @@ Line: value_1=2, value_2="Some string 2", value_3="[4, 5, NULL]"$$)
|
||||
└──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
## Settings for text formats {settings-for-text-formats}
|
||||
### Settings for text formats {#settings-for-text-formats}
|
||||
|
||||
### input_format_max_rows_to_read_for_schema_inference
|
||||
#### input_format_max_rows_to_read_for_schema_inference
|
||||
|
||||
This setting controls the maximum number of rows to be read while schema inference.
|
||||
The more rows are read, the more time is spent on schema inference, but the greater the chance to
|
||||
@ -1152,7 +1152,7 @@ correctly determine the types (especially when the data contains a lot of nulls)
|
||||
|
||||
Default value: `25000`.
|
||||
|
||||
### column_names_for_schema_inference
|
||||
#### column_names_for_schema_inference
|
||||
|
||||
The list of column names to use in schema inference for formats without explicit column names. Specified names will be used instead of default `c1,c2,c3,...`. The format: `column1,column2,column3,...`.
|
||||
|
||||
@ -1169,7 +1169,7 @@ DESC format(TSV, 'Hello, World! 42 [1, 2, 3]') settings column_names_for_schema_
|
||||
└──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
### schema_inference_hints
|
||||
#### schema_inference_hints
|
||||
|
||||
The list of column names and types to use in schema inference instead of automatically determined types. The format: 'column_name1 column_type1, column_name2 column_type2, ...'.
|
||||
This setting can be used to specify the types of columns that could not be determined automatically or for optimizing the schema.
|
||||
@ -1189,7 +1189,7 @@ DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : nul
|
||||
└─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
### schema_inference_make_columns_nullable
|
||||
#### schema_inference_make_columns_nullable
|
||||
|
||||
Controls making inferred types `Nullable` in schema inference for formats without information about nullability.
|
||||
If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will be `Nullable` only if the column contains `NULL` in a sample that is parsed during schema inference.
|
||||
@ -1232,7 +1232,7 @@ DESC format(JSONEachRow, $$
|
||||
└─────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
### input_format_try_infer_integers
|
||||
#### input_format_try_infer_integers
|
||||
|
||||
If enabled, ClickHouse will try to infer integers instead of floats in schema inference for text formats.
|
||||
If all numbers in the column from sample data are integers, the result type will be `Int64`, if at least one number is float, the result type will be `Float64`.
|
||||
@ -1289,7 +1289,7 @@ DESC format(JSONEachRow, $$
|
||||
└────────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
### input_format_try_infer_datetimes
|
||||
#### input_format_try_infer_datetimes
|
||||
|
||||
If enabled, ClickHouse will try to infer type `DateTime64` from string fields in schema inference for text formats.
|
||||
If all fields from a column in sample data were successfully parsed as datetimes, the result type will be `DateTime64(9)`,
|
||||
@ -1337,7 +1337,7 @@ DESC format(JSONEachRow, $$
|
||||
|
||||
Note: Parsing datetimes during schema inference respect setting [date_time_input_format](/docs/en/operations/settings/settings-formats.md#date_time_input_format)
|
||||
|
||||
### input_format_try_infer_dates
|
||||
#### input_format_try_infer_dates
|
||||
|
||||
If enabled, ClickHouse will try to infer type `Date` from string fields in schema inference for text formats.
|
||||
If all fields from a column in sample data were successfully parsed as dates, the result type will be `Date`,
|
||||
@ -1383,14 +1383,14 @@ DESC format(JSONEachRow, $$
|
||||
└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
# Self describing formats {#self-describing-formats}
|
||||
## Self describing formats {#self-describing-formats}
|
||||
|
||||
Self-describing formats contain information about the structure of the data in the data itself,
|
||||
it can be some header with a description, a binary type tree, or some kind of table.
|
||||
To automatically infer a schema from files in such formats, ClickHouse reads a part of the data containing
|
||||
information about the types and converts it into a schema of the ClickHouse table.
|
||||
|
||||
## Formats with -WithNamesAndTypes suffix {#formats-with-names-and-types}
|
||||
### Formats with -WithNamesAndTypes suffix {#formats-with-names-and-types}
|
||||
|
||||
ClickHouse supports some text formats with the suffix -WithNamesAndTypes. This suffix means that the data contains two additional rows with column names and types before the actual data.
|
||||
While schema inference for such formats, ClickHouse reads the first two rows and extracts column names and types.
|
||||
@ -1412,7 +1412,7 @@ $$)
|
||||
└──────┴──────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
## JSON formats with metadata {#json-with-metadata}
|
||||
### JSON formats with metadata {#json-with-metadata}
|
||||
|
||||
Some JSON input formats ([JSON](formats.md#json), [JSONCompact](formats.md#json-compact), [JSONColumnsWithMetadata](formats.md#jsoncolumnswithmetadata)) contain metadata with column names and types.
|
||||
In schema inference for such formats, ClickHouse reads this metadata.
|
||||
@ -1465,7 +1465,7 @@ $$)
|
||||
└──────┴──────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
## Avro {#avro}
|
||||
### Avro {#avro}
|
||||
|
||||
In Avro format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches:
|
||||
|
||||
@ -1485,7 +1485,7 @@ In Avro format ClickHouse reads its schema from the data and converts it to Clic
|
||||
|
||||
Other Avro types are not supported.
|
||||
|
||||
## Parquet {#parquet}
|
||||
### Parquet {#parquet}
|
||||
|
||||
In Parquet format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches:
|
||||
|
||||
@ -1513,7 +1513,7 @@ In Parquet format ClickHouse reads its schema from the data and converts it to C
|
||||
|
||||
Other Parquet types are not supported. By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`.
|
||||
|
||||
## Arrow {#arrow}
|
||||
### Arrow {#arrow}
|
||||
|
||||
In Arrow format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches:
|
||||
|
||||
@ -1541,7 +1541,7 @@ In Arrow format ClickHouse reads its schema from the data and converts it to Cli
|
||||
|
||||
Other Arrow types are not supported. By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`.
|
||||
|
||||
## ORC {#orc}
|
||||
### ORC {#orc}
|
||||
|
||||
In ORC format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches:
|
||||
|
||||
@ -1564,17 +1564,17 @@ In ORC format ClickHouse reads its schema from the data and converts it to Click
|
||||
|
||||
Other ORC types are not supported. By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`.
|
||||
|
||||
## Native {#native}
|
||||
### Native {#native}
|
||||
|
||||
Native format is used inside ClickHouse and contains the schema in the data.
|
||||
In schema inference, ClickHouse reads the schema from the data without any transformations.
|
||||
|
||||
# Formats with external schema {#formats-with-external-schema}
|
||||
## Formats with external schema {#formats-with-external-schema}
|
||||
|
||||
Such formats require a schema describing the data in a separate file in a specific schema language.
|
||||
To automatically infer a schema from files in such formats, ClickHouse reads external schema from a separate file and transforms it to a ClickHouse table schema.
|
||||
|
||||
# Protobuf {#protobuf}
|
||||
### Protobuf {#protobuf}
|
||||
|
||||
In schema inference for Protobuf format ClickHouse uses the following type matches:
|
||||
|
||||
@ -1592,7 +1592,7 @@ In schema inference for Protobuf format ClickHouse uses the following type match
|
||||
| `repeated T` | [Array(T)](../sql-reference/data-types/array.md) |
|
||||
| `message`, `group` | [Tuple](../sql-reference/data-types/tuple.md) |
|
||||
|
||||
# CapnProto {#capnproto}
|
||||
### CapnProto {#capnproto}
|
||||
|
||||
In schema inference for CapnProto format ClickHouse uses the following type matches:
|
||||
|
||||
@ -1615,13 +1615,13 @@ In schema inference for CapnProto format ClickHouse uses the following type matc
|
||||
| `struct` | [Tuple](../sql-reference/data-types/tuple.md) |
|
||||
| `union(T, Void)`, `union(Void, T)` | [Nullable(T)](../sql-reference/data-types/nullable.md) |
|
||||
|
||||
# Strong-typed binary formats {#strong-typed-binary-formats}
|
||||
## Strong-typed binary formats {#strong-typed-binary-formats}
|
||||
|
||||
In such formats, each serialized value contains information about its type (and possibly about its name), but there is no information about the whole table.
|
||||
In schema inference for such formats, ClickHouse reads data row by row (up to `input_format_max_rows_to_read_for_schema_inference` rows) and extracts
|
||||
the type (and possibly name) for each value from the data and then converts these types to ClickHouse types.
|
||||
|
||||
## MsgPack {msgpack}
|
||||
### MsgPack {#msgpack}
|
||||
|
||||
In MsgPack format there is no delimiter between rows, to use schema inference for this format you should specify the number of columns in the table
|
||||
using the setting `input_format_msgpack_number_of_columns`. ClickHouse uses the following type matches:
|
||||
@ -1641,7 +1641,7 @@ using the setting `input_format_msgpack_number_of_columns`. ClickHouse uses the
|
||||
|
||||
By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`.
|
||||
|
||||
## BSONEachRow {#bsoneachrow}
|
||||
### BSONEachRow {#bsoneachrow}
|
||||
|
||||
In BSONEachRow each row of data is presented as a BSON document. In schema inference ClickHouse reads BSON documents one by one and extracts
|
||||
values, names, and types from the data and then transforms these types to ClickHouse types using the following type matches:
|
||||
@ -1661,11 +1661,11 @@ values, names, and types from the data and then transforms these types to ClickH
|
||||
|
||||
By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`.
|
||||
|
||||
# Formats with constant schema {#formats-with-constant-schema}
|
||||
## Formats with constant schema {#formats-with-constant-schema}
|
||||
|
||||
Data in such formats always have the same schema.
|
||||
|
||||
## LineAsString {#line-as-string}
|
||||
### LineAsString {#line-as-string}
|
||||
|
||||
In this format, ClickHouse reads the whole line from the data into a single column with `String` data type. The inferred type for this format is always `String` and the column name is `line`.
|
||||
|
||||
@ -1680,7 +1680,7 @@ DESC format(LineAsString, 'Hello\nworld!')
|
||||
└──────┴────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
## JSONAsString {#json-as-string}
|
||||
### JSONAsString {#json-as-string}
|
||||
|
||||
In this format, ClickHouse reads the whole JSON object from the data into a single column with `String` data type. The inferred type for this format is always `String` and the column name is `json`.
|
||||
|
||||
@ -1695,7 +1695,7 @@ DESC format(JSONAsString, '{"x" : 42, "y" : "Hello, World!"}')
|
||||
└──────┴────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
## JSONAsObject {#json-as-object}
|
||||
### JSONAsObject {#json-as-object}
|
||||
|
||||
In this format, ClickHouse reads the whole JSON object from the data into a single column with `Object('json')` data type. Inferred type for this format is always `String` and the column name is `json`.
|
||||
|
||||
|
@ -765,7 +765,7 @@ Default value: `0`.
|
||||
|
||||
## concurrent_threads_soft_limit_ratio_to_cores {#concurrent_threads_soft_limit_ratio_to_cores}
|
||||
The maximum number of query processing threads as multiple of number of logical cores.
|
||||
More details: [concurrent_threads_soft_limit_num](#concurrent-threads-soft-limit-num).
|
||||
More details: [concurrent_threads_soft_limit_num](#concurrent_threads_soft_limit_num).
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -967,6 +967,7 @@ The maximum number of jobs that can be scheduled on the Global Thread pool. Incr
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — No limit.
|
||||
|
||||
Default value: `10000`.
|
||||
|
||||
@ -976,6 +977,69 @@ Default value: `10000`.
|
||||
<thread_pool_queue_size>12000</thread_pool_queue_size>
|
||||
```
|
||||
|
||||
## max_io_thread_pool_size {#max-io-thread-pool-size}
|
||||
|
||||
ClickHouse uses threads from the IO Thread pool to do some IO operations (e.g. to interact with S3). `max_io_thread_pool_size` limits the maximum number of threads in the pool.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
|
||||
Default value: `100`.
|
||||
|
||||
## max_io_thread_pool_free_size {#max-io-thread-pool-free-size}
|
||||
|
||||
If the number of **idle** threads in the IO Thread pool exceeds `max_io_thread_pool_free_size`, ClickHouse will release resources occupied by idling threads and decrease the pool size. Threads can be created again if necessary.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## io_thread_pool_queue_size {#io-thread-pool-queue-size}
|
||||
|
||||
The maximum number of jobs that can be scheduled on the IO Thread pool.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — No limit.
|
||||
|
||||
Default value: `10000`.
|
||||
|
||||
## max_backups_io_thread_pool_size {#max-backups-io-thread-pool-size}
|
||||
|
||||
ClickHouse uses threads from the Backups IO Thread pool to do S3 backup IO operations. `max_backups_io_thread_pool_size` limits the maximum number of threads in the pool.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
|
||||
Default value: `1000`.
|
||||
|
||||
## max_backups_io_thread_pool_free_size {#max-backups-io-thread-pool-free-size}
|
||||
|
||||
If the number of **idle** threads in the Backups IO Thread pool exceeds `max_backup_io_thread_pool_free_size`, ClickHouse will release resources occupied by idling threads and decrease the pool size. Threads can be created again if necessary.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- Zero.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## backups_io_thread_pool_queue_size {#backups-io-thread-pool-queue-size}
|
||||
|
||||
The maximum number of jobs that can be scheduled on the Backups IO Thread pool. It is recommended to keep this queue unlimited due to the current S3 backup logic.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — No limit.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## background_pool_size {#background_pool_size}
|
||||
|
||||
Sets the number of threads performing background merges and mutations for tables with MergeTree engines. This setting is also could be applied at server startup from the `default` profile configuration for backward compatibility at the ClickHouse server start. You can only increase the number of threads at runtime. To lower the number of threads you have to restart the server. By adjusting this setting, you manage CPU and disk load. Smaller pool size utilizes less CPU and disk resources, but background processes advance slower which might eventually impact query performance.
|
||||
@ -1254,12 +1318,12 @@ Settings:
|
||||
|
||||
``` xml
|
||||
<prometheus>
|
||||
<endpoint>/metrics</endpoint>
|
||||
<port>8001</port>
|
||||
<metrics>true</metrics>
|
||||
<events>true</events>
|
||||
<asynchronous_metrics>true</asynchronous_metrics>
|
||||
</prometheus>
|
||||
<endpoint>/metrics</endpoint>
|
||||
<port>9363</port>
|
||||
<metrics>true</metrics>
|
||||
<events>true</events>
|
||||
<asynchronous_metrics>true</asynchronous_metrics>
|
||||
</prometheus>
|
||||
```
|
||||
|
||||
## query_log {#server_configuration_parameters-query-log}
|
||||
|
@ -1014,6 +1014,12 @@ Use Arrow FIXED_SIZE_BINARY type instead of Binary/String for FixedString column
|
||||
|
||||
Enabled by default.
|
||||
|
||||
### output_format_arrow_compression_method {#output_format_arrow_compression_method}
|
||||
|
||||
Compression method used in output Arrow format. Supported codecs: `lz4_frame`, `zstd`, `none` (uncompressed)
|
||||
|
||||
Default value: `none`.
|
||||
|
||||
## ORC format settings {#orc-format-settings}
|
||||
|
||||
### input_format_orc_import_nested {#input_format_orc_import_nested}
|
||||
@ -1057,6 +1063,12 @@ Use ORC String type instead of Binary for String columns.
|
||||
|
||||
Disabled by default.
|
||||
|
||||
### output_format_orc_compression_method {#output_format_orc_compression_method}
|
||||
|
||||
Compression method used in output ORC format. Supported codecs: `lz4`, `snappy`, `zlib`, `zstd`, `none` (uncompressed)
|
||||
|
||||
Default value: `none`.
|
||||
|
||||
## Parquet format settings {#parquet-format-settings}
|
||||
|
||||
### input_format_parquet_import_nested {#input_format_parquet_import_nested}
|
||||
@ -1112,6 +1124,12 @@ The version of Parquet format used in output format. Supported versions: `1.0`,
|
||||
|
||||
Default value: `2.latest`.
|
||||
|
||||
### output_format_parquet_compression_method {#output_format_parquet_compression_method}
|
||||
|
||||
Compression method used in output Parquet format. Supported codecs: `snappy`, `lz4`, `brotli`, `zstd`, `gzip`, `none` (uncompressed)
|
||||
|
||||
Default value: `snappy`.
|
||||
|
||||
## Hive format settings {#hive-format-settings}
|
||||
|
||||
### input_format_hive_text_fields_delimiter {#input_format_hive_text_fields_delimiter}
|
||||
@ -1474,7 +1492,7 @@ Default value: `65505`.
|
||||
|
||||
The name of table that will be used in the output INSERT statement.
|
||||
|
||||
Default value: `'table''`.
|
||||
Default value: `table`.
|
||||
|
||||
### output_format_sql_insert_include_column_names {#output_format_sql_insert_include_column_names}
|
||||
|
||||
@ -1514,4 +1532,12 @@ Disabled by default.
|
||||
|
||||
The maximum allowed size for String in RowBinary format. It prevents allocating large amount of memory in case of corrupted data. 0 means there is no limit.
|
||||
|
||||
Default value: `1GiB`
|
||||
Default value: `1GiB`.
|
||||
|
||||
## Native format settings {#native-format-settings}
|
||||
|
||||
### input_format_native_allow_types_conversion {#input_format_native_allow_types_conversion}
|
||||
|
||||
Allow types conversion in Native input format between columns from input data and requested columns.
|
||||
|
||||
Enabled by default.
|
||||
|
@ -966,10 +966,10 @@ This is an expert-level setting, and you shouldn't change it if you're just gett
|
||||
|
||||
## max_query_size {#settings-max_query_size}
|
||||
|
||||
The maximum part of a query that can be taken to RAM for parsing with the SQL parser.
|
||||
The INSERT query also contains data for INSERT that is processed by a separate stream parser (that consumes O(1) RAM), which is not included in this restriction.
|
||||
The maximum number of bytes of a query string parsed by the SQL parser.
|
||||
Data in the VALUES clause of INSERT queries is processed by a separate stream parser (that consumes O(1) RAM) and not affected by this restriction.
|
||||
|
||||
Default value: 256 KiB.
|
||||
Default value: 262144 (= 256 KiB).
|
||||
|
||||
## max_parser_depth {#max_parser_depth}
|
||||
|
||||
@ -1248,7 +1248,9 @@ Possible values:
|
||||
Default value: 1.
|
||||
|
||||
:::warning
|
||||
Disable this setting if you use [max_parallel_replicas](#settings-max_parallel_replicas).
|
||||
Disable this setting if you use [max_parallel_replicas](#settings-max_parallel_replicas) without [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key).
|
||||
If [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key) is set, disable this setting only if it's used on a cluster with multiple shards containing multiple replicas.
|
||||
If it's used on a cluster with a single shard and multiple replicas, disabling this setting will have negative effects.
|
||||
:::
|
||||
|
||||
## totals_mode {#totals-mode}
|
||||
@ -1273,16 +1275,47 @@ Default value: `1`.
|
||||
|
||||
**Additional Info**
|
||||
|
||||
This setting is useful for replicated tables with a sampling key. A query may be processed faster if it is executed on several servers in parallel. But the query performance may degrade in the following cases:
|
||||
This options will produce different results depending on the settings used.
|
||||
|
||||
:::warning
|
||||
This setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain requirements. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md/#max_parallel_replica-subqueries) for more details.
|
||||
:::
|
||||
|
||||
### Parallel processing using `SAMPLE` key
|
||||
|
||||
A query may be processed faster if it is executed on several servers in parallel. But the query performance may degrade in the following cases:
|
||||
|
||||
- The position of the sampling key in the partitioning key does not allow efficient range scans.
|
||||
- Adding a sampling key to the table makes filtering by other columns less efficient.
|
||||
- The sampling key is an expression that is expensive to calculate.
|
||||
- The cluster latency distribution has a long tail, so that querying more servers increases the query overall latency.
|
||||
|
||||
:::warning
|
||||
This setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain requirements. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md/#max_parallel_replica-subqueries) for more details.
|
||||
:::
|
||||
### Parallel processing using [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key)
|
||||
|
||||
This setting is useful for any replicated table.
|
||||
|
||||
## parallel_replicas_custom_key {#settings-parallel_replicas_custom_key}
|
||||
|
||||
An arbitrary integer expression that can be used to split work between replicas for a specific table.
|
||||
The value can be any integer expression.
|
||||
A query may be processed faster if it is executed on several servers in parallel but it depends on the used [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key)
|
||||
and [parallel_replicas_custom_key_filter_type](#settings-parallel_replicas_custom_key_filter_type).
|
||||
|
||||
Simple expressions using primary keys are preferred.
|
||||
|
||||
If the setting is used on a cluster that consists of a single shard with multiple replicas, those replicas will be converted into virtual shards.
|
||||
Otherwise, it will behave same as for `SAMPLE` key, it will use multiple replicas of each shard.
|
||||
|
||||
## parallel_replicas_custom_key_filter_type {#settings-parallel_replicas_custom_key_filter_type}
|
||||
|
||||
How to use `parallel_replicas_custom_key` expression for splitting work between replicas.
|
||||
|
||||
Possible values:
|
||||
|
||||
- `default` — Use the default implementation using modulo operation on the `parallel_replicas_custom_key`.
|
||||
- `range` — Split the entire value space of the expression in the ranges. This type of filtering is useful if values of `parallel_replicas_custom_key` are uniformly spread across the entire integer space, e.g. hash values.
|
||||
|
||||
Default value: `default`.
|
||||
|
||||
## compile_expressions {#compile-expressions}
|
||||
|
||||
@ -1515,7 +1548,7 @@ Enables or disables asynchronous inserts. This makes sense only for insertion ov
|
||||
|
||||
If enabled, the data is combined into batches before the insertion into tables, so it is possible to do small and frequent insertions into ClickHouse (up to 15000 queries per second) without buffer tables.
|
||||
|
||||
The data is inserted either after the [async_insert_max_data_size](#async-insert-max-data-size) is exceeded or after [async_insert_busy_timeout_ms](#async-insert-busy-timeout-ms) milliseconds since the first `INSERT` query. If the [async_insert_stale_timeout_ms](#async-insert-stale-timeout-ms) is set to a non-zero value, the data is inserted after `async_insert_stale_timeout_ms` milliseconds since the last query.
|
||||
The data is inserted either after the [async_insert_max_data_size](#async-insert-max-data-size) is exceeded or after [async_insert_busy_timeout_ms](#async-insert-busy-timeout-ms) milliseconds since the first `INSERT` query. If the [async_insert_stale_timeout_ms](#async-insert-stale-timeout-ms) is set to a non-zero value, the data is inserted after `async_insert_stale_timeout_ms` milliseconds since the last query. Also the buffer will be flushed to disk if at least [async_insert_max_query_number](#async-insert-max-query-number) async insert queries per block were received. This last setting takes effect only if [async_insert_deduplicate](#async-insert-deduplicate) is enabled.
|
||||
|
||||
If [wait_for_async_insert](#wait-for-async-insert) is enabled, every client will wait for the data to be processed and flushed to the table. Otherwise, the query would be processed almost instantly, even if the data is not inserted.
|
||||
|
||||
|
@ -80,7 +80,7 @@ Required parameters:
|
||||
|
||||
- `type` — `encrypted`. Otherwise the encrypted disk is not created.
|
||||
- `disk` — Type of disk for data storage.
|
||||
- `key` — The key for encryption and decryption. Type: [Uint64](/docs/en/sql-reference/data-types/int-uint.md). You can use `key_hex` parameter to encrypt in hexadecimal form.
|
||||
- `key` — The key for encryption and decryption. Type: [Uint64](/docs/en/sql-reference/data-types/int-uint.md). You can use `key_hex` parameter to encode the key in hexadecimal form.
|
||||
You can specify multiple keys using the `id` attribute (see example above).
|
||||
|
||||
Optional parameters:
|
||||
@ -135,11 +135,13 @@ Example of configuration for versions later or equal to 22.8:
|
||||
</cache>
|
||||
</disks>
|
||||
<policies>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>cache</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
<s3-cache>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>cache</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</s3-cache>
|
||||
<policies>
|
||||
</storage_configuration>
|
||||
```
|
||||
@ -159,11 +161,13 @@ Example of configuration for versions earlier than 22.8:
|
||||
</s3>
|
||||
</disks>
|
||||
<policies>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>s3</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
<s3-cache>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>s3</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</s3-cache>
|
||||
<policies>
|
||||
</storage_configuration>
|
||||
```
|
||||
|
@ -15,6 +15,13 @@ Columns:
|
||||
|
||||
- `operation_name` ([String](../../sql-reference/data-types/string.md)) — The name of the operation.
|
||||
|
||||
- `kind` ([Enum8](../../sql-reference/data-types/enum.md)) — The [SpanKind](https://opentelemetry.io/docs/reference/specification/trace/api/#spankind) of the span.
|
||||
- `INTERNAL` — Indicates that the span represents an internal operation within an application.
|
||||
- `SERVER` — Indicates that the span covers server-side handling of a synchronous RPC or other remote request.
|
||||
- `CLIENT` — Indicates that the span describes a request to some remote service.
|
||||
- `PRODUCER` — Indicates that the span describes the initiators of an asynchronous request. This parent span will often end before the corresponding child CONSUMER span, possibly even before the child span starts.
|
||||
- `CONSUMER` - Indicates that the span describes a child of an asynchronous PRODUCER request.
|
||||
|
||||
- `start_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The start time of the `trace span` (in microseconds).
|
||||
|
||||
- `finish_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The finish time of the `trace span` (in microseconds).
|
||||
@ -42,6 +49,7 @@ trace_id: cdab0847-0d62-61d5-4d38-dd65b19a1914
|
||||
span_id: 701487461015578150
|
||||
parent_span_id: 2991972114672045096
|
||||
operation_name: DB::Block DB::InterpreterSelectQuery::getSampleBlockImpl()
|
||||
kind: INTERNAL
|
||||
start_time_us: 1612374594529090
|
||||
finish_time_us: 1612374594529108
|
||||
finish_date: 2021-02-03
|
||||
|
@ -27,7 +27,7 @@ $ clickhouse-format --query "select number from numbers(10) where number%2 order
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
```sql
|
||||
SELECT number
|
||||
FROM numbers(10)
|
||||
WHERE number % 2
|
||||
@ -54,7 +54,7 @@ $ clickhouse-format -n <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELECT 1 UNIO
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
```sql
|
||||
SELECT *
|
||||
FROM
|
||||
(
|
||||
@ -75,7 +75,7 @@ $ clickhouse-format --seed Hello --obfuscate <<< "SELECT cost_first_screen BETWE
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
```sql
|
||||
SELECT treasury_mammoth_hazelnut BETWEEN nutmeg AND span, CASE WHEN chive >= 116 THEN switching ELSE ANYTHING END;
|
||||
```
|
||||
|
||||
@ -87,7 +87,7 @@ $ clickhouse-format --seed World --obfuscate <<< "SELECT cost_first_screen BETWE
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
```sql
|
||||
SELECT horse_tape_summer BETWEEN folklore AND moccasins, CASE WHEN intestine >= 116 THEN nonconformist ELSE FORESTRY END;
|
||||
```
|
||||
|
||||
@ -99,7 +99,7 @@ $ clickhouse-format --backslash <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELE
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
```sql
|
||||
SELECT * \
|
||||
FROM \
|
||||
( \
|
||||
|
@ -6,29 +6,26 @@ sidebar_label: clickhouse-local
|
||||
|
||||
# clickhouse-local
|
||||
|
||||
The `clickhouse-local` program enables you to perform fast processing on local files, without having to deploy and configure the ClickHouse server.
|
||||
The `clickhouse-local` program enables you to perform fast processing on local files, without having to deploy and configure the ClickHouse server. It accepts data that represent tables and queries them using [ClickHouse SQL dialect](../../sql-reference/). `clickhouse-local` uses the same core as ClickHouse server, so it supports most of the features and the same set of formats and table engines.
|
||||
|
||||
Accepts data that represent tables and queries them using [ClickHouse SQL dialect](../../sql-reference/).
|
||||
|
||||
`clickhouse-local` uses the same core as ClickHouse server, so it supports most of the features and the same set of formats and table engines.
|
||||
|
||||
By default `clickhouse-local` does not have access to data on the same host, but it supports loading server configuration using `--config-file` argument.
|
||||
|
||||
:::warning
|
||||
It is not recommended to load production server configuration into `clickhouse-local` because data can be damaged in case of human error.
|
||||
:::
|
||||
|
||||
For temporary data, a unique temporary data directory is created by default.
|
||||
By default `clickhouse-local` has access to data on the same host, and it does not depend on the server's configuration. It also supports loading server configuration using `--config-file` argument. For temporary data, a unique temporary data directory is created by default.
|
||||
|
||||
## Usage {#usage}
|
||||
|
||||
Basic usage:
|
||||
Basic usage (Linux):
|
||||
|
||||
``` bash
|
||||
$ clickhouse-local --structure "table_structure" --input-format "format_of_incoming_data" \
|
||||
--query "query"
|
||||
$ clickhouse-local --structure "table_structure" --input-format "format_of_incoming_data" --query "query"
|
||||
```
|
||||
|
||||
Basic usage (Mac):
|
||||
|
||||
``` bash
|
||||
$ ./clickhouse local --structure "table_structure" --input-format "format_of_incoming_data" --query "query"
|
||||
```
|
||||
|
||||
Also supported on Windows through WSL2.
|
||||
|
||||
Arguments:
|
||||
|
||||
- `-S`, `--structure` — table structure for input data.
|
||||
|
@ -11,15 +11,15 @@ sidebar_title: exponentialMovingAverage
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
exponentialMovingAverage(x)(value, timestamp)
|
||||
exponentialMovingAverage(x)(value, timeunit)
|
||||
```
|
||||
|
||||
Each `value` corresponds to the determinate `timestamp`. The half-life `x` is the time lag at which the exponential weights decay by one-half. The function returns a weighted average: the older the time point, the less weight the corresponding value is considered to be.
|
||||
Each `value` corresponds to the determinate `timeunit`. The half-life `x` is the time lag at which the exponential weights decay by one-half. The function returns a weighted average: the older the time point, the less weight the corresponding value is considered to be.
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `value` — Value. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
|
||||
- `timestamp` — Timestamp. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
|
||||
- `timeunit` — Timeunit. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). Timeunit is not timestamp (seconds), it's -- an index of the time interval. Can be calculated using [intDiv](../../functions/arithmetic-functions/#intdiva-b).
|
||||
|
||||
**Parameters**
|
||||
|
||||
@ -148,3 +148,58 @@ Result:
|
||||
│ 1 │ 49 │ 0.825 │ █████████████████████████████████████████▎│
|
||||
└───────┴──────┴──────────────────────┴────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
CREATE TABLE data
|
||||
ENGINE = Memory AS
|
||||
SELECT
|
||||
10 AS value,
|
||||
toDateTime('2020-01-01') + (3600 * number) AS time
|
||||
FROM numbers_mt(10);
|
||||
|
||||
|
||||
-- Calculate timeunit using intDiv
|
||||
SELECT
|
||||
value,
|
||||
time,
|
||||
exponentialMovingAverage(1)(value, intDiv(toUInt32(time), 3600)) OVER (ORDER BY time ASC) AS res,
|
||||
intDiv(toUInt32(time), 3600) AS timeunit
|
||||
FROM data
|
||||
ORDER BY time ASC;
|
||||
|
||||
┌─value─┬────────────────time─┬─────────res─┬─timeunit─┐
|
||||
│ 10 │ 2020-01-01 00:00:00 │ 5 │ 438288 │
|
||||
│ 10 │ 2020-01-01 01:00:00 │ 7.5 │ 438289 │
|
||||
│ 10 │ 2020-01-01 02:00:00 │ 8.75 │ 438290 │
|
||||
│ 10 │ 2020-01-01 03:00:00 │ 9.375 │ 438291 │
|
||||
│ 10 │ 2020-01-01 04:00:00 │ 9.6875 │ 438292 │
|
||||
│ 10 │ 2020-01-01 05:00:00 │ 9.84375 │ 438293 │
|
||||
│ 10 │ 2020-01-01 06:00:00 │ 9.921875 │ 438294 │
|
||||
│ 10 │ 2020-01-01 07:00:00 │ 9.9609375 │ 438295 │
|
||||
│ 10 │ 2020-01-01 08:00:00 │ 9.98046875 │ 438296 │
|
||||
│ 10 │ 2020-01-01 09:00:00 │ 9.990234375 │ 438297 │
|
||||
└───────┴─────────────────────┴─────────────┴──────────┘
|
||||
|
||||
|
||||
-- Calculate timeunit using toRelativeHourNum
|
||||
SELECT
|
||||
value,
|
||||
time,
|
||||
exponentialMovingAverage(1)(value, toRelativeHourNum(time)) OVER (ORDER BY time ASC) AS res,
|
||||
toRelativeHourNum(time) AS timeunit
|
||||
FROM data
|
||||
ORDER BY time ASC;
|
||||
|
||||
┌─value─┬────────────────time─┬─────────res─┬─timeunit─┐
|
||||
│ 10 │ 2020-01-01 00:00:00 │ 5 │ 438288 │
|
||||
│ 10 │ 2020-01-01 01:00:00 │ 7.5 │ 438289 │
|
||||
│ 10 │ 2020-01-01 02:00:00 │ 8.75 │ 438290 │
|
||||
│ 10 │ 2020-01-01 03:00:00 │ 9.375 │ 438291 │
|
||||
│ 10 │ 2020-01-01 04:00:00 │ 9.6875 │ 438292 │
|
||||
│ 10 │ 2020-01-01 05:00:00 │ 9.84375 │ 438293 │
|
||||
│ 10 │ 2020-01-01 06:00:00 │ 9.921875 │ 438294 │
|
||||
│ 10 │ 2020-01-01 07:00:00 │ 9.9609375 │ 438295 │
|
||||
│ 10 │ 2020-01-01 08:00:00 │ 9.98046875 │ 438296 │
|
||||
│ 10 │ 2020-01-01 09:00:00 │ 9.990234375 │ 438297 │
|
||||
└───────┴─────────────────────┴─────────────┴──────────┘
|
||||
```
|
||||
|
@ -1126,15 +1126,48 @@ Rounds the time to the half hour.
|
||||
|
||||
## toYYYYMM
|
||||
|
||||
Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 100 + MM).
|
||||
Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 100 + MM). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
|
||||
|
||||
### example
|
||||
```sql
|
||||
SELECT
|
||||
toYYYYMM(now(), 'US/Eastern')
|
||||
```
|
||||
```response
|
||||
┌─toYYYYMM(now(), 'US/Eastern')─┐
|
||||
│ 202303 │
|
||||
└───────────────────────────────┘
|
||||
```
|
||||
|
||||
## toYYYYMMDD
|
||||
|
||||
Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 10000 + MM \* 100 + DD).
|
||||
Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 10000 + MM \* 100 + DD). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
|
||||
|
||||
### example
|
||||
```sql
|
||||
SELECT
|
||||
toYYYYMMDD(now(), 'US/Eastern')
|
||||
```
|
||||
```response
|
||||
┌─toYYYYMMDD(now(), 'US/Eastern')─┐
|
||||
│ 20230302 │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
## toYYYYMMDDhhmmss
|
||||
|
||||
Converts a date or date with time to a UInt64 number containing the year and month number (YYYY \* 10000000000 + MM \* 100000000 + DD \* 1000000 + hh \* 10000 + mm \* 100 + ss).
|
||||
Converts a date or date with time to a UInt64 number containing the year and month number (YYYY \* 10000000000 + MM \* 100000000 + DD \* 1000000 + hh \* 10000 + mm \* 100 + ss). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
|
||||
|
||||
### example
|
||||
```sql
|
||||
SELECT
|
||||
toYYYYMMDDhhmmss(now(), 'US/Eastern')
|
||||
```
|
||||
```response
|
||||
┌─toYYYYMMDDhhmmss(now(), 'US/Eastern')─┐
|
||||
│ 20230302112209 │
|
||||
└───────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## addYears, addMonths, addWeeks, addDays, addHours, addMinutes, addSeconds, addQuarters
|
||||
|
||||
|
@ -280,12 +280,20 @@ SELECT
|
||||
|
||||
## toIPv4OrDefault(string)
|
||||
|
||||
Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns 0.
|
||||
Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns `0.0.0.0` (0 IPv4).
|
||||
|
||||
## toIPv4OrNull(string)
|
||||
|
||||
Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns null.
|
||||
|
||||
## toIPv6OrDefault(string)
|
||||
|
||||
Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns `::` (0 IPv6).
|
||||
|
||||
## toIPv6OrNull(string)
|
||||
|
||||
Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns null.
|
||||
|
||||
## toIPv6
|
||||
|
||||
Converts a string form of IPv6 address to [IPv6](../../sql-reference/data-types/domains/ipv6.md) type. If the IPv6 address has an invalid format, returns an empty value.
|
||||
|
@ -330,7 +330,7 @@ repeat(s, n)
|
||||
**Arguments**
|
||||
|
||||
- `s` — The string to repeat. [String](../../sql-reference/data-types/string.md).
|
||||
- `n` — The number of times to repeat the string. [UInt](../../sql-reference/data-types/int-uint.md).
|
||||
- `n` — The number of times to repeat the string. [UInt or Int](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
|
@ -233,8 +233,9 @@ If `some_predicate` is not selective enough, it will return large amount of data
|
||||
|
||||
### Distributed Subqueries and max_parallel_replicas
|
||||
|
||||
When max_parallel_replicas is greater than 1, distributed queries are further transformed. For example, the following:
|
||||
When [max_parallel_replicas](#settings-max_parallel_replicas) is greater than 1, distributed queries are further transformed.
|
||||
|
||||
For example, the following:
|
||||
```sql
|
||||
SELECT CounterID, count() FROM distributed_table_1 WHERE UserID IN (SELECT UserID FROM local_table_2 WHERE CounterID < 100)
|
||||
SETTINGS max_parallel_replicas=3
|
||||
@ -247,8 +248,12 @@ SELECT CounterID, count() FROM local_table_1 WHERE UserID IN (SELECT UserID FROM
|
||||
SETTINGS parallel_replicas_count=3, parallel_replicas_offset=M
|
||||
```
|
||||
|
||||
where M is between 1 and 3 depending on which replica the local query is executing on. These settings affect every MergeTree-family table in the query and have the same effect as applying `SAMPLE 1/3 OFFSET (M-1)/3` on each table.
|
||||
where M is between 1 and 3 depending on which replica the local query is executing on.
|
||||
|
||||
Therefore adding the max_parallel_replicas setting will only produce correct results if both tables have the same replication scheme and are sampled by UserID or a subkey of it. In particular, if local_table_2 does not have a sampling key, incorrect results will be produced. The same rule applies to JOIN.
|
||||
These settings affect every MergeTree-family table in the query and have the same effect as applying `SAMPLE 1/3 OFFSET (M-1)/3` on each table.
|
||||
|
||||
Therefore adding the [max_parallel_replicas](#settings-max_parallel_replicas) setting will only produce correct results if both tables have the same replication scheme and are sampled by UserID or a subkey of it. In particular, if local_table_2 does not have a sampling key, incorrect results will be produced. The same rule applies to JOIN.
|
||||
|
||||
One workaround if local_table_2 does not meet the requirements, is to use `GLOBAL IN` or `GLOBAL JOIN`.
|
||||
|
||||
If a table doesn't have a sampling key, more flexible options for [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key) can be used that can produce different and more optimal behaviour.
|
||||
|
@ -16,7 +16,7 @@ ALTER TABLE [db].name [ON CLUSTER cluster] MODIFY COMMENT 'Comment'
|
||||
|
||||
**Examples**
|
||||
|
||||
Creating a table with comment (for more information, see the [COMMENT] clause(../../../sql-reference/statements/create/table.md#comment-table)):
|
||||
Creating a table with comment (for more information, see the [COMMENT](../../../sql-reference/statements/create/table.md#comment-table) clause):
|
||||
|
||||
``` sql
|
||||
CREATE TABLE table_with_comment
|
||||
|
@ -110,25 +110,23 @@ If the type is not `Nullable` and if `NULL` is specified, it will be treated as
|
||||
|
||||
See also [data_type_default_nullable](../../../operations/settings/settings.md#data_type_default_nullable) setting.
|
||||
|
||||
## Default Values
|
||||
## Default Values {#default_values}
|
||||
|
||||
The column description can specify an expression for a default value, in one of the following ways: `DEFAULT expr`, `MATERIALIZED expr`, `ALIAS expr`.
|
||||
The column description can specify a default value expression in the form of `DEFAULT expr`, `MATERIALIZED expr`, or `ALIAS expr`. Example: `URLDomain String DEFAULT domain(URL)`.
|
||||
|
||||
Example: `URLDomain String DEFAULT domain(URL)`.
|
||||
The expression `expr` is optional. If it is omitted, the column type must be specified explicitly and the default value will be `0` for numeric columns, `''` (the empty string) for string columns, `[]` (the empty array) for array columns, `1970-01-01` for date columns, or `NULL` for nullable columns.
|
||||
|
||||
If an expression for the default value is not defined, the default values will be set to zeros for numbers, empty strings for strings, empty arrays for arrays, and `1970-01-01` for dates or zero unix timestamp for DateTime, NULL for Nullable.
|
||||
The column type of a default value column can be omitted in which case it is infered from `expr`'s type. For example the type of column `EventDate DEFAULT toDate(EventTime)` will be date.
|
||||
|
||||
If the default expression is defined, the column type is optional. If there isn’t an explicitly defined type, the default expression type is used. Example: `EventDate DEFAULT toDate(EventTime)` – the ‘Date’ type will be used for the ‘EventDate’ column.
|
||||
If both a data type and a default value expression are specified, an implicit type casting function inserted which converts the expression to the specified type. Example: `Hits UInt32 DEFAULT 0` is internally represented as `Hits UInt32 DEFAULT toUInt32(0)`.
|
||||
|
||||
If the data type and default expression are defined explicitly, this expression will be cast to the specified type using type casting functions. Example: `Hits UInt32 DEFAULT 0` means the same thing as `Hits UInt32 DEFAULT toUInt32(0)`.
|
||||
|
||||
Default expressions may be defined as an arbitrary expression from table constants and columns. When creating and changing the table structure, it checks that expressions do not contain loops. For INSERT, it checks that expressions are resolvable – that all columns they can be calculated from have been passed.
|
||||
A default value expression `expr` may reference arbitrary table columns and constants. ClickHouse checks that changes of the table structure do not introduce loops in the expression calculation. For INSERT, it checks that expressions are resolvable – that all columns they can be calculated from have been passed.
|
||||
|
||||
### DEFAULT
|
||||
|
||||
`DEFAULT expr`
|
||||
|
||||
Normal default value. If the INSERT query does not specify the corresponding column, it will be filled in by computing the corresponding expression.
|
||||
Normal default value. If the value of such a column is not specified in an INSERT query, it is computed from `expr`.
|
||||
|
||||
Example:
|
||||
|
||||
@ -154,9 +152,9 @@ SELECT * FROM test;
|
||||
|
||||
`MATERIALIZED expr`
|
||||
|
||||
Materialized expression. Such a column can’t be specified for INSERT, because it is always calculated.
|
||||
For an INSERT without a list of columns, these columns are not considered.
|
||||
In addition, this column is not substituted when using an asterisk in a SELECT query. This is to preserve the invariant that the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns.
|
||||
Materialized expression. Values of such columns are always calculated, they cannot be specified in INSERT queries.
|
||||
|
||||
Also, default value columns of this type are not included in the result of `SELECT *`. This is to preserve the invariant that the result of a `SELECT *` can always be inserted back into the table using `INSERT`. This behavior can be disabled with setting `asterisk_include_materialized_columns`.
|
||||
|
||||
Example:
|
||||
|
||||
@ -192,8 +190,9 @@ SELECT * FROM test SETTINGS asterisk_include_materialized_columns=1;
|
||||
|
||||
`EPHEMERAL [expr]`
|
||||
|
||||
Ephemeral column. Such a column isn't stored in the table and cannot be SELECTed, but can be referenced in the defaults of CREATE statement. If `expr` is omitted type for column is required.
|
||||
INSERT without list of columns will skip such column, so SELECT/INSERT invariant is preserved - the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns.
|
||||
Ephemeral column. Columns of this type are not stored in the table and it is not possible to SELECT from them. The only purpose of ephemeral columns is to build default value expressions of other columns from them.
|
||||
|
||||
An insert without explicitly specified columns will skip columns of this type. This is to preserve the invariant that the result of a `SELECT *` can always be inserted back into the table using `INSERT`.
|
||||
|
||||
Example:
|
||||
|
||||
@ -205,7 +204,7 @@ CREATE OR REPLACE TABLE test
|
||||
hexed FixedString(4) DEFAULT unhex(unhexed)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id
|
||||
ORDER BY id;
|
||||
|
||||
INSERT INTO test (id, unhexed) Values (1, '5a90b714');
|
||||
|
||||
@ -227,9 +226,9 @@ hex(hexed): 5A90B714
|
||||
|
||||
`ALIAS expr`
|
||||
|
||||
Synonym. Such a column isn’t stored in the table at all.
|
||||
Its values can’t be inserted in a table, and it is not substituted when using an asterisk in a SELECT query.
|
||||
It can be used in SELECTs if the alias is expanded during query parsing.
|
||||
Calculated columns (synonym). Column of this type are not stored in the table and it is not possible to INSERT values into them.
|
||||
|
||||
When SELECT queries explicitly reference columns of this type, the value is computed at query time from `expr`. By default, `SELECT *` excludes ALIAS columns. This behavior can be disabled with setting `asteriks_include_alias_columns`.
|
||||
|
||||
When using the ALTER query to add new columns, old data for these columns is not written. Instead, when reading old data that does not have values for the new columns, expressions are computed on the fly by default. However, if running the expressions requires different columns that are not indicated in the query, these columns will additionally be read, but only for the blocks of data that need it.
|
||||
|
||||
@ -394,15 +393,15 @@ These codecs are designed to make compression more effective by using specific f
|
||||
|
||||
#### DoubleDelta
|
||||
|
||||
`DoubleDelta` — Calculates delta of deltas and writes it in compact binary form. Optimal compression rates are achieved for monotonic sequences with a constant stride, such as time series data. Can be used with any fixed-width type. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. Uses 1 extra bit for 32-byte deltas: 5-bit prefixes instead of 4-bit prefixes. For additional information, see Compressing Time Stamps in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf).
|
||||
`DoubleDelta(bytes_size)` — Calculates delta of deltas and writes it in compact binary form. Possible `bytes_size` values: 1, 2, 4, 8, the default value is `sizeof(type)` if equal to 1, 2, 4, or 8. In all other cases, it’s 1. Optimal compression rates are achieved for monotonic sequences with a constant stride, such as time series data. Can be used with any fixed-width type. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. Uses 1 extra bit for 32-bit deltas: 5-bit prefixes instead of 4-bit prefixes. For additional information, see Compressing Time Stamps in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf).
|
||||
|
||||
#### Gorilla
|
||||
|
||||
`Gorilla` — Calculates XOR between current and previous floating point value and writes it in compact binary form. The smaller the difference between consecutive values is, i.e. the slower the values of the series changes, the better the compression rate. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. For additional information, see section 4.1 in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](https://doi.org/10.14778/2824032.2824078).
|
||||
`Gorilla(bytes_size)` — Calculates XOR between current and previous floating point value and writes it in compact binary form. The smaller the difference between consecutive values is, i.e. the slower the values of the series changes, the better the compression rate. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. Possible `bytes_size` values: 1, 2, 4, 8, the default value is `sizeof(type)` if equal to 1, 2, 4, or 8. In all other cases, it’s 1. For additional information, see section 4.1 in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](https://doi.org/10.14778/2824032.2824078).
|
||||
|
||||
#### FPC
|
||||
|
||||
`FPC` - Repeatedly predicts the next floating point value in the sequence using the better of two predictors, then XORs the actual with the predicted value, and leading-zero compresses the result. Similar to Gorilla, this is efficient when storing a series of floating point values that change slowly. For 64-bit values (double), FPC is faster than Gorilla, for 32-bit values your mileage may vary. For a detailed description of the algorithm see [High Throughput Compression of Double-Precision Floating-Point Data](https://userweb.cs.txstate.edu/~burtscher/papers/dcc07a.pdf).
|
||||
`FPC(level, float_size)` - Repeatedly predicts the next floating point value in the sequence using the better of two predictors, then XORs the actual with the predicted value, and leading-zero compresses the result. Similar to Gorilla, this is efficient when storing a series of floating point values that change slowly. For 64-bit values (double), FPC is faster than Gorilla, for 32-bit values your mileage may vary. Possible `level` values: 1-28, the default value is 12. Possible `float_size` values: 4, 8, the default value is `sizeof(type)` if type is Float. In all other cases, it’s 4. For a detailed description of the algorithm see [High Throughput Compression of Double-Precision Floating-Point Data](https://userweb.cs.txstate.edu/~burtscher/papers/dcc07a.pdf).
|
||||
|
||||
#### T64
|
||||
|
||||
@ -474,7 +473,7 @@ ENGINE = MergeTree ORDER BY x;
|
||||
ClickHouse supports temporary tables which have the following characteristics:
|
||||
|
||||
- Temporary tables disappear when the session ends, including if the connection is lost.
|
||||
- A temporary table uses the Memory engine only.
|
||||
- A temporary table uses the Memory table engine when engine is not specified and it may use any table engine except Replicated and `KeeperMap` engines.
|
||||
- The DB can’t be specified for a temporary table. It is created outside of databases.
|
||||
- Impossible to create a temporary table with distributed DDL query on all cluster servers (by using `ON CLUSTER`): this table exists only in the current session.
|
||||
- If a temporary table has the same name as another one and a query specifies the table name without specifying the DB, the temporary table will be used.
|
||||
@ -488,7 +487,7 @@ CREATE TEMPORARY TABLE [IF NOT EXISTS] table_name
|
||||
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
|
||||
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
|
||||
...
|
||||
)
|
||||
) [ENGINE = engine]
|
||||
```
|
||||
|
||||
In most cases, temporary tables are not created manually, but when using external data for a query, or for distributed `(GLOBAL) IN`. For more information, see the appropriate sections
|
||||
@ -576,7 +575,7 @@ SELECT * FROM base.t1;
|
||||
You can add a comment to the table when you creating it.
|
||||
|
||||
:::note
|
||||
The comment is supported for all table engines except [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) and [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md).
|
||||
The comment clause is supported by all table engines except [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) and [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md).
|
||||
:::
|
||||
|
||||
|
||||
|
@ -70,6 +70,12 @@ A materialized view is implemented as follows: when inserting data to the table
|
||||
Materialized views in ClickHouse use **column names** instead of column order during insertion into destination table. If some column names are not present in the `SELECT` query result, ClickHouse uses a default value, even if the column is not [Nullable](../../data-types/nullable.md). A safe practice would be to add aliases for every column when using Materialized views.
|
||||
|
||||
Materialized views in ClickHouse are implemented more like insert triggers. If there’s some aggregation in the view query, it’s applied only to the batch of freshly inserted data. Any changes to existing data of source table (like update, delete, drop partition, etc.) does not change the materialized view.
|
||||
|
||||
Materialized views in ClickHouse do not have deterministic behaviour in case of errors. This means that blocks that had been already written will be preserved in the destination table, but all blocks after error will not.
|
||||
|
||||
By default if pushing to one of views fails, then the INSERT query will fail too, and some blocks may not be written to the destination table. This can be changed using `materialized_views_ignore_errors` setting (you should set it for `INSERT` query), if you will set `materialized_views_ignore_errors=true`, then any errors while pushing to views will be ignored and all blocks will be written to the destination table.
|
||||
|
||||
Also note, that `materialized_views_ignore_errors` set to `true` by default for `system.*_log` tables.
|
||||
:::
|
||||
|
||||
If you specify `POPULATE`, the existing table data is inserted into the view when creating it, as if making a `CREATE TABLE ... AS SELECT ...` . Otherwise, the query contains only the data inserted in the table after creating the view. We **do not recommend** using `POPULATE`, since data inserted in the table during the view creation will not be inserted in it.
|
||||
|
@ -105,7 +105,8 @@ Hierarchy of privileges:
|
||||
- [CREATE](#grant-create)
|
||||
- `CREATE DATABASE`
|
||||
- `CREATE TABLE`
|
||||
- `CREATE TEMPORARY TABLE`
|
||||
- `CREATE ARBITRARY TEMPORARY TABLE`
|
||||
- `CREATE TEMPORARY TABLE`
|
||||
- `CREATE VIEW`
|
||||
- `CREATE DICTIONARY`
|
||||
- `CREATE FUNCTION`
|
||||
@ -313,7 +314,8 @@ Allows executing [CREATE](../../sql-reference/statements/create/index.md) and [A
|
||||
- `CREATE`. Level: `GROUP`
|
||||
- `CREATE DATABASE`. Level: `DATABASE`
|
||||
- `CREATE TABLE`. Level: `TABLE`
|
||||
- `CREATE TEMPORARY TABLE`. Level: `GLOBAL`
|
||||
- `CREATE ARBITRARY TEMPORARY TABLE`. Level: `GLOBAL`
|
||||
- `CREATE TEMPORARY TABLE`. Level: `GLOBAL`
|
||||
- `CREATE VIEW`. Level: `VIEW`
|
||||
- `CREATE DICTIONARY`. Level: `DICTIONARY`
|
||||
|
||||
|
@ -91,6 +91,13 @@ INSERT INTO t FORMAT TabSeparated
|
||||
|
||||
You can insert data separately from the query by using the command-line client or the HTTP interface. For more information, see the section “[Interfaces](../../interfaces)”.
|
||||
|
||||
:::note
|
||||
If you want to specify `SETTINGS` for `INSERT` query then you have to do it _before_ `FORMAT` clause since everything after `FORMAT format_name` is treated as data. For example:
|
||||
```sql
|
||||
INSERT INTO table SETTINGS ... FORMAT format_name data_set
|
||||
```
|
||||
:::
|
||||
|
||||
## Constraints
|
||||
|
||||
If table has [constraints](../../sql-reference/statements/create/table.md#constraints), their expressions will be checked for each row of inserted data. If any of those constraints is not satisfied — server will raise an exception containing constraint name and expression, the query will be stopped.
|
||||
|
@ -6,21 +6,22 @@ sidebar_label: file
|
||||
|
||||
# file
|
||||
|
||||
Creates a table from a file. This table function is similar to [url](../../sql-reference/table-functions/url.md) and [hdfs](../../sql-reference/table-functions/hdfs.md) ones.
|
||||
Creates a table from a file. This table function is similar to [url](/docs/en/sql-reference/table-functions/url.md) and [hdfs](/docs/en/sql-reference/table-functions/hdfs.md) ones.
|
||||
|
||||
`file` function can be used in `SELECT` and `INSERT` queries on data in [File](../../engines/table-engines/special/file.md) tables.
|
||||
`file` function can be used in `SELECT` and `INSERT` queries on data in [File](/docs/en/engines/table-engines/special/file.md) tables.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
file(path [,format] [,structure])
|
||||
file(path [,format] [,structure] [,compression])
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `path` — The relative path to the file from [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in read-only mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings.
|
||||
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
|
||||
- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in read-only mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings.
|
||||
- `format` — The [format](/docs/en/interfaces/formats.md#formats) of the file.
|
||||
- `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`.
|
||||
- `compression` — The existing compression type when used in a `SELECT` query, or the desired compression type when used in an `INSERT` query. The supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`.
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -53,7 +54,7 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U
|
||||
└─────────┴─────────┴─────────┘
|
||||
```
|
||||
|
||||
Getting the first 10 lines of a table that contains 3 columns of [UInt32](../../sql-reference/data-types/int-uint.md) type from a CSV file:
|
||||
Getting the first 10 lines of a table that contains 3 columns of [UInt32](/docs/en/sql-reference/data-types/int-uint.md) type from a CSV file:
|
||||
|
||||
``` sql
|
||||
SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 10;
|
||||
@ -143,4 +144,4 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Virtual columns](../../engines/table-engines/index.md#table_engines-virtual_columns)
|
||||
- [Virtual columns](/docs/en/engines/table-engines/index.md#table_engines-virtual_columns)
|
||||
|
@ -89,7 +89,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
└─────────────────────┴───────────┴──────────┴──────┘
|
||||
```
|
||||
|
||||
Первая строка отменяет предыдущее состояние объекта (пользователя). Она должен повторять все поля из ключа сортировки для отменённого состояния за исключением `Sign`.
|
||||
Первая строка отменяет предыдущее состояние объекта (пользователя). Она должна повторять все поля из ключа сортировки для отменённого состояния за исключением `Sign`.
|
||||
|
||||
Вторая строка содержит текущее состояние.
|
||||
|
||||
|
@ -584,7 +584,7 @@ TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y);
|
||||
|
||||
Данные с истекшим `TTL` удаляются, когда ClickHouse мёржит куски данных.
|
||||
|
||||
Когда ClickHouse видит, что некоторые данные устарели, он выполняет внеплановые мёржи. Для управление частотой подобных мёржей, можно задать настройку `merge_with_ttl_timeout`. Если её значение слишком низкое, придется выполнять много внеплановых мёржей, которые могут начать потреблять значительную долю ресурсов сервера.
|
||||
Когда ClickHouse видит, что некоторые данные устарели, он выполняет внеплановые мёржи. Для управления частотой подобных мёржей, можно задать настройку `merge_with_ttl_timeout`. Если её значение слишком низкое, придется выполнять много внеплановых мёржей, которые могут начать потреблять значительную долю ресурсов сервера.
|
||||
|
||||
Если вы выполните запрос `SELECT` между слияниями вы можете получить устаревшие данные. Чтобы избежать этого используйте запрос [OPTIMIZE](../../../engines/table-engines/mergetree-family/mergetree.md#misc_operations-optimize) перед `SELECT`.
|
||||
|
||||
@ -679,7 +679,7 @@ TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y);
|
||||
- `policy_name_N` — название политики. Названия политик должны быть уникальны.
|
||||
- `volume_name_N` — название тома. Названия томов должны быть уникальны.
|
||||
- `disk` — диск, находящийся внутри тома.
|
||||
- `max_data_part_size_bytes` — максимальный размер куска данных, который может находится на любом из дисков этого тома. Если в результате слияния размер куска ожидается больше, чем max_data_part_size_bytes, то этот кусок будет записан в следующий том. В основном эта функция позволяет хранить новые / мелкие куски на горячем (SSD) томе и перемещать их на холодный (HDD) том, когда они достигают большого размера. Не используйте этот параметр, если политика имеет только один том.
|
||||
- `max_data_part_size_bytes` — максимальный размер куска данных, который может находиться на любом из дисков этого тома. Если в результате слияния размер куска ожидается больше, чем max_data_part_size_bytes, то этот кусок будет записан в следующий том. В основном эта функция позволяет хранить новые / мелкие куски на горячем (SSD) томе и перемещать их на холодный (HDD) том, когда они достигают большого размера. Не используйте этот параметр, если политика имеет только один том.
|
||||
- `move_factor` — доля доступного свободного места на томе, если места становится меньше, то данные начнут перемещение на следующий том, если он есть (по умолчанию 0.1). Для перемещения куски сортируются по размеру от большего к меньшему (по убыванию) и выбираются куски, совокупный размер которых достаточен для соблюдения условия `move_factor`, если совокупный размер всех партов недостаточен, будут перемещены все парты.
|
||||
- `prefer_not_to_merge` — Отключает слияние кусков данных, хранящихся на данном томе. Если данная настройка включена, то слияние данных, хранящихся на данном томе, не допускается. Это позволяет контролировать работу ClickHouse с медленными дисками.
|
||||
|
||||
@ -730,7 +730,7 @@ TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y);
|
||||
|
||||
В приведенном примере, политика `hdd_in_order` реализует прицип [round-robin](https://ru.wikipedia.org/wiki/Round-robin_(%D0%B0%D0%BB%D0%B3%D0%BE%D1%80%D0%B8%D1%82%D0%BC)). Так как в политике есть всего один том (`single`), то все записи производятся на его диски по круговому циклу. Такая политика может быть полезна при наличии в системе нескольких похожих дисков, но при этом не сконфигурирован RAID. Учтите, что каждый отдельный диск ненадёжен и чтобы не потерять важные данные это необходимо скомпенсировать за счет хранения данных в трёх копиях.
|
||||
|
||||
Если система содержит диски различных типов, то может пригодиться политика `moving_from_ssd_to_hdd`. В томе `hot` находится один SSD-диск (`fast_ssd`), а также задается ограничение на максимальный размер куска, который может храниться на этом томе (1GB). Все куски такой таблицы больше 1GB будут записываться сразу на том `cold`, в котором содержится один HDD-диск `disk1`. Также, при заполнении диска `fast_ssd` более чем на 80% данные будут переносится на диск `disk1` фоновым процессом.
|
||||
Если система содержит диски различных типов, то может пригодиться политика `moving_from_ssd_to_hdd`. В томе `hot` находится один SSD-диск (`fast_ssd`), а также задается ограничение на максимальный размер куска, который может храниться на этом томе (1GB). Все куски такой таблицы больше 1GB будут записываться сразу на том `cold`, в котором содержится один HDD-диск `disk1`. Также при заполнении диска `fast_ssd` более чем на 80% данные будут переноситься на диск `disk1` фоновым процессом.
|
||||
|
||||
Порядок томов в политиках хранения важен, при достижении условий на переполнение тома данные переносятся на следующий. Порядок дисков в томах так же важен, данные пишутся по очереди на каждый из них.
|
||||
|
||||
|
@ -8,6 +8,7 @@ sidebar_label: "Клиентские библиотеки от сторонни
|
||||
|
||||
:::danger "Disclaimer"
|
||||
Яндекс не поддерживает перечисленные ниже библиотеки и не проводит тщательного тестирования для проверки их качества.
|
||||
:::
|
||||
|
||||
- Python:
|
||||
- [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm)
|
||||
|
9
docs/ru/interfaces/third-party/gui.md
vendored
9
docs/ru/interfaces/third-party/gui.md
vendored
@ -177,19 +177,20 @@ sidebar_label: "Визуальные интерфейсы от сторонни
|
||||
|
||||
### Yandex DataLens {#yandex-datalens}
|
||||
|
||||
[Yandex DataLens](https://cloud.yandex.ru/services/datalens) — cервис визуализации и анализа данных.
|
||||
[Yandex DataLens](https://datalens.yandex.ru) — cервис визуализации и анализа данных.
|
||||
|
||||
Основные возможности:
|
||||
|
||||
- Широкий выбор инструментов визуализации, от простых столбчатых диаграмм до сложных дашбордов.
|
||||
- Возможность опубликовать дашборды на широкую аудиторию.
|
||||
- Поддержка множества источников данных, включая ClickHouse.
|
||||
- Хранение материализованных данных в кластере ClickHouse DataLens.
|
||||
|
||||
Для небольших проектов DataLens [доступен бесплатно](https://cloud.yandex.ru/docs/datalens/pricing), в том числе и для коммерческого использования.
|
||||
DataLens [доступен бесплатно](https://cloud.yandex.ru/docs/datalens/pricing), в том числе и для коммерческого использования.
|
||||
|
||||
- [Знакомство с DataLens]((https://youtu.be/57ngi_6BINE).
|
||||
- [Чат сообщества DataLens](https://t.me/YandexDataLens)
|
||||
- [Документация DataLens](https://cloud.yandex.ru/docs/datalens/).
|
||||
- [Пособие по визуализации данных из ClickHouse](https://cloud.yandex.ru/docs/solutions/datalens/data-from-ch-visualization).
|
||||
- [Сценарий по визуализации данных из ClickHouse](https://cloud.yandex.ru/docs/solutions/datalens/data-from-ch-visualization).
|
||||
|
||||
### Holistics Software {#holistics-software}
|
||||
|
||||
|
@ -325,21 +325,21 @@ clickhouse-keeper-converter --zookeeper-logs-dir /var/lib/zookeeper/version-2 --
|
||||
Например, для кластера из 3 нод, алгоритм кворума продолжает работать при отказе не более чем одной ноды.
|
||||
|
||||
Конфигурация кластера может быть изменена динамически с некоторыми ограничениями.
|
||||
Переконфигурация также использует Raft, поэтому для добавление новой ноды кластера или исключения старой ноды из него требуется достижения кворума в рамках текущей конфигурации кластера.
|
||||
Переконфигурация также использует Raft, поэтому для добавления новой ноды кластера или исключения старой ноды требуется достижение кворума в рамках текущей конфигурации кластера.
|
||||
Если в вашем кластере произошел отказ большего числа нод, чем допускает Raft для вашей текущей конфигурации и у вас нет возможности восстановить их работоспособность, Raft перестанет работать и не позволит изменить конфигурацию стандартным механизмом.
|
||||
|
||||
Тем не менее ClickHousr Keeper имеет возможность запуститься в режиме восстановления, который позволяет переконфигурировать класте используя только одну ноду кластера.
|
||||
Тем не менее ClickHouse Keeper имеет возможность запуститься в режиме восстановления, который позволяет переконфигурировать кластер используя только одну ноду кластера.
|
||||
Этот механизм может использоваться только как крайняя мера, когда вы не можете восстановить существующие ноды кластера или запустить новый сервер с тем же идентификатором.
|
||||
|
||||
Важно:
|
||||
- Удостоверьтесь, что отказавшие ноды не смогут в дальнейшем подключиться к кластеру в будущем.
|
||||
- Не запускайте новые ноды, пока не завешите процедуру ниже.
|
||||
- Не запускайте новые ноды, пока не завершите процедуру ниже.
|
||||
|
||||
После того, как выполнили действия выше выполните следующие шаги.
|
||||
1. Выберете одну ноду Keeper, которая станет новым лидером. Учтите, что данные которые с этой ноды будут испольщзованы всем кластером, поэтому рекомендуется выбрать ноду с наиболее актуальным состоянием.
|
||||
1. Выберете одну ноду Keeper, которая станет новым лидером. Учтите, что данные с этой ноды будут использованы всем кластером, поэтому рекомендуется выбрать ноду с наиболее актуальным состоянием.
|
||||
2. Перед дальнейшими действиям сделайте резервную копию данных из директорий `log_storage_path` и `snapshot_storage_path`.
|
||||
3. Измените настройки на всех нодах кластера, которые вы собираетесь использовать.
|
||||
4. Отправьте команду `rcvr` на ноду, которую вы выбрали или остановите ее и запустите заново с аргументом `--force-recovery`. Это переведет ноду в режим восстановления.
|
||||
4. Отправьте команду `rcvr` на ноду, которую вы выбрали, или остановите ее и запустите заново с аргументом `--force-recovery`. Это переведет ноду в режим восстановления.
|
||||
5. Запускайте остальные ноды кластера по одной и проверяйте, что команда `mntr` возвращает `follower` в выводе состояния `zk_server_state` перед тем, как запустить следующую ноду.
|
||||
6. Пока нода работает в режиме восстановления, лидер будет возвращать ошибку на запрос `mntr` пока кворум не будет достигнут с помощью новых нод. Любые запросы от клиентов и постедователей будут возвращать ошибку.
|
||||
6. Пока нода работает в режиме восстановления, лидер будет возвращать ошибку на запрос `mntr` пока кворум не будет достигнут с помощью новых нод. Любые запросы от клиентов и последователей будут возвращать ошибку.
|
||||
7. После достижения кворума лидер перейдет в нормальный режим работы и станет обрабатывать все запросы через Raft. Удостоверьтесь, что запрос `mntr` возвращает `leader` в выводе состояния `zk_server_state`.
|
||||
|
@ -10,6 +10,7 @@ ClickHouse поддерживает [OpenTelemetry](https://opentelemetry.io/)
|
||||
|
||||
:::danger "Предупреждение"
|
||||
Поддержка стандарта экспериментальная и будет со временем меняться.
|
||||
:::
|
||||
|
||||
## Обеспечение поддержки контекста трассировки в ClickHouse
|
||||
|
||||
|
@ -26,6 +26,7 @@ ClickHouse перезагружает встроенные словари с з
|
||||
|
||||
:::danger "Внимание"
|
||||
Лучше не использовать, если вы только начали работать с ClickHouse.
|
||||
:::
|
||||
|
||||
Общий вид конфигурации:
|
||||
|
||||
@ -1064,6 +1065,7 @@ ClickHouse использует потоки из глобального пул
|
||||
|
||||
:::danger "Обратите внимание"
|
||||
Завершающий слеш обязателен.
|
||||
:::
|
||||
|
||||
**Пример**
|
||||
|
||||
@ -1330,6 +1332,7 @@ TCP порт для защищённого обмена данными с кли
|
||||
|
||||
:::danger "Обратите внимание"
|
||||
Завершающий слеш обязателен.
|
||||
:::
|
||||
|
||||
**Пример**
|
||||
|
||||
|
@ -82,7 +82,7 @@ sidebar_label: "Хранение данных на внешних дисках"
|
||||
|
||||
- `type` — `encrypted`. Иначе зашифрованный диск создан не будет.
|
||||
- `disk` — тип диска для хранения данных.
|
||||
- `key` — ключ для шифрования и расшифровки. Тип: [Uint64](../sql-reference/data-types/int-uint.md). Вы можете использовать параметр `key_hex` для шифрования в шестнадцатеричной форме.
|
||||
- `key` — ключ для шифрования и расшифровки. Тип: [UInt64](../sql-reference/data-types/int-uint.md). Вы можете использовать параметр `key_hex` для шифрования в шестнадцатеричной форме.
|
||||
Вы можете указать несколько ключей, используя атрибут `id` (смотрите пример выше).
|
||||
|
||||
Необязательные параметры:
|
||||
|
@ -6,7 +6,7 @@ sidebar_label: AggregateFunction
|
||||
|
||||
# AggregateFunction {#data-type-aggregatefunction}
|
||||
|
||||
Агрегатные функции могут обладать определяемым реализацией промежуточным состоянием, которое может быть сериализовано в тип данных, соответствующий AggregateFunction(…), и быть записано в таблицу обычно посредством [материализованного представления] (../../sql-reference/statements/create/view.md). Чтобы получить промежуточное состояние, обычно используются агрегатные функции с суффиксом `-State`. Чтобы в дальнейшем получить агрегированные данные необходимо использовать те же агрегатные функции с суффиксом `-Merge`.
|
||||
Агрегатные функции могут обладать определяемым реализацией промежуточным состоянием, которое может быть сериализовано в тип данных, соответствующий AggregateFunction(…), и быть записано в таблицу обычно посредством [материализованного представления](../../sql-reference/statements/create/view.md). Чтобы получить промежуточное состояние, обычно используются агрегатные функции с суффиксом `-State`. Чтобы в дальнейшем получить агрегированные данные необходимо использовать те же агрегатные функции с суффиксом `-Merge`.
|
||||
|
||||
`AggregateFunction(name, types_of_arguments…)` — параметрический тип данных.
|
||||
|
||||
|
@ -10,6 +10,7 @@ ClickHouse поддерживает типы данных для отображ
|
||||
|
||||
:::danger "Предупреждение"
|
||||
Сейчас использование типов данных для работы с географическими структурами является экспериментальной возможностью. Чтобы использовать эти типы данных, включите настройку `allow_experimental_geo_types = 1`.
|
||||
:::
|
||||
|
||||
**См. также**
|
||||
- [Хранение географических структур данных](https://ru.wikipedia.org/wiki/GeoJSON).
|
||||
|
@ -10,6 +10,7 @@ sidebar_label: Interval
|
||||
|
||||
:::danger "Внимание"
|
||||
Нельзя использовать типы данных `Interval` для хранения данных в таблице.
|
||||
:::
|
||||
|
||||
Структура:
|
||||
|
||||
|
@ -34,7 +34,7 @@ SELECT tuple(1,'a') AS x, toTypeName(x)
|
||||
|
||||
## Особенности работы с типами данных {#osobennosti-raboty-s-tipami-dannykh}
|
||||
|
||||
При создании кортежа «на лету» ClickHouse автоматически определяет тип каждого аргументов как минимальный из типов, который может сохранить значение аргумента. Если аргумент — [NULL](../../sql-reference/data-types/tuple.md#null-literal), то тип элемента кортежа — [Nullable](nullable.md).
|
||||
При создании кортежа «на лету» ClickHouse автоматически определяет тип всех аргументов как минимальный из типов, который может сохранить значение аргумента. Если аргумент — [NULL](../../sql-reference/data-types/tuple.md#null-literal), то тип элемента кортежа — [Nullable](nullable.md).
|
||||
|
||||
Пример автоматического определения типа данных:
|
||||
|
||||
|
@ -61,7 +61,7 @@ LAYOUT(POLYGON(STORE_POLYGON_KEY_COLUMN 1))
|
||||
- Мультиполигон. Представляет из себя массив полигонов. Каждый полигон задается двумерным массивом точек — первый элемент этого массива задает внешнюю границу полигона,
|
||||
последующие элементы могут задавать дырки, вырезаемые из него.
|
||||
|
||||
Точки могут задаваться массивом или кортежем из своих координат. В текущей реализации поддерживается только двумерные точки.
|
||||
Точки могут задаваться массивом или кортежем из своих координат. В текущей реализации поддерживаются только двумерные точки.
|
||||
|
||||
Пользователь может [загружать свои собственные данные](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) во всех поддерживаемых ClickHouse форматах.
|
||||
|
||||
@ -80,7 +80,7 @@ LAYOUT(POLYGON(STORE_POLYGON_KEY_COLUMN 1))
|
||||
- `POLYGON`. Синоним к `POLYGON_INDEX_CELL`.
|
||||
|
||||
Запросы к словарю осуществляются с помощью стандартных [функций](../../../sql-reference/functions/ext-dict-functions.md) для работы со внешними словарями.
|
||||
Важным отличием является то, что здесь ключами будут являются точки, для которых хочется найти содержащий их полигон.
|
||||
Важным отличием является то, что здесь ключами являются точки, для которых хочется найти содержащий их полигон.
|
||||
|
||||
**Пример**
|
||||
|
||||
|
@ -59,6 +59,7 @@ ClickHouse поддерживает следующие виды ключей:
|
||||
|
||||
:::danger "Обратите внимание"
|
||||
Ключ не надо дополнительно описывать в атрибутах.
|
||||
:::
|
||||
|
||||
### Числовой ключ {#ext_dict-numeric-key}
|
||||
|
||||
|
@ -14,7 +14,7 @@ ClickHouse:
|
||||
- Периодически обновляет их и динамически подгружает отсутствующие значения.
|
||||
- Позволяет создавать внешние словари с помощью xml-файлов или [DDL-запросов](../../statements/create/dictionary.md#create-dictionary-query).
|
||||
|
||||
Конфигурация внешних словарей может находится в одном или нескольких xml-файлах. Путь к конфигурации указывается в параметре [dictionaries_config](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_config).
|
||||
Конфигурация внешних словарей может находиться в одном или нескольких xml-файлах. Путь к конфигурации указывается в параметре [dictionaries_config](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_config).
|
||||
|
||||
Словари могут загружаться при старте сервера или при первом использовании, в зависимости от настройки [dictionaries_lazy_load](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_lazy_load).
|
||||
|
||||
|
@ -22,7 +22,7 @@ sidebar_label: "Функции интроспекции"
|
||||
|
||||
ClickHouse сохраняет отчеты профилировщика в [журнал трассировки](../../operations/system-tables/trace_log.md#system_tables-trace_log) в системной таблице. Убедитесь, что таблица и профилировщик настроены правильно.
|
||||
|
||||
## addresssToLine {#addresstoline}
|
||||
## addressToLine {#addresstoline}
|
||||
|
||||
Преобразует адрес виртуальной памяти внутри процесса сервера ClickHouse в имя файла и номер строки в исходном коде ClickHouse.
|
||||
|
||||
|
@ -8,7 +8,8 @@ slug: /ru/sql-reference/operators/exists
|
||||
`EXISTS` может быть использован в секции [WHERE](../../sql-reference/statements/select/where.md).
|
||||
|
||||
:::danger "Предупреждение"
|
||||
Ссылки на таблицы или столбцы основного запроса не поддерживаются в подзапросе.
|
||||
Ссылки на таблицы или столбцы основного запроса не поддерживаются в подзапросе.
|
||||
:::
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
|
@ -38,9 +38,9 @@ SELECT '1' IN (SELECT 1);
|
||||
└──────────────────────┘
|
||||
```
|
||||
|
||||
Если в качестве правой части оператора указано имя таблицы (например, `UserID IN users`), то это эквивалентно подзапросу `UserID IN (SELECT * FROM users)`. Это используется при работе с внешними данными, отправляемым вместе с запросом. Например, вместе с запросом может быть отправлено множество идентификаторов посетителей, загруженное во временную таблицу users, по которому следует выполнить фильтрацию.
|
||||
Если в качестве правой части оператора указано имя таблицы (например, `UserID IN users`), то это эквивалентно подзапросу `UserID IN (SELECT * FROM users)`. Это используется при работе с внешними данными, отправляемыми вместе с запросом. Например, вместе с запросом может быть отправлено множество идентификаторов посетителей, загруженное во временную таблицу users, по которому следует выполнить фильтрацию.
|
||||
|
||||
Если в качестве правой части оператора, указано имя таблицы, имеющий движок Set (подготовленное множество, постоянно находящееся в оперативке), то множество не будет создаваться заново при каждом запросе.
|
||||
Если в качестве правой части оператора, указано имя таблицы, имеющей движок Set (подготовленное множество, постоянно находящееся в оперативке), то множество не будет создаваться заново при каждом запросе.
|
||||
|
||||
В подзапросе может быть указано более одного столбца для фильтрации кортежей.
|
||||
Пример:
|
||||
@ -49,9 +49,9 @@ SELECT '1' IN (SELECT 1);
|
||||
SELECT (CounterID, UserID) IN (SELECT CounterID, UserID FROM ...) FROM ...
|
||||
```
|
||||
|
||||
Типы столбцов слева и справа оператора IN, должны совпадать.
|
||||
Типы столбцов слева и справа оператора IN должны совпадать.
|
||||
|
||||
Оператор IN и подзапрос могут встречаться в любой части запроса, в том числе в агрегатных и лямбда функциях.
|
||||
Оператор IN и подзапрос могут встречаться в любой части запроса, в том числе в агрегатных и лямбда-функциях.
|
||||
Пример:
|
||||
|
||||
``` sql
|
||||
@ -122,7 +122,7 @@ FROM t_null
|
||||
|
||||
Существует два варианта IN-ов с подзапросами (аналогично для JOIN-ов): обычный `IN` / `JOIN` и `GLOBAL IN` / `GLOBAL JOIN`. Они отличаются способом выполнения при распределённой обработке запроса.
|
||||
|
||||
:::note "Attention"
|
||||
:::note "Внимание"
|
||||
Помните, что алгоритмы, описанные ниже, могут работать иначе в зависимости от [настройки](../../operations/settings/settings.md) `distributed_product_mode`.
|
||||
:::
|
||||
При использовании обычного IN-а, запрос отправляется на удалённые серверы, и на каждом из них выполняются подзапросы в секциях `IN` / `JOIN`.
|
||||
@ -228,7 +228,7 @@ SELECT CounterID, count() FROM distributed_table_1 WHERE UserID IN (SELECT UserI
|
||||
SETTINGS max_parallel_replicas=3
|
||||
```
|
||||
|
||||
преобразуются на каждом сервере в
|
||||
преобразуется на каждом сервере в
|
||||
|
||||
```sql
|
||||
SELECT CounterID, count() FROM local_table_1 WHERE UserID IN (SELECT UserID FROM local_table_2 WHERE CounterID < 100)
|
||||
|
@ -263,6 +263,7 @@ SELECT toDateTime('2014-10-26 00:00:00', 'Europe/Moscow') AS time, time + 60 * 6
|
||||
│ 2014-10-26 00:00:00 │ 2014-10-26 23:00:00 │ 2014-10-27 00:00:00 │
|
||||
└─────────────────────┴─────────────────────┴─────────────────────┘
|
||||
```
|
||||
:::
|
||||
|
||||
**Смотрите также**
|
||||
|
||||
|
@ -6,7 +6,7 @@ sidebar_label: VIEW
|
||||
|
||||
# Выражение ALTER TABLE … MODIFY QUERY {#alter-modify-query}
|
||||
|
||||
Вы можеие изменить запрос `SELECT`, который был задан при создании [материализованного представления](../create/view.md#materialized), с помощью запроса 'ALTER TABLE … MODIFY QUERY'. Используйте его если при создании материализованного представления не использовалась секция `TO [db.]name`. Настройка `allow_experimental_alter_materialized_view_structure` должна быть включена.
|
||||
Вы можете изменить запрос `SELECT`, который был задан при создании [материализованного представления](../create/view.md#materialized), с помощью запроса 'ALTER TABLE … MODIFY QUERY'. Используйте его если при создании материализованного представления не использовалась секция `TO [db.]name`. Настройка `allow_experimental_alter_materialized_view_structure` должна быть включена.
|
||||
|
||||
Если при создании материализованного представления использовалась конструкция `TO [db.]name`, то для изменения отсоедините представление с помощью [DETACH](../detach.md), измените таблицу с помощью [ALTER TABLE](index.md), а затем снова присоедините запрос с помощью [ATTACH](../attach.md).
|
||||
|
||||
|
@ -260,8 +260,8 @@ ENGINE = MergeTree()
|
||||
|
||||
Кодеки шифрования:
|
||||
|
||||
- `CODEC('AES-128-GCM-SIV')` — Зашифровывает данные с помощью AES-128 в режиме [RFC 8452](https://tools.ietf.org/html/rfc8452) GCM-SIV.
|
||||
- `CODEC('AES-256-GCM-SIV')` — Зашифровывает данные с помощью AES-256 в режиме GCM-SIV.
|
||||
- `CODEC('AES-128-GCM-SIV')` — Зашифровывает данные с помощью AES-128 в режиме [RFC 8452](https://tools.ietf.org/html/rfc8452) GCM-SIV.
|
||||
- `CODEC('AES-256-GCM-SIV')` — Зашифровывает данные с помощью AES-256 в режиме GCM-SIV.
|
||||
|
||||
Эти кодеки используют фиксированный одноразовый ключ шифрования. Таким образом, это детерминированное шифрование. Оно совместимо с поддерживающими дедупликацию движками, в частности, [ReplicatedMergeTree](../../../engines/table-engines/mergetree-family/replication.md). Однако у шифрования имеется недостаток: если дважды зашифровать один и тот же блок данных, текст на выходе получится одинаковым, и злоумышленник, у которого есть доступ к диску, заметит эту эквивалентность (при этом доступа к содержимому он не получит).
|
||||
|
||||
@ -274,10 +274,10 @@ ENGINE = MergeTree()
|
||||
**Пример**
|
||||
|
||||
```sql
|
||||
CREATE TABLE mytable
|
||||
CREATE TABLE mytable
|
||||
(
|
||||
x String Codec(AES_128_GCM_SIV)
|
||||
)
|
||||
)
|
||||
ENGINE = MergeTree ORDER BY x;
|
||||
```
|
||||
|
||||
@ -287,10 +287,10 @@ ENGINE = MergeTree ORDER BY x;
|
||||
**Пример**
|
||||
|
||||
```sql
|
||||
CREATE TABLE mytable
|
||||
CREATE TABLE mytable
|
||||
(
|
||||
x String Codec(Delta, LZ4, AES_128_GCM_SIV)
|
||||
)
|
||||
)
|
||||
ENGINE = MergeTree ORDER BY x;
|
||||
```
|
||||
|
||||
@ -299,7 +299,7 @@ ENGINE = MergeTree ORDER BY x;
|
||||
ClickHouse поддерживает временные таблицы со следующими характеристиками:
|
||||
|
||||
- Временные таблицы исчезают после завершения сессии, в том числе при обрыве соединения.
|
||||
- Временная таблица использует только модуль памяти.
|
||||
- Временная таблица использует движок таблиц Memory когда движок не указан и она может использовать любой движок таблиц за исключением движков Replicated и `KeeperMap`.
|
||||
- Невозможно указать базу данных для временной таблицы. Она создается вне баз данных.
|
||||
- Невозможно создать временную таблицу распределённым DDL запросом на всех серверах кластера (с опцией `ON CLUSTER`): такая таблица существует только в рамках существующей сессии.
|
||||
- Если временная таблица имеет то же имя, что и некоторая другая, то, при упоминании в запросе без указания БД, будет использована временная таблица.
|
||||
@ -313,7 +313,7 @@ CREATE TEMPORARY TABLE [IF NOT EXISTS] table_name
|
||||
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
|
||||
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
|
||||
...
|
||||
)
|
||||
) [ENGINE = engine]
|
||||
```
|
||||
|
||||
В большинстве случаев, временные таблицы создаются не вручную, а при использовании внешних данных для запроса, или при распределённом `(GLOBAL) IN`. Подробнее см. соответствующие разделы
|
||||
|
@ -107,7 +107,8 @@ GRANT SELECT(x,y) ON db.table TO john WITH GRANT OPTION
|
||||
- [CREATE](#grant-create)
|
||||
- `CREATE DATABASE`
|
||||
- `CREATE TABLE`
|
||||
- `CREATE TEMPORARY TABLE`
|
||||
- `CREATE ARBITRARY TEMPORARY TABLE`
|
||||
- `CREATE TEMPORARY TABLE`
|
||||
- `CREATE VIEW`
|
||||
- `CREATE DICTIONARY`
|
||||
- `CREATE FUNCTION`
|
||||
@ -314,7 +315,8 @@ GRANT INSERT(x,y) ON db.table TO john
|
||||
- `CREATE`. Уровень: `GROUP`
|
||||
- `CREATE DATABASE`. Уровень: `DATABASE`
|
||||
- `CREATE TABLE`. Уровень: `TABLE`
|
||||
- `CREATE TEMPORARY TABLE`. Уровень: `GLOBAL`
|
||||
- `CREATE ARBITRARY TEMPORARY TABLE`. Уровень: `GLOBAL`
|
||||
- `CREATE TEMPORARY TABLE`. Уровень: `GLOBAL`
|
||||
- `CREATE VIEW`. Уровень: `VIEW`
|
||||
- `CREATE DICTIONARY`. Уровень: `DICTIONARY`
|
||||
|
||||
|
@ -10,6 +10,7 @@ sidebar_label: OPTIMIZE
|
||||
|
||||
:::danger "Внимание"
|
||||
`OPTIMIZE` не устраняет причину появления ошибки `Too many parts`.
|
||||
:::
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
|
@ -19,7 +19,6 @@ CREATE DATABASE testdb ENGINE = Replicated('zoo_path', 'shard_name', 'replica_na
|
||||
- `shard_name` — 分片的名字。数据库副本按`shard_name`分组到分片中。
|
||||
- `replica_name` — 副本的名字。同一分片的所有副本的副本名称必须不同。
|
||||
|
||||
!!! note "警告"
|
||||
对于[ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table_engines-replication)表,如果没有提供参数,则使用默认参数:`/clickhouse/tables/{uuid}/{shard}`和`{replica}`。这些可以在服务器设置[default_replica_path](../../operations/server-configuration-parameters/settings.md#default_replica_path)和[default_replica_name](../../operations/server-configuration-parameters/settings.md#default_replica_name)中更改。宏`{uuid}`被展开到表的uuid, `{shard}`和`{replica}`被展开到服务器配置的值,而不是数据库引擎参数。但是在将来,可以使用Replicated数据库的`shard_name`和`replica_name`。
|
||||
|
||||
## 使用方式 {#specifics-and-recommendations}
|
||||
@ -52,8 +51,8 @@ CREATE TABLE r.rmt (n UInt64) ENGINE=ReplicatedMergeTree ORDER BY n;
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─────hosts────────────┬──status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
|
||||
│ shard1|replica1 │ 0 │ │ 2 │ 0 │
|
||||
┌─────hosts────────────┬──status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
|
||||
│ shard1|replica1 │ 0 │ │ 2 │ 0 │
|
||||
│ shard1|other_replica │ 0 │ │ 1 │ 0 │
|
||||
│ other_shard|r1 │ 0 │ │ 0 │ 0 │
|
||||
└──────────────────────┴─────────┴───────┴─────────────────────┴──────────────────┘
|
||||
@ -62,13 +61,13 @@ CREATE TABLE r.rmt (n UInt64) ENGINE=ReplicatedMergeTree ORDER BY n;
|
||||
显示系统表:
|
||||
|
||||
``` sql
|
||||
SELECT cluster, shard_num, replica_num, host_name, host_address, port, is_local
|
||||
SELECT cluster, shard_num, replica_num, host_name, host_address, port, is_local
|
||||
FROM system.clusters WHERE cluster='r';
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┐
|
||||
│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │
|
||||
┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┐
|
||||
│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │
|
||||
│ r │ 2 │ 1 │ node2 │ 127.0.0.1 │ 9001 │ 0 │
|
||||
│ r │ 2 │ 2 │ node1 │ 127.0.0.1 │ 9000 │ 1 │
|
||||
└─────────┴───────────┴─────────────┴───────────┴──────────────┴──────┴──────────┘
|
||||
@ -83,9 +82,9 @@ node1 :) SELECT materialize(hostName()) AS host, groupArray(n) FROM r.d GROUP BY
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─hosts─┬─groupArray(n)─┐
|
||||
│ node1 │ [1,3,5,7,9] │
|
||||
│ node2 │ [0,2,4,6,8] │
|
||||
┌─hosts─┬─groupArray(n)─┐
|
||||
│ node1 │ [1,3,5,7,9] │
|
||||
│ node2 │ [0,2,4,6,8] │
|
||||
└───────┴───────────────┘
|
||||
```
|
||||
|
||||
@ -98,8 +97,8 @@ node4 :) CREATE DATABASE r ENGINE=Replicated('some/path/r','other_shard','r2');
|
||||
集群配置如下所示:
|
||||
|
||||
``` text
|
||||
┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┐
|
||||
│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │
|
||||
┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┐
|
||||
│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │
|
||||
│ r │ 1 │ 2 │ node4 │ 127.0.0.1 │ 9003 │ 0 │
|
||||
│ r │ 2 │ 1 │ node2 │ 127.0.0.1 │ 9001 │ 0 │
|
||||
│ r │ 2 │ 2 │ node1 │ 127.0.0.1 │ 9000 │ 1 │
|
||||
@ -113,8 +112,8 @@ node2 :) SELECT materialize(hostName()) AS host, groupArray(n) FROM r.d GROUP BY
|
||||
```
|
||||
|
||||
```text
|
||||
┌─hosts─┬─groupArray(n)─┐
|
||||
│ node2 │ [1,3,5,7,9] │
|
||||
│ node4 │ [0,2,4,6,8] │
|
||||
┌─hosts─┬─groupArray(n)─┐
|
||||
│ node2 │ [1,3,5,7,9] │
|
||||
│ node4 │ [0,2,4,6,8] │
|
||||
└───────┴───────────────┘
|
||||
```
|
||||
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
slug: /zh/engines/table-engines/mergetree-family/mergetree
|
||||
---
|
||||
---
|
||||
slug: /zh/engines/table-engines/mergetree-family/mergetree
|
||||
---
|
||||
# MergeTree {#table_engines-mergetree}
|
||||
|
||||
Clickhouse 中最强大的表引擎当属 `MergeTree` (合并树)引擎及该系列(`*MergeTree`)中的其他引擎。
|
||||
@ -25,8 +25,9 @@ Clickhouse 中最强大的表引擎当属 `MergeTree` (合并树)引擎及
|
||||
|
||||
需要的话,您可以给表设置一个采样方法。
|
||||
|
||||
!!! note "注意"
|
||||
[合并](../special/merge.md#merge) 引擎并不属于 `*MergeTree` 系列。
|
||||
:::info
|
||||
[合并](../special/merge.md#merge) 引擎并不属于 `*MergeTree` 系列。
|
||||
:::
|
||||
|
||||
## 建表 {#table_engine-mergetree-creating-a-table}
|
||||
|
||||
@ -364,7 +365,7 @@ WHERE 子句中的条件可以包含对某列数据进行运算的函数表达
|
||||
|
||||
常量参数小于 ngram 大小的函数不能使用 `ngrambf_v1` 进行查询优化。
|
||||
|
||||
!!! note "注意"
|
||||
:::note
|
||||
布隆过滤器可能会包含不符合条件的匹配,所以 `ngrambf_v1`, `tokenbf_v1` 和 `bloom_filter` 索引不能用于结果返回为假的函数,例如:
|
||||
|
||||
- 可以用来优化的场景
|
||||
@ -379,6 +380,7 @@ WHERE 子句中的条件可以包含对某列数据进行运算的函数表达
|
||||
- `NOT s = 1`
|
||||
- `s != 1`
|
||||
- `NOT startsWith(s, 'test')`
|
||||
:::
|
||||
|
||||
## 并发数据访问 {#concurrent-data-access}
|
||||
|
||||
|
@ -45,7 +45,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] AS [db2.]name2
|
||||
|
||||
- [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) 设置
|
||||
- [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) 查看示例
|
||||
|
||||
|
||||
**分布式设置**
|
||||
|
||||
- `fsync_after_insert` - 对异步插入到分布式的文件数据执行`fsync`。确保操作系统将所有插入的数据刷新到启动节点**磁盘上的一个文件**中。
|
||||
@ -66,19 +66,20 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] AS [db2.]name2
|
||||
|
||||
- `monitor_max_sleep_time_ms` - 等同于 [distributed_directory_monitor_max_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms)
|
||||
|
||||
!!! note "备注"
|
||||
::note
|
||||
**稳定性设置** (`fsync_...`):
|
||||
|
||||
**稳定性设置** (`fsync_...`):
|
||||
- 只影响异步插入(例如:`insert_distributed_sync=false`), 当数据首先存储在启动节点磁盘上,然后再异步发送到shard。
|
||||
— 可能会显著降低`insert`的性能
|
||||
- 影响将存储在分布式表文件夹中的数据写入 **接受您插入的节点** 。如果你需要保证写入数据到底层的MergeTree表中,请参阅 `system.merge_tree_settings` 中的持久性设置(`...fsync...`)
|
||||
|
||||
- 只影响异步插入(例如:`insert_distributed_sync=false`), 当数据首先存储在启动节点磁盘上,然后再异步发送到shard。
|
||||
— 可能会显著降低`insert`的性能
|
||||
- 影响将存储在分布式表文件夹中的数据写入 **接受您插入的节点** 。如果你需要保证写入数据到底层的MergeTree表中,请参阅 `system.merge_tree_settings` 中的持久性设置(`...fsync...`)
|
||||
**插入限制设置** (`..._insert`) 请见:
|
||||
|
||||
**插入限制设置** (`..._insert`) 请见:
|
||||
- [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) 设置
|
||||
- [prefer_localhost_replica](../../../operations/settings/settings.md#settings-prefer-localhost-replica) 设置
|
||||
- `bytes_to_throw_insert` 在 `bytes_to_delay_insert` 之前处理,所以你不应该设置它的值小于 `bytes_to_delay_insert`
|
||||
:::
|
||||
|
||||
- [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) 设置
|
||||
- [prefer_localhost_replica](../../../operations/settings/settings.md#settings-prefer-localhost-replica) 设置
|
||||
- `bytes_to_throw_insert` 在 `bytes_to_delay_insert` 之前处理,所以你不应该设置它的值小于 `bytes_to_delay_insert`
|
||||
**示例**
|
||||
|
||||
``` sql
|
||||
@ -214,7 +215,7 @@ SELECT 查询会被发送到所有分片,并且无论数据在分片中如何
|
||||
|
||||
|
||||
## 读取数据 {#distributed-reading-data}
|
||||
|
||||
|
||||
当查询一个`Distributed`表时,`SELECT`查询被发送到所有的分片,不管数据是如何分布在分片上的(它们可以完全随机分布)。当您添加一个新分片时,您不必将旧数据传输到它。相反,您可以使用更重的权重向其写入新数据——数据的分布会稍微不均匀,但查询将正确有效地工作。
|
||||
|
||||
当启用`max_parallel_replicas`选项时,查询处理将在单个分片中的所有副本之间并行化。更多信息,请参见[max_parallel_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas)。
|
||||
@ -225,8 +226,9 @@ SELECT 查询会被发送到所有分片,并且无论数据在分片中如何
|
||||
|
||||
- `_shard_num` — 表`system.clusters` 中的 `shard_num` 值 . 数据类型: [UInt32](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
!!! note "备注"
|
||||
因为 [remote](../../../sql-reference/table-functions/remote.md) 和 [cluster](../../../sql-reference/table-functions/cluster.mdx) 表方法内部创建了分布式表, `_shard_num` 对他们都有效.
|
||||
:::note
|
||||
因为 [remote](../../../sql-reference/table-functions/remote.md) 和 [cluster](../../../sql-reference/table-functions/cluster.mdx) 表方法内部创建了分布式表, `_shard_num` 对他们都有效.
|
||||
:::
|
||||
|
||||
**详见**
|
||||
- [虚拟列](../../../engines/table-engines/index.md#table_engines-virtual_columns) 描述
|
||||
|
@ -617,8 +617,9 @@ INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1;
|
||||
|
||||
启用本机复制[Zookeeper](http://zookeeper.apache.org/)是必需的。 ClickHouse负责所有副本的数据一致性,并在失败后自动运行恢复过程。建议将ZooKeeper集群部署在单独的服务器上(其中没有其他进程,包括运行的ClickHouse)。
|
||||
|
||||
!!! note "注意"
|
||||
ZooKeeper不是一个严格的要求:在某些简单的情况下,您可以通过将数据写入应用程序代码中的所有副本来复制数据。 这种方法是**不**建议的,在这种情况下,ClickHouse将无法保证所有副本上的数据一致性。 因此需要由您的应用来保证这一点。
|
||||
:::note
|
||||
ZooKeeper不是一个严格的要求:在某些简单的情况下,您可以通过将数据写入应用程序代码中的所有副本来复制数据。 这种方法是**不**建议的,在这种情况下,ClickHouse将无法保证所有副本上的数据一致性。 因此需要由您的应用来保证这一点。
|
||||
:::
|
||||
|
||||
ZooKeeper位置在配置文件中指定:
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
slug: /zh/guides/improving-query-performance/skipping-indexes
|
||||
sidebar_label: Data Skipping Indexes
|
||||
sidebar_label: 跳数索引
|
||||
sidebar_position: 2
|
||||
---
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
slug: /zh/guides/improving-query-performance/sparse-primary-indexes
|
||||
sidebar_label: Sparse Primary Indexes
|
||||
sidebar_label: 主键稀疏索引
|
||||
sidebar_position: 20
|
||||
---
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user