mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
Merge branch 'master' into vdimir/http_wait_end_of_query_settings
This commit is contained in:
commit
1b264ad51e
43
.github/workflows/backport_branches.yml
vendored
43
.github/workflows/backport_branches.yml
vendored
@ -79,7 +79,7 @@ jobs:
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ runner.temp }}/changed_images.json
|
||||
CompatibilityCheck:
|
||||
CompatibilityCheckX86:
|
||||
needs: [BuilderDebRelease]
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
@ -98,12 +98,43 @@ jobs:
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: CompatibilityCheck
|
||||
- name: CompatibilityCheckX86
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
CompatibilityCheckAarch64:
|
||||
needs: [BuilderDebAarch64]
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/compatibility_check
|
||||
REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
EOF
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: CompatibilityCheckAarch64
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -421,7 +452,8 @@ jobs:
|
||||
- name: Check docker clickhouse/clickhouse-server building
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 docker_server.py --release-type head --no-push
|
||||
python3 docker_server.py --release-type head --no-push \
|
||||
--image-repo clickhouse/clickhouse-server --image-path docker/server
|
||||
python3 docker_server.py --release-type head --no-push --no-ubuntu \
|
||||
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
|
||||
- name: Cleanup
|
||||
@ -741,7 +773,8 @@ jobs:
|
||||
- FunctionalStatefulTestDebug
|
||||
- StressTestTsan
|
||||
- IntegrationTestsRelease
|
||||
- CompatibilityCheck
|
||||
- CompatibilityCheckX86
|
||||
- CompatibilityCheckAarch64
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
|
1
.github/workflows/cherry_pick.yml
vendored
1
.github/workflows/cherry_pick.yml
vendored
@ -35,7 +35,6 @@ jobs:
|
||||
fetch-depth: 0
|
||||
- name: Cherry pick
|
||||
run: |
|
||||
sudo pip install GitPython
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 cherry_pick.py
|
||||
- name: Cleanup
|
||||
|
43
.github/workflows/master.yml
vendored
43
.github/workflows/master.yml
vendored
@ -110,7 +110,7 @@ jobs:
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
CompatibilityCheck:
|
||||
CompatibilityCheckX86:
|
||||
needs: [BuilderDebRelease]
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
@ -129,12 +129,43 @@ jobs:
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: CompatibilityCheck
|
||||
- name: CompatibilityCheckX86
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
CompatibilityCheckAarch64:
|
||||
needs: [BuilderDebAarch64]
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/compatibility_check
|
||||
REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
EOF
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: CompatibilityCheckAarch64
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -829,7 +860,8 @@ jobs:
|
||||
- name: Check docker clickhouse/clickhouse-server building
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 docker_server.py --release-type head
|
||||
python3 docker_server.py --release-type head \
|
||||
--image-repo clickhouse/clickhouse-server --image-path docker/server
|
||||
python3 docker_server.py --release-type head --no-ubuntu \
|
||||
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
|
||||
- name: Cleanup
|
||||
@ -3124,7 +3156,8 @@ jobs:
|
||||
- PerformanceComparisonX86-1
|
||||
- PerformanceComparisonX86-2
|
||||
- PerformanceComparisonX86-3
|
||||
- CompatibilityCheck
|
||||
- CompatibilityCheckX86
|
||||
- CompatibilityCheckAarch64
|
||||
- ASTFuzzerTestDebug
|
||||
- ASTFuzzerTestAsan
|
||||
- ASTFuzzerTestTsan
|
||||
|
44
.github/workflows/pull_request.yml
vendored
44
.github/workflows/pull_request.yml
vendored
@ -37,7 +37,6 @@ jobs:
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 run_check.py
|
||||
PythonUnitTests:
|
||||
needs: CheckLabels
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
@ -174,7 +173,7 @@ jobs:
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
CompatibilityCheck:
|
||||
CompatibilityCheckX86:
|
||||
needs: [BuilderDebRelease]
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
@ -193,12 +192,43 @@ jobs:
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: CompatibilityCheck
|
||||
- name: CompatibilityCheckX86
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
CompatibilityCheckAarch64:
|
||||
needs: [BuilderDebAarch64]
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/compatibility_check
|
||||
REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
EOF
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: CompatibilityCheckAarch64
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -886,7 +916,8 @@ jobs:
|
||||
- name: Check docker clickhouse/clickhouse-server building
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 docker_server.py --release-type head --no-push
|
||||
python3 docker_server.py --release-type head --no-push \
|
||||
--image-repo clickhouse/clickhouse-server --image-path docker/server
|
||||
python3 docker_server.py --release-type head --no-push --no-ubuntu \
|
||||
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
|
||||
- name: Cleanup
|
||||
@ -4792,7 +4823,8 @@ jobs:
|
||||
- UnitTestsMsan
|
||||
- UnitTestsUBsan
|
||||
- UnitTestsReleaseClang
|
||||
- CompatibilityCheck
|
||||
- CompatibilityCheckX86
|
||||
- CompatibilityCheckAarch64
|
||||
- IntegrationTestsFlakyCheck
|
||||
- SQLancerTestRelease
|
||||
- SQLancerTestDebug
|
||||
|
31
.github/workflows/release.yml
vendored
31
.github/workflows/release.yml
vendored
@ -7,15 +7,28 @@ on: # yamllint disable-line rule:truthy
|
||||
release:
|
||||
types:
|
||||
- published
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
tag:
|
||||
description: 'Release tag'
|
||||
required: true
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
ReleasePublish:
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Set tag from input
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
run: |
|
||||
echo "GITHUB_TAG=${{ github.event.inputs.tag }}" >> "$GITHUB_ENV"
|
||||
- name: Set tag from REF
|
||||
if: github.event_name == 'release'
|
||||
run: |
|
||||
echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV"
|
||||
- name: Deploy packages and assets
|
||||
run: |
|
||||
GITHUB_TAG="${GITHUB_REF#refs/tags/}"
|
||||
curl --silent --data '' \
|
||||
curl --silent --data '' --no-buffer \
|
||||
'${{ secrets.PACKAGES_RELEASE_URL }}/release/'"${GITHUB_TAG}"'?binary=binary_darwin&binary=binary_darwin_aarch64&sync=true'
|
||||
############################################################################################
|
||||
##################################### Docker images #######################################
|
||||
@ -23,16 +36,26 @@ jobs:
|
||||
DockerServerImages:
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Set tag from input
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
run: |
|
||||
echo "GITHUB_TAG=${{ github.event.inputs.tag }}" >> "$GITHUB_ENV"
|
||||
- name: Set tag from REF
|
||||
if: github.event_name == 'release'
|
||||
run: |
|
||||
echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV"
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
fetch-depth: 0 # otherwise we will have no version info
|
||||
ref: ${{ env.GITHUB_TAG }}
|
||||
- name: Check docker clickhouse/clickhouse-server building
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 docker_server.py --release-type auto --version "${{ github.ref }}"
|
||||
python3 docker_server.py --release-type auto --version "${{ github.ref }}" --no-ubuntu \
|
||||
python3 docker_server.py --release-type auto --version "$GITHUB_TAG" \
|
||||
--image-repo clickhouse/clickhouse-server --image-path docker/server
|
||||
python3 docker_server.py --release-type auto --version "$GITHUB_TAG" --no-ubuntu \
|
||||
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
|
43
.github/workflows/release_branches.yml
vendored
43
.github/workflows/release_branches.yml
vendored
@ -71,7 +71,7 @@ jobs:
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ runner.temp }}/changed_images.json
|
||||
CompatibilityCheck:
|
||||
CompatibilityCheckX86:
|
||||
needs: [BuilderDebRelease]
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
@ -90,12 +90,43 @@ jobs:
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: CompatibilityCheck
|
||||
- name: CompatibilityCheckX86
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
CompatibilityCheckAarch64:
|
||||
needs: [BuilderDebAarch64]
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/compatibility_check
|
||||
REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
EOF
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: CompatibilityCheckAarch64
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -494,7 +525,8 @@ jobs:
|
||||
- name: Check docker clickhouse/clickhouse-server building
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 docker_server.py --release-type head --no-push
|
||||
python3 docker_server.py --release-type head --no-push \
|
||||
--image-repo clickhouse/clickhouse-server --image-path docker/server
|
||||
python3 docker_server.py --release-type head --no-push --no-ubuntu \
|
||||
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
|
||||
- name: Cleanup
|
||||
@ -1947,7 +1979,8 @@ jobs:
|
||||
- IntegrationTestsTsan1
|
||||
- IntegrationTestsTsan2
|
||||
- IntegrationTestsTsan3
|
||||
- CompatibilityCheck
|
||||
- CompatibilityCheckX86
|
||||
- CompatibilityCheckAarch64
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
|
@ -4,7 +4,6 @@
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include "defines.h"
|
||||
#include "TypePair.h"
|
||||
|
||||
/// General-purpose typelist. Easy on compilation times as it does not use recursion.
|
||||
template <typename ...Args>
|
||||
@ -28,7 +27,7 @@ namespace TypeListUtils /// In some contexts it's more handy to use functions in
|
||||
constexpr Root<Args...> changeRoot(TypeList<Args...>) { return {}; }
|
||||
|
||||
template <typename F, typename ...Args>
|
||||
constexpr void forEach(TypeList<Args...>, F && f) { (std::forward<F>(f)(Id<Args>{}), ...); }
|
||||
constexpr void forEach(TypeList<Args...>, F && f) { (std::forward<F>(f)(TypeList<Args>{}), ...); }
|
||||
}
|
||||
|
||||
template <typename TypeListLeft, typename TypeListRight>
|
||||
|
@ -1,4 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
template <typename T, typename V> struct TypePair {};
|
||||
template <typename T> struct Id {};
|
214
base/base/hex.h
Normal file
214
base/base/hex.h
Normal file
@ -0,0 +1,214 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstring>
|
||||
#include "types.h"
|
||||
|
||||
/// Maps 0..15 to 0..9A..F or 0..9a..f correspondingly.
|
||||
|
||||
constexpr inline std::string_view hex_digit_to_char_uppercase_table = "0123456789ABCDEF";
|
||||
constexpr inline std::string_view hex_digit_to_char_lowercase_table = "0123456789abcdef";
|
||||
|
||||
constexpr char hexDigitUppercase(unsigned char c)
|
||||
{
|
||||
return hex_digit_to_char_uppercase_table[c];
|
||||
}
|
||||
constexpr char hexDigitLowercase(unsigned char c)
|
||||
{
|
||||
return hex_digit_to_char_lowercase_table[c];
|
||||
}
|
||||
|
||||
/// Maps 0..255 to 00..FF or 00..ff correspondingly
|
||||
|
||||
constexpr inline std::string_view hex_byte_to_char_uppercase_table = //
|
||||
"000102030405060708090A0B0C0D0E0F"
|
||||
"101112131415161718191A1B1C1D1E1F"
|
||||
"202122232425262728292A2B2C2D2E2F"
|
||||
"303132333435363738393A3B3C3D3E3F"
|
||||
"404142434445464748494A4B4C4D4E4F"
|
||||
"505152535455565758595A5B5C5D5E5F"
|
||||
"606162636465666768696A6B6C6D6E6F"
|
||||
"707172737475767778797A7B7C7D7E7F"
|
||||
"808182838485868788898A8B8C8D8E8F"
|
||||
"909192939495969798999A9B9C9D9E9F"
|
||||
"A0A1A2A3A4A5A6A7A8A9AAABACADAEAF"
|
||||
"B0B1B2B3B4B5B6B7B8B9BABBBCBDBEBF"
|
||||
"C0C1C2C3C4C5C6C7C8C9CACBCCCDCECF"
|
||||
"D0D1D2D3D4D5D6D7D8D9DADBDCDDDEDF"
|
||||
"E0E1E2E3E4E5E6E7E8E9EAEBECEDEEEF"
|
||||
"F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF";
|
||||
|
||||
constexpr inline std::string_view hex_byte_to_char_lowercase_table = //
|
||||
"000102030405060708090a0b0c0d0e0f"
|
||||
"101112131415161718191a1b1c1d1e1f"
|
||||
"202122232425262728292a2b2c2d2e2f"
|
||||
"303132333435363738393a3b3c3d3e3f"
|
||||
"404142434445464748494a4b4c4d4e4f"
|
||||
"505152535455565758595a5b5c5d5e5f"
|
||||
"606162636465666768696a6b6c6d6e6f"
|
||||
"707172737475767778797a7b7c7d7e7f"
|
||||
"808182838485868788898a8b8c8d8e8f"
|
||||
"909192939495969798999a9b9c9d9e9f"
|
||||
"a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
|
||||
"b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
|
||||
"c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
|
||||
"d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
|
||||
"e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
|
||||
"f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
|
||||
|
||||
inline void writeHexByteUppercase(UInt8 byte, void * out)
|
||||
{
|
||||
memcpy(out, &hex_byte_to_char_uppercase_table[static_cast<size_t>(byte) * 2], 2);
|
||||
}
|
||||
|
||||
inline void writeHexByteLowercase(UInt8 byte, void * out)
|
||||
{
|
||||
memcpy(out, &hex_byte_to_char_lowercase_table[static_cast<size_t>(byte) * 2], 2);
|
||||
}
|
||||
|
||||
constexpr inline std::string_view bin_byte_to_char_table = //
|
||||
"0000000000000001000000100000001100000100000001010000011000000111"
|
||||
"0000100000001001000010100000101100001100000011010000111000001111"
|
||||
"0001000000010001000100100001001100010100000101010001011000010111"
|
||||
"0001100000011001000110100001101100011100000111010001111000011111"
|
||||
"0010000000100001001000100010001100100100001001010010011000100111"
|
||||
"0010100000101001001010100010101100101100001011010010111000101111"
|
||||
"0011000000110001001100100011001100110100001101010011011000110111"
|
||||
"0011100000111001001110100011101100111100001111010011111000111111"
|
||||
"0100000001000001010000100100001101000100010001010100011001000111"
|
||||
"0100100001001001010010100100101101001100010011010100111001001111"
|
||||
"0101000001010001010100100101001101010100010101010101011001010111"
|
||||
"0101100001011001010110100101101101011100010111010101111001011111"
|
||||
"0110000001100001011000100110001101100100011001010110011001100111"
|
||||
"0110100001101001011010100110101101101100011011010110111001101111"
|
||||
"0111000001110001011100100111001101110100011101010111011001110111"
|
||||
"0111100001111001011110100111101101111100011111010111111001111111"
|
||||
"1000000010000001100000101000001110000100100001011000011010000111"
|
||||
"1000100010001001100010101000101110001100100011011000111010001111"
|
||||
"1001000010010001100100101001001110010100100101011001011010010111"
|
||||
"1001100010011001100110101001101110011100100111011001111010011111"
|
||||
"1010000010100001101000101010001110100100101001011010011010100111"
|
||||
"1010100010101001101010101010101110101100101011011010111010101111"
|
||||
"1011000010110001101100101011001110110100101101011011011010110111"
|
||||
"1011100010111001101110101011101110111100101111011011111010111111"
|
||||
"1100000011000001110000101100001111000100110001011100011011000111"
|
||||
"1100100011001001110010101100101111001100110011011100111011001111"
|
||||
"1101000011010001110100101101001111010100110101011101011011010111"
|
||||
"1101100011011001110110101101101111011100110111011101111011011111"
|
||||
"1110000011100001111000101110001111100100111001011110011011100111"
|
||||
"1110100011101001111010101110101111101100111011011110111011101111"
|
||||
"1111000011110001111100101111001111110100111101011111011011110111"
|
||||
"1111100011111001111110101111101111111100111111011111111011111111";
|
||||
|
||||
inline void writeBinByte(UInt8 byte, void * out)
|
||||
{
|
||||
memcpy(out, &bin_byte_to_char_table[static_cast<size_t>(byte) * 8], 8);
|
||||
}
|
||||
|
||||
/// Produces hex representation of an unsigned int with leading zeros (for checksums)
|
||||
template <typename TUInt>
|
||||
inline void writeHexUIntImpl(TUInt uint_, char * out, std::string_view table)
|
||||
{
|
||||
union
|
||||
{
|
||||
TUInt value;
|
||||
UInt8 uint8[sizeof(TUInt)];
|
||||
};
|
||||
|
||||
value = uint_;
|
||||
|
||||
for (size_t i = 0; i < sizeof(TUInt); ++i)
|
||||
{
|
||||
if constexpr (std::endian::native == std::endian::little)
|
||||
memcpy(out + i * 2, &table[static_cast<size_t>(uint8[sizeof(TUInt) - 1 - i]) * 2], 2);
|
||||
else
|
||||
memcpy(out + i * 2, &table[static_cast<size_t>(uint8[i]) * 2], 2);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TUInt>
|
||||
inline void writeHexUIntUppercase(TUInt uint_, char * out)
|
||||
{
|
||||
writeHexUIntImpl(uint_, out, hex_byte_to_char_uppercase_table);
|
||||
}
|
||||
|
||||
template <typename TUInt>
|
||||
inline void writeHexUIntLowercase(TUInt uint_, char * out)
|
||||
{
|
||||
writeHexUIntImpl(uint_, out, hex_byte_to_char_lowercase_table);
|
||||
}
|
||||
|
||||
template <typename TUInt>
|
||||
std::string getHexUIntUppercase(TUInt uint_)
|
||||
{
|
||||
std::string res(sizeof(TUInt) * 2, '\0');
|
||||
writeHexUIntUppercase(uint_, res.data());
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename TUInt>
|
||||
std::string getHexUIntLowercase(TUInt uint_)
|
||||
{
|
||||
std::string res(sizeof(TUInt) * 2, '\0');
|
||||
writeHexUIntLowercase(uint_, res.data());
|
||||
return res;
|
||||
}
|
||||
|
||||
/// Maps 0..9, A..F, a..f to 0..15. Other chars are mapped to implementation specific value.
|
||||
|
||||
constexpr inline std::string_view hex_char_to_digit_table
|
||||
= {"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff" //0-9
|
||||
"\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //A-Z
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //a-z
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff",
|
||||
256};
|
||||
|
||||
constexpr UInt8 unhex(char c)
|
||||
{
|
||||
return hex_char_to_digit_table[static_cast<UInt8>(c)];
|
||||
}
|
||||
|
||||
constexpr UInt8 unhex2(const char * data)
|
||||
{
|
||||
return static_cast<UInt8>(unhex(data[0])) * 0x10 + static_cast<UInt8>(unhex(data[1]));
|
||||
}
|
||||
|
||||
constexpr UInt16 unhex4(const char * data)
|
||||
{
|
||||
return static_cast<UInt16>(unhex(data[0])) * 0x1000 + static_cast<UInt16>(unhex(data[1])) * 0x100
|
||||
+ static_cast<UInt16>(unhex(data[2])) * 0x10 + static_cast<UInt16>(unhex(data[3]));
|
||||
}
|
||||
|
||||
template <typename TUInt>
|
||||
constexpr TUInt unhexUInt(const char * data)
|
||||
{
|
||||
TUInt res = 0;
|
||||
if constexpr ((sizeof(TUInt) <= 8) || ((sizeof(TUInt) % 8) != 0))
|
||||
{
|
||||
for (size_t i = 0; i < sizeof(TUInt) * 2; ++i, ++data)
|
||||
{
|
||||
res <<= 4;
|
||||
res += unhex(*data);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < sizeof(TUInt) / 8; ++i, data += 16)
|
||||
{
|
||||
res <<= 64;
|
||||
res += unhexUInt<UInt64>(data);
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
13
base/base/interpolate.h
Normal file
13
base/base/interpolate.h
Normal file
@ -0,0 +1,13 @@
|
||||
#pragma once
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
|
||||
/** Linear interpolation in logarithmic coordinates.
|
||||
* Exponential interpolation is related to linear interpolation
|
||||
* exactly in same way as geometric mean is related to arithmetic mean.
|
||||
*/
|
||||
constexpr double interpolateExponential(double min, double max, double ratio)
|
||||
{
|
||||
assert(min > 0 && ratio >= 0 && ratio <= 1);
|
||||
return min * std::pow(max / min, ratio);
|
||||
}
|
@ -90,6 +90,9 @@ namespace MongoDB
|
||||
|
||||
Poco::Net::SocketAddress address() const;
|
||||
/// Returns the address of the MongoDB server.
|
||||
|
||||
const std::string & uri() const;
|
||||
/// Returns the uri on which the connection was made.
|
||||
|
||||
void connect(const std::string & hostAndPort);
|
||||
/// Connects to the given MongoDB server.
|
||||
@ -148,6 +151,7 @@ namespace MongoDB
|
||||
private:
|
||||
Poco::Net::SocketAddress _address;
|
||||
Poco::Net::StreamSocket _socket;
|
||||
std::string _uri;
|
||||
};
|
||||
|
||||
|
||||
@ -158,6 +162,10 @@ namespace MongoDB
|
||||
{
|
||||
return _address;
|
||||
}
|
||||
inline const std::string & Connection::uri() const
|
||||
{
|
||||
return _uri;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -145,68 +145,155 @@ void Connection::connect(const Poco::Net::StreamSocket& socket)
|
||||
|
||||
void Connection::connect(const std::string& uri, SocketFactory& socketFactory)
|
||||
{
|
||||
Poco::URI theURI(uri);
|
||||
if (theURI.getScheme() != "mongodb") throw Poco::UnknownURISchemeException(uri);
|
||||
std::vector<std::string> strAddresses;
|
||||
std::string newURI;
|
||||
|
||||
std::string userInfo = theURI.getUserInfo();
|
||||
std::string host = theURI.getHost();
|
||||
Poco::UInt16 port = theURI.getPort();
|
||||
if (port == 0) port = 27017;
|
||||
if (uri.find(',') != std::string::npos)
|
||||
{
|
||||
size_t pos;
|
||||
size_t head = 0;
|
||||
if ((pos = uri.find("@")) != std::string::npos)
|
||||
{
|
||||
head = pos + 1;
|
||||
}
|
||||
else if ((pos = uri.find("://")) != std::string::npos)
|
||||
{
|
||||
head = pos + 3;
|
||||
}
|
||||
|
||||
std::string databaseName = theURI.getPath();
|
||||
if (!databaseName.empty() && databaseName[0] == '/') databaseName.erase(0, 1);
|
||||
if (databaseName.empty()) databaseName = "admin";
|
||||
std::string tempstr;
|
||||
std::string::const_iterator it = uri.begin();
|
||||
it += head;
|
||||
size_t tail = head;
|
||||
for (;it != uri.end() && *it != '?' && *it != '/'; ++it)
|
||||
{
|
||||
tempstr += *it;
|
||||
tail++;
|
||||
}
|
||||
|
||||
bool ssl = false;
|
||||
Poco::Timespan connectTimeout;
|
||||
Poco::Timespan socketTimeout;
|
||||
std::string authMechanism = Database::AUTH_SCRAM_SHA1;
|
||||
it = tempstr.begin();
|
||||
std::string token;
|
||||
for (;it != tempstr.end(); ++it)
|
||||
{
|
||||
if (*it == ',')
|
||||
{
|
||||
newURI = uri.substr(0, head) + token + uri.substr(tail, uri.length());
|
||||
strAddresses.push_back(newURI);
|
||||
token = "";
|
||||
}
|
||||
else
|
||||
{
|
||||
token += *it;
|
||||
}
|
||||
}
|
||||
newURI = uri.substr(0, head) + token + uri.substr(tail, uri.length());
|
||||
strAddresses.push_back(newURI);
|
||||
}
|
||||
else
|
||||
{
|
||||
strAddresses.push_back(uri);
|
||||
}
|
||||
|
||||
Poco::URI::QueryParameters params = theURI.getQueryParameters();
|
||||
for (Poco::URI::QueryParameters::const_iterator it = params.begin(); it != params.end(); ++it)
|
||||
{
|
||||
if (it->first == "ssl")
|
||||
{
|
||||
ssl = (it->second == "true");
|
||||
}
|
||||
else if (it->first == "connectTimeoutMS")
|
||||
{
|
||||
connectTimeout = static_cast<Poco::Timespan::TimeDiff>(1000)*Poco::NumberParser::parse(it->second);
|
||||
}
|
||||
else if (it->first == "socketTimeoutMS")
|
||||
{
|
||||
socketTimeout = static_cast<Poco::Timespan::TimeDiff>(1000)*Poco::NumberParser::parse(it->second);
|
||||
}
|
||||
else if (it->first == "authMechanism")
|
||||
{
|
||||
authMechanism = it->second;
|
||||
}
|
||||
}
|
||||
newURI = strAddresses.front();
|
||||
Poco::URI theURI(newURI);
|
||||
if (theURI.getScheme() != "mongodb") throw Poco::UnknownURISchemeException(uri);
|
||||
|
||||
connect(socketFactory.createSocket(host, port, connectTimeout, ssl));
|
||||
std::string userInfo = theURI.getUserInfo();
|
||||
std::string databaseName = theURI.getPath();
|
||||
if (!databaseName.empty() && databaseName[0] == '/') databaseName.erase(0, 1);
|
||||
if (databaseName.empty()) databaseName = "admin";
|
||||
|
||||
if (socketTimeout > 0)
|
||||
{
|
||||
_socket.setSendTimeout(socketTimeout);
|
||||
_socket.setReceiveTimeout(socketTimeout);
|
||||
}
|
||||
bool ssl = false;
|
||||
Poco::Timespan connectTimeout;
|
||||
Poco::Timespan socketTimeout;
|
||||
std::string authMechanism = Database::AUTH_SCRAM_SHA1;
|
||||
std::string readPreference="primary";
|
||||
|
||||
if (!userInfo.empty())
|
||||
{
|
||||
std::string username;
|
||||
std::string password;
|
||||
std::string::size_type pos = userInfo.find(':');
|
||||
if (pos != std::string::npos)
|
||||
{
|
||||
username.assign(userInfo, 0, pos++);
|
||||
password.assign(userInfo, pos, userInfo.size() - pos);
|
||||
}
|
||||
else username = userInfo;
|
||||
Poco::URI::QueryParameters params = theURI.getQueryParameters();
|
||||
for (Poco::URI::QueryParameters::const_iterator it = params.begin(); it != params.end(); ++it)
|
||||
{
|
||||
if (it->first == "ssl")
|
||||
{
|
||||
ssl = (it->second == "true");
|
||||
}
|
||||
else if (it->first == "connectTimeoutMS")
|
||||
{
|
||||
connectTimeout = static_cast<Poco::Timespan::TimeDiff>(1000)*Poco::NumberParser::parse(it->second);
|
||||
}
|
||||
else if (it->first == "socketTimeoutMS")
|
||||
{
|
||||
socketTimeout = static_cast<Poco::Timespan::TimeDiff>(1000)*Poco::NumberParser::parse(it->second);
|
||||
}
|
||||
else if (it->first == "authMechanism")
|
||||
{
|
||||
authMechanism = it->second;
|
||||
}
|
||||
else if (it->first == "readPreference")
|
||||
{
|
||||
readPreference= it->second;
|
||||
}
|
||||
}
|
||||
|
||||
Database database(databaseName);
|
||||
if (!database.authenticate(*this, username, password, authMechanism))
|
||||
throw Poco::NoPermissionException(Poco::format("Access to MongoDB database %s denied for user %s", databaseName, username));
|
||||
}
|
||||
for (std::vector<std::string>::const_iterator it = strAddresses.cbegin();it != strAddresses.cend(); ++it)
|
||||
{
|
||||
newURI = *it;
|
||||
theURI = Poco::URI(newURI);
|
||||
|
||||
std::string host = theURI.getHost();
|
||||
Poco::UInt16 port = theURI.getPort();
|
||||
if (port == 0) port = 27017;
|
||||
|
||||
connect(socketFactory.createSocket(host, port, connectTimeout, ssl));
|
||||
_uri = newURI;
|
||||
if (socketTimeout > 0)
|
||||
{
|
||||
_socket.setSendTimeout(socketTimeout);
|
||||
_socket.setReceiveTimeout(socketTimeout);
|
||||
}
|
||||
if (strAddresses.size() > 1)
|
||||
{
|
||||
Poco::MongoDB::QueryRequest request("admin.$cmd");
|
||||
request.setNumberToReturn(1);
|
||||
request.selector().add("isMaster", 1);
|
||||
Poco::MongoDB::ResponseMessage response;
|
||||
|
||||
sendRequest(request, response);
|
||||
_uri = newURI;
|
||||
if (!response.documents().empty())
|
||||
{
|
||||
Poco::MongoDB::Document::Ptr doc = response.documents()[0];
|
||||
if (doc->get<bool>("ismaster") && readPreference == "primary")
|
||||
{
|
||||
break;
|
||||
}
|
||||
else if (!doc->get<bool>("ismaster") && readPreference == "secondary")
|
||||
{
|
||||
break;
|
||||
}
|
||||
else if (it + 1 == strAddresses.cend())
|
||||
{
|
||||
throw Poco::URISyntaxException(uri);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!userInfo.empty())
|
||||
{
|
||||
std::string username;
|
||||
std::string password;
|
||||
std::string::size_type pos = userInfo.find(':');
|
||||
if (pos != std::string::npos)
|
||||
{
|
||||
username.assign(userInfo, 0, pos++);
|
||||
password.assign(userInfo, pos, userInfo.size() - pos);
|
||||
}
|
||||
else username = userInfo;
|
||||
|
||||
Database database(databaseName);
|
||||
|
||||
if (!database.authenticate(*this, username, password, authMechanism))
|
||||
throw Poco::NoPermissionException(Poco::format("Access to MongoDB database %s denied for user %s", databaseName, username));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -115,6 +115,13 @@ configure_file("${ORC_SOURCE_SRC_DIR}/Adaptor.hh.in" "${ORC_BUILD_INCLUDE_DIR}/A
|
||||
|
||||
# ARROW_ORC + adapters/orc/CMakefiles
|
||||
set(ORC_SRCS
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/orc_proto.pb.h"
|
||||
"${ORC_SOURCE_SRC_DIR}/sargs/ExpressionTree.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/sargs/Literal.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/sargs/PredicateLeaf.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/sargs/SargsApplier.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/sargs/SearchArgument.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/sargs/TruthValue.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/Exceptions.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/OrcFile.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/Reader.cc"
|
||||
@ -129,13 +136,20 @@ set(ORC_SRCS
|
||||
"${ORC_SOURCE_SRC_DIR}/MemoryPool.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/RLE.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/RLEv1.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/RLEv2.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/RleDecoderV2.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/RleEncoderV2.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/RLEV2Util.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/Statistics.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/StripeStream.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/Timezone.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/TypeImpl.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/Vector.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/Writer.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/Adaptor.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/BloomFilter.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/Murmur3.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/BlockBuffer.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/wrap/orc-proto-wrapper.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/io/InputStream.cc"
|
||||
"${ORC_SOURCE_SRC_DIR}/io/OutputStream.cc"
|
||||
"${ORC_ADDITION_SOURCE_DIR}/orc_proto.pb.cc"
|
||||
@ -358,6 +372,9 @@ SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zlib.cc" ${ARROW_SRCS})
|
||||
add_definitions(-DARROW_WITH_ZSTD)
|
||||
SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zstd.cc" ${ARROW_SRCS})
|
||||
|
||||
add_definitions(-DARROW_WITH_BROTLI)
|
||||
SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_brotli.cc" ${ARROW_SRCS})
|
||||
|
||||
|
||||
add_library(_arrow ${ARROW_SRCS})
|
||||
|
||||
@ -372,6 +389,7 @@ target_link_libraries(_arrow PRIVATE
|
||||
ch_contrib::snappy
|
||||
ch_contrib::zlib
|
||||
ch_contrib::zstd
|
||||
ch_contrib::brotli
|
||||
)
|
||||
target_link_libraries(_arrow PUBLIC _orc)
|
||||
|
||||
|
@ -6,6 +6,10 @@ if (MSVC)
|
||||
target_compile_definitions (_farmhash PRIVATE FARMHASH_NO_BUILTIN_EXPECT=1)
|
||||
endif ()
|
||||
|
||||
if (ARCH_S390X)
|
||||
add_compile_definitions(WORDS_BIGENDIAN)
|
||||
endif ()
|
||||
|
||||
target_include_directories (_farmhash BEFORE PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
add_library(ch_contrib::farmhash ALIAS _farmhash)
|
||||
|
2
contrib/orc
vendored
2
contrib/orc
vendored
@ -1 +1 @@
|
||||
Subproject commit f9a393ed2433a60034795284f82d093b348f2102
|
||||
Subproject commit c5d7755ba0b9a95631c8daea4d094101f26ec761
|
@ -29,7 +29,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
esac
|
||||
|
||||
ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release"
|
||||
ARG VERSION="23.2.1.2537"
|
||||
ARG VERSION="23.2.3.17"
|
||||
ARG PACKAGES="clickhouse-keeper"
|
||||
|
||||
# user/group precreated explicitly with fixed uid/gid on purpose.
|
||||
|
@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="23.2.1.2537"
|
||||
ARG VERSION="23.2.3.17"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
# user/group precreated explicitly with fixed uid/gid on purpose.
|
||||
|
@ -22,7 +22,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
|
||||
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
|
||||
ARG VERSION="23.2.1.2537"
|
||||
ARG VERSION="23.2.3.17"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
# set non-empty deb_location_url url to create a docker image
|
||||
|
@ -162,7 +162,7 @@ rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
|
||||
-e "Cannot flush" \
|
||||
-e "Container already exists" \
|
||||
/var/log/clickhouse-server/clickhouse-server.upgrade.log | zgrep -Fa "<Error>" > /test_output/upgrade_error_messages.txt \
|
||||
&& echo -e "Error message in clickhouse-server.log (see upgrade_error_messages.txt)$FAIL$(head_escaped /test_output/bc_check_error_messages.txt)" \
|
||||
&& echo -e "Error message in clickhouse-server.log (see upgrade_error_messages.txt)$FAIL$(head_escaped /test_output/upgrade_error_messages.txt)" \
|
||||
>> /test_output/test_results.tsv \
|
||||
|| echo -e "No Error messages after server upgrade$OK" >> /test_output/test_results.tsv
|
||||
|
||||
|
@ -85,9 +85,9 @@ echo
|
||||
echo "Successfully downloaded the ClickHouse binary, you can run it as:
|
||||
./${clickhouse}"
|
||||
|
||||
#if [ "${OS}" = "Linux" ]
|
||||
#then
|
||||
#echo
|
||||
#echo "You can also install it:
|
||||
#sudo ./${clickhouse} install"
|
||||
#fi
|
||||
if [ "${OS}" = "Linux" ]
|
||||
then
|
||||
echo
|
||||
echo "You can also install it:
|
||||
sudo ./${clickhouse} install"
|
||||
fi
|
||||
|
55
docs/changelogs/v22.12.4.76-stable.md
Normal file
55
docs/changelogs/v22.12.4.76-stable.md
Normal file
@ -0,0 +1,55 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v22.12.4.76-stable (cb5772db805) FIXME as compared to v22.12.3.5-stable (893de538f02)
|
||||
|
||||
#### Performance Improvement
|
||||
* Backported in [#45704](https://github.com/ClickHouse/ClickHouse/issues/45704): Fixed performance of short `SELECT` queries that read from tables with large number of`Array`/`Map`/`Nested` columns. [#45630](https://github.com/ClickHouse/ClickHouse/pull/45630) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Backported in [#46378](https://github.com/ClickHouse/ClickHouse/issues/46378): Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
|
||||
#### Bug Fix
|
||||
* Backported in [#45672](https://github.com/ClickHouse/ClickHouse/issues/45672): Fix wiping sensitive info in logs. [#45603](https://github.com/ClickHouse/ClickHouse/pull/45603) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#45200](https://github.com/ClickHouse/ClickHouse/issues/45200): Fix zookeeper downloading, update the version, and optimize the image size. [#44853](https://github.com/ClickHouse/ClickHouse/pull/44853) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#46116](https://github.com/ClickHouse/ClickHouse/issues/46116): Remove the dependency on the `adduser` tool from the packages, because we don't use it. This fixes [#44934](https://github.com/ClickHouse/ClickHouse/issues/44934). [#45011](https://github.com/ClickHouse/ClickHouse/pull/45011) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#46035](https://github.com/ClickHouse/ClickHouse/issues/46035): Add systemd.service file for clickhouse-keeper. Fixes [#44293](https://github.com/ClickHouse/ClickHouse/issues/44293). [#45568](https://github.com/ClickHouse/ClickHouse/pull/45568) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#46484](https://github.com/ClickHouse/ClickHouse/issues/46484): Get rid of unnecessary build for standalone clickhouse-keeper. [#46367](https://github.com/ClickHouse/ClickHouse/pull/46367) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#46509](https://github.com/ClickHouse/ClickHouse/issues/46509): Some time ago the ccache compression was changed to `zst`, but `gz` archives are downloaded by default. It fixes it by prioritizing zst archive. [#46490](https://github.com/ClickHouse/ClickHouse/pull/46490) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#47058](https://github.com/ClickHouse/ClickHouse/issues/47058): Fix error during server startup on old distros (e.g. Amazon Linux 2) and on ARM that glibc 2.28 symbols are not found. [#47008](https://github.com/ClickHouse/ClickHouse/pull/47008) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#45904](https://github.com/ClickHouse/ClickHouse/issues/45904): Fixed bug with non-parsable default value for EPHEMERAL column in table metadata. [#44026](https://github.com/ClickHouse/ClickHouse/pull/44026) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Backported in [#45321](https://github.com/ClickHouse/ClickHouse/issues/45321): Fixed a bug in normalization of a `DEFAULT` expression in `CREATE TABLE` statement. The second argument of function `in` (or the right argument of operator `IN`) might be replaced with the result of its evaluation during CREATE query execution. Fixes [#44496](https://github.com/ClickHouse/ClickHouse/issues/44496). [#44547](https://github.com/ClickHouse/ClickHouse/pull/44547) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Backported in [#45000](https://github.com/ClickHouse/ClickHouse/issues/45000): Another fix for `Cannot read all data` error which could happen while reading `LowCardinality` dictionary from remote fs. Fixes [#44709](https://github.com/ClickHouse/ClickHouse/issues/44709). [#44875](https://github.com/ClickHouse/ClickHouse/pull/44875) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#45553](https://github.com/ClickHouse/ClickHouse/issues/45553): Fix `SELECT ... FROM system.dictionaries` exception when there is a dictionary with a bad structure (e.g. incorrect type in xml config). [#45399](https://github.com/ClickHouse/ClickHouse/pull/45399) ([Aleksei Filatov](https://github.com/aalexfvk)).
|
||||
* Backported in [#46226](https://github.com/ClickHouse/ClickHouse/issues/46226): A couple of seg faults have been reported around `c-ares`. All of the recent stack traces observed fail on inserting into `std::unodered_set<>`. I believe I have found the root cause of this, it seems to be unprocessed queries. Prior to this PR, CH calls `poll` to wait on the file descriptors in the `c-ares` channel. According to the [poll docs](https://man7.org/linux/man-pages/man2/poll.2.html), a negative return value means an error has ocurred. Because of this, we would abort the execution and return failure. The problem is that `poll` will also return a negative value if a system interrupt occurs. A system interrupt does not mean the processing has failed or ended, but we would abort it anyways because we were checking for negative values. Once the execution is aborted, the whole stack is destroyed, which includes the `std::unordered_set<std::string>` passed to the `void *` parameter of the c-ares callback. Once c-ares completed the request, the callback would be invoked and would access an invalid memory address causing a segfault. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)).
|
||||
* Backported in [#46218](https://github.com/ClickHouse/ClickHouse/issues/46218): Fix reading of non existing nested columns with multiple level in compact parts. [#46045](https://github.com/ClickHouse/ClickHouse/pull/46045) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Backported in [#46446](https://github.com/ClickHouse/ClickHouse/issues/46446): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Backported in [#46678](https://github.com/ClickHouse/ClickHouse/issues/46678): Fix an invalid processing of constant `LowCardinality` argument in function `arrayMap`. This bug could lead to a segfault in release, and logical error `Bad cast` in debug build. [#46569](https://github.com/ClickHouse/ClickHouse/pull/46569) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#46872](https://github.com/ClickHouse/ClickHouse/issues/46872): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#46954](https://github.com/ClickHouse/ClickHouse/issues/46954): Fix result of LIKE predicates which translate to substring searches and contain quoted non-LIKE metacharacters. [#46875](https://github.com/ClickHouse/ClickHouse/pull/46875) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Automatically merge green backport PRs and green approved PRs [#41110](https://github.com/ClickHouse/ClickHouse/pull/41110) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Improve release scripts [#45074](https://github.com/ClickHouse/ClickHouse/pull/45074) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix wrong approved_at, simplify conditions [#45302](https://github.com/ClickHouse/ClickHouse/pull/45302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Get rid of artifactory in favor of r2 + ch-repos-manager [#45421](https://github.com/ClickHouse/ClickHouse/pull/45421) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Another attempt to fix automerge, or at least to have debug footprint [#45476](https://github.com/ClickHouse/ClickHouse/pull/45476) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Trim refs/tags/ from GITHUB_TAG in release workflow [#45636](https://github.com/ClickHouse/ClickHouse/pull/45636) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add check for running workflows to merge_pr.py [#45803](https://github.com/ClickHouse/ClickHouse/pull/45803) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Get rid of progress timestamps in release publishing [#45818](https://github.com/ClickHouse/ClickHouse/pull/45818) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add necessary dependency for sanitizers [#45959](https://github.com/ClickHouse/ClickHouse/pull/45959) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add helping logging to auto-merge script [#46080](https://github.com/ClickHouse/ClickHouse/pull/46080) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix write buffer destruction order for vertical merge. [#46205](https://github.com/ClickHouse/ClickHouse/pull/46205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Improve install_check.py [#46458](https://github.com/ClickHouse/ClickHouse/pull/46458) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix dependencies for InstallPackagesTestAarch64 [#46597](https://github.com/ClickHouse/ClickHouse/pull/46597) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Reduce updates of Mergeable Check [#46781](https://github.com/ClickHouse/ClickHouse/pull/46781) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
40
docs/changelogs/v22.8.14.53-lts.md
Normal file
40
docs/changelogs/v22.8.14.53-lts.md
Normal file
@ -0,0 +1,40 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v22.8.14.53-lts (4ea67c40077) FIXME as compared to v22.8.13.20-lts (e4817946d18)
|
||||
|
||||
#### Performance Improvement
|
||||
* Backported in [#45845](https://github.com/ClickHouse/ClickHouse/issues/45845): Fixed performance of short `SELECT` queries that read from tables with large number of`Array`/`Map`/`Nested` columns. [#45630](https://github.com/ClickHouse/ClickHouse/pull/45630) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Backported in [#46374](https://github.com/ClickHouse/ClickHouse/issues/46374): Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#46358](https://github.com/ClickHouse/ClickHouse/issues/46358): Allow using Vertical merge algorithm with parts in Compact format. This will allow ClickHouse server to use much less memory for background operations. This closes [#46084](https://github.com/ClickHouse/ClickHouse/issues/46084). [#46282](https://github.com/ClickHouse/ClickHouse/pull/46282) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#46112](https://github.com/ClickHouse/ClickHouse/issues/46112): Remove the dependency on the `adduser` tool from the packages, because we don't use it. This fixes [#44934](https://github.com/ClickHouse/ClickHouse/issues/44934). [#45011](https://github.com/ClickHouse/ClickHouse/pull/45011) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#46482](https://github.com/ClickHouse/ClickHouse/issues/46482): Get rid of unnecessary build for standalone clickhouse-keeper. [#46367](https://github.com/ClickHouse/ClickHouse/pull/46367) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#46505](https://github.com/ClickHouse/ClickHouse/issues/46505): Some time ago the ccache compression was changed to `zst`, but `gz` archives are downloaded by default. It fixes it by prioritizing zst archive. [#46490](https://github.com/ClickHouse/ClickHouse/pull/46490) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#45908](https://github.com/ClickHouse/ClickHouse/issues/45908): Fixed bug with non-parsable default value for EPHEMERAL column in table metadata. [#44026](https://github.com/ClickHouse/ClickHouse/pull/44026) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Backported in [#46238](https://github.com/ClickHouse/ClickHouse/issues/46238): A couple of seg faults have been reported around `c-ares`. All of the recent stack traces observed fail on inserting into `std::unodered_set<>`. I believe I have found the root cause of this, it seems to be unprocessed queries. Prior to this PR, CH calls `poll` to wait on the file descriptors in the `c-ares` channel. According to the [poll docs](https://man7.org/linux/man-pages/man2/poll.2.html), a negative return value means an error has ocurred. Because of this, we would abort the execution and return failure. The problem is that `poll` will also return a negative value if a system interrupt occurs. A system interrupt does not mean the processing has failed or ended, but we would abort it anyways because we were checking for negative values. Once the execution is aborted, the whole stack is destroyed, which includes the `std::unordered_set<std::string>` passed to the `void *` parameter of the c-ares callback. Once c-ares completed the request, the callback would be invoked and would access an invalid memory address causing a segfault. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)).
|
||||
* Backported in [#45727](https://github.com/ClickHouse/ClickHouse/issues/45727): Fix key description when encountering duplicate primary keys. This can happen in projections. See [#45590](https://github.com/ClickHouse/ClickHouse/issues/45590) for details. [#45686](https://github.com/ClickHouse/ClickHouse/pull/45686) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Backported in [#46394](https://github.com/ClickHouse/ClickHouse/issues/46394): Fix `SYSTEM UNFREEZE` queries failing with the exception `CANNOT_PARSE_INPUT_ASSERTION_FAILED`. [#46325](https://github.com/ClickHouse/ClickHouse/pull/46325) ([Aleksei Filatov](https://github.com/aalexfvk)).
|
||||
* Backported in [#46442](https://github.com/ClickHouse/ClickHouse/issues/46442): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Backported in [#46674](https://github.com/ClickHouse/ClickHouse/issues/46674): Fix an invalid processing of constant `LowCardinality` argument in function `arrayMap`. This bug could lead to a segfault in release, and logical error `Bad cast` in debug build. [#46569](https://github.com/ClickHouse/ClickHouse/pull/46569) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#46879](https://github.com/ClickHouse/ClickHouse/issues/46879): Fix MSan report in the `maxIntersections` function. This closes [#43126](https://github.com/ClickHouse/ClickHouse/issues/43126). [#46847](https://github.com/ClickHouse/ClickHouse/pull/46847) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#46871](https://github.com/ClickHouse/ClickHouse/issues/46871): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Another attempt to fix automerge, or at least to have debug footprint [#45476](https://github.com/ClickHouse/ClickHouse/pull/45476) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add check for running workflows to merge_pr.py [#45803](https://github.com/ClickHouse/ClickHouse/pull/45803) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Get rid of progress timestamps in release publishing [#45818](https://github.com/ClickHouse/ClickHouse/pull/45818) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add necessary dependency for sanitizers [#45959](https://github.com/ClickHouse/ClickHouse/pull/45959) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add helping logging to auto-merge script [#46080](https://github.com/ClickHouse/ClickHouse/pull/46080) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix write buffer destruction order for vertical merge. [#46205](https://github.com/ClickHouse/ClickHouse/pull/46205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
47
docs/changelogs/v23.1.4.58-stable.md
Normal file
47
docs/changelogs/v23.1.4.58-stable.md
Normal file
@ -0,0 +1,47 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.1.4.58-stable (9ed562163a5) FIXME as compared to v23.1.3.5-stable (548b494bcce)
|
||||
|
||||
#### Performance Improvement
|
||||
* Backported in [#46380](https://github.com/ClickHouse/ClickHouse/issues/46380): Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#46985](https://github.com/ClickHouse/ClickHouse/issues/46985): - Apply `ALTER TABLE table_name ON CLUSTER cluster MOVE PARTITION|PART partition_expr TO DISK|VOLUME 'disk_name'` to all replicas. Because `ALTER TABLE t MOVE` is not replicated. [#46402](https://github.com/ClickHouse/ClickHouse/pull/46402) ([lizhuoyu5](https://github.com/lzydmxy)).
|
||||
* Backported in [#46778](https://github.com/ClickHouse/ClickHouse/issues/46778): Backward compatibility for T64 codec support for IPv4. [#46747](https://github.com/ClickHouse/ClickHouse/pull/46747) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Backported in [#47020](https://github.com/ClickHouse/ClickHouse/issues/47020): Allow IPv4 in range(). [#46995](https://github.com/ClickHouse/ClickHouse/pull/46995) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#46031](https://github.com/ClickHouse/ClickHouse/issues/46031): Add systemd.service file for clickhouse-keeper. Fixes [#44293](https://github.com/ClickHouse/ClickHouse/issues/44293). [#45568](https://github.com/ClickHouse/ClickHouse/pull/45568) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#46477](https://github.com/ClickHouse/ClickHouse/issues/46477): Get rid of unnecessary build for standalone clickhouse-keeper. [#46367](https://github.com/ClickHouse/ClickHouse/pull/46367) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#46511](https://github.com/ClickHouse/ClickHouse/issues/46511): Some time ago the ccache compression was changed to `zst`, but `gz` archives are downloaded by default. It fixes it by prioritizing zst archive. [#46490](https://github.com/ClickHouse/ClickHouse/pull/46490) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#46228](https://github.com/ClickHouse/ClickHouse/issues/46228): A couple of seg faults have been reported around `c-ares`. All of the recent stack traces observed fail on inserting into `std::unodered_set<>`. I believe I have found the root cause of this, it seems to be unprocessed queries. Prior to this PR, CH calls `poll` to wait on the file descriptors in the `c-ares` channel. According to the [poll docs](https://man7.org/linux/man-pages/man2/poll.2.html), a negative return value means an error has ocurred. Because of this, we would abort the execution and return failure. The problem is that `poll` will also return a negative value if a system interrupt occurs. A system interrupt does not mean the processing has failed or ended, but we would abort it anyways because we were checking for negative values. Once the execution is aborted, the whole stack is destroyed, which includes the `std::unordered_set<std::string>` passed to the `void *` parameter of the c-ares callback. Once c-ares completed the request, the callback would be invoked and would access an invalid memory address causing a segfault. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)).
|
||||
* Backported in [#46967](https://github.com/ClickHouse/ClickHouse/issues/46967): Backward compatibility - allow implicit narrowing conversion from UInt64 to IPv4 - required for "INSERT ... VALUES ..." expression. [#45865](https://github.com/ClickHouse/ClickHouse/pull/45865) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Backported in [#46220](https://github.com/ClickHouse/ClickHouse/issues/46220): Fix reading of non existing nested columns with multiple level in compact parts. [#46045](https://github.com/ClickHouse/ClickHouse/pull/46045) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Backported in [#46751](https://github.com/ClickHouse/ClickHouse/issues/46751): Follow-up fix for Replace domain IP types (IPv4, IPv6) with native https://github.com/ClickHouse/ClickHouse/pull/43221. [#46087](https://github.com/ClickHouse/ClickHouse/pull/46087) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Backported in [#46448](https://github.com/ClickHouse/ClickHouse/issues/46448): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Backported in [#46680](https://github.com/ClickHouse/ClickHouse/issues/46680): Fix an invalid processing of constant `LowCardinality` argument in function `arrayMap`. This bug could lead to a segfault in release, and logical error `Bad cast` in debug build. [#46569](https://github.com/ClickHouse/ClickHouse/pull/46569) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#46873](https://github.com/ClickHouse/ClickHouse/issues/46873): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#46956](https://github.com/ClickHouse/ClickHouse/issues/46956): Fix result of LIKE predicates which translate to substring searches and contain quoted non-LIKE metacharacters. [#46875](https://github.com/ClickHouse/ClickHouse/pull/46875) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Another attempt to fix automerge, or at least to have debug footprint [#45476](https://github.com/ClickHouse/ClickHouse/pull/45476) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Support DELETE ON CLUSTER [#45786](https://github.com/ClickHouse/ClickHouse/pull/45786) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Add check for running workflows to merge_pr.py [#45803](https://github.com/ClickHouse/ClickHouse/pull/45803) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add necessary dependency for sanitizers [#45959](https://github.com/ClickHouse/ClickHouse/pull/45959) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add helping logging to auto-merge script [#46080](https://github.com/ClickHouse/ClickHouse/pull/46080) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix write buffer destruction order for vertical merge. [#46205](https://github.com/ClickHouse/ClickHouse/pull/46205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Wait for background tasks in ~UploadHelper [#46334](https://github.com/ClickHouse/ClickHouse/pull/46334) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Improve install_check.py [#46458](https://github.com/ClickHouse/ClickHouse/pull/46458) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix dependencies for InstallPackagesTestAarch64 [#46597](https://github.com/ClickHouse/ClickHouse/pull/46597) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Reduce updates of Mergeable Check [#46781](https://github.com/ClickHouse/ClickHouse/pull/46781) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
30
docs/changelogs/v23.2.2.20-stable.md
Normal file
30
docs/changelogs/v23.2.2.20-stable.md
Normal file
@ -0,0 +1,30 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.2.2.20-stable (f6c269c8df2) FIXME as compared to v23.2.1.2537-stable (52bf836e03a)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#46914](https://github.com/ClickHouse/ClickHouse/issues/46914): Allow PREWHERE for Merge with different DEFAULT expression for column. [#46831](https://github.com/ClickHouse/ClickHouse/pull/46831) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Backported in [#47022](https://github.com/ClickHouse/ClickHouse/issues/47022): Allow IPv4 in range(). [#46995](https://github.com/ClickHouse/ClickHouse/pull/46995) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
|
||||
#### Bug Fix
|
||||
* Backported in [#46828](https://github.com/ClickHouse/ClickHouse/issues/46828): Combined PREWHERE column accumulated from multiple PREWHERE in some cases didn't contain 0's from previous steps. The fix is to apply final filter if we know that it wasn't applied from more than 1 last step. [#46785](https://github.com/ClickHouse/ClickHouse/pull/46785) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#47062](https://github.com/ClickHouse/ClickHouse/issues/47062): Fix error during server startup on old distros (e.g. Amazon Linux 2) and on ARM that glibc 2.28 symbols are not found. [#47008](https://github.com/ClickHouse/ClickHouse/pull/47008) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#46895](https://github.com/ClickHouse/ClickHouse/issues/46895): Fixed a bug in automatic retries of `DROP TABLE` query with `ReplicatedMergeTree` tables and `Atomic` databases. In rare cases it could lead to `Can't get data for node /zk_path/log_pointer` and `The specified key does not exist` errors if ZooKeeper session expired during DROP and a new replicated table with the same path in ZooKeeper was created in parallel. [#46384](https://github.com/ClickHouse/ClickHouse/pull/46384) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Backported in [#46865](https://github.com/ClickHouse/ClickHouse/issues/46865): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#46958](https://github.com/ClickHouse/ClickHouse/issues/46958): Fix result of LIKE predicates which translate to substring searches and contain quoted non-LIKE metacharacters. [#46875](https://github.com/ClickHouse/ClickHouse/pull/46875) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* More concise logging at trace level for PREWHERE steps [#46771](https://github.com/ClickHouse/ClickHouse/pull/46771) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Reduce updates of Mergeable Check [#46781](https://github.com/ClickHouse/ClickHouse/pull/46781) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
23
docs/changelogs/v23.2.3.17-stable.md
Normal file
23
docs/changelogs/v23.2.3.17-stable.md
Normal file
@ -0,0 +1,23 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.2.3.17-stable (dec18bf7281) FIXME as compared to v23.2.2.20-stable (f6c269c8df2)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#46907](https://github.com/ClickHouse/ClickHouse/issues/46907): - Fix incorrect alias recursion in QueryNormalizer. [#46609](https://github.com/ClickHouse/ClickHouse/pull/46609) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#47091](https://github.com/ClickHouse/ClickHouse/issues/47091): - Fix arithmetic operations in aggregate optimization with `min` and `max`. [#46705](https://github.com/ClickHouse/ClickHouse/pull/46705) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Backported in [#46885](https://github.com/ClickHouse/ClickHouse/issues/46885): Fix MSan report in the `maxIntersections` function. This closes [#43126](https://github.com/ClickHouse/ClickHouse/issues/43126). [#46847](https://github.com/ClickHouse/ClickHouse/pull/46847) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#47067](https://github.com/ClickHouse/ClickHouse/issues/47067): Fix typo in systemd service, which causes the systemd service start to fail. [#47051](https://github.com/ClickHouse/ClickHouse/pull/47051) ([Palash Goel](https://github.com/palash-goel)).
|
||||
* Backported in [#47259](https://github.com/ClickHouse/ClickHouse/issues/47259): Fix concrete columns PREWHERE support. [#47154](https://github.com/ClickHouse/ClickHouse/pull/47154) ([Azat Khuzhin](https://github.com/azat)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Use /etc/default/clickhouse in systemd too [#47003](https://github.com/ClickHouse/ClickHouse/pull/47003) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* do flushUntrackedMemory when context switches [#47102](https://github.com/ClickHouse/ClickHouse/pull/47102) ([Sema Checherinda](https://github.com/CheSema)).
|
||||
* Update typing for a new PyGithub version [#47123](https://github.com/ClickHouse/ClickHouse/pull/47123) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
@ -172,7 +172,7 @@ Global thread pool is `GlobalThreadPool` singleton class. To allocate thread fro
|
||||
|
||||
Global pool is universal and all pools described below are implemented on top of it. This can be thought of as a hierarchy of pools. Any specialized pool takes its threads from the global pool using `ThreadPool` class. So the main purpose of any specialized pool is to apply limit on the number of simultaneous jobs and do job scheduling. If there are more jobs scheduled than threads in a pool, `ThreadPool` accumulates jobs in a queue with priorities. Each job has an integer priority. Default priority is zero. All jobs with higher priority values are started before any job with lower priority value. But there is no difference between already executing jobs, thus priority matters only when the pool in overloaded.
|
||||
|
||||
IO thread pool is implemented as a plain `ThreadPool` accessible via `IOThreadPool::get()` method. It is configured in the same way as global pool with `max_io_thread_pool_size`, `max_io_thread_pool_free_size` and `io_thread_pool_queue_size` settings. The main purpose of IO thread pool is to avoid exhaustion of the global pool with IO jobs, which could prevent queries from fully utilizing CPU.
|
||||
IO thread pool is implemented as a plain `ThreadPool` accessible via `IOThreadPool::get()` method. It is configured in the same way as global pool with `max_io_thread_pool_size`, `max_io_thread_pool_free_size` and `io_thread_pool_queue_size` settings. The main purpose of IO thread pool is to avoid exhaustion of the global pool with IO jobs, which could prevent queries from fully utilizing CPU. Backup to S3 does significant amount of IO operations and to avoid impact on interactive queries there is a separate `BackupsIOThreadPool` configured with `max_backups_io_thread_pool_size`, `max_backups_io_thread_pool_free_size` and `backups_io_thread_pool_queue_size` settings.
|
||||
|
||||
For periodic task execution there is `BackgroundSchedulePool` class. You can register tasks using `BackgroundSchedulePool::TaskHolder` objects and the pool ensures that no task runs two jobs at the same time. It also allows you to postpone task execution to a specific instant in the future or temporarily deactivate task. Global `Context` provides a few instances of this class for different purposes. For general purpose tasks `Context::getSchedulePool()` is used.
|
||||
|
||||
|
@ -67,7 +67,7 @@ It generally means that the SSH keys for connecting to GitHub are missing. These
|
||||
|
||||
You can also clone the repository via https protocol:
|
||||
|
||||
git clone --recursive--shallow-submodules https://github.com/ClickHouse/ClickHouse.git
|
||||
git clone --recursive --shallow-submodules https://github.com/ClickHouse/ClickHouse.git
|
||||
|
||||
This, however, will not let you send your changes to the server. You can still use it temporarily and add the SSH keys later replacing the remote address of the repository with `git remote` command.
|
||||
|
||||
|
@ -19,8 +19,8 @@ Kafka lets you:
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
(
|
||||
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
|
||||
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
|
||||
name1 [type1],
|
||||
name2 [type2],
|
||||
...
|
||||
) ENGINE = Kafka()
|
||||
SETTINGS
|
||||
@ -113,6 +113,10 @@ Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format
|
||||
|
||||
</details>
|
||||
|
||||
:::info
|
||||
The Kafka table engine doesn't support columns with [default value](../../../sql-reference/statements/create/table.md#default_value). If you need columns with default value, you can add them at materialized view level (see below).
|
||||
:::
|
||||
|
||||
## Description {#description}
|
||||
|
||||
The delivered messages are tracked automatically, so each message in a group is only counted once. If you want to get the data twice, then create a copy of the table with another group name.
|
||||
|
@ -450,29 +450,32 @@ INDEX sample_index3 (lower(str), str) TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY
|
||||
|
||||
Conditions in the `WHERE` clause contains calls of the functions that operate with columns. If the column is a part of an index, ClickHouse tries to use this index when performing the functions. ClickHouse supports different subsets of functions for using indexes.
|
||||
|
||||
The `set` index can be used with all functions. Function subsets for other indexes are shown in the table below.
|
||||
Indexes of type `set` can be utilized by all functions. The other index types are supported as follows:
|
||||
|
||||
| Function (operator) / Index | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter |
|
||||
|------------------------------------------------------------------------------------------------------------|-------------|--------|-------------|-------------|---------------|
|
||||
| [equals (=, ==)](/docs/en/sql-reference/functions/comparison-functions.md/#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ |
|
||||
| [notEquals(!=, <>)](/docs/en/sql-reference/functions/comparison-functions.md/#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ |
|
||||
| [like](/docs/en/sql-reference/functions/string-search-functions.md/#function-like) | ✔ | ✔ | ✔ | ✔ | ✗ |
|
||||
| [notLike](/docs/en/sql-reference/functions/string-search-functions.md/#function-notlike) | ✔ | ✔ | ✔ | ✔ | ✗ |
|
||||
| [startsWith](/docs/en/sql-reference/functions/string-functions.md/#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ |
|
||||
| [endsWith](/docs/en/sql-reference/functions/string-functions.md/#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ |
|
||||
| [multiSearchAny](/docs/en/sql-reference/functions/string-search-functions.md/#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ |
|
||||
| [in](/docs/en/sql-reference/functions/in-functions#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ |
|
||||
| [notIn](/docs/en/sql-reference/functions/in-functions#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ |
|
||||
| [less (<)](/docs/en/sql-reference/functions/comparison-functions.md/#function-less) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [greater (>)](/docs/en/sql-reference/functions/comparison-functions.md/#function-greater) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [lessOrEquals (<=)](/docs/en/sql-reference/functions/comparison-functions.md/#function-lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [greaterOrEquals (>=)](/docs/en/sql-reference/functions/comparison-functions.md/#function-greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [empty](/docs/en/sql-reference/functions/array-functions#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [notEmpty](/docs/en/sql-reference/functions/array-functions#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| hasToken | ✗ | ✗ | ✗ | ✔ | ✗ |
|
||||
| hasTokenOrNull | ✗ | ✗ | ✗ | ✔ | ✗ |
|
||||
| hasTokenCaseInsensitive | ✗ | ✗ | ✗ | ✔ | ✗ |
|
||||
| hasTokenCaseInsensitiveOrNull | ✗ | ✗ | ✗ | ✔ | ✗ |
|
||||
|------------------------------------------------------------------------------------------------------------|-------------|--------|------------|------------|--------------|
|
||||
| [equals (=, ==)](/docs/en/sql-reference/functions/comparison-functions.md/#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ |
|
||||
| [notEquals(!=, <>)](/docs/en/sql-reference/functions/comparison-functions.md/#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ |
|
||||
| [like](/docs/en/sql-reference/functions/string-search-functions.md/#function-like) | ✔ | ✔ | ✔ | ✔ | ✗ |
|
||||
| [notLike](/docs/en/sql-reference/functions/string-search-functions.md/#function-notlike) | ✔ | ✔ | ✔ | ✔ | ✗ |
|
||||
| [startsWith](/docs/en/sql-reference/functions/string-functions.md/#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ |
|
||||
| [endsWith](/docs/en/sql-reference/functions/string-functions.md/#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ |
|
||||
| [multiSearchAny](/docs/en/sql-reference/functions/string-search-functions.md/#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ |
|
||||
| [in](/docs/en/sql-reference/functions/in-functions#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ |
|
||||
| [notIn](/docs/en/sql-reference/functions/in-functions#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ |
|
||||
| [less (<)](/docs/en/sql-reference/functions/comparison-functions.md/#function-less) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [greater (>)](/docs/en/sql-reference/functions/comparison-functions.md/#function-greater) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [lessOrEquals (<=)](/docs/en/sql-reference/functions/comparison-functions.md/#function-lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [greaterOrEquals (>=)](/docs/en/sql-reference/functions/comparison-functions.md/#function-greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [empty](/docs/en/sql-reference/functions/array-functions#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [notEmpty](/docs/en/sql-reference/functions/array-functions#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [has](/docs/en/sql-reference/functions/array-functions#function-has) | ✗ | ✗ | ✔ | ✔ | ✔ |
|
||||
| [hasAny](/docs/en/sql-reference/functions/array-functions#function-hasAny) | ✗ | ✗ | ✗ | ✗ | ✔ |
|
||||
| [hasAll](/docs/en/sql-reference/functions/array-functions#function-hasAll) | ✗ | ✗ | ✗ | ✗ | ✔ |
|
||||
| hasToken | ✗ | ✗ | ✗ | ✔ | ✗ |
|
||||
| hasTokenOrNull | ✗ | ✗ | ✗ | ✔ | ✗ |
|
||||
| hasTokenCaseInsensitive | ✗ | ✗ | ✗ | ✔ | ✗ |
|
||||
| hasTokenCaseInsensitiveOrNull | ✗ | ✗ | ✗ | ✔ | ✗ |
|
||||
|
||||
Functions with a constant argument that is less than ngram size can’t be used by `ngrambf_v1` for query optimization.
|
||||
|
||||
|
@ -1232,50 +1232,52 @@ Each row is formatted as a single document and each column is formatted as a sin
|
||||
|
||||
For output it uses the following correspondence between ClickHouse types and BSON types:
|
||||
|
||||
| ClickHouse type | BSON Type |
|
||||
|-----------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------|
|
||||
| [Bool](/docs/en/sql-reference/data-types/boolean.md) | `\x08` boolean |
|
||||
| [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 |
|
||||
| [Int16UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 |
|
||||
| [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 |
|
||||
| [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `\x12` int64 |
|
||||
| [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `\x12` int64 |
|
||||
| [Float32/Float64](/docs/en/sql-reference/data-types/float.md) | `\x01` double |
|
||||
| [Date](/docs/en/sql-reference/data-types/date.md)/[Date32](/docs/en/sql-reference/data-types/date32.md) | `\x10` int32 |
|
||||
| [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `\x12` int64 |
|
||||
| [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `\x09` datetime |
|
||||
| [Decimal32](/docs/en/sql-reference/data-types/decimal.md) | `\x10` int32 |
|
||||
| [Decimal64](/docs/en/sql-reference/data-types/decimal.md) | `\x12` int64 |
|
||||
| [Decimal128](/docs/en/sql-reference/data-types/decimal.md) | `\x05` binary, `\x00` binary subtype, size = 16 |
|
||||
| [Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `\x05` binary, `\x00` binary subtype, size = 32 |
|
||||
| [Int128/UInt128](/docs/en/sql-reference/data-types/int-uint.md) | `\x05` binary, `\x00` binary subtype, size = 16 |
|
||||
| [Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `\x05` binary, `\x00` binary subtype, size = 32 |
|
||||
| ClickHouse type | BSON Type |
|
||||
|-----------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------|
|
||||
| [Bool](/docs/en/sql-reference/data-types/boolean.md) | `\x08` boolean |
|
||||
| [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 |
|
||||
| [Int16UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 |
|
||||
| [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 |
|
||||
| [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `\x12` int64 |
|
||||
| [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `\x12` int64 |
|
||||
| [Float32/Float64](/docs/en/sql-reference/data-types/float.md) | `\x01` double |
|
||||
| [Date](/docs/en/sql-reference/data-types/date.md)/[Date32](/docs/en/sql-reference/data-types/date32.md) | `\x10` int32 |
|
||||
| [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `\x12` int64 |
|
||||
| [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `\x09` datetime |
|
||||
| [Decimal32](/docs/en/sql-reference/data-types/decimal.md) | `\x10` int32 |
|
||||
| [Decimal64](/docs/en/sql-reference/data-types/decimal.md) | `\x12` int64 |
|
||||
| [Decimal128](/docs/en/sql-reference/data-types/decimal.md) | `\x05` binary, `\x00` binary subtype, size = 16 |
|
||||
| [Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `\x05` binary, `\x00` binary subtype, size = 32 |
|
||||
| [Int128/UInt128](/docs/en/sql-reference/data-types/int-uint.md) | `\x05` binary, `\x00` binary subtype, size = 16 |
|
||||
| [Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `\x05` binary, `\x00` binary subtype, size = 32 |
|
||||
| [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `\x05` binary, `\x00` binary subtype or \x02 string if setting output_format_bson_string_as_string is enabled |
|
||||
| [UUID](/docs/en/sql-reference/data-types/uuid.md) | `\x05` binary, `\x04` uuid subtype, size = 16 |
|
||||
| [Array](/docs/en/sql-reference/data-types/array.md) | `\x04` array |
|
||||
| [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `\x04` array |
|
||||
| [Named Tuple](/docs/en/sql-reference/data-types/tuple.md) | `\x03` document |
|
||||
| [Map](/docs/en/sql-reference/data-types/map.md) (with String keys) | `\x03` document |
|
||||
| [UUID](/docs/en/sql-reference/data-types/uuid.md) | `\x05` binary, `\x04` uuid subtype, size = 16 |
|
||||
| [Array](/docs/en/sql-reference/data-types/array.md) | `\x04` array |
|
||||
| [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `\x04` array |
|
||||
| [Named Tuple](/docs/en/sql-reference/data-types/tuple.md) | `\x03` document |
|
||||
| [Map](/docs/en/sql-reference/data-types/map.md) (with String keys) | `\x03` document |
|
||||
| [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `\x10` int32 |
|
||||
| [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `\x05` binary, `\x00` binary subtype |
|
||||
|
||||
For input it uses the following correspondence between BSON types and ClickHouse types:
|
||||
|
||||
| BSON Type | ClickHouse Type |
|
||||
|------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `\x01` double | [Float32/Float64](/docs/en/sql-reference/data-types/float.md) |
|
||||
| `\x02` string | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x03` document | [Map](/docs/en/sql-reference/data-types/map.md)/[Named Tuple](/docs/en/sql-reference/data-types/tuple.md) |
|
||||
| `\x04` array | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md) |
|
||||
| `\x05` binary, `\x00` binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x05` binary, `\x02` old binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x05` binary, `\x03` old uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) |
|
||||
| `\x05` binary, `\x04` uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) |
|
||||
| `\x07` ObjectId | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x08` boolean | [Bool](/docs/en/sql-reference/data-types/boolean.md) |
|
||||
| `\x09` datetime | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) |
|
||||
| `\x0A` null value | [NULL](/docs/en/sql-reference/data-types/nullable.md) |
|
||||
| `\x0D` JavaScript code | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x0E` symbol | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x10` int32 | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md) |
|
||||
| BSON Type | ClickHouse Type |
|
||||
|------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `\x01` double | [Float32/Float64](/docs/en/sql-reference/data-types/float.md) |
|
||||
| `\x02` string | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x03` document | [Map](/docs/en/sql-reference/data-types/map.md)/[Named Tuple](/docs/en/sql-reference/data-types/tuple.md) |
|
||||
| `\x04` array | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md) |
|
||||
| `\x05` binary, `\x00` binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)/[IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) |
|
||||
| `\x05` binary, `\x02` old binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x05` binary, `\x03` old uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) |
|
||||
| `\x05` binary, `\x04` uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) |
|
||||
| `\x07` ObjectId | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x08` boolean | [Bool](/docs/en/sql-reference/data-types/boolean.md) |
|
||||
| `\x09` datetime | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) |
|
||||
| `\x0A` null value | [NULL](/docs/en/sql-reference/data-types/nullable.md) |
|
||||
| `\x0D` JavaScript code | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x0E` symbol | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x10` int32 | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md)/[IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) |
|
||||
| `\x12` int64 | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal64](/docs/en/sql-reference/data-types/decimal.md)/[DateTime64](/docs/en/sql-reference/data-types/datetime64.md) |
|
||||
|
||||
Other BSON types are not supported. Also, it performs conversion between different integer types (for example, you can insert BSON int32 value into ClickHouse UInt8).
|
||||
@ -1608,23 +1610,25 @@ See also [Format Schema](#formatschema).
|
||||
|
||||
The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
|
||||
|
||||
| CapnProto data type (`INSERT`) | ClickHouse data type | CapnProto data type (`SELECT`) |
|
||||
|--------------------------------|-----------------------------------------------------------|--------------------------------|
|
||||
| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
|
||||
| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` |
|
||||
| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md) | `UINT16` |
|
||||
| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` |
|
||||
| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
|
||||
| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` |
|
||||
| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
|
||||
| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [DateTime64](/docs/en/sql-reference/data-types/datetime.md) | `INT64` |
|
||||
| `FLOAT32` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` |
|
||||
| `FLOAT64` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` |
|
||||
| `TEXT, DATA` | [String](/docs/en/sql-reference/data-types/string.md), [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `TEXT, DATA` |
|
||||
| `union(T, Void), union(Void, T)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md) | `union(T, Void), union(Void, T)` |
|
||||
| `ENUM` | [Enum(8\|16)](/docs/en/sql-reference/data-types/enum.md) | `ENUM` |
|
||||
| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
|
||||
| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
|
||||
| CapnProto data type (`INSERT`) | ClickHouse data type | CapnProto data type (`SELECT`) |
|
||||
|----------------------------------|------------------------------------------------------------------------------------------------------------------------|------------------------------|
|
||||
| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
|
||||
| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` |
|
||||
| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md) | `UINT16` |
|
||||
| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` |
|
||||
| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
|
||||
| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` |
|
||||
| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
|
||||
| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [DateTime64](/docs/en/sql-reference/data-types/datetime.md) | `INT64` |
|
||||
| `FLOAT32` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` |
|
||||
| `FLOAT64` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` |
|
||||
| `TEXT, DATA` | [String](/docs/en/sql-reference/data-types/string.md), [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `TEXT, DATA` |
|
||||
| `union(T, Void), union(Void, T)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md) | `union(T, Void), union(Void, T)` |
|
||||
| `ENUM` | [Enum(8\ |16)](/docs/en/sql-reference/data-types/enum.md) | `ENUM` |
|
||||
| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
|
||||
| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
|
||||
| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` |
|
||||
| `DATA` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `DATA` |
|
||||
|
||||
For working with `Enum` in CapnProto format use the [format_capn_proto_enum_comparising_mode](/docs/en/operations/settings/settings-formats.md/#format_capn_proto_enum_comparising_mode) setting.
|
||||
|
||||
@ -1804,21 +1808,23 @@ ClickHouse Avro format supports reading and writing [Avro data files](https://av
|
||||
|
||||
The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
|
||||
|
||||
| Avro data type `INSERT` | ClickHouse data type | Avro data type `SELECT` |
|
||||
|---------------------------------------------|----------------------------------------------------------------------------------------------------|------------------------------|
|
||||
| `boolean`, `int`, `long`, `float`, `double` | [Int(8\|16\|32)](/docs/en/sql-reference/data-types/int-uint.md), [UInt(8\|16\|32)](/docs/en/sql-reference/data-types/int-uint.md) | `int` |
|
||||
| `boolean`, `int`, `long`, `float`, `double` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `long` |
|
||||
| `boolean`, `int`, `long`, `float`, `double` | [Float32](/docs/en/sql-reference/data-types/float.md) | `float` |
|
||||
| `boolean`, `int`, `long`, `float`, `double` | [Float64](/docs/en/sql-reference/data-types/float.md) | `double` |
|
||||
| `bytes`, `string`, `fixed`, `enum` | [String](/docs/en/sql-reference/data-types/string.md) | `bytes` or `string` \* |
|
||||
| `bytes`, `string`, `fixed` | [FixedString(N)](/docs/en/sql-reference/data-types/fixedstring.md) | `fixed(N)` |
|
||||
| `enum` | [Enum(8\|16)](/docs/en/sql-reference/data-types/enum.md) | `enum` |
|
||||
| `array(T)` | [Array(T)](/docs/en/sql-reference/data-types/array.md) | `array(T)` |
|
||||
| `union(null, T)`, `union(T, null)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md) | `union(null, T)` |
|
||||
| `null` | [Nullable(Nothing)](/docs/en/sql-reference/data-types/special-data-types/nothing.md) | `null` |
|
||||
| `int (date)` \** | [Date](/docs/en/sql-reference/data-types/date.md) | `int (date)` \** |
|
||||
| `long (timestamp-millis)` \** | [DateTime64(3)](/docs/en/sql-reference/data-types/datetime.md) | `long (timestamp-millis)` \* |
|
||||
| `long (timestamp-micros)` \** | [DateTime64(6)](/docs/en/sql-reference/data-types/datetime.md) | `long (timestamp-micros)` \* |
|
||||
| Avro data type `INSERT` | ClickHouse data type | Avro data type `SELECT` |
|
||||
|---------------------------------------------|-----------------------------------------------------------------------------------------------------------------|-------------------------------------------------|
|
||||
| `boolean`, `int`, `long`, `float`, `double` | [Int(8\ | 16\ |32)](/docs/en/sql-reference/data-types/int-uint.md), [UInt(8\|16\|32)](/docs/en/sql-reference/data-types/int-uint.md) | `int` |
|
||||
| `boolean`, `int`, `long`, `float`, `double` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `long` |
|
||||
| `boolean`, `int`, `long`, `float`, `double` | [Float32](/docs/en/sql-reference/data-types/float.md) | `float` |
|
||||
| `boolean`, `int`, `long`, `float`, `double` | [Float64](/docs/en/sql-reference/data-types/float.md) | `double` |
|
||||
| `bytes`, `string`, `fixed`, `enum` | [String](/docs/en/sql-reference/data-types/string.md) | `bytes` or `string` \* |
|
||||
| `bytes`, `string`, `fixed` | [FixedString(N)](/docs/en/sql-reference/data-types/fixedstring.md) | `fixed(N)` |
|
||||
| `enum` | [Enum(8\ | 16)](/docs/en/sql-reference/data-types/enum.md) | `enum` |
|
||||
| `array(T)` | [Array(T)](/docs/en/sql-reference/data-types/array.md) | `array(T)` |
|
||||
| `union(null, T)`, `union(T, null)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md) | `union(null, T)` |
|
||||
| `null` | [Nullable(Nothing)](/docs/en/sql-reference/data-types/special-data-types/nothing.md) | `null` |
|
||||
| `int (date)` \** | [Date](/docs/en/sql-reference/data-types/date.md) | `int (date)` \** |
|
||||
| `long (timestamp-millis)` \** | [DateTime64(3)](/docs/en/sql-reference/data-types/datetime.md) | `long (timestamp-millis)` \* |
|
||||
| `long (timestamp-micros)` \** | [DateTime64(6)](/docs/en/sql-reference/data-types/datetime.md) | `long (timestamp-micros)` \* |
|
||||
| `int` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `int` |
|
||||
| `fixed(16)` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `fixed(16)` |
|
||||
|
||||
\* `bytes` is default, controlled by [output_format_avro_string_column_pattern](/docs/en/operations/settings/settings-formats.md/#output_format_avro_string_column_pattern)
|
||||
\** [Avro logical types](https://avro.apache.org/docs/current/spec.html#Logical+Types)
|
||||
@ -1918,28 +1924,30 @@ Setting `format_avro_schema_registry_url` needs to be configured in `users.xml`
|
||||
|
||||
The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
|
||||
|
||||
| Parquet data type (`INSERT`) | ClickHouse data type | Parquet data type (`SELECT`) |
|
||||
|-----------------------------------------------|-----------------------------------------------------------------|------------------------------|
|
||||
| `BOOL` | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `BOOL` |
|
||||
| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
|
||||
| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` |
|
||||
| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `UINT16` |
|
||||
| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` |
|
||||
| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `UINT32` |
|
||||
| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` |
|
||||
| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
|
||||
| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `INT64` |
|
||||
| `FLOAT` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT` |
|
||||
| `DOUBLE` | [Float64](/docs/en/sql-reference/data-types/float.md) | `DOUBLE` |
|
||||
| `DATE` | [Date32](/docs/en/sql-reference/data-types/date.md) | `DATE` |
|
||||
| `TIME (ms)` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
|
||||
| `TIMESTAMP`, `TIME (us, ns)` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `TIMESTAMP` |
|
||||
| `STRING`, `BINARY` | [String](/docs/en/sql-reference/data-types/string.md) | `BINARY` |
|
||||
| `STRING`, `BINARY`, `FIXED_LENGTH_BYTE_ARRAY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `FIXED_LENGTH_BYTE_ARRAY` |
|
||||
| `DECIMAL` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL` |
|
||||
| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
|
||||
| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
|
||||
| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` |
|
||||
| Parquet data type (`INSERT`) | ClickHouse data type | Parquet data type (`SELECT`) |
|
||||
|----------------------------------------------------|-----------------------------------------------------------------|------------------------------|
|
||||
| `BOOL` | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `BOOL` |
|
||||
| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
|
||||
| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` |
|
||||
| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `UINT16` |
|
||||
| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` |
|
||||
| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `UINT32` |
|
||||
| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` |
|
||||
| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
|
||||
| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `INT64` |
|
||||
| `FLOAT` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT` |
|
||||
| `DOUBLE` | [Float64](/docs/en/sql-reference/data-types/float.md) | `DOUBLE` |
|
||||
| `DATE` | [Date32](/docs/en/sql-reference/data-types/date.md) | `DATE` |
|
||||
| `TIME (ms)` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
|
||||
| `TIMESTAMP`, `TIME (us, ns)` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `TIMESTAMP` |
|
||||
| `STRING`, `BINARY` | [String](/docs/en/sql-reference/data-types/string.md) | `BINARY` |
|
||||
| `STRING`, `BINARY`, `FIXED_LENGTH_BYTE_ARRAY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `FIXED_LENGTH_BYTE_ARRAY` |
|
||||
| `DECIMAL` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL` |
|
||||
| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
|
||||
| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
|
||||
| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` |
|
||||
| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` |
|
||||
| `FIXED_LENGTH_BYTE_ARRAY` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `FIXED_LENGTH_BYTE_ARRAY` |
|
||||
|
||||
Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.
|
||||
|
||||
@ -1973,6 +1981,7 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
|
||||
- [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`.
|
||||
- [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`.
|
||||
- [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`.
|
||||
- [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `snappy`.
|
||||
|
||||
## Arrow {#data-format-arrow}
|
||||
|
||||
@ -2007,6 +2016,8 @@ The table below shows supported data types and how they match ClickHouse [data t
|
||||
| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
|
||||
| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
|
||||
| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` |
|
||||
| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` |
|
||||
| `FIXED_SIZE_BINARY`, `BINARY` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `FIXED_SIZE_BINARY` |
|
||||
|
||||
Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.
|
||||
|
||||
@ -2041,6 +2052,7 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam
|
||||
- [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`.
|
||||
- [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`.
|
||||
- [output_format_arrow_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_fixed_string_as_fixed_byte_array) - use Arrow FIXED_SIZE_BINARY type instead of Binary/String for FixedString columns. Default value - `true`.
|
||||
- [output_format_arrow_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_compression_method) - compression method used in output Arrow format. Default value - `none`.
|
||||
|
||||
## ArrowStream {#data-format-arrow-stream}
|
||||
|
||||
@ -2054,8 +2066,8 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam
|
||||
|
||||
The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
|
||||
|
||||
| ORC data type (`INSERT`) | ClickHouse data type | ORC data type (`SELECT`) |
|
||||
|---------------------------------------|---------------------------------------------------------|--------------------------|
|
||||
| ORC data type (`INSERT`) | ClickHouse data type | ORC data type (`SELECT`) |
|
||||
|---------------------------------------|---------------------------------------------------------------|--------------------------|
|
||||
| `Boolean` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `Boolean` |
|
||||
| `Tinyint` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `Tinyint` |
|
||||
| `Smallint` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `Smallint` |
|
||||
@ -2070,6 +2082,7 @@ The table below shows supported data types and how they match ClickHouse [data t
|
||||
| `List` | [Array](/docs/en/sql-reference/data-types/array.md) | `List` |
|
||||
| `Struct` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `Struct` |
|
||||
| `Map` | [Map](/docs/en/sql-reference/data-types/map.md) | `Map` |
|
||||
| `-` | [IPv4](/docs/en/sql-reference/data-types/int-uint.md) | `Int` |
|
||||
|
||||
Other types are not supported.
|
||||
|
||||
@ -2096,6 +2109,7 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename.
|
||||
### Arrow format settings {#parquet-format-settings}
|
||||
|
||||
- [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`.
|
||||
- [output_format_orc_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_orc_compression_method) - compression method used in output ORC format. Default value - `none`.
|
||||
- [input_format_arrow_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`.
|
||||
- [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`.
|
||||
- [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`.
|
||||
@ -2264,8 +2278,8 @@ ClickHouse supports reading and writing [MessagePack](https://msgpack.org/) data
|
||||
|
||||
### Data Types Matching {#data-types-matching-msgpack}
|
||||
|
||||
| MessagePack data type (`INSERT`) | ClickHouse data type | MessagePack data type (`SELECT`) |
|
||||
|--------------------------------------------------------------------|-----------------------------------------------------------|------------------------------------|
|
||||
| MessagePack data type (`INSERT`) | ClickHouse data type | MessagePack data type (`SELECT`) |
|
||||
|--------------------------------------------------------------------|-----------------------------------------------------------------|------------------------------------|
|
||||
| `uint N`, `positive fixint` | [UIntN](/docs/en/sql-reference/data-types/int-uint.md) | `uint N` |
|
||||
| `int N`, `negative fixint` | [IntN](/docs/en/sql-reference/data-types/int-uint.md) | `int N` |
|
||||
| `bool` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `uint 8` |
|
||||
@ -2278,6 +2292,8 @@ ClickHouse supports reading and writing [MessagePack](https://msgpack.org/) data
|
||||
| `uint 64` | [DateTime64](/docs/en/sql-reference/data-types/datetime.md) | `uint 64` |
|
||||
| `fixarray`, `array 16`, `array 32` | [Array](/docs/en/sql-reference/data-types/array.md) | `fixarray`, `array 16`, `array 32` |
|
||||
| `fixmap`, `map 16`, `map 32` | [Map](/docs/en/sql-reference/data-types/map.md) | `fixmap`, `map 16`, `map 32` |
|
||||
| `uint 32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `uint 32` |
|
||||
| `bin 8` | [String](/docs/en/sql-reference/data-types/string.md) | `bin 8` |
|
||||
|
||||
Example:
|
||||
|
||||
|
@ -26,6 +26,7 @@ ClickHouse Inc does **not** maintain the libraries listed below and hasn’t don
|
||||
- [one-ck](https://github.com/lizhichao/one-ck)
|
||||
- [glushkovds/phpclickhouse-laravel](https://packagist.org/packages/glushkovds/phpclickhouse-laravel)
|
||||
- [kolya7k ClickHouse PHP extension](https://github.com//kolya7k/clickhouse-php)
|
||||
- [hyvor/clickhouse-php](https://github.com/hyvor/clickhouse-php)
|
||||
- Go
|
||||
- [clickhouse](https://github.com/kshvakov/clickhouse/)
|
||||
- [go-clickhouse](https://github.com/roistat/go-clickhouse)
|
||||
|
@ -967,6 +967,7 @@ The maximum number of jobs that can be scheduled on the Global Thread pool. Incr
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — No limit.
|
||||
|
||||
Default value: `10000`.
|
||||
|
||||
@ -976,6 +977,69 @@ Default value: `10000`.
|
||||
<thread_pool_queue_size>12000</thread_pool_queue_size>
|
||||
```
|
||||
|
||||
## max_io_thread_pool_size {#max-io-thread-pool-size}
|
||||
|
||||
ClickHouse uses threads from the IO Thread pool to do some IO operations (e.g. to interact with S3). `max_io_thread_pool_size` limits the maximum number of threads in the pool.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
|
||||
Default value: `100`.
|
||||
|
||||
## max_io_thread_pool_free_size {#max-io-thread-pool-free-size}
|
||||
|
||||
If the number of **idle** threads in the IO Thread pool exceeds `max_io_thread_pool_free_size`, ClickHouse will release resources occupied by idling threads and decrease the pool size. Threads can be created again if necessary.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## io_thread_pool_queue_size {#io-thread-pool-queue-size}
|
||||
|
||||
The maximum number of jobs that can be scheduled on the IO Thread pool.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — No limit.
|
||||
|
||||
Default value: `10000`.
|
||||
|
||||
## max_backups_io_thread_pool_size {#max-backups-io-thread-pool-size}
|
||||
|
||||
ClickHouse uses threads from the Backups IO Thread pool to do S3 backup IO operations. `max_backups_io_thread_pool_size` limits the maximum number of threads in the pool.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
|
||||
Default value: `1000`.
|
||||
|
||||
## max_backups_io_thread_pool_free_size {#max-backups-io-thread-pool-free-size}
|
||||
|
||||
If the number of **idle** threads in the Backups IO Thread pool exceeds `max_backup_io_thread_pool_free_size`, ClickHouse will release resources occupied by idling threads and decrease the pool size. Threads can be created again if necessary.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- Zero.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## backups_io_thread_pool_queue_size {#backups-io-thread-pool-queue-size}
|
||||
|
||||
The maximum number of jobs that can be scheduled on the Backups IO Thread pool. It is recommended to keep this queue unlimited due to the current S3 backup logic.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — No limit.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## background_pool_size {#background_pool_size}
|
||||
|
||||
Sets the number of threads performing background merges and mutations for tables with MergeTree engines. This setting is also could be applied at server startup from the `default` profile configuration for backward compatibility at the ClickHouse server start. You can only increase the number of threads at runtime. To lower the number of threads you have to restart the server. By adjusting this setting, you manage CPU and disk load. Smaller pool size utilizes less CPU and disk resources, but background processes advance slower which might eventually impact query performance.
|
||||
|
@ -1014,6 +1014,12 @@ Use Arrow FIXED_SIZE_BINARY type instead of Binary/String for FixedString column
|
||||
|
||||
Enabled by default.
|
||||
|
||||
### output_format_arrow_compression_method {#output_format_arrow_compression_method}
|
||||
|
||||
Compression method used in output Arrow format. Supported codecs: `lz4_frame`, `zstd`, `none` (uncompressed)
|
||||
|
||||
Default value: `none`.
|
||||
|
||||
## ORC format settings {#orc-format-settings}
|
||||
|
||||
### input_format_orc_import_nested {#input_format_orc_import_nested}
|
||||
@ -1057,6 +1063,12 @@ Use ORC String type instead of Binary for String columns.
|
||||
|
||||
Disabled by default.
|
||||
|
||||
### output_format_orc_compression_method {#output_format_orc_compression_method}
|
||||
|
||||
Compression method used in output ORC format. Supported codecs: `lz4`, `snappy`, `zlib`, `zstd`, `none` (uncompressed)
|
||||
|
||||
Default value: `none`.
|
||||
|
||||
## Parquet format settings {#parquet-format-settings}
|
||||
|
||||
### input_format_parquet_import_nested {#input_format_parquet_import_nested}
|
||||
@ -1112,6 +1124,12 @@ The version of Parquet format used in output format. Supported versions: `1.0`,
|
||||
|
||||
Default value: `2.latest`.
|
||||
|
||||
### output_format_parquet_compression_method {#output_format_parquet_compression_method}
|
||||
|
||||
Compression method used in output Parquet format. Supported codecs: `snappy`, `lz4`, `brotli`, `zstd`, `gzip`, `none` (uncompressed)
|
||||
|
||||
Default value: `snappy`.
|
||||
|
||||
## Hive format settings {#hive-format-settings}
|
||||
|
||||
### input_format_hive_text_fields_delimiter {#input_format_hive_text_fields_delimiter}
|
||||
|
@ -1248,7 +1248,9 @@ Possible values:
|
||||
Default value: 1.
|
||||
|
||||
:::warning
|
||||
Disable this setting if you use [max_parallel_replicas](#settings-max_parallel_replicas).
|
||||
Disable this setting if you use [max_parallel_replicas](#settings-max_parallel_replicas) without [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key).
|
||||
If [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key) is set, disable this setting only if it's used on a cluster with multiple shards containing multiple replicas.
|
||||
If it's used on a cluster with a single shard and multiple replicas, disabling this setting will have negative effects.
|
||||
:::
|
||||
|
||||
## totals_mode {#totals-mode}
|
||||
@ -1273,16 +1275,47 @@ Default value: `1`.
|
||||
|
||||
**Additional Info**
|
||||
|
||||
This setting is useful for replicated tables with a sampling key. A query may be processed faster if it is executed on several servers in parallel. But the query performance may degrade in the following cases:
|
||||
This options will produce different results depending on the settings used.
|
||||
|
||||
:::warning
|
||||
This setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain requirements. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md/#max_parallel_replica-subqueries) for more details.
|
||||
:::
|
||||
|
||||
### Parallel processing using `SAMPLE` key
|
||||
|
||||
A query may be processed faster if it is executed on several servers in parallel. But the query performance may degrade in the following cases:
|
||||
|
||||
- The position of the sampling key in the partitioning key does not allow efficient range scans.
|
||||
- Adding a sampling key to the table makes filtering by other columns less efficient.
|
||||
- The sampling key is an expression that is expensive to calculate.
|
||||
- The cluster latency distribution has a long tail, so that querying more servers increases the query overall latency.
|
||||
|
||||
:::warning
|
||||
This setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain requirements. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md/#max_parallel_replica-subqueries) for more details.
|
||||
:::
|
||||
### Parallel processing using [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key)
|
||||
|
||||
This setting is useful for any replicated table.
|
||||
|
||||
## parallel_replicas_custom_key {#settings-parallel_replicas_custom_key}
|
||||
|
||||
An arbitrary integer expression that can be used to split work between replicas for a specific table.
|
||||
The value can be any integer expression.
|
||||
A query may be processed faster if it is executed on several servers in parallel but it depends on the used [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key)
|
||||
and [parallel_replicas_custom_key_filter_type](#settings-parallel_replicas_custom_key_filter_type).
|
||||
|
||||
Simple expressions using primary keys are preferred.
|
||||
|
||||
If the setting is used on a cluster that consists of a single shard with multiple replicas, those replicas will be converted into virtual shards.
|
||||
Otherwise, it will behave same as for `SAMPLE` key, it will use multiple replicas of each shard.
|
||||
|
||||
## parallel_replicas_custom_key_filter_type {#settings-parallel_replicas_custom_key_filter_type}
|
||||
|
||||
How to use `parallel_replicas_custom_key` expression for splitting work between replicas.
|
||||
|
||||
Possible values:
|
||||
|
||||
- `default` — Use the default implementation using modulo operation on the `parallel_replicas_custom_key`.
|
||||
- `range` — Split the entire value space of the expression in the ranges. This type of filtering is useful if values of `parallel_replicas_custom_key` are uniformly spread across the entire integer space, e.g. hash values.
|
||||
|
||||
Default value: `default`.
|
||||
|
||||
## compile_expressions {#compile-expressions}
|
||||
|
||||
|
@ -135,11 +135,13 @@ Example of configuration for versions later or equal to 22.8:
|
||||
</cache>
|
||||
</disks>
|
||||
<policies>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>cache</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
<s3-cache>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>cache</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</s3-cache>
|
||||
<policies>
|
||||
</storage_configuration>
|
||||
```
|
||||
@ -159,11 +161,13 @@ Example of configuration for versions earlier than 22.8:
|
||||
</s3>
|
||||
</disks>
|
||||
<policies>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>s3</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
<s3-cache>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>s3</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</s3-cache>
|
||||
<policies>
|
||||
</storage_configuration>
|
||||
```
|
||||
|
@ -1126,15 +1126,48 @@ Rounds the time to the half hour.
|
||||
|
||||
## toYYYYMM
|
||||
|
||||
Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 100 + MM).
|
||||
Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 100 + MM). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
|
||||
|
||||
### example
|
||||
```sql
|
||||
SELECT
|
||||
toYYYYMM(now(), 'US/Eastern')
|
||||
```
|
||||
```response
|
||||
┌─toYYYYMM(now(), 'US/Eastern')─┐
|
||||
│ 202303 │
|
||||
└───────────────────────────────┘
|
||||
```
|
||||
|
||||
## toYYYYMMDD
|
||||
|
||||
Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 10000 + MM \* 100 + DD).
|
||||
Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 10000 + MM \* 100 + DD). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
|
||||
|
||||
### example
|
||||
```sql
|
||||
SELECT
|
||||
toYYYYMMDD(now(), 'US/Eastern')
|
||||
```
|
||||
```response
|
||||
┌─toYYYYMMDD(now(), 'US/Eastern')─┐
|
||||
│ 20230302 │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
## toYYYYMMDDhhmmss
|
||||
|
||||
Converts a date or date with time to a UInt64 number containing the year and month number (YYYY \* 10000000000 + MM \* 100000000 + DD \* 1000000 + hh \* 10000 + mm \* 100 + ss).
|
||||
Converts a date or date with time to a UInt64 number containing the year and month number (YYYY \* 10000000000 + MM \* 100000000 + DD \* 1000000 + hh \* 10000 + mm \* 100 + ss). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
|
||||
|
||||
### example
|
||||
```sql
|
||||
SELECT
|
||||
toYYYYMMDDhhmmss(now(), 'US/Eastern')
|
||||
```
|
||||
```response
|
||||
┌─toYYYYMMDDhhmmss(now(), 'US/Eastern')─┐
|
||||
│ 20230302112209 │
|
||||
└───────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## addYears, addMonths, addWeeks, addDays, addHours, addMinutes, addSeconds, addQuarters
|
||||
|
||||
@ -1231,8 +1264,8 @@ Using replacement fields, you can define a pattern for the resulting string. “
|
||||
| %e | day of the month, space-padded (1-31) | 2 |
|
||||
| %f | fractional second from the fractional part of DateTime64 | 1234560 |
|
||||
| %F | short YYYY-MM-DD date, equivalent to %Y-%m-%d | 2018-01-02 |
|
||||
| %G | four-digit year format for ISO week number, calculated from the week-based year [defined by the ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates) standard, normally useful only with %V | 2018 |
|
||||
| %g | two-digit year format, aligned to ISO 8601, abbreviated from four-digit notation | 18 |
|
||||
| %G | four-digit year format for ISO week number, calculated from the week-based year [defined by the ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates) standard, normally useful only with %V | 2018 |
|
||||
| %h | hour in 12h format (01-12) | 09 |
|
||||
| %H | hour in 24h format (00-23) | 22 |
|
||||
| %i | minute (00-59) | 33 |
|
||||
|
@ -280,12 +280,20 @@ SELECT
|
||||
|
||||
## toIPv4OrDefault(string)
|
||||
|
||||
Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns 0.
|
||||
Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns `0.0.0.0` (0 IPv4).
|
||||
|
||||
## toIPv4OrNull(string)
|
||||
|
||||
Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns null.
|
||||
|
||||
## toIPv6OrDefault(string)
|
||||
|
||||
Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns `::` (0 IPv6).
|
||||
|
||||
## toIPv6OrNull(string)
|
||||
|
||||
Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns null.
|
||||
|
||||
## toIPv6
|
||||
|
||||
Converts a string form of IPv6 address to [IPv6](../../sql-reference/data-types/domains/ipv6.md) type. If the IPv6 address has an invalid format, returns an empty value.
|
||||
|
@ -330,7 +330,7 @@ repeat(s, n)
|
||||
**Arguments**
|
||||
|
||||
- `s` — The string to repeat. [String](../../sql-reference/data-types/string.md).
|
||||
- `n` — The number of times to repeat the string. [UInt](../../sql-reference/data-types/int-uint.md).
|
||||
- `n` — The number of times to repeat the string. [UInt or Int](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
|
@ -66,6 +66,42 @@ Result:
|
||||
|
||||
- [Map(key, value)](../../sql-reference/data-types/map.md) data type
|
||||
|
||||
## mapFromArrays
|
||||
|
||||
Merges an [Array](../../sql-reference/data-types/array.md) of keys and an [Array](../../sql-reference/data-types/array.md) of values into a [Map(key, value)](../../sql-reference/data-types/map.md).
|
||||
|
||||
The function is a more convenient alternative to `CAST((key_array, value_array), 'Map(key_type, value_type)')`. For example, instead of writing `CAST((['aa', 'bb'], [4, 5]), 'Map(String, UInt32)')`, you can write `mapFromArrays(['aa', 'bb'], [4, 5])`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
mapFromArrays(keys, values)
|
||||
```
|
||||
|
||||
Alias: `MAP_FROM_ARRAYS(keys, values)`
|
||||
|
||||
**Arguments**
|
||||
- `keys` — Given key array to create a map from. The nested type of array must be: [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UUID](../../sql-reference/data-types/uuid.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), [Date32](../../sql-reference/data-types/date32.md), [Enum](../../sql-reference/data-types/enum.md)
|
||||
- `values` - Given value array to create a map from.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A map whose keys and values are constructed from the key and value arrays
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
select mapFromArrays(['a', 'b', 'c'], [1, 2, 3])
|
||||
```
|
||||
|
||||
```text
|
||||
┌─mapFromArrays(['a', 'b', 'c'], [1, 2, 3])─┐
|
||||
│ {'a':1,'b':2,'c':3} │
|
||||
└───────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## mapAdd
|
||||
|
||||
Collect all the keys and sum corresponding values.
|
||||
@ -235,7 +271,7 @@ Determines whether the `map` contains the `key` parameter.
|
||||
mapContains(map, key)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
|
||||
- `key` — Key. Type matches the type of keys of `map` parameter.
|
||||
@ -280,7 +316,7 @@ Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operat
|
||||
mapKeys(map)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
|
||||
|
||||
@ -323,7 +359,7 @@ Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operat
|
||||
mapValues(map)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
|
||||
|
||||
@ -362,7 +398,7 @@ Result:
|
||||
mapContainsKeyLike(map, pattern)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
|
||||
- `pattern` - String pattern to match.
|
||||
@ -400,7 +436,7 @@ Result:
|
||||
mapExtractKeyLike(map, pattern)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
|
||||
- `pattern` - String pattern to match.
|
||||
@ -438,7 +474,7 @@ Result:
|
||||
mapApply(func, map)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
|
||||
- `map` — [Map](../../sql-reference/data-types/map.md).
|
||||
@ -478,7 +514,7 @@ Result:
|
||||
mapFilter(func, map)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
|
||||
- `map` — [Map](../../sql-reference/data-types/map.md).
|
||||
@ -520,7 +556,7 @@ Result:
|
||||
mapUpdate(map1, map2)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `map1` [Map](../../sql-reference/data-types/map.md).
|
||||
- `map2` [Map](../../sql-reference/data-types/map.md).
|
||||
|
@ -233,8 +233,9 @@ If `some_predicate` is not selective enough, it will return large amount of data
|
||||
|
||||
### Distributed Subqueries and max_parallel_replicas
|
||||
|
||||
When max_parallel_replicas is greater than 1, distributed queries are further transformed. For example, the following:
|
||||
When [max_parallel_replicas](#settings-max_parallel_replicas) is greater than 1, distributed queries are further transformed.
|
||||
|
||||
For example, the following:
|
||||
```sql
|
||||
SELECT CounterID, count() FROM distributed_table_1 WHERE UserID IN (SELECT UserID FROM local_table_2 WHERE CounterID < 100)
|
||||
SETTINGS max_parallel_replicas=3
|
||||
@ -247,8 +248,12 @@ SELECT CounterID, count() FROM local_table_1 WHERE UserID IN (SELECT UserID FROM
|
||||
SETTINGS parallel_replicas_count=3, parallel_replicas_offset=M
|
||||
```
|
||||
|
||||
where M is between 1 and 3 depending on which replica the local query is executing on. These settings affect every MergeTree-family table in the query and have the same effect as applying `SAMPLE 1/3 OFFSET (M-1)/3` on each table.
|
||||
where M is between 1 and 3 depending on which replica the local query is executing on.
|
||||
|
||||
Therefore adding the max_parallel_replicas setting will only produce correct results if both tables have the same replication scheme and are sampled by UserID or a subkey of it. In particular, if local_table_2 does not have a sampling key, incorrect results will be produced. The same rule applies to JOIN.
|
||||
These settings affect every MergeTree-family table in the query and have the same effect as applying `SAMPLE 1/3 OFFSET (M-1)/3` on each table.
|
||||
|
||||
Therefore adding the [max_parallel_replicas](#settings-max_parallel_replicas) setting will only produce correct results if both tables have the same replication scheme and are sampled by UserID or a subkey of it. In particular, if local_table_2 does not have a sampling key, incorrect results will be produced. The same rule applies to JOIN.
|
||||
|
||||
One workaround if local_table_2 does not meet the requirements, is to use `GLOBAL IN` or `GLOBAL JOIN`.
|
||||
|
||||
If a table doesn't have a sampling key, more flexible options for [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key) can be used that can produce different and more optimal behaviour.
|
||||
|
@ -110,7 +110,7 @@ If the type is not `Nullable` and if `NULL` is specified, it will be treated as
|
||||
|
||||
See also [data_type_default_nullable](../../../operations/settings/settings.md#data_type_default_nullable) setting.
|
||||
|
||||
## Default Values
|
||||
## Default Values {#default_values}
|
||||
|
||||
The column description can specify an expression for a default value, in one of the following ways: `DEFAULT expr`, `MATERIALIZED expr`, `ALIAS expr`.
|
||||
|
||||
@ -576,7 +576,7 @@ SELECT * FROM base.t1;
|
||||
You can add a comment to the table when you creating it.
|
||||
|
||||
:::note
|
||||
The comment is supported for all table engines except [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) and [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md).
|
||||
The comment clause is supported by all table engines except [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) and [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md).
|
||||
:::
|
||||
|
||||
|
||||
|
@ -70,6 +70,12 @@ A materialized view is implemented as follows: when inserting data to the table
|
||||
Materialized views in ClickHouse use **column names** instead of column order during insertion into destination table. If some column names are not present in the `SELECT` query result, ClickHouse uses a default value, even if the column is not [Nullable](../../data-types/nullable.md). A safe practice would be to add aliases for every column when using Materialized views.
|
||||
|
||||
Materialized views in ClickHouse are implemented more like insert triggers. If there’s some aggregation in the view query, it’s applied only to the batch of freshly inserted data. Any changes to existing data of source table (like update, delete, drop partition, etc.) does not change the materialized view.
|
||||
|
||||
Materialized views in ClickHouse do not have deterministic behaviour in case of errors. This means that blocks that had been already written will be preserved in the destination table, but all blocks after error will not.
|
||||
|
||||
By default if pushing to one of views fails, then the INSERT query will fail too, and some blocks may not be written to the destination table. This can be changed using `materialized_views_ignore_errors` setting (you should set it for `INSERT` query), if you will set `materialized_views_ignore_errors=true`, then any errors while pushing to views will be ignored and all blocks will be written to the destination table.
|
||||
|
||||
Also note, that `materialized_views_ignore_errors` set to `true` by default for `system.*_log` tables.
|
||||
:::
|
||||
|
||||
If you specify `POPULATE`, the existing table data is inserted into the view when creating it, as if making a `CREATE TABLE ... AS SELECT ...` . Otherwise, the query contains only the data inserted in the table after creating the view. We **do not recommend** using `POPULATE`, since data inserted in the table during the view creation will not be inserted in it.
|
||||
|
@ -6,21 +6,22 @@ sidebar_label: file
|
||||
|
||||
# file
|
||||
|
||||
Creates a table from a file. This table function is similar to [url](../../sql-reference/table-functions/url.md) and [hdfs](../../sql-reference/table-functions/hdfs.md) ones.
|
||||
Creates a table from a file. This table function is similar to [url](/docs/en/sql-reference/table-functions/url.md) and [hdfs](/docs/en/sql-reference/table-functions/hdfs.md) ones.
|
||||
|
||||
`file` function can be used in `SELECT` and `INSERT` queries on data in [File](../../engines/table-engines/special/file.md) tables.
|
||||
`file` function can be used in `SELECT` and `INSERT` queries on data in [File](/docs/en/engines/table-engines/special/file.md) tables.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
file(path [,format] [,structure])
|
||||
file(path [,format] [,structure] [,compression])
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `path` — The relative path to the file from [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in read-only mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings.
|
||||
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
|
||||
- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in read-only mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings.
|
||||
- `format` — The [format](/docs/en/interfaces/formats.md#formats) of the file.
|
||||
- `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`.
|
||||
- `compression` — The existing compression type when used in a `SELECT` query, or the desired compression type when used in an `INSERT` query. The supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`.
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -53,7 +54,7 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U
|
||||
└─────────┴─────────┴─────────┘
|
||||
```
|
||||
|
||||
Getting the first 10 lines of a table that contains 3 columns of [UInt32](../../sql-reference/data-types/int-uint.md) type from a CSV file:
|
||||
Getting the first 10 lines of a table that contains 3 columns of [UInt32](/docs/en/sql-reference/data-types/int-uint.md) type from a CSV file:
|
||||
|
||||
``` sql
|
||||
SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 10;
|
||||
@ -143,4 +144,4 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Virtual columns](../../engines/table-engines/index.md#table_engines-virtual_columns)
|
||||
- [Virtual columns](/docs/en/engines/table-engines/index.md#table_engines-virtual_columns)
|
||||
|
@ -24,6 +24,7 @@ sidebar_label: "Клиентские библиотеки от сторонни
|
||||
- [SeasClick C++ client](https://github.com/SeasX/SeasClick)
|
||||
- [glushkovds/phpclickhouse-laravel](https://packagist.org/packages/glushkovds/phpclickhouse-laravel)
|
||||
- [kolya7k ClickHouse PHP extension](https://github.com//kolya7k/clickhouse-php)
|
||||
- [hyvor/clickhouse-php](https://github.com/hyvor/clickhouse-php)
|
||||
- Go
|
||||
- [clickhouse](https://github.com/kshvakov/clickhouse/)
|
||||
- [go-clickhouse](https://github.com/roistat/go-clickhouse)
|
||||
|
@ -301,7 +301,7 @@ ClickHouse поддерживает временные таблицы со сл
|
||||
- Временные таблицы исчезают после завершения сессии, в том числе при обрыве соединения.
|
||||
- Временная таблица использует только модуль памяти.
|
||||
- Невозможно указать базу данных для временной таблицы. Она создается вне баз данных.
|
||||
- Невозможно создать временную таблицу распределнным DDL запросом на всех серверах кластера (с опцией `ON CLUSTER`): такая таблица существует только в рамках существующей сессии.
|
||||
- Невозможно создать временную таблицу распределённым DDL запросом на всех серверах кластера (с опцией `ON CLUSTER`): такая таблица существует только в рамках существующей сессии.
|
||||
- Если временная таблица имеет то же имя, что и некоторая другая, то, при упоминании в запросе без указания БД, будет использована временная таблица.
|
||||
- При распределённой обработке запроса, используемые в запросе временные таблицы, передаются на удалённые серверы.
|
||||
|
||||
@ -344,7 +344,9 @@ REPLACE TABLE myOldTable SELECT * FROM myOldTable WHERE CounterID <12345;
|
||||
|
||||
### Синтаксис
|
||||
|
||||
```sql
|
||||
{CREATE [OR REPLACE]|REPLACE} TABLE [db.]table_name
|
||||
```
|
||||
|
||||
Для данного запроса можно использовать любые варианты синтаксиса запроса `CREATE`. Запрос `REPLACE` для несуществующей таблицы вызовет ошибку.
|
||||
|
||||
|
@ -108,7 +108,7 @@ SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH ROLLUP;
|
||||
|
||||
## Модификатор WITH CUBE {#with-cube-modifier}
|
||||
|
||||
Модификатор `WITH CUBE` применятеся для расчета подытогов по всем комбинациям группировки ключевых выражений в списке `GROUP BY`.
|
||||
Модификатор `WITH CUBE` применяется для расчета подытогов по всем комбинациям группировки ключевых выражений в списке `GROUP BY`.
|
||||
|
||||
Строки с подытогами добавляются в конец результирующей таблицы. В колонках, по которым выполняется группировка, указывается значение `0` или пустая строка.
|
||||
|
||||
|
@ -16,7 +16,7 @@ FROM <left_table>
|
||||
(ON <expr_list>)|(USING <column_list>) ...
|
||||
```
|
||||
|
||||
Выражения из секции `ON` и столбцы из секции `USING` называется «ключами соединения». Если не указано иное, при присоединение создаётся [Декартово произведение](https://en.wikipedia.org/wiki/Cartesian_product) из строк с совпадающими значениями ключей соединения, что может привести к получению результатов с гораздо большим количеством строк, чем исходные таблицы.
|
||||
Выражения из секции `ON` и столбцы из секции `USING` называются «ключами соединения». Если не указано иное, при присоединение создаётся [Декартово произведение](https://en.wikipedia.org/wiki/Cartesian_product) из строк с совпадающими значениями ключей соединения, что может привести к получению результатов с гораздо большим количеством строк, чем исходные таблицы.
|
||||
|
||||
## Поддерживаемые типы соединения {#select-join-types}
|
||||
|
||||
@ -28,7 +28,7 @@ FROM <left_table>
|
||||
- `FULL OUTER JOIN`, не совпадающие строки из обеих таблиц возвращаются в дополнение к совпадающим строкам.
|
||||
- `CROSS JOIN`, производит декартово произведение таблиц целиком, ключи соединения не указываются.
|
||||
|
||||
Без указания типа `JOIN` подразумевается `INNER`. Ключевое слово `OUTER` можно опускать. Альтернативным синтаксисом для `CROSS JOIN` является ли указание нескольких таблиц, разделённых запятыми, в [секции FROM](from.md).
|
||||
Без указания типа `JOIN` подразумевается `INNER`. Ключевое слово `OUTER` можно опускать. Альтернативным синтаксисом для `CROSS JOIN` является указание нескольких таблиц, разделённых запятыми, в [секции FROM](from.md).
|
||||
|
||||
Дополнительные типы соединений, доступные в ClickHouse:
|
||||
|
||||
@ -62,7 +62,7 @@ FROM <left_table>
|
||||
|
||||
Строки объединяются только тогда, когда всё составное условие выполнено. Если оно не выполнено, то строки могут попасть в результат в зависимости от типа `JOIN`. Обратите внимание, что если то же самое условие поместить в секцию `WHERE`, то строки, для которых оно не выполняется, никогда не попаду в результат.
|
||||
|
||||
Оператор `OR` внутри секции `ON` работает, используя алгоритм хеш-соединения — на каждый агрумент `OR` с ключами соединений для `JOIN` создается отдельная хеш-таблица, поэтому потребление памяти и время выполнения запроса растет линейно при увеличении количества выражений `OR` секции `ON`.
|
||||
Оператор `OR` внутри секции `ON` работает, используя алгоритм хеш-соединения — на каждый аргумент `OR` с ключами соединений для `JOIN` создается отдельная хеш-таблица, поэтому потребление памяти и время выполнения запроса растет линейно при увеличении количества выражений `OR` секции `ON`.
|
||||
|
||||
:::note "Примечание"
|
||||
Если в условии использованы столбцы из разных таблиц, то пока поддерживается только оператор равенства (`=`).
|
||||
@ -280,7 +280,7 @@ SELECT a, b, toTypeName(a), toTypeName(b) FROM t_1 FULL JOIN t_2 USING (a, b);
|
||||
|
||||
Каждый раз для выполнения запроса с одинаковым `JOIN`, подзапрос выполняется заново — результат не кэшируется. Это можно избежать, используя специальный движок таблиц [Join](../../../engines/table-engines/special/join.md), представляющий собой подготовленное множество для соединения, которое всегда находится в оперативке.
|
||||
|
||||
В некоторых случаях это более эффективно использовать [IN](../../operators/in.md) вместо `JOIN`.
|
||||
В некоторых случаях более эффективно использовать [IN](../../operators/in.md) вместо `JOIN`.
|
||||
|
||||
Если `JOIN` необходим для соединения с таблицами измерений (dimension tables - сравнительно небольшие таблицы, которые содержат свойства измерений - например, имена для рекламных кампаний), то использование `JOIN` может быть не очень удобным из-за громоздкости синтаксиса, а также из-за того, что правая таблица читается заново при каждом запросе. Специально для таких случаев существует функциональность «Внешние словари», которую следует использовать вместо `JOIN`. Дополнительные сведения смотрите в разделе «Внешние словари».
|
||||
|
||||
|
@ -67,7 +67,7 @@ sidebar_label: ORDER BY
|
||||
|
||||
## Примеры с использованием сравнения {#collation-examples}
|
||||
|
||||
Пример с значениями типа [String](../../../sql-reference/data-types/string.md):
|
||||
Пример со значениями типа [String](../../../sql-reference/data-types/string.md):
|
||||
|
||||
Входная таблица:
|
||||
|
||||
@ -241,13 +241,13 @@ SELECT * FROM collate_test ORDER BY s ASC COLLATE 'en';
|
||||
└───┴─────────┘
|
||||
```
|
||||
|
||||
## Деталь реализации {#implementation-details}
|
||||
## Детали реализации {#implementation-details}
|
||||
|
||||
Если кроме `ORDER BY` указан также не слишком большой [LIMIT](limit.md), то расходуется меньше оперативки. Иначе расходуется количество памяти, пропорциональное количеству данных для сортировки. При распределённой обработке запроса, если отсутствует [GROUP BY](group-by.md), сортировка частично делается на удалённых серверах, а на сервере-инициаторе запроса производится слияние результатов. Таким образом, при распределённой сортировке, может сортироваться объём данных, превышающий размер памяти на одном сервере.
|
||||
|
||||
Существует возможность выполнять сортировку во внешней памяти (с созданием временных файлов на диске), если оперативной памяти не хватает. Для этого предназначена настройка `max_bytes_before_external_sort`. Если она выставлена в 0 (по умолчанию), то внешняя сортировка выключена. Если она включена, то при достижении объёмом данных для сортировки указанного количества байт, накопленные данные будут отсортированы и сброшены во временный файл. После того, как все данные будут прочитаны, будет произведено слияние всех сортированных файлов и выдача результата. Файлы записываются в директорию `/var/lib/clickhouse/tmp/` (по умолчанию, может быть изменено с помощью параметра `tmp_path`) в конфиге.
|
||||
|
||||
На выполнение запроса может расходоваться больше памяти, чем `max_bytes_before_external_sort`. Поэтому, значение этой настройки должно быть существенно меньше, чем `max_memory_usage`. Для примера, если на вашем сервере 128 GB оперативки, и вам нужно выполнить один запрос, то выставите `max_memory_usage` в 100 GB, а `max_bytes_before_external_sort` в 80 GB.
|
||||
На выполнение запроса может расходоваться больше памяти, чем `max_bytes_before_external_sort`. Поэтому значение этой настройки должно быть существенно меньше, чем `max_memory_usage`. Для примера, если на вашем сервере 128 GB оперативки, и вам нужно выполнить один запрос, то выставьте `max_memory_usage` в 100 GB, а `max_bytes_before_external_sort` в 80 GB.
|
||||
|
||||
Внешняя сортировка работает существенно менее эффективно, чем сортировка в оперативке.
|
||||
|
||||
@ -366,9 +366,9 @@ ORDER BY
|
||||
└────────────┴────────────┴──────────┘
|
||||
```
|
||||
|
||||
Поле `d1` не заполняется и использует значение по умолчанию. Поскольку у нас нет повторяющихся значений для `d2`, мы не можем правильно рассчитать последователность заполнения для `d1`.
|
||||
Поле `d1` не заполняется и использует значение по умолчанию. Поскольку у нас нет повторяющихся значений для `d2`, мы не можем правильно рассчитать последовательность заполнения для `d1`.
|
||||
|
||||
Cледующий запрос (с измененым порядком в ORDER BY):
|
||||
Cледующий запрос (с измененным порядком в ORDER BY):
|
||||
```sql
|
||||
SELECT
|
||||
toDate((number * 10) * 86400) AS d1,
|
||||
|
@ -13,7 +13,7 @@ Prewhere — это оптимизация для более эффективн
|
||||
|
||||
`PREWHERE` имеет смысл использовать, если есть условия фильтрации, которые использует меньшинство столбцов из тех, что есть в запросе, но достаточно сильно фильтрует данные. Таким образом, сокращается количество читаемых данных.
|
||||
|
||||
В запросе может быть одновременно указаны и `PREWHERE`, и `WHERE`. В этом случае `PREWHERE` предшествует `WHERE`.
|
||||
В запросе могут быть одновременно указаны и `PREWHERE`, и `WHERE`. В этом случае `PREWHERE` предшествует `WHERE`.
|
||||
|
||||
Если значение параметра [optimize_move_to_prewhere](../../../operations/settings/settings.md#optimize_move_to_prewhere) равно 0, эвристика по автоматическому перемещению части выражений из `WHERE` к `PREWHERE` отключается.
|
||||
|
||||
|
@ -10,7 +10,7 @@ sidebar_label: SAMPLE
|
||||
Сэмплирование имеет смысл, когда:
|
||||
|
||||
1. Точность результата не важна, например, для оценочных расчетов.
|
||||
2. Возможности аппаратной части не позволяют соответствовать строгим критериям. Например, время ответа должно быть \<100 мс. При этом точность расчета имеет более низкий приоритет.
|
||||
2. Возможности аппаратной части не позволяют соответствовать строгим критериям. Например, время ответа должно быть <100 мс. При этом точность расчета имеет более низкий приоритет.
|
||||
3. Точность результата участвует в бизнес-модели сервиса. Например, пользователи с бесплатной подпиской на сервис могут получать отчеты с меньшей точностью, чем пользователи с премиум подпиской.
|
||||
|
||||
:::note "Внимание"
|
||||
|
@ -26,7 +26,7 @@ SELECT CounterID, 2 AS table, sum(Sign) AS c
|
||||
|
||||
Результирующие столбцы сопоставляются по их индексу (порядку внутри `SELECT`). Если имена столбцов не совпадают, то имена для конечного результата берутся из первого запроса.
|
||||
|
||||
При объединении выполняет приведение типов. Например, если два запроса имеют одно и то же поле с не-`Nullable` и `Nullable` совместимыми типами, полученные в результате `UNION` данные будут иметь `Nullable` тип.
|
||||
При объединении выполняется приведение типов. Например, если два запроса имеют одно и то же поле с не-`Nullable` и `Nullable` совместимыми типами, полученные в результате `UNION` данные будут иметь `Nullable` тип.
|
||||
|
||||
Запросы, которые являются частью `UNION`, могут быть заключены в круглые скобки. [ORDER BY](order-by.md) и [LIMIT](limit.md) применяются к отдельным запросам, а не к конечному результату. Если вам нужно применить преобразование к конечному результату, вы можете разместить все объединенные с помощью `UNION` запросы в подзапрос в секции [FROM](from.md).
|
||||
|
||||
|
@ -5,7 +5,7 @@ sidebar_label: WITH
|
||||
|
||||
# Секция WITH {#with-clause}
|
||||
|
||||
Clickhouse поддерживает [Общие табличные выражения](https://ru.wikipedia.org/wiki/Иерархические_и_рекурсивные_запросы_в_SQL), то есть позволяет использовать результаты выражений из секции `WITH` в остальной части `SELECT` запроса. Именованные подзапросы могут быть включены в текущий и дочерний контекст запроса в тех местах, где разрешены табличные объекты. Рекурсия предотвращается путем скрытия общего табличного выражения текущего уровня из выражения `WITH`.
|
||||
ClickHouse поддерживает [Общие табличные выражения](https://ru.wikipedia.org/wiki/Иерархические_и_рекурсивные_запросы_в_SQL), то есть позволяет использовать результаты выражений из секции `WITH` в остальной части `SELECT` запроса. Именованные подзапросы могут быть включены в текущий и дочерний контекст запроса в тех местах, где разрешены табличные объекты. Рекурсия предотвращается путем скрытия общего табличного выражения текущего уровня из выражения `WITH`.
|
||||
|
||||
## Синтаксис
|
||||
|
||||
|
@ -19,7 +19,6 @@ CREATE DATABASE testdb ENGINE = Replicated('zoo_path', 'shard_name', 'replica_na
|
||||
- `shard_name` — 分片的名字。数据库副本按`shard_name`分组到分片中。
|
||||
- `replica_name` — 副本的名字。同一分片的所有副本的副本名称必须不同。
|
||||
|
||||
!!! note "警告"
|
||||
对于[ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table_engines-replication)表,如果没有提供参数,则使用默认参数:`/clickhouse/tables/{uuid}/{shard}`和`{replica}`。这些可以在服务器设置[default_replica_path](../../operations/server-configuration-parameters/settings.md#default_replica_path)和[default_replica_name](../../operations/server-configuration-parameters/settings.md#default_replica_name)中更改。宏`{uuid}`被展开到表的uuid, `{shard}`和`{replica}`被展开到服务器配置的值,而不是数据库引擎参数。但是在将来,可以使用Replicated数据库的`shard_name`和`replica_name`。
|
||||
|
||||
## 使用方式 {#specifics-and-recommendations}
|
||||
@ -52,8 +51,8 @@ CREATE TABLE r.rmt (n UInt64) ENGINE=ReplicatedMergeTree ORDER BY n;
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─────hosts────────────┬──status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
|
||||
│ shard1|replica1 │ 0 │ │ 2 │ 0 │
|
||||
┌─────hosts────────────┬──status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
|
||||
│ shard1|replica1 │ 0 │ │ 2 │ 0 │
|
||||
│ shard1|other_replica │ 0 │ │ 1 │ 0 │
|
||||
│ other_shard|r1 │ 0 │ │ 0 │ 0 │
|
||||
└──────────────────────┴─────────┴───────┴─────────────────────┴──────────────────┘
|
||||
@ -62,13 +61,13 @@ CREATE TABLE r.rmt (n UInt64) ENGINE=ReplicatedMergeTree ORDER BY n;
|
||||
显示系统表:
|
||||
|
||||
``` sql
|
||||
SELECT cluster, shard_num, replica_num, host_name, host_address, port, is_local
|
||||
SELECT cluster, shard_num, replica_num, host_name, host_address, port, is_local
|
||||
FROM system.clusters WHERE cluster='r';
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┐
|
||||
│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │
|
||||
┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┐
|
||||
│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │
|
||||
│ r │ 2 │ 1 │ node2 │ 127.0.0.1 │ 9001 │ 0 │
|
||||
│ r │ 2 │ 2 │ node1 │ 127.0.0.1 │ 9000 │ 1 │
|
||||
└─────────┴───────────┴─────────────┴───────────┴──────────────┴──────┴──────────┘
|
||||
@ -83,9 +82,9 @@ node1 :) SELECT materialize(hostName()) AS host, groupArray(n) FROM r.d GROUP BY
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─hosts─┬─groupArray(n)─┐
|
||||
│ node1 │ [1,3,5,7,9] │
|
||||
│ node2 │ [0,2,4,6,8] │
|
||||
┌─hosts─┬─groupArray(n)─┐
|
||||
│ node1 │ [1,3,5,7,9] │
|
||||
│ node2 │ [0,2,4,6,8] │
|
||||
└───────┴───────────────┘
|
||||
```
|
||||
|
||||
@ -98,8 +97,8 @@ node4 :) CREATE DATABASE r ENGINE=Replicated('some/path/r','other_shard','r2');
|
||||
集群配置如下所示:
|
||||
|
||||
``` text
|
||||
┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┐
|
||||
│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │
|
||||
┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┐
|
||||
│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │
|
||||
│ r │ 1 │ 2 │ node4 │ 127.0.0.1 │ 9003 │ 0 │
|
||||
│ r │ 2 │ 1 │ node2 │ 127.0.0.1 │ 9001 │ 0 │
|
||||
│ r │ 2 │ 2 │ node1 │ 127.0.0.1 │ 9000 │ 1 │
|
||||
@ -113,8 +112,8 @@ node2 :) SELECT materialize(hostName()) AS host, groupArray(n) FROM r.d GROUP BY
|
||||
```
|
||||
|
||||
```text
|
||||
┌─hosts─┬─groupArray(n)─┐
|
||||
│ node2 │ [1,3,5,7,9] │
|
||||
│ node4 │ [0,2,4,6,8] │
|
||||
┌─hosts─┬─groupArray(n)─┐
|
||||
│ node2 │ [1,3,5,7,9] │
|
||||
│ node4 │ [0,2,4,6,8] │
|
||||
└───────┴───────────────┘
|
||||
```
|
||||
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
slug: /zh/engines/table-engines/mergetree-family/mergetree
|
||||
---
|
||||
---
|
||||
slug: /zh/engines/table-engines/mergetree-family/mergetree
|
||||
---
|
||||
# MergeTree {#table_engines-mergetree}
|
||||
|
||||
Clickhouse 中最强大的表引擎当属 `MergeTree` (合并树)引擎及该系列(`*MergeTree`)中的其他引擎。
|
||||
@ -25,8 +25,9 @@ Clickhouse 中最强大的表引擎当属 `MergeTree` (合并树)引擎及
|
||||
|
||||
需要的话,您可以给表设置一个采样方法。
|
||||
|
||||
!!! note "注意"
|
||||
[合并](../special/merge.md#merge) 引擎并不属于 `*MergeTree` 系列。
|
||||
:::info
|
||||
[合并](../special/merge.md#merge) 引擎并不属于 `*MergeTree` 系列。
|
||||
:::
|
||||
|
||||
## 建表 {#table_engine-mergetree-creating-a-table}
|
||||
|
||||
@ -364,7 +365,7 @@ WHERE 子句中的条件可以包含对某列数据进行运算的函数表达
|
||||
|
||||
常量参数小于 ngram 大小的函数不能使用 `ngrambf_v1` 进行查询优化。
|
||||
|
||||
!!! note "注意"
|
||||
:::note
|
||||
布隆过滤器可能会包含不符合条件的匹配,所以 `ngrambf_v1`, `tokenbf_v1` 和 `bloom_filter` 索引不能用于结果返回为假的函数,例如:
|
||||
|
||||
- 可以用来优化的场景
|
||||
@ -379,6 +380,7 @@ WHERE 子句中的条件可以包含对某列数据进行运算的函数表达
|
||||
- `NOT s = 1`
|
||||
- `s != 1`
|
||||
- `NOT startsWith(s, 'test')`
|
||||
:::
|
||||
|
||||
## 并发数据访问 {#concurrent-data-access}
|
||||
|
||||
|
@ -45,7 +45,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] AS [db2.]name2
|
||||
|
||||
- [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) 设置
|
||||
- [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) 查看示例
|
||||
|
||||
|
||||
**分布式设置**
|
||||
|
||||
- `fsync_after_insert` - 对异步插入到分布式的文件数据执行`fsync`。确保操作系统将所有插入的数据刷新到启动节点**磁盘上的一个文件**中。
|
||||
@ -66,19 +66,20 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] AS [db2.]name2
|
||||
|
||||
- `monitor_max_sleep_time_ms` - 等同于 [distributed_directory_monitor_max_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms)
|
||||
|
||||
!!! note "备注"
|
||||
::note
|
||||
**稳定性设置** (`fsync_...`):
|
||||
|
||||
**稳定性设置** (`fsync_...`):
|
||||
- 只影响异步插入(例如:`insert_distributed_sync=false`), 当数据首先存储在启动节点磁盘上,然后再异步发送到shard。
|
||||
— 可能会显著降低`insert`的性能
|
||||
- 影响将存储在分布式表文件夹中的数据写入 **接受您插入的节点** 。如果你需要保证写入数据到底层的MergeTree表中,请参阅 `system.merge_tree_settings` 中的持久性设置(`...fsync...`)
|
||||
|
||||
- 只影响异步插入(例如:`insert_distributed_sync=false`), 当数据首先存储在启动节点磁盘上,然后再异步发送到shard。
|
||||
— 可能会显著降低`insert`的性能
|
||||
- 影响将存储在分布式表文件夹中的数据写入 **接受您插入的节点** 。如果你需要保证写入数据到底层的MergeTree表中,请参阅 `system.merge_tree_settings` 中的持久性设置(`...fsync...`)
|
||||
**插入限制设置** (`..._insert`) 请见:
|
||||
|
||||
**插入限制设置** (`..._insert`) 请见:
|
||||
- [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) 设置
|
||||
- [prefer_localhost_replica](../../../operations/settings/settings.md#settings-prefer-localhost-replica) 设置
|
||||
- `bytes_to_throw_insert` 在 `bytes_to_delay_insert` 之前处理,所以你不应该设置它的值小于 `bytes_to_delay_insert`
|
||||
:::
|
||||
|
||||
- [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) 设置
|
||||
- [prefer_localhost_replica](../../../operations/settings/settings.md#settings-prefer-localhost-replica) 设置
|
||||
- `bytes_to_throw_insert` 在 `bytes_to_delay_insert` 之前处理,所以你不应该设置它的值小于 `bytes_to_delay_insert`
|
||||
**示例**
|
||||
|
||||
``` sql
|
||||
@ -214,7 +215,7 @@ SELECT 查询会被发送到所有分片,并且无论数据在分片中如何
|
||||
|
||||
|
||||
## 读取数据 {#distributed-reading-data}
|
||||
|
||||
|
||||
当查询一个`Distributed`表时,`SELECT`查询被发送到所有的分片,不管数据是如何分布在分片上的(它们可以完全随机分布)。当您添加一个新分片时,您不必将旧数据传输到它。相反,您可以使用更重的权重向其写入新数据——数据的分布会稍微不均匀,但查询将正确有效地工作。
|
||||
|
||||
当启用`max_parallel_replicas`选项时,查询处理将在单个分片中的所有副本之间并行化。更多信息,请参见[max_parallel_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas)。
|
||||
@ -225,8 +226,9 @@ SELECT 查询会被发送到所有分片,并且无论数据在分片中如何
|
||||
|
||||
- `_shard_num` — 表`system.clusters` 中的 `shard_num` 值 . 数据类型: [UInt32](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
!!! note "备注"
|
||||
因为 [remote](../../../sql-reference/table-functions/remote.md) 和 [cluster](../../../sql-reference/table-functions/cluster.mdx) 表方法内部创建了分布式表, `_shard_num` 对他们都有效.
|
||||
:::note
|
||||
因为 [remote](../../../sql-reference/table-functions/remote.md) 和 [cluster](../../../sql-reference/table-functions/cluster.mdx) 表方法内部创建了分布式表, `_shard_num` 对他们都有效.
|
||||
:::
|
||||
|
||||
**详见**
|
||||
- [虚拟列](../../../engines/table-engines/index.md#table_engines-virtual_columns) 描述
|
||||
|
@ -617,8 +617,9 @@ INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1;
|
||||
|
||||
启用本机复制[Zookeeper](http://zookeeper.apache.org/)是必需的。 ClickHouse负责所有副本的数据一致性,并在失败后自动运行恢复过程。建议将ZooKeeper集群部署在单独的服务器上(其中没有其他进程,包括运行的ClickHouse)。
|
||||
|
||||
!!! note "注意"
|
||||
ZooKeeper不是一个严格的要求:在某些简单的情况下,您可以通过将数据写入应用程序代码中的所有副本来复制数据。 这种方法是**不**建议的,在这种情况下,ClickHouse将无法保证所有副本上的数据一致性。 因此需要由您的应用来保证这一点。
|
||||
:::note
|
||||
ZooKeeper不是一个严格的要求:在某些简单的情况下,您可以通过将数据写入应用程序代码中的所有副本来复制数据。 这种方法是**不**建议的,在这种情况下,ClickHouse将无法保证所有副本上的数据一致性。 因此需要由您的应用来保证这一点。
|
||||
:::
|
||||
|
||||
ZooKeeper位置在配置文件中指定:
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
slug: /zh/guides/improving-query-performance/skipping-indexes
|
||||
sidebar_label: Data Skipping Indexes
|
||||
sidebar_label: 跳数索引
|
||||
sidebar_position: 2
|
||||
---
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
slug: /zh/guides/improving-query-performance/sparse-primary-indexes
|
||||
sidebar_label: Sparse Primary Indexes
|
||||
sidebar_label: 主键稀疏索引
|
||||
sidebar_position: 20
|
||||
---
|
||||
|
||||
|
@ -685,8 +685,9 @@ CREATE TABLE IF NOT EXISTS example_table
|
||||
- 如果`input_format_defaults_for_omitted_fields = 0`, 那么`x`和`a`的默认值等于`0`(作为`UInt32`数据类型的默认值)。
|
||||
- 如果`input_format_defaults_for_omitted_fields = 1`, 那么`x`的默认值为`0`,但`a`的默认值为`x * 2`。
|
||||
|
||||
!!! note "注意"
|
||||
:::warning
|
||||
当使用`input_format_defaults_for_omitted_fields = 1`插入数据时,与使用`input_format_defaults_for_omitted_fields = 0`相比,ClickHouse消耗更多的计算资源。
|
||||
:::
|
||||
|
||||
### Selecting Data {#selecting-data}
|
||||
|
||||
@ -708,8 +709,9 @@ CREATE TABLE IF NOT EXISTS example_table
|
||||
|
||||
与[JSON](#json)格式不同,没有替换无效的UTF-8序列。值以与`JSON`相同的方式转义。
|
||||
|
||||
!!! note "提示"
|
||||
:::info
|
||||
字符串中可以输出任意一组字节。如果您确信表中的数据可以被格式化为JSON而不会丢失任何信息,那么就使用`JSONEachRow`格式。
|
||||
:::
|
||||
|
||||
### Nested Structures {#jsoneachrow-nested}
|
||||
|
||||
@ -1216,9 +1218,9 @@ SET format_avro_schema_registry_url = 'http://schema-registry';
|
||||
SELECT * FROM topic1_stream;
|
||||
```
|
||||
|
||||
!!! note "警告"
|
||||
设置 `format_avro_schema_registry_url` 需要写入配置文件`users.xml`以在Clickhouse重启后,该设置仍为您的设定值。您也可以在使用Kafka引擎的时候指定该设置。
|
||||
|
||||
:::warning
|
||||
设置 `format_avro_schema_registry_url` 需要写入配置文件`users.xml`以在Clickhouse重启后,该设置仍为您的设定值。您也可以在使用Kafka引擎的时候指定该设置。
|
||||
:::
|
||||
|
||||
## Parquet {#data-format-parquet}
|
||||
|
||||
|
@ -188,8 +188,9 @@ $ curl -vsS "http://localhost:8123/?enable_http_compression=1" -d 'SELECT number
|
||||
$ echo "SELECT 1" | gzip -c | curl -sS --data-binary @- -H 'Content-Encoding: gzip' 'http://localhost:8123/'
|
||||
```
|
||||
|
||||
!!! note "警告"
|
||||
一些HTTP客户端可能会在默认情况下从服务器解压数据(使用`gzip`和`deflate`),即使您未正确地使用了压缩设置,您也可能会得到解压数据。
|
||||
:::warning
|
||||
一些HTTP客户端可能会在默认情况下从服务器解压数据(使用`gzip`和`deflate`),即使您未正确地使用了压缩设置,您也可能会得到解压数据。
|
||||
:::
|
||||
|
||||
您可以使用`database`URL参数或`X-ClickHouse-Database`头来指定默认数据库。
|
||||
|
||||
@ -447,8 +448,9 @@ $ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:
|
||||
max_final_threads 2
|
||||
```
|
||||
|
||||
!!! note "警告"
|
||||
在一个`predefined_query_handler`中,只支持insert类型的一个`查询`。
|
||||
:::warning
|
||||
在一个`predefined_query_handler`中,只支持insert类型的一个`查询`。
|
||||
:::
|
||||
|
||||
### 动态查询 {#dynamic_query_handler}
|
||||
|
||||
|
@ -24,6 +24,7 @@ Yandex**没有**维护下面列出的库,也没有做过任何广泛的测试
|
||||
- [SeasClick C++ client](https://github.com/SeasX/SeasClick)
|
||||
- [one-ck](https://github.com/lizhichao/one-ck)
|
||||
- [glushkovds/phpclickhouse-laravel](https://packagist.org/packages/glushkovds/phpclickhouse-laravel)
|
||||
- [hyvor/clickhouse-php](https://github.com/hyvor/clickhouse-php)
|
||||
- Go
|
||||
- [clickhouse](https://github.com/kshvakov/clickhouse/)
|
||||
- [go-clickhouse](https://github.com/roistat/go-clickhouse)
|
||||
|
3
docs/zh/interfaces/third-party/index.md
vendored
3
docs/zh/interfaces/third-party/index.md
vendored
@ -13,5 +13,6 @@ sidebar_position: 24
|
||||
- [GUI](../../interfaces/third-party/gui.md)
|
||||
- [Proxies](../../interfaces/third-party/proxy.md)
|
||||
|
||||
!!! note "注意"
|
||||
:::note
|
||||
支持通用API的通用工具[ODBC](../../interfaces/odbc.md)或[JDBC](../../interfaces/jdbc.md),通常也适用于ClickHouse,但这里没有列出,因为它们实在太多了。
|
||||
:::
|
||||
|
@ -24,9 +24,9 @@ ClickHouse权限实体包括:
|
||||
|
||||
我们建议你使用SQL工作流的方式。当然配置的方式也可以同时起作用, 所以如果你正在用服务端配置的方式来管理权限和账户,你可以平滑的切换到SQL驱动的工作流方式。
|
||||
|
||||
!!! note "警告"
|
||||
你无法同时使用两个配置的方式来管理同一个权限实体。
|
||||
|
||||
:::warning
|
||||
你无法同时使用两个配置的方式来管理同一个权限实体。
|
||||
:::
|
||||
|
||||
## 用法 {#access-control-usage}
|
||||
|
||||
|
@ -12,8 +12,9 @@ sidebar_label: "\u6570\u636E\u5907\u4EFD"
|
||||
|
||||
不同公司有不同的可用资源和业务需求,因此不存在一个通用的解决方案可以应对各种情况下的ClickHouse备份和恢复。 适用于 1GB 数据的方案可能并不适用于几十 PB 数据的情况。 有多种具备各自优缺点的可能方法,将在下面对其进行讨论。最好使用几种方法而不是仅仅使用一种方法来弥补它们的各种缺点。。
|
||||
|
||||
!!! note "注"
|
||||
需要注意的是,如果您备份了某些内容并且从未尝试过还原它,那么当您实际需要它时可能无法正常恢复(或者至少需要的时间比业务能够容忍的时间更长)。 因此,无论您选择哪种备份方法,请确保自动还原过程,并定期在备用ClickHouse群集上演练。
|
||||
:::note
|
||||
需要注意的是,如果您备份了某些内容并且从未尝试过还原它,那么当您实际需要它时可能无法正常恢复(或者至少需要的时间比业务能够容忍的时间更长)。 因此,无论您选择哪种备份方法,请确保自动还原过程,并定期在备用ClickHouse群集上演练。
|
||||
:::
|
||||
|
||||
## 将源数据复制到其它地方 {#duplicating-source-data-somewhere-else}
|
||||
|
||||
|
@ -528,8 +528,9 @@ SSL客户端/服务器配置。
|
||||
|
||||
包含数据的目录的路径。
|
||||
|
||||
!!! note "注"
|
||||
尾部斜杠是强制性的。
|
||||
:::note
|
||||
尾部斜杠是强制性的。
|
||||
:::
|
||||
|
||||
**示例**
|
||||
|
||||
@ -714,8 +715,9 @@ TCP端口,用于与客户端进行安全通信。 使用它与 [OpenSSL](#serv
|
||||
|
||||
用于处理大型查询的临时数据的路径。
|
||||
|
||||
!!! note "注"
|
||||
尾部斜杠是强制性的。
|
||||
:::note
|
||||
尾部斜杠是强制性的。
|
||||
:::
|
||||
|
||||
**示例**
|
||||
|
||||
@ -728,11 +730,12 @@ TCP端口,用于与客户端进行安全通信。 使用它与 [OpenSSL](#serv
|
||||
从政策 [`storage_configuration`](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) 存储临时文件。
|
||||
如果没有设置 [`tmp_path`](#server-settings-tmp_path) 被使用,否则被忽略。
|
||||
|
||||
!!! note "注"
|
||||
- `move_factor` 被忽略
|
||||
:::note
|
||||
- `move_factor` 被忽略
|
||||
- `keep_free_space_bytes` 被忽略
|
||||
- `max_data_part_size_bytes` 被忽略
|
||||
-您必须在该政策中只有一个卷
|
||||
:::
|
||||
|
||||
## uncompressed_cache_size {#server-settings-uncompressed_cache_size}
|
||||
|
||||
|
@ -8,8 +8,9 @@ sidebar_label: "\u8BBE\u7F6E\u914D\u7F6E"
|
||||
|
||||
设置配置是设置的集合,并按照相同的名称进行分组。
|
||||
|
||||
!!! note "信息"
|
||||
ClickHouse 还支持用 [SQL驱动的工作流](../../operations/access-rights.md#access-control) 管理设置配置。我们建议使用它。
|
||||
:::info
|
||||
ClickHouse 还支持用 [SQL驱动的工作流](../../operations/access-rights.md#access-control) 管理设置配置。我们建议使用它。
|
||||
:::
|
||||
|
||||
设置配置可以任意命名。你可以为不同的用户指定相同的设置配置。您可以在设置配置中写入的最重要的内容是 `readonly=1`,这将确保只读访问。
|
||||
|
||||
|
@ -10,8 +10,9 @@ sidebar_label: "\u7528\u6237\u8BBE\u7F6E"
|
||||
|
||||
`user.xml` 中的 `users` 配置段包含了用户配置
|
||||
|
||||
!!! note "提示"
|
||||
ClickHouse还支持 [SQL驱动的工作流](../access-rights.md#access-control) 用于管理用户。 我们建议使用它。
|
||||
:::note
|
||||
ClickHouse还支持 [SQL驱动的工作流](../access-rights.md#access-control) 用于管理用户。 我们建议使用它。
|
||||
:::
|
||||
|
||||
`users` 配置段的结构:
|
||||
|
||||
|
@ -266,8 +266,9 @@ INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), (
|
||||
|
||||
执行时 `INSERT` 查询时,将省略的输入列值替换为相应列的默认值。 此选项仅适用于 [JSONEachRow](../../interfaces/formats.md#jsoneachrow), [CSV](../../interfaces/formats.md#csv) 和 [TabSeparated](../../interfaces/formats.md#tabseparated) 格式。
|
||||
|
||||
!!! note "注"
|
||||
启用此选项后,扩展表元数据将从服务器发送到客户端。 它会消耗服务器上的额外计算资源,并可能降低性能。
|
||||
:::note
|
||||
启用此选项后,扩展表元数据将从服务器发送到客户端。 它会消耗服务器上的额外计算资源,并可能降低性能。
|
||||
:::
|
||||
|
||||
可能的值:
|
||||
|
||||
|
@ -99,8 +99,9 @@ slug: /zh/operations/system-tables/parts
|
||||
|
||||
- `move_ttl_info.expression` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — 表达式的数组。 每个表达式定义一个 [TTL MOVE 规则](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).
|
||||
|
||||
!!! note "警告"
|
||||
保留 `move_ttl_info.expression` 数组主要是为了向后兼容,现在检查 `TTL MOVE` 规则最简单的方法是使用 `move_ttl_info.min` 和 `move_ttl_info.max` 字段。
|
||||
:::warning
|
||||
保留 `move_ttl_info.expression` 数组主要是为了向后兼容,现在检查 `TTL MOVE` 规则最简单的方法是使用 `move_ttl_info.min` 和 `move_ttl_info.max` 字段。
|
||||
:::
|
||||
|
||||
- `move_ttl_info.min` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — 日期值和时间值的数组。数组中的每个元素都描述了一个 [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) 的最小键值。
|
||||
|
||||
|
@ -8,8 +8,9 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
|
||||
|
||||
包含已执行查询的相关信息,例如:开始时间、处理持续时间、错误消息。
|
||||
|
||||
!!! note "注"
|
||||
此表不包含以下内容的摄取数据 `INSERT` 查询。
|
||||
:::note
|
||||
此表不包含以下内容的摄取数据 `INSERT` 查询。
|
||||
:::
|
||||
|
||||
您可以更改query_log的设置,在服务器配置的 [query_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) 部分。
|
||||
|
||||
|
@ -12,5 +12,6 @@ sidebar_position: 107
|
||||
|
||||
计算Pearson相关系数: `Σ((x - x̅)(y - y̅)) / sqrt(Σ((x - x̅)^2) * Σ((y - y̅)^2))`。
|
||||
|
||||
!!! note "注"
|
||||
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `corrStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
|
||||
:::note
|
||||
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `corrStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
|
||||
:::
|
||||
|
@ -12,5 +12,6 @@ covarPop(x, y)
|
||||
|
||||
计算 `Σ((x - x̅)(y - y̅)) / n` 的值。
|
||||
|
||||
!!! note "注"
|
||||
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `covarPopStable` 函数。 它的工作速度较慢,但提供了较低的计算错误。
|
||||
:::note
|
||||
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `covarPopStable` 函数。 它的工作速度较慢,但提供了较低的计算错误。
|
||||
:::
|
||||
|
@ -14,5 +14,6 @@ covarSamp(x, y)
|
||||
|
||||
返回Float64。 当 `n <= 1`, 返回 +∞。
|
||||
|
||||
!!! note "注"
|
||||
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `covarSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
|
||||
:::note
|
||||
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `covarSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
|
||||
:::
|
||||
|
@ -37,8 +37,9 @@ quantileTiming(level)(expr)
|
||||
|
||||
否则,计算结果将四舍五入到16毫秒的最接近倍数。
|
||||
|
||||
!!! note "注"
|
||||
对于计算页面加载时间分位数, 此函数比[quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile)更有效和准确。
|
||||
:::note
|
||||
对于计算页面加载时间分位数, 此函数比[quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile)更有效和准确。
|
||||
:::
|
||||
|
||||
**返回值**
|
||||
|
||||
@ -46,8 +47,9 @@ quantileTiming(level)(expr)
|
||||
|
||||
类型: `Float32`。
|
||||
|
||||
!!! note "注"
|
||||
如果没有值传递给函数(当使用 `quantileTimingIf`), [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf)被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 参见 [ORDER BY clause](../../../sql-reference/statements/select/order-by.md#select-order-by) 对于 `NaN` 值排序注意事项。
|
||||
:::note
|
||||
如果没有值传递给函数(当使用 `quantileTimingIf`), [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf)被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 参见 [ORDER BY clause](../../../sql-reference/statements/select/order-by.md#select-order-by) 对于 `NaN` 值排序注意事项。
|
||||
:::
|
||||
|
||||
**示例**
|
||||
|
||||
|
@ -39,8 +39,9 @@ quantileTimingWeighted(level)(expr, weight)
|
||||
|
||||
否则,计算结果将四舍五入到16毫秒的最接近倍数。
|
||||
|
||||
!!! note "注"
|
||||
对于计算页面加载时间分位数, 此函数比[quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile)更有效和准确。
|
||||
:::note
|
||||
对于计算页面加载时间分位数, 此函数比[quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile)更有效和准确。
|
||||
:::
|
||||
|
||||
**返回值**
|
||||
|
||||
@ -48,8 +49,9 @@ quantileTimingWeighted(level)(expr, weight)
|
||||
|
||||
类型: `Float32`。
|
||||
|
||||
!!! note "注"
|
||||
如果没有值传递给函数(当使用 `quantileTimingIf`), [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf)被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 参见 [ORDER BY clause](../../../sql-reference/statements/select/order-by.md#select-order-by) 对于 `NaN` 值排序注意事项。
|
||||
:::note
|
||||
如果没有值传递给函数(当使用 `quantileTimingIf`), [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf)被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 参见 [ORDER BY clause](../../../sql-reference/statements/select/order-by.md#select-order-by) 对于 `NaN` 值排序注意事项。
|
||||
:::
|
||||
|
||||
**示例**
|
||||
|
||||
|
@ -7,5 +7,6 @@ sidebar_position: 30
|
||||
|
||||
结果等于 [varPop](../../../sql-reference/aggregate-functions/reference/varpop.md)的平方根。
|
||||
|
||||
!!! note "注"
|
||||
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevPopStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
|
||||
:::note
|
||||
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevPopStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
|
||||
:::
|
||||
|
@ -7,5 +7,6 @@ sidebar_position: 31
|
||||
|
||||
结果等于 [varSamp] (../../../sql-reference/aggregate-functions/reference/varsamp.md)的平方根。
|
||||
|
||||
!!! note "注"
|
||||
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
|
||||
:::note
|
||||
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
|
||||
:::
|
@ -36,8 +36,9 @@ uniqCombined(HLL_precision)(x[, ...])
|
||||
|
||||
- 确定性地提供结果(它不依赖于查询处理顺序)。
|
||||
|
||||
!!! note "注"
|
||||
由于它对非 `String` 类型使用32位哈希,对于基数显著大于`UINT_MAX` ,结果将有非常高的误差(误差将在几百亿不同值之后迅速提高), 因此这种情况,你应该使用 [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64)
|
||||
:::note
|
||||
由于它对非 `String` 类型使用32位哈希,对于基数显著大于`UINT_MAX` ,结果将有非常高的误差(误差将在几百亿不同值之后迅速提高), 因此这种情况,你应该使用 [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64)
|
||||
:::
|
||||
|
||||
相比于 [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) 函数, 该 `uniqCombined`:
|
||||
|
||||
|
@ -9,5 +9,6 @@ sidebar_position: 32
|
||||
|
||||
换句话说,计算一组数据的离差。 返回 `Float64`。
|
||||
|
||||
!!! note "注"
|
||||
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `varPopStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
|
||||
:::note
|
||||
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `varPopStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
|
||||
:::
|
||||
|
@ -11,5 +11,6 @@ sidebar_position: 33
|
||||
|
||||
返回 `Float64`。 当 `n <= 1`,返回 `+∞`。
|
||||
|
||||
!!! note "注"
|
||||
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `varSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
|
||||
:::note
|
||||
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `varSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
|
||||
:::
|
||||
|
@ -6,8 +6,9 @@ sidebar_label: "ANSI\u517C\u5BB9\u6027"
|
||||
|
||||
# ClickHouse SQL方言 与ANSI SQL的兼容性{#ansi-sql-compatibility-of-clickhouse-sql-dialect}
|
||||
|
||||
!!! note "注"
|
||||
本文参考Annex G所著的[ISO/IEC CD 9075-2:2011](https://www.iso.org/obp/ui/#iso:std:iso-iec:9075:-2:ed-4:v1:en:sec:8)标准.
|
||||
:::note
|
||||
本文参考Annex G所著的[ISO/IEC CD 9075-2:2011](https://www.iso.org/obp/ui/#iso:std:iso-iec:9075:-2:ed-4:v1:en:sec:8)标准.
|
||||
:::
|
||||
|
||||
## 行为差异 {#differences-in-behaviour}
|
||||
|
||||
|
@ -25,10 +25,10 @@ slug: /zh/sql-reference/data-types/simpleaggregatefunction
|
||||
- [`argMax`](../../sql-reference/aggregate-functions/reference/argmax.md)
|
||||
|
||||
|
||||
!!! note "注"
|
||||
`SimpleAggregateFunction(func, Type)` 的值外观和存储方式于 `Type` 相同, 所以你不需要应用带有 `-Merge`/`-State` 后缀的函数。
|
||||
|
||||
`SimpleAggregateFunction` 的性能优于具有相同聚合函数的 `AggregateFunction` 。
|
||||
:::note
|
||||
`SimpleAggregateFunction(func, Type)` 的值外观和存储方式于 `Type` 相同, 所以你不需要应用带有 `-Merge`/`-State` 后缀的函数。
|
||||
`SimpleAggregateFunction` 的性能优于具有相同聚合函数的 `AggregateFunction` 。
|
||||
:::
|
||||
|
||||
**参数**
|
||||
|
||||
|
@ -42,8 +42,9 @@ slug: /zh/sql-reference/functions/string-search-functions
|
||||
|
||||
对于不区分大小写的搜索或/和UTF-8格式,使用函数`multiSearchAnyCaseInsensitive,multiSearchAnyUTF8,multiSearchAnyCaseInsensitiveUTF8`。
|
||||
|
||||
!!! note "注意"
|
||||
在所有`multiSearch*`函数中,由于实现规范,needles的数量应小于2<sup>8</sup>。
|
||||
:::note
|
||||
在所有`multiSearch*`函数中,由于实现规范,needles的数量应小于2<sup>8</sup>。
|
||||
:::
|
||||
|
||||
## 匹配(大海捞针,模式) {#matchhaystack-pattern}
|
||||
|
||||
@ -60,8 +61,9 @@ slug: /zh/sql-reference/functions/string-search-functions
|
||||
|
||||
与`match`相同,但如果所有正则表达式都不匹配,则返回0;如果任何模式匹配,则返回1。它使用[超扫描](https://github.com/intel/hyperscan)库。对于在字符串中搜索子字符串的模式,最好使用«multisearchany»,因为它更高效。
|
||||
|
||||
!!! note "注意"
|
||||
任何`haystack`字符串的长度必须小于2<sup>32\</sup>字节,否则抛出异常。这种限制是因为hyperscan API而产生的。
|
||||
:::note
|
||||
任何`haystack`字符串的长度必须小于2<sup>32\</sup>字节,否则抛出异常。这种限制是因为hyperscan API而产生的。
|
||||
:::
|
||||
|
||||
## multiMatchAnyIndex(大海捞针,\[模式<sub>1</sub>,模式<sub>2</sub>, …, pattern<sub>n</sub>\]) {#multimatchanyindexhaystack-pattern1-pattern2-patternn}
|
||||
|
||||
@ -75,11 +77,13 @@ slug: /zh/sql-reference/functions/string-search-functions
|
||||
|
||||
与`multiFuzzyMatchAny`相同,但返回匹配项的匹配能容的索引位置。
|
||||
|
||||
!!! note "注意"
|
||||
`multiFuzzyMatch*`函数不支持UTF-8正则表达式,由于hyperscan限制,这些表达式被按字节解析。
|
||||
:::note
|
||||
`multiFuzzyMatch*`函数不支持UTF-8正则表达式,由于hyperscan限制,这些表达式被按字节解析。
|
||||
:::
|
||||
|
||||
!!! note "注意"
|
||||
如要关闭所有hyperscan函数的使用,请设置`SET allow_hyperscan = 0;`。
|
||||
:::note
|
||||
如要关闭所有hyperscan函数的使用,请设置`SET allow_hyperscan = 0;`。
|
||||
:::
|
||||
|
||||
## 提取(大海捞针,图案) {#extracthaystack-pattern}
|
||||
|
||||
@ -119,5 +123,6 @@ slug: /zh/sql-reference/functions/string-search-functions
|
||||
|
||||
对于不区分大小写的搜索或/和UTF-8格式,使用函数`ngramSearchCaseInsensitive,ngramSearchUTF8,ngramSearchCaseInsensitiveUTF8`。
|
||||
|
||||
!!! note "注意"
|
||||
对于UTF-8,我们使用3-gram。所有这些都不是完全公平的n-gram距离。我们使用2字节哈希来散列n-gram,然后计算这些哈希表之间的(非)对称差异 - 可能会发生冲突。对于UTF-8不区分大小写的格式,我们不使用公平的`tolower`函数 - 我们将每个Unicode字符字节的第5位(从零开始)和字节的第一位归零 - 这适用于拉丁语,主要用于所有西里尔字母。
|
||||
:::note
|
||||
对于UTF-8,我们使用3-gram。所有这些都不是完全公平的n-gram距离。我们使用2字节哈希来散列n-gram,然后计算这些哈希表之间的(非)对称差异 - 可能会发生冲突。对于UTF-8不区分大小写的格式,我们不使用公平的`tolower`函数 - 我们将每个Unicode字符字节的第5位(从零开始)和字节的第一位归零 - 这适用于拉丁语,主要用于所有西里尔字母。
|
||||
:::
|
||||
|
@ -12,8 +12,9 @@ ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE WHERE filter_expr
|
||||
|
||||
删除匹配指定过滤表达式的数据。实现为[突变](../../../sql-reference/statements/alter/index.md#mutations).
|
||||
|
||||
!!! note "备注"
|
||||
`ALTER TABLE`前缀使得这个语法不同于大多数其他支持SQL的系统。它的目的是表示,与OLTP数据库中的类似查询不同,这是一个不为经常使用而设计的繁重操作。
|
||||
:::note
|
||||
`ALTER TABLE`前缀使得这个语法不同于大多数其他支持SQL的系统。它的目的是表示,与OLTP数据库中的类似查询不同,这是一个不为经常使用而设计的繁重操作。
|
||||
:::
|
||||
|
||||
`filter_expr` 的类型必须是`UInt8`。该查询删除表中该表达式接受非零值的行。
|
||||
|
||||
|
@ -17,8 +17,9 @@ sidebar_label: ALTER
|
||||
- [CONSTRAINT](../../../sql-reference/statements/alter/constraint.md)
|
||||
- [TTL](../../../sql-reference/statements/alter/ttl.md)
|
||||
|
||||
!!! note "备注"
|
||||
大多数 `ALTER TABLE` 查询只支持[\*MergeTree](../../../engines/table-engines/mergetree-family/index.md)表,以及[Merge](../../../engines/table-engines/special/merge.md)和[Distributed](../../../engines/table-engines/special/distributed.md)。
|
||||
:::note
|
||||
大多数 `ALTER TABLE` 查询只支持[\*MergeTree](../../../engines/table-engines/mergetree-family/index.md)表,以及[Merge](../../../engines/table-engines/special/merge.md)和[Distributed](../../../engines/table-engines/special/distributed.md)。
|
||||
:::
|
||||
|
||||
这些 `ALTER` 语句操作视图:
|
||||
|
||||
|
@ -14,5 +14,6 @@ ALTER TABLE [db].name [ON CLUSTER cluster] MODIFY ORDER BY new_expression
|
||||
从某种意义上说,该命令是轻量级的,它只更改元数据。要保持数据部分行按排序键表达式排序的属性,您不能向排序键添加包含现有列的表达式(仅在相同的`ALTER`查询中由`ADD COLUMN`命令添加的列,没有默认的列值)。
|
||||
|
||||
|
||||
!!! note "备注"
|
||||
它只适用于[`MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md)表族(包括[replicated](../../../engines/table-engines/mergetree-family/replication.md)表)。
|
||||
:::note
|
||||
它只适用于[`MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md)表族(包括[replicated](../../../engines/table-engines/mergetree-family/replication.md)表)。
|
||||
:::
|
||||
|
@ -14,8 +14,9 @@ sidebar_label: SETTING
|
||||
ALTER TABLE [db].name [ON CLUSTER cluster] MODIFY|RESET SETTING ...
|
||||
```
|
||||
|
||||
!!! note "注意"
|
||||
这些查询只能应用于 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) 表。
|
||||
:::note
|
||||
这些查询只能应用于 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) 表。
|
||||
:::
|
||||
|
||||
|
||||
## 修改设置 {#alter_modify_setting}
|
||||
|
@ -12,8 +12,9 @@ ALTER TABLE [db.]table UPDATE column1 = expr1 [, ...] WHERE filter_expr
|
||||
|
||||
操作与指定过滤表达式相匹配的数据。作为一个[变更 mutation](../../../sql-reference/statements/alter/index.md#mutations)来实现.
|
||||
|
||||
!!! note "Note"
|
||||
`ALTER TABLE` 的前缀使这个语法与其他大多数支持SQL的系统不同。它的目的是表明,与OLTP数据库中的类似查询不同,这是一个繁重的操作,不是为频繁使用而设计。
|
||||
:::note
|
||||
`ALTER TABLE` 的前缀使这个语法与其他大多数支持SQL的系统不同。它的目的是表明,与OLTP数据库中的类似查询不同,这是一个繁重的操作,不是为频繁使用而设计。
|
||||
:::
|
||||
|
||||
`filter_expr`必须是`UInt8`类型。这个查询将指定列的值更新为行中相应表达式的值,对于这些行,`filter_expr`取值为非零。使用`CAST`操作符将数值映射到列的类型上。不支持更新用于计算主键或分区键的列。
|
||||
|
||||
|
@ -9,8 +9,9 @@ sidebar_label: EXCHANGE
|
||||
以原子方式交换两个表或字典的名称。
|
||||
此任务也可以通过使用[RENAME](./rename.md)来完成,但在这种情况下操作不是原子的。
|
||||
|
||||
!!! note "注意"
|
||||
:::note
|
||||
`EXCHANGE`仅支持[Atomic](../../engines/database-engines/atomic.md)数据库引擎.
|
||||
:::
|
||||
|
||||
**语法**
|
||||
|
||||
|
@ -9,8 +9,9 @@ sidebar_label: RENAME
|
||||
重命名数据库、表或字典。 可以在单个查询中重命名多个实体。
|
||||
请注意,具有多个实体的`RENAME`查询是非原子操作。 要以原子方式交换实体名称,请使用[EXCHANGE](./exchange.md)语法.
|
||||
|
||||
!!! note "注意"
|
||||
:::note
|
||||
`RENAME`仅支持[Atomic](../../engines/database-engines/atomic.md)数据库引擎.
|
||||
:::
|
||||
|
||||
**语法**
|
||||
|
||||
|
@ -11,8 +11,9 @@ sidebar_label: GROUP BY
|
||||
- 在所有的表达式在 [SELECT](../../../sql-reference/statements/select/index.md), [HAVING](../../../sql-reference/statements/select/having),和 [ORDER BY](../../../sql-reference/statements/select/order-by.md) 子句中 **必须** 基于键表达式进行计算 **或** 上 [聚合函数](../../../sql-reference/aggregate-functions/index.md) 在非键表达式(包括纯列)上。 换句话说,从表中选择的每个列必须用于键表达式或聚合函数内,但不能同时使用。
|
||||
- 聚合结果 `SELECT` 查询将包含尽可能多的行,因为有唯一值 “grouping key” 在源表中。 通常这会显着减少行数,通常是数量级,但不一定:如果所有行数保持不变 “grouping key” 值是不同的。
|
||||
|
||||
!!! note "注"
|
||||
还有一种额外的方法可以在表上运行聚合。 如果查询仅在聚合函数中包含表列,则 `GROUP BY` 可以省略,并且通过一个空的键集合来假定聚合。 这样的查询总是只返回一行。
|
||||
:::note
|
||||
还有一种额外的方法可以在表上运行聚合。 如果查询仅在聚合函数中包含表列,则 `GROUP BY` 可以省略,并且通过一个空的键集合来假定聚合。 这样的查询总是只返回一行。
|
||||
:::
|
||||
|
||||
## 空处理 {#null-processing}
|
||||
|
||||
|
@ -39,8 +39,9 @@ ClickHouse中提供的其他联接类型:
|
||||
|
||||
## 严格 {#join-settings}
|
||||
|
||||
!!! note "注"
|
||||
可以使用以下方式复盖默认的严格性值 [join_default_strictness](../../../operations/settings/settings.md#settings-join_default_strictness) 设置。
|
||||
:::note
|
||||
可以使用以下方式复盖默认的严格性值 [join_default_strictness](../../../operations/settings/settings.md#settings-join_default_strictness) 设置。
|
||||
:::
|
||||
|
||||
Also the behavior of ClickHouse server for `ANY JOIN` operations depends on the [any_join_distinct_right_table_keys](../../../operations/settings/settings.md#any_join_distinct_right_table_keys) setting.
|
||||
|
||||
@ -91,8 +92,9 @@ USING (equi_column1, ... equi_columnN, asof_column)
|
||||
|
||||
`ASOF JOIN`会从 `table_2` 中的用户事件时间戳找出和 `table_1` 中用户事件时间戳中最近的一个时间戳,来满足最接近匹配的条件。如果有得话,则相等的时间戳值是最接近的值。在此例中,`user_id` 列可用于条件匹配,`ev_time` 列可用于最接近匹配。在此例中,`event_1_1` 可以 JOIN `event_2_1`,`event_1_2` 可以JOIN `event_2_3`,但是 `event_2_2` 不能被JOIN。
|
||||
|
||||
!!! note "注"
|
||||
`ASOF JOIN`在 [JOIN](../../../engines/table-engines/special/join.md) 表引擎中 **不受** 支持。
|
||||
:::note
|
||||
`ASOF JOIN`在 [JOIN](../../../engines/table-engines/special/join.md) 表引擎中 **不受** 支持。
|
||||
:::
|
||||
|
||||
## 分布式联接 {#global-join}
|
||||
|
||||
|
@ -14,8 +14,9 @@ ClickHouse支持以下语法变体:
|
||||
|
||||
处理查询时,ClickHouse首先选择经由排序键排序过后的数据。排序键可以显式地使用[ORDER BY](order-by.md#select-order-by)从句指定,或隐式地使用表引擎使用的排序键(数据的顺序仅在使用[ORDER BY](order-by.md#select-order-by)时才可以保证,否则由于多线程处理,数据顺序会随机化)。然后ClickHouse执行`LIMIT n BY expressions`从句,将每一行按 `expressions` 的值进行分组,并对每一分组返回前`n`行。如果指定了`OFFSET`,那么对于每一分组,ClickHouse会跳过前`offset_value`行,接着返回前`n`行。如果`offset_value`大于某一分组的行数,ClickHouse会从分组返回0行。
|
||||
|
||||
!!! note "注"
|
||||
`LIMIT BY`与[LIMIT](../../../sql-reference/statements/select/limit.md)没有关系。它们可以在同一个查询中使用。
|
||||
:::note
|
||||
`LIMIT BY`与[LIMIT](../../../sql-reference/statements/select/limit.md)没有关系。它们可以在同一个查询中使用。
|
||||
:::
|
||||
|
||||
## 例 {#examples}
|
||||
|
||||
|
@ -15,8 +15,9 @@ sidebar_label: SAMPLE
|
||||
- 当您的原始数据不准确时,所以近似不会明显降低质量。
|
||||
- 业务需求的目标是近似结果(为了成本效益,或者向高级用户推销确切结果)。
|
||||
|
||||
!!! note "注"
|
||||
您只能使用采样中的表 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) 族,并且只有在表创建过程中指定了采样表达式(请参阅 [MergeTree引擎](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table)).
|
||||
:::note
|
||||
您只能使用采样中的表 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) 族,并且只有在表创建过程中指定了采样表达式(请参阅 [MergeTree引擎](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table)).
|
||||
:::
|
||||
|
||||
下面列出了数据采样的功能:
|
||||
|
||||
|
@ -11,9 +11,10 @@ sidebar_label: WHERE
|
||||
|
||||
如果基础表引擎支持,`WHERE`表达式会使用索引和分区进行剪枝。
|
||||
|
||||
!!! note "注"
|
||||
有一个叫做过滤优化 [prewhere](../../../sql-reference/statements/select/prewhere.md) 的东西.
|
||||
|
||||
:::note
|
||||
有一个叫做过滤优化 [prewhere](../../../sql-reference/statements/select/prewhere.md) 的东西.
|
||||
:::
|
||||
|
||||
如果需要测试一个 [NULL](../../../sql-reference/syntax.md#null-literal) 值,请使用 [IS NULL](../../operators/index.md#operator-is-null) and [IS NOT NULL](../../operators/index.md#is-not-null) 运算符或 [isNull](../../../sql-reference/functions/functions-for-nulls.md#isnull) 和 [isNotNull](../../../sql-reference/functions/functions-for-nulls.md#isnotnull) 函数。否则带有 NULL 的表达式永远不会通过。
|
||||
|
||||
**示例**
|
||||
|
@ -124,10 +124,9 @@ ClickHouse可以管理 [MergeTree](../../engines/table-engines/mergetree-family/
|
||||
SYSTEM STOP MERGES [[db.]merge_tree_family_table_name]
|
||||
```
|
||||
|
||||
|
||||
!!! note "Note"
|
||||
`DETACH / ATTACH` 表操作会在后台进行表的merge操作,甚至当所有MergeTree表的合并操作已经停止的情况下。
|
||||
|
||||
:::note
|
||||
`DETACH / ATTACH` 表操作会在后台进行表的merge操作,甚至当所有MergeTree表的合并操作已经停止的情况下。
|
||||
:::
|
||||
|
||||
### START MERGES {#query_language-system-start-merges}
|
||||
|
||||
|
@ -49,8 +49,9 @@ SELECT name FROM mysql(`mysql1:3306|mysql2:3306|mysql3:3306`, 'mysql_database',
|
||||
|
||||
与原始MySQL表具有相同列的表对象。
|
||||
|
||||
!!! note "注意"
|
||||
在`INSERT`查询中为了区分`mysql(...)`与带有列名列表的表名的表函数,你必须使用关键字`FUNCTION`或`TABLE FUNCTION`。查看如下示例。
|
||||
:::note
|
||||
在`INSERT`查询中为了区分`mysql(...)`与带有列名列表的表名的表函数,你必须使用关键字`FUNCTION`或`TABLE FUNCTION`。查看如下示例。
|
||||
:::
|
||||
|
||||
## 用法示例 {#usage-example}
|
||||
|
||||
|
@ -14,7 +14,8 @@ User=clickhouse
|
||||
Group=clickhouse
|
||||
Restart=always
|
||||
RestartSec=30
|
||||
RuntimeDirectory=%p # %p is resolved to the systemd unit name
|
||||
# %p is resolved to the systemd unit name
|
||||
RuntimeDirectory=%p
|
||||
ExecStart=/usr/bin/clickhouse-keeper --config=/etc/clickhouse-keeper/keeper_config.xml --pid-file=%t/%p/%p.pid
|
||||
# Minus means that this file is optional.
|
||||
EnvironmentFile=-/etc/default/%p
|
||||
|
@ -20,7 +20,7 @@
|
||||
#include <Common/formatReadable.h>
|
||||
#include <Common/Config/ConfigProcessor.h>
|
||||
#include <Common/OpenSSLHelpers.h>
|
||||
#include <Common/hex.h>
|
||||
#include <base/hex.h>
|
||||
#include <Common/getResource.h>
|
||||
#include <base/sleep.h>
|
||||
#include <IO/ReadBufferFromFileDescriptor.h>
|
||||
|
@ -128,6 +128,7 @@ if (BUILD_STANDALONE_KEEPER)
|
||||
ch_contrib::lz4
|
||||
ch_contrib::zstd
|
||||
ch_contrib::cityhash
|
||||
ch_contrib::jemalloc
|
||||
common ch_contrib::double_conversion
|
||||
ch_contrib::dragonbox_to_chars
|
||||
pcg_random
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user