Merge branch 'master' of github.com:ClickHouse/ClickHouse into ADQM-528-B

This commit is contained in:
Roman Vasin 2022-09-15 13:47:51 +00:00
commit 5c242de43a
965 changed files with 36946 additions and 17904 deletions

15
.git-blame-ignore-revs Normal file
View File

@ -0,0 +1,15 @@
# This is a file that can be used by git-blame to ignore some revisions.
# (git 2.23+, released in August 2019)
#
# Can be configured as follow:
#
# $ git config blame.ignoreRevsFile .git-blame-ignore-revs
#
# For more information you can look at git-blame(1) man page.
# Changed tabs to spaces in code [#CLICKHOUSE-3]
137ad95929ee016cc6d3c03bccb5586941c163ff
# dbms/ → src/
# (though it is unlikely that you will see it in blame)
06446b4f08a142d6f1bc30664c47ded88ab51782

View File

@ -349,6 +349,100 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinDarwin:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_darwin
EOF
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
with:
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
# shellcheck disable=SC2046
docker kill $(docker ps -q) ||:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinDarwinAarch64:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_darwin_aarch64
EOF
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
with:
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
# shellcheck disable=SC2046
docker kill $(docker ps -q) ||:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
############################################################################################
##################################### Docker images #######################################
############################################################################################
@ -425,6 +519,46 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
BuilderSpecialReport:
needs:
- BuilderBinDarwin
- BuilderBinDarwinAarch64
runs-on: [self-hosted, style-checker]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/report_check
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=ClickHouse special build check
NEEDS_DATA_PATH=${{runner.temp}}/needs.json
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Report Builder
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cat > "$NEEDS_DATA_PATH" << 'EOF'
${{ toJSON(needs) }}
EOF
cd "$GITHUB_WORKSPACE/tests/ci"
python3 build_report_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
# shellcheck disable=SC2046
docker kill $(docker ps -q) ||:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
##############################################################################################
########################### FUNCTIONAl STATELESS TESTS #######################################
##############################################################################################
@ -592,6 +726,7 @@ jobs:
- DockerHubPush
- DockerServerImages
- BuilderReport
- BuilderSpecialReport
- FunctionalStatelessTestAsan
- FunctionalStatefulTestDebug
- StressTestTsan

View File

@ -923,6 +923,53 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinAmd64SSE2:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_amd64sse2
EOF
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
with:
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
# shellcheck disable=SC2046
docker kill $(docker ps -q) ||:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
############################################################################################
##################################### Docker images #######################################
############################################################################################
@ -1011,6 +1058,7 @@ jobs:
- BuilderBinFreeBSD
# - BuilderBinGCC
- BuilderBinPPC64
- BuilderBinAmd64SSE2
- BuilderBinClangTidy
- BuilderDebShared
runs-on: [self-hosted, style-checker]

View File

@ -935,6 +935,51 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinAmd64SSE2:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_amd64sse2
EOF
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
# shellcheck disable=SC2046
docker kill $(docker ps -q) ||:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
############################################################################################
##################################### Docker images #######################################
############################################################################################
@ -1023,6 +1068,7 @@ jobs:
- BuilderBinFreeBSD
# - BuilderBinGCC
- BuilderBinPPC64
- BuilderBinAmd64SSE2
- BuilderBinClangTidy
- BuilderDebShared
runs-on: [self-hosted, style-checker]
@ -1254,6 +1300,228 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
FunctionalStatelessTestS3Debug0:
needs: [BuilderDebDebug]
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_s3_storage_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, s3 storage)
REPO_COPY=${{runner.temp}}/stateless_s3_storage_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=3
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
- name: Cleanup
if: always()
run: |
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
FunctionalStatelessTestS3Debug1:
needs: [BuilderDebDebug]
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_s3_storage_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, s3 storage)
REPO_COPY=${{runner.temp}}/stateless_s3_storage_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=3
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
- name: Cleanup
if: always()
run: |
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
FunctionalStatelessTestS3Debug2:
needs: [BuilderDebDebug]
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_s3_storage_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, s3 storage)
REPO_COPY=${{runner.temp}}/stateless_s3_storage_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
RUN_BY_HASH_TOTAL=3
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
- name: Cleanup
if: always()
run: |
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
FunctionalStatelessTestS3Tsan0:
needs: [BuilderDebTsan]
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_s3_storage_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (tsan, s3 storage)
REPO_COPY=${{runner.temp}}/stateless_s3_storage_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=3
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
- name: Cleanup
if: always()
run: |
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
FunctionalStatelessTestS3Tsan1:
needs: [BuilderDebTsan]
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_s3_storage_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (tsan, s3 storage)
REPO_COPY=${{runner.temp}}/stateless_s3_storage_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=3
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
- name: Cleanup
if: always()
run: |
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
FunctionalStatelessTestS3Tsan2:
needs: [BuilderDebTsan]
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_s3_storage_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (tsan, s3 storage)
REPO_COPY=${{runner.temp}}/stateless_s3_storage_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
RUN_BY_HASH_TOTAL=3
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
- name: Cleanup
if: always()
run: |
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
FunctionalStatelessTestAarch64:
needs: [BuilderDebAarch64]
runs-on: [self-hosted, func-tester-aarch64]
@ -3388,6 +3656,12 @@ jobs:
- FunctionalStatefulTestMsan
- FunctionalStatefulTestUBsan
- FunctionalStatelessTestReleaseS3
- FunctionalStatelessTestS3Debug0
- FunctionalStatelessTestS3Debug1
- FunctionalStatelessTestS3Debug2
- FunctionalStatelessTestS3Tsan0
- FunctionalStatelessTestS3Tsan1
- FunctionalStatelessTestS3Tsan2
- StressTestDebug
- StressTestAsan
- StressTestTsan

View File

@ -29,8 +29,13 @@ jobs:
rm -rf "$TEMP_PATH" && mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY"
python3 ./tests/ci/push_to_artifactory.py --release "${{ github.ref }}" \
--commit '${{ github.sha }}' --artifactory-url "${{ secrets.JFROG_ARTIFACTORY_URL }}" --all
# Download and push packages to artifactory
python3 ./tests/ci/push_to_artifactory.py --release '${{ github.ref }}' \
--commit '${{ github.sha }}' --artifactory-url '${{ secrets.JFROG_ARTIFACTORY_URL }}' --all
# Download macos binaries to ${{runner.temp}}/download_binary
python3 ./tests/ci/download_binary.py --version '${{ github.ref }}' \
--commit '${{ github.sha }}' binary_darwin binary_darwin_aarch64
mv '${{runner.temp}}/download_binary/'clickhouse-* '${{runner.temp}}/push_to_artifactory'
- name: Upload packages to release assets
uses: svenstaro/upload-release-action@v2
with:

View File

@ -426,6 +426,100 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinDarwin:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_darwin
EOF
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
with:
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
# shellcheck disable=SC2046
docker kill $(docker ps -q) ||:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinDarwinAarch64:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_darwin_aarch64
EOF
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
with:
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
# shellcheck disable=SC2046
docker kill $(docker ps -q) ||:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
############################################################################################
##################################### Docker images #######################################
############################################################################################
@ -505,6 +599,46 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
BuilderSpecialReport:
needs:
- BuilderBinDarwin
- BuilderBinDarwinAarch64
runs-on: [self-hosted, style-checker]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/report_check
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=ClickHouse special build check
NEEDS_DATA_PATH=${{runner.temp}}/needs.json
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Report Builder
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cat > "$NEEDS_DATA_PATH" << 'EOF'
${{ toJSON(needs) }}
EOF
cd "$GITHUB_WORKSPACE/tests/ci"
python3 build_report_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
# shellcheck disable=SC2046
docker kill $(docker ps -q) ||:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
##############################################################################################
########################### FUNCTIONAl STATELESS TESTS #######################################
##############################################################################################
@ -1847,6 +1981,7 @@ jobs:
- DockerHubPush
- DockerServerImages
- BuilderReport
- BuilderSpecialReport
- FunctionalStatelessTestDebug0
- FunctionalStatelessTestDebug1
- FunctionalStatelessTestDebug2

View File

@ -43,6 +43,7 @@ jobs:
GITHUB_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }}
run: |
./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv
./utils/list-versions/update-docker-version.sh
GID=$(id -g "${UID}")
docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 \
--volume="${GITHUB_WORKSPACE}:/ClickHouse" clickhouse/style-test \

View File

@ -143,6 +143,8 @@ include (cmake/add_warning.cmake)
if (COMPILER_CLANG)
# generate ranges for fast "addr2line" search
if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
# NOTE: that clang has a bug because of it does not emit .debug_aranges
# with ThinLTO, so custom ld.lld wrapper is shipped in docker images.
set(COMPILER_FLAGS "${COMPILER_FLAGS} -gdwarf-aranges")
endif ()

View File

@ -15,4 +15,5 @@ ClickHouse® is an open-source column-oriented database management system that a
* [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any.
## Upcoming events
* [**v22.8 Release Webinar**](https://clickhouse.com/company/events/v22-8-release-webinar) Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release, provide live demos, and share vision into what is coming in the roadmap.
* [**v22.9 Release Webinar**](https://clickhouse.com/company/events/v22-9-release-webinar) Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release, provide live demos, and share vision into what is coming in the roadmap.
* [**ClickHouse for Analytics @ Barracuda Networks**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/288140358/) Join us for this in person meetup hosted by our friends at Barracuda in Bay Area.

View File

@ -22,7 +22,7 @@ POCO_IMPLEMENT_EXCEPTION(JSONException, Poco::Exception, "JSONException") // NOL
#endif
/// Прочитать беззнаковое целое в простом формате из не-0-terminated строки.
/// Read unsigned integer in a simple form from a non-0-terminated string.
static UInt64 readUIntText(const char * buf, const char * end)
{
UInt64 x = 0;
@ -59,7 +59,7 @@ static UInt64 readUIntText(const char * buf, const char * end)
}
/// Прочитать знаковое целое в простом формате из не-0-terminated строки.
/// Read signed integer in a simple form from a non-0-terminated string.
static Int64 readIntText(const char * buf, const char * end)
{
bool negative = false;
@ -102,7 +102,7 @@ static Int64 readIntText(const char * buf, const char * end)
}
/// Прочитать число с плавающей запятой в простом формате, с грубым округлением, из не-0-terminated строки.
/// Read floating point number in simple format, imprecisely, from a non-0-terminated string.
static double readFloatText(const char * buf, const char * end)
{
bool negative = false;
@ -151,8 +151,8 @@ static double readFloatText(const char * buf, const char * end)
case 'E':
{
++buf;
Int32 exponent = readIntText(buf, end);
x *= preciseExp10(exponent);
auto exponent = readIntText(buf, end);
x *= preciseExp10(static_cast<double>(exponent));
run = false;
break;
@ -207,7 +207,7 @@ JSON::ElementType JSON::getType() const
return TYPE_NUMBER;
case '"':
{
/// Проверим - это просто строка или name-value pair
/// Is it a string or a name-value pair?
Pos after_string = skipString();
if (after_string < ptr_end && *after_string == ':')
return TYPE_NAME_VALUE_PAIR;
@ -229,15 +229,13 @@ void JSON::checkPos(Pos pos) const
JSON::Pos JSON::skipString() const
{
//std::cerr << "skipString()\t" << data() << std::endl;
Pos pos = ptr_begin;
checkPos(pos);
if (*pos != '"')
throw JSONException(std::string("JSON: expected \", got ") + *pos);
++pos;
/// fast path: находим следующую двойную кавычку. Если перед ней нет бэкслеша - значит это конец строки (при допущении корректности JSON).
/// fast path: find next double quote. If it is not escaped by backslash - then it's an end of string (assuming JSON is valid).
Pos closing_quote = reinterpret_cast<const char *>(memchr(reinterpret_cast<const void *>(pos), '\"', ptr_end - pos));
if (nullptr != closing_quote && closing_quote[-1] != '\\')
return closing_quote + 1;
@ -269,8 +267,6 @@ JSON::Pos JSON::skipString() const
JSON::Pos JSON::skipNumber() const
{
//std::cerr << "skipNumber()\t" << data() << std::endl;
Pos pos = ptr_begin;
checkPos(pos);
@ -296,8 +292,6 @@ JSON::Pos JSON::skipNumber() const
JSON::Pos JSON::skipBool() const
{
//std::cerr << "skipBool()\t" << data() << std::endl;
Pos pos = ptr_begin;
checkPos(pos);
@ -314,16 +308,12 @@ JSON::Pos JSON::skipBool() const
JSON::Pos JSON::skipNull() const
{
//std::cerr << "skipNull()\t" << data() << std::endl;
return ptr_begin + 4;
}
JSON::Pos JSON::skipNameValuePair() const
{
//std::cerr << "skipNameValuePair()\t" << data() << std::endl;
Pos pos = skipString();
checkPos(pos);
@ -338,8 +328,6 @@ JSON::Pos JSON::skipNameValuePair() const
JSON::Pos JSON::skipArray() const
{
//std::cerr << "skipArray()\t" << data() << std::endl;
if (!isArray())
throw JSONException("JSON: expected [");
Pos pos = ptr_begin;
@ -370,8 +358,6 @@ JSON::Pos JSON::skipArray() const
JSON::Pos JSON::skipObject() const
{
//std::cerr << "skipObject()\t" << data() << std::endl;
if (!isObject())
throw JSONException("JSON: expected {");
Pos pos = ptr_begin;
@ -402,8 +388,6 @@ JSON::Pos JSON::skipObject() const
JSON::Pos JSON::skipElement() const
{
//std::cerr << "skipElement()\t" << data() << std::endl;
ElementType type = getType();
switch (type)
@ -640,7 +624,7 @@ std::string JSON::getString() const
{
throw JSONException("JSON: incorrect syntax: incorrect HEX code.");
}
buf.resize(buf.size() + 6); /// максимальный размер UTF8 многобайтовой последовательности
buf.resize(buf.size() + 6); /// Max size of UTF-8 sequence, including pre-standard mapping of UCS-4 to UTF-8.
int res = utf8.convert(unicode,
reinterpret_cast<unsigned char *>(const_cast<char*>(buf.data())) + buf.size() - 6, 6);
if (!res)
@ -754,8 +738,6 @@ JSON::iterator JSON::iterator::begin() const
if (type != TYPE_ARRAY && type != TYPE_OBJECT)
throw JSONException("JSON: not array or object when calling begin() method.");
//std::cerr << "begin()\t" << data() << std::endl;
Pos pos = ptr_begin + 1;
checkPos(pos);
if (*pos == '}' || *pos == ']')
@ -846,4 +828,3 @@ bool JSON::isType<bool>() const
{
return isBool();
}

View File

@ -220,6 +220,35 @@ ReplxxLineReader::ReplxxLineReader(
rx.bind_key(Replxx::KEY::control('W'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::KILL_TO_WHITESPACE_ON_LEFT, code); });
rx.bind_key(Replxx::KEY::meta('E'), [this](char32_t) { openEditor(); return Replxx::ACTION_RESULT::CONTINUE; });
/// readline insert-comment
auto insert_comment_action = [this](char32_t code)
{
replxx::Replxx::State state(rx.get_state());
const char * line = state.text();
const char * line_end = line + strlen(line);
std::string commented_line;
if (std::find(line, line_end, '\n') != line_end)
{
/// If query has multiple lines, multiline comment is used over
/// commenting each line separately for easier uncomment (though
/// with invoking editor it is simpler to uncomment multiple lines)
///
/// Note, that using multiline comment is OK even with nested
/// comments, since nested comments are supported.
commented_line = fmt::format("/* {} */", state.text());
}
else
{
// In a simplest case use simple comment.
commented_line = fmt::format("-- {}", state.text());
}
rx.set_state(replxx::Replxx::State(commented_line.c_str(), commented_line.size()));
return rx.invoke(Replxx::ACTION::COMMIT_LINE, code);
};
rx.bind_key(Replxx::KEY::meta('#'), insert_comment_action);
}
ReplxxLineReader::~ReplxxLineReader()

View File

@ -61,11 +61,6 @@ struct StringRef
constexpr explicit operator std::string_view() const { return std::string_view(data, size); }
};
/// Here constexpr doesn't implicate inline, see https://www.viva64.com/en/w/v1043/
/// nullptr can't be used because the StringRef values are used in SipHash's pointer arithmetic
/// and the UBSan thinks that something like nullptr + 8 is UB.
constexpr const inline char empty_string_ref_addr{};
constexpr const inline StringRef EMPTY_STRING_REF{&empty_string_ref_addr, 0};
using StringRefs = std::vector<StringRef>;
@ -227,7 +222,7 @@ inline UInt64 shiftMix(UInt64 val)
return val ^ (val >> 47);
}
inline UInt64 rotateByAtLeast1(UInt64 val, int shift)
inline UInt64 rotateByAtLeast1(UInt64 val, UInt8 shift)
{
return (val >> shift) | (val << (64 - shift));
}
@ -249,7 +244,7 @@ inline size_t hashLessThan8(const char * data, size_t size)
uint8_t b = data[size >> 1];
uint8_t c = data[size - 1];
uint32_t y = static_cast<uint32_t>(a) + (static_cast<uint32_t>(b) << 8);
uint32_t z = size + (static_cast<uint32_t>(c) << 2);
uint32_t z = static_cast<uint32_t>(size) + (static_cast<uint32_t>(c) << 2);
return shiftMix(y * k2 ^ z * k3) * k2;
}
@ -262,7 +257,7 @@ inline size_t hashLessThan16(const char * data, size_t size)
{
UInt64 a = unalignedLoad<UInt64>(data);
UInt64 b = unalignedLoad<UInt64>(data + size - 8);
return hashLen16(a, rotateByAtLeast1(b + size, size)) ^ b;
return hashLen16(a, rotateByAtLeast1(b + size, static_cast<UInt8>(size))) ^ b;
}
return hashLessThan8(data, size);

View File

@ -15,7 +15,7 @@
*
* Allow to search for next character from the set of 'symbols...' in a string.
* It is similar to 'strpbrk', 'strcspn' (and 'strchr', 'memchr' in the case of one symbol and '\0'),
* but with the following differencies:
* but with the following differences:
* - works with any memory ranges, including containing zero bytes;
* - doesn't require terminating zero byte: end of memory range is passed explicitly;
* - if not found, returns pointer to end instead of nullptr;

View File

@ -22,7 +22,7 @@ uint64_t getThreadId()
#if defined(OS_ANDROID)
current_tid = gettid();
#elif defined(OS_LINUX)
current_tid = syscall(SYS_gettid); /// This call is always successful. - man gettid
current_tid = static_cast<uint64_t>(syscall(SYS_gettid)); /// This call is always successful. - man gettid
#elif defined(OS_FREEBSD)
current_tid = pthread_getthreadid_np();
#elif defined(OS_SUNOS)

View File

@ -14,37 +14,37 @@ static T shift10Impl(T x, int exponent)
static const long double powers10[] =
{
1e-323L, 1e-322L, 1e-321L, 1e-320L, 1e-319L, 1e-318L, 1e-317L, 1e-316L, 1e-315L, 1e-314L, 1e-313L, 1e-312L, 1e-311L,
1e-310L,1e-309L,1e-308L,1e-307L,1e-306L,1e-305L,1e-304L,1e-303L,1e-302L,1e-301L,1e-300L,1e-299L,1e-298L,1e-297L,1e-296L,1e-295L,1e-294L,1e-293L,1e-292L,1e-291L,
1e-290L,1e-289L,1e-288L,1e-287L,1e-286L,1e-285L,1e-284L,1e-283L,1e-282L,1e-281L,1e-280L,1e-279L,1e-278L,1e-277L,1e-276L,1e-275L,1e-274L,1e-273L,1e-272L,1e-271L,
1e-270L,1e-269L,1e-268L,1e-267L,1e-266L,1e-265L,1e-264L,1e-263L,1e-262L,1e-261L,1e-260L,1e-259L,1e-258L,1e-257L,1e-256L,1e-255L,1e-254L,1e-253L,1e-252L,1e-251L,
1e-250L,1e-249L,1e-248L,1e-247L,1e-246L,1e-245L,1e-244L,1e-243L,1e-242L,1e-241L,1e-240L,1e-239L,1e-238L,1e-237L,1e-236L,1e-235L,1e-234L,1e-233L,1e-232L,1e-231L,
1e-230L,1e-229L,1e-228L,1e-227L,1e-226L,1e-225L,1e-224L,1e-223L,1e-222L,1e-221L,1e-220L,1e-219L,1e-218L,1e-217L,1e-216L,1e-215L,1e-214L,1e-213L,1e-212L,1e-211L,
1e-210L,1e-209L,1e-208L,1e-207L,1e-206L,1e-205L,1e-204L,1e-203L,1e-202L,1e-201L,1e-200L,1e-199L,1e-198L,1e-197L,1e-196L,1e-195L,1e-194L,1e-193L,1e-192L,1e-191L,
1e-190L,1e-189L,1e-188L,1e-187L,1e-186L,1e-185L,1e-184L,1e-183L,1e-182L,1e-181L,1e-180L,1e-179L,1e-178L,1e-177L,1e-176L,1e-175L,1e-174L,1e-173L,1e-172L,1e-171L,
1e-170L,1e-169L,1e-168L,1e-167L,1e-166L,1e-165L,1e-164L,1e-163L,1e-162L,1e-161L,1e-160L,1e-159L,1e-158L,1e-157L,1e-156L,1e-155L,1e-154L,1e-153L,1e-152L,1e-151L,
1e-150L,1e-149L,1e-148L,1e-147L,1e-146L,1e-145L,1e-144L,1e-143L,1e-142L,1e-141L,1e-140L,1e-139L,1e-138L,1e-137L,1e-136L,1e-135L,1e-134L,1e-133L,1e-132L,1e-131L,
1e-130L,1e-129L,1e-128L,1e-127L,1e-126L,1e-125L,1e-124L,1e-123L,1e-122L,1e-121L,1e-120L,1e-119L,1e-118L,1e-117L,1e-116L,1e-115L,1e-114L,1e-113L,1e-112L,1e-111L,
1e-110L,1e-109L,1e-108L,1e-107L,1e-106L,1e-105L,1e-104L,1e-103L,1e-102L,1e-101L,1e-100L,1e-99L,1e-98L,1e-97L,1e-96L,1e-95L,1e-94L,1e-93L,1e-92L,1e-91L,1e-90L,
1e-89L,1e-88L,1e-87L,1e-86L,1e-85L,1e-84L,1e-83L,1e-82L,1e-81L,1e-80L,1e-79L,1e-78L,1e-77L,1e-76L,1e-75L,1e-74L,1e-73L,1e-72L,1e-71L,1e-70,
1e-69L,1e-68L,1e-67L,1e-66L,1e-65L,1e-64L,1e-63L,1e-62L,1e-61L,1e-60L,1e-59L,1e-58L,1e-57L,1e-56L,1e-55L,1e-54L,1e-53L,1e-52L,1e-51L,1e-50,
1e-49L,1e-48L,1e-47L,1e-46L,1e-45L,1e-44L,1e-43L,1e-42L,1e-41L,1e-40L,1e-39L,1e-38L,1e-37L,1e-36L,1e-35L,1e-34L,1e-33L,1e-32L,1e-31L,1e-30,
1e-29L,1e-28L,1e-27L,1e-26L,1e-25L,1e-24L,1e-23L,1e-22L,1e-21L,1e-20L,1e-19L,1e-18L,1e-17L,1e-16L,1e-15L,1e-14L,1e-13L,1e-12L,1e-11L,1e-10,
1e-9L,1e-8L,1e-7L,1e-6L,1e-5L,1e-4L,1e-3L,1e-2L,1e-1L,1e0L,1e1L,1e2L,1e3L,1e4L,1e5L,1e6L,1e7L,1e8L,1e9L,1e10,
1e11L,1e12L,1e13L,1e14L,1e15L,1e16L,1e17L,1e18L,1e19L,1e20L,1e21L,1e22L,1e23L,1e24L,1e25L,1e26L,1e27L,1e28L,1e29L,1e30,
1e31L,1e32L,1e33L,1e34L,1e35L,1e36L,1e37L,1e38L,1e39L,1e40L,1e41L,1e42L,1e43L,1e44L,1e45L,1e46L,1e47L,1e48L,1e49L,1e50,
1e51L,1e52L,1e53L,1e54L,1e55L,1e56L,1e57L,1e58L,1e59L,1e60L,1e61L,1e62L,1e63L,1e64L,1e65L,1e66L,1e67L,1e68L,1e69L,1e70,
1e71L,1e72L,1e73L,1e74L,1e75L,1e76L,1e77L,1e78L,1e79L,1e80L,1e81L,1e82L,1e83L,1e84L,1e85L,1e86L,1e87L,1e88L,1e89L,1e90,
1e91L,1e92L,1e93L,1e94L,1e95L,1e96L,1e97L,1e98L,1e99L,1e100L,1e101L,1e102L,1e103L,1e104L,1e105L,1e106L,1e107L,1e108L,1e109L,1e110,
1e111L,1e112L,1e113L,1e114L,1e115L,1e116L,1e117L,1e118L,1e119L,1e120L,1e121L,1e122L,1e123L,1e124L,1e125L,1e126L,1e127L,1e128L,1e129L,1e130,
1e131L,1e132L,1e133L,1e134L,1e135L,1e136L,1e137L,1e138L,1e139L,1e140L,1e141L,1e142L,1e143L,1e144L,1e145L,1e146L,1e147L,1e148L,1e149L,1e150,
1e151L,1e152L,1e153L,1e154L,1e155L,1e156L,1e157L,1e158L,1e159L,1e160L,1e161L,1e162L,1e163L,1e164L,1e165L,1e166L,1e167L,1e168L,1e169L,1e170,
1e171L,1e172L,1e173L,1e174L,1e175L,1e176L,1e177L,1e178L,1e179L,1e180L,1e181L,1e182L,1e183L,1e184L,1e185L,1e186L,1e187L,1e188L,1e189L,1e190,
1e191L,1e192L,1e193L,1e194L,1e195L,1e196L,1e197L,1e198L,1e199L,1e200L,1e201L,1e202L,1e203L,1e204L,1e205L,1e206L,1e207L,1e208L,1e209L,1e210,
1e211L,1e212L,1e213L,1e214L,1e215L,1e216L,1e217L,1e218L,1e219L,1e220L,1e221L,1e222L,1e223L,1e224L,1e225L,1e226L,1e227L,1e228L,1e229L,1e230,
1e231L,1e232L,1e233L,1e234L,1e235L,1e236L,1e237L,1e238L,1e239L,1e240L,1e241L,1e242L,1e243L,1e244L,1e245L,1e246L,1e247L,1e248L,1e249L,1e250,
1e251L,1e252L,1e253L,1e254L,1e255L,1e256L,1e257L,1e258L,1e259L,1e260L,1e261L,1e262L,1e263L,1e264L,1e265L,1e266L,1e267L,1e268L,1e269L,1e270,
1e271L,1e272L,1e273L,1e274L,1e275L,1e276L,1e277L,1e278L,1e279L,1e280L,1e281L,1e282L,1e283L,1e284L,1e285L,1e286L,1e287L,1e288L,1e289L,1e290,
1e291L,1e292L,1e293L,1e294L,1e295L,1e296L,1e297L,1e298L,1e299L,1e300L,1e301L,1e302L,1e303L,1e304L,1e305L,1e306L,1e307L,1e308L
1e-310L, 1e-309L, 1e-308L, 1e-307L, 1e-306L, 1e-305L, 1e-304L, 1e-303L, 1e-302L, 1e-301L, 1e-300L, 1e-299L, 1e-298L, 1e-297L, 1e-296L, 1e-295L, 1e-294L, 1e-293L, 1e-292L, 1e-291L,
1e-290L, 1e-289L, 1e-288L, 1e-287L, 1e-286L, 1e-285L, 1e-284L, 1e-283L, 1e-282L, 1e-281L, 1e-280L, 1e-279L, 1e-278L, 1e-277L, 1e-276L, 1e-275L, 1e-274L, 1e-273L, 1e-272L, 1e-271L,
1e-270L, 1e-269L, 1e-268L, 1e-267L, 1e-266L, 1e-265L, 1e-264L, 1e-263L, 1e-262L, 1e-261L, 1e-260L, 1e-259L, 1e-258L, 1e-257L, 1e-256L, 1e-255L, 1e-254L, 1e-253L, 1e-252L, 1e-251L,
1e-250L, 1e-249L, 1e-248L, 1e-247L, 1e-246L, 1e-245L, 1e-244L, 1e-243L, 1e-242L, 1e-241L, 1e-240L, 1e-239L, 1e-238L, 1e-237L, 1e-236L, 1e-235L, 1e-234L, 1e-233L, 1e-232L, 1e-231L,
1e-230L, 1e-229L, 1e-228L, 1e-227L, 1e-226L, 1e-225L, 1e-224L, 1e-223L, 1e-222L, 1e-221L, 1e-220L, 1e-219L, 1e-218L, 1e-217L, 1e-216L, 1e-215L, 1e-214L, 1e-213L, 1e-212L, 1e-211L,
1e-210L, 1e-209L, 1e-208L, 1e-207L, 1e-206L, 1e-205L, 1e-204L, 1e-203L, 1e-202L, 1e-201L, 1e-200L, 1e-199L, 1e-198L, 1e-197L, 1e-196L, 1e-195L, 1e-194L, 1e-193L, 1e-192L, 1e-191L,
1e-190L, 1e-189L, 1e-188L, 1e-187L, 1e-186L, 1e-185L, 1e-184L, 1e-183L, 1e-182L, 1e-181L, 1e-180L, 1e-179L, 1e-178L, 1e-177L, 1e-176L, 1e-175L, 1e-174L, 1e-173L, 1e-172L, 1e-171L,
1e-170L, 1e-169L, 1e-168L, 1e-167L, 1e-166L, 1e-165L, 1e-164L, 1e-163L, 1e-162L, 1e-161L, 1e-160L, 1e-159L, 1e-158L, 1e-157L, 1e-156L, 1e-155L, 1e-154L, 1e-153L, 1e-152L, 1e-151L,
1e-150L, 1e-149L, 1e-148L, 1e-147L, 1e-146L, 1e-145L, 1e-144L, 1e-143L, 1e-142L, 1e-141L, 1e-140L, 1e-139L, 1e-138L, 1e-137L, 1e-136L, 1e-135L, 1e-134L, 1e-133L, 1e-132L, 1e-131L,
1e-130L, 1e-129L, 1e-128L, 1e-127L, 1e-126L, 1e-125L, 1e-124L, 1e-123L, 1e-122L, 1e-121L, 1e-120L, 1e-119L, 1e-118L, 1e-117L, 1e-116L, 1e-115L, 1e-114L, 1e-113L, 1e-112L, 1e-111L,
1e-110L, 1e-109L, 1e-108L, 1e-107L, 1e-106L, 1e-105L, 1e-104L, 1e-103L, 1e-102L, 1e-101L, 1e-100L, 1e-99L, 1e-98L, 1e-97L, 1e-96L, 1e-95L, 1e-94L, 1e-93L, 1e-92L, 1e-91L, 1e-90L,
1e-89L, 1e-88L, 1e-87L, 1e-86L, 1e-85L, 1e-84L, 1e-83L, 1e-82L, 1e-81L, 1e-80L, 1e-79L, 1e-78L, 1e-77L, 1e-76L, 1e-75L, 1e-74L, 1e-73L, 1e-72L, 1e-71L, 1e-70,
1e-69L, 1e-68L, 1e-67L, 1e-66L, 1e-65L, 1e-64L, 1e-63L, 1e-62L, 1e-61L, 1e-60L, 1e-59L, 1e-58L, 1e-57L, 1e-56L, 1e-55L, 1e-54L, 1e-53L, 1e-52L, 1e-51L, 1e-50,
1e-49L, 1e-48L, 1e-47L, 1e-46L, 1e-45L, 1e-44L, 1e-43L, 1e-42L, 1e-41L, 1e-40L, 1e-39L, 1e-38L, 1e-37L, 1e-36L, 1e-35L, 1e-34L, 1e-33L, 1e-32L, 1e-31L, 1e-30,
1e-29L, 1e-28L, 1e-27L, 1e-26L, 1e-25L, 1e-24L, 1e-23L, 1e-22L, 1e-21L, 1e-20L, 1e-19L, 1e-18L, 1e-17L, 1e-16L, 1e-15L, 1e-14L, 1e-13L, 1e-12L, 1e-11L, 1e-10,
1e-9L, 1e-8L, 1e-7L, 1e-6L, 1e-5L, 1e-4L, 1e-3L, 1e-2L, 1e-1L, 1e0L, 1e1L, 1e2L, 1e3L, 1e4L, 1e5L, 1e6L, 1e7L, 1e8L, 1e9L, 1e10,
1e11L, 1e12L, 1e13L, 1e14L, 1e15L, 1e16L, 1e17L, 1e18L, 1e19L, 1e20L, 1e21L, 1e22L, 1e23L, 1e24L, 1e25L, 1e26L, 1e27L, 1e28L, 1e29L, 1e30,
1e31L, 1e32L, 1e33L, 1e34L, 1e35L, 1e36L, 1e37L, 1e38L, 1e39L, 1e40L, 1e41L, 1e42L, 1e43L, 1e44L, 1e45L, 1e46L, 1e47L, 1e48L, 1e49L, 1e50,
1e51L, 1e52L, 1e53L, 1e54L, 1e55L, 1e56L, 1e57L, 1e58L, 1e59L, 1e60L, 1e61L, 1e62L, 1e63L, 1e64L, 1e65L, 1e66L, 1e67L, 1e68L, 1e69L, 1e70,
1e71L, 1e72L, 1e73L, 1e74L, 1e75L, 1e76L, 1e77L, 1e78L, 1e79L, 1e80L, 1e81L, 1e82L, 1e83L, 1e84L, 1e85L, 1e86L, 1e87L, 1e88L, 1e89L, 1e90,
1e91L, 1e92L, 1e93L, 1e94L, 1e95L, 1e96L, 1e97L, 1e98L, 1e99L, 1e100L, 1e101L, 1e102L, 1e103L, 1e104L, 1e105L, 1e106L, 1e107L, 1e108L, 1e109L, 1e110,
1e111L, 1e112L, 1e113L, 1e114L, 1e115L, 1e116L, 1e117L, 1e118L, 1e119L, 1e120L, 1e121L, 1e122L, 1e123L, 1e124L, 1e125L, 1e126L, 1e127L, 1e128L, 1e129L, 1e130,
1e131L, 1e132L, 1e133L, 1e134L, 1e135L, 1e136L, 1e137L, 1e138L, 1e139L, 1e140L, 1e141L, 1e142L, 1e143L, 1e144L, 1e145L, 1e146L, 1e147L, 1e148L, 1e149L, 1e150,
1e151L, 1e152L, 1e153L, 1e154L, 1e155L, 1e156L, 1e157L, 1e158L, 1e159L, 1e160L, 1e161L, 1e162L, 1e163L, 1e164L, 1e165L, 1e166L, 1e167L, 1e168L, 1e169L, 1e170,
1e171L, 1e172L, 1e173L, 1e174L, 1e175L, 1e176L, 1e177L, 1e178L, 1e179L, 1e180L, 1e181L, 1e182L, 1e183L, 1e184L, 1e185L, 1e186L, 1e187L, 1e188L, 1e189L, 1e190,
1e191L, 1e192L, 1e193L, 1e194L, 1e195L, 1e196L, 1e197L, 1e198L, 1e199L, 1e200L, 1e201L, 1e202L, 1e203L, 1e204L, 1e205L, 1e206L, 1e207L, 1e208L, 1e209L, 1e210,
1e211L, 1e212L, 1e213L, 1e214L, 1e215L, 1e216L, 1e217L, 1e218L, 1e219L, 1e220L, 1e221L, 1e222L, 1e223L, 1e224L, 1e225L, 1e226L, 1e227L, 1e228L, 1e229L, 1e230,
1e231L, 1e232L, 1e233L, 1e234L, 1e235L, 1e236L, 1e237L, 1e238L, 1e239L, 1e240L, 1e241L, 1e242L, 1e243L, 1e244L, 1e245L, 1e246L, 1e247L, 1e248L, 1e249L, 1e250,
1e251L, 1e252L, 1e253L, 1e254L, 1e255L, 1e256L, 1e257L, 1e258L, 1e259L, 1e260L, 1e261L, 1e262L, 1e263L, 1e264L, 1e265L, 1e266L, 1e267L, 1e268L, 1e269L, 1e270,
1e271L, 1e272L, 1e273L, 1e274L, 1e275L, 1e276L, 1e277L, 1e278L, 1e279L, 1e280L, 1e281L, 1e282L, 1e283L, 1e284L, 1e285L, 1e286L, 1e287L, 1e288L, 1e289L, 1e290,
1e291L, 1e292L, 1e293L, 1e294L, 1e295L, 1e296L, 1e297L, 1e298L, 1e299L, 1e300L, 1e301L, 1e302L, 1e303L, 1e304L, 1e305L, 1e306L, 1e307L, 1e308L
};
if (unlikely(exponent < min_exponent)) /// Note: there are some values below MIN_EXPONENT that is greater than zero.
@ -52,7 +52,7 @@ static T shift10Impl(T x, int exponent)
else if (unlikely(exponent > max_exponent))
x *= std::numeric_limits<T>::infinity(); /// Multiplying to keep the sign of infinity.
else
x *= powers10[exponent - min_exponent];
x *= static_cast<T>(powers10[exponent - min_exponent]);
return x;
}
@ -68,12 +68,12 @@ float shift10(float x, int exponent)
return shift10Impl(x, exponent);
}
double shift10(UInt64 x, int exponent)
long double shift10(UInt64 x, int exponent)
{
return shift10Impl(static_cast<long double>(x), exponent);
}
double shift10(Int64 x, int exponent)
long double shift10(Int64 x, int exponent)
{
return shift10Impl(static_cast<long double>(x), exponent);
}

View File

@ -12,5 +12,5 @@
double shift10(double x, int exponent);
float shift10(float x, int exponent);
double shift10(UInt64 x, int exponent);
double shift10(Int64 x, int exponent);
long double shift10(UInt64 x, int exponent);
long double shift10(Int64 x, int exponent);

View File

@ -11,12 +11,12 @@ std::string setColor(UInt64 hash)
/// It still looks awesome.
UInt8 y = 128;
UInt8 cb = hash % 256;
UInt8 cr = hash / 256 % 256;
UInt8 cb = static_cast<UInt8>(hash % 256);
UInt8 cr = static_cast<UInt8>(hash / 256 % 256);
UInt8 r = std::max(0.0, std::min(255.0, y + 1.402 * (cr - 128)));
UInt8 g = std::max(0.0, std::min(255.0, y - 0.344136 * (cb - 128) - 0.714136 * (cr - 128)));
UInt8 b = std::max(0.0, std::min(255.0, y + 1.772 * (cb - 128)));
UInt8 r = static_cast<UInt8>(std::max(0.0, std::min(255.0, y + 1.402 * (cr - 128))));
UInt8 g = static_cast<UInt8>(std::max(0.0, std::min(255.0, y - 0.344136 * (cb - 128) - 0.714136 * (cr - 128))));
UInt8 b = static_cast<UInt8>(std::max(0.0, std::min(255.0, y + 1.772 * (cb - 128))));
/// ANSI escape sequence to set 24-bit foreground font color in terminal.
return "\033[38;2;" + std::to_string(r) + ";" + std::to_string(g) + ";" + std::to_string(b) + "m";

View File

@ -453,7 +453,7 @@ private:
if constexpr (sizeof(T) <= sizeof(base_type))
{
if (0 == idx)
return x;
return static_cast<base_type>(x);
}
else if (idx * sizeof(base_type) < sizeof(T))
return x >> (idx * base_bits); // & std::numeric_limits<base_type>::max()
@ -1239,13 +1239,13 @@ constexpr integer<Bits, Signed>::operator long double() const noexcept
template <size_t Bits, typename Signed>
constexpr integer<Bits, Signed>::operator double() const noexcept
{
return static_cast<long double>(*this);
return static_cast<double>(static_cast<long double>(*this));
}
template <size_t Bits, typename Signed>
constexpr integer<Bits, Signed>::operator float() const noexcept
{
return static_cast<long double>(*this);
return static_cast<float>(static_cast<long double>(*this));
}
// Unary operators

View File

@ -63,7 +63,7 @@
* Very large size of memcpy typically indicates suboptimal (not cache friendly) algorithms in code or unrealistic scenarios,
* so we don't pay attention to using non-temporary stores.
*
* On recent Intel CPUs, the presence of "erms" makes "rep movsb" the most benefitial,
* On recent Intel CPUs, the presence of "erms" makes "rep movsb" the most beneficial,
* even comparing to non-temporary aligned unrolled stores even with the most wide registers.
*
* memcpy can be written in asm, C or C++. The latter can also use inline asm.
@ -214,4 +214,3 @@ tail:
return ret;
}

View File

@ -101,7 +101,7 @@
#endif
/*
* The pcg_extras namespace contains some support code that is likley to
* The pcg_extras namespace contains some support code that is likely to
* be useful for a variety of RNGs, including:
* - 128-bit int support for platforms where it isn't available natively
* - bit twiddling operations

View File

@ -22,7 +22,7 @@
/*
* This code provides a a C++ class that can provide 128-bit (or higher)
* integers. To produce 2K-bit integers, it uses two K-bit integers,
* placed in a union that allowes the code to also see them as four K/2 bit
* placed in a union that allows the code to also see them as four K/2 bit
* integers (and access them either directly name, or by index).
*
* It may seem like we're reinventing the wheel here, because several

View File

@ -24,6 +24,23 @@ option (ENABLE_BMI "Use BMI instructions on x86_64" 0)
option (ENABLE_AVX2_FOR_SPEC_OP "Use avx2 instructions for specific operations on x86_64" 0)
option (ENABLE_AVX512_FOR_SPEC_OP "Use avx512 instructions for specific operations on x86_64" 0)
# X86: Allow compilation for a SSE2-only target machine. Done by a special build in CI for embedded or very old hardware.
option (NO_SSE3_OR_HIGHER "Disable SSE3 or higher on x86_64" 0)
if (NO_SSE3_OR_HIGHER)
SET(ENABLE_SSSE3 0)
SET(ENABLE_SSE41 0)
SET(ENABLE_SSE42 0)
SET(ENABLE_PCLMULQDQ 0)
SET(ENABLE_POPCNT 0)
SET(ENABLE_AVX 0)
SET(ENABLE_AVX2 0)
SET(ENABLE_AVX512 0)
SET(ENABLE_AVX512_VBMI 0)
SET(ENABLE_BMI 0)
SET(ENABLE_AVX2_FOR_SPEC_OP 0)
SET(ENABLE_AVX512_FOR_SPEC_OP 0)
endif()
option (ARCH_NATIVE "Add -march=native compiler flag. This makes your binaries non-portable but more performant code may be generated. This option overrides ENABLE_* options for specific instruction set. Highly not recommended to use." 0)
if (ARCH_NATIVE)

17
cmake/ld.lld.in Executable file
View File

@ -0,0 +1,17 @@
#!/usr/bin/env bash
# This is a workaround for bug in llvm/clang,
# that does not produce .debug_aranges with LTO
#
# NOTE: this is a temporary solution, that should be removed once [1] will be
# resolved.
#
# [1]: https://discourse.llvm.org/t/clang-does-not-produce-full-debug-aranges-section-with-thinlto/64898/8
# NOTE: only -flto=thin is supported.
# NOTE: it is not possible to check was there -gdwarf-aranges initially or not.
if [[ "$*" =~ -plugin-opt=thinlto ]]; then
exec "@LLD_PATH@" -mllvm -generate-arange-section "$@"
else
exec "@LLD_PATH@" "$@"
fi

View File

@ -20,7 +20,7 @@ macro(clickhouse_split_debug_symbols)
COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/bin"
COMMAND cp "${STRIP_BINARY_PATH}" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
# Splits debug symbols into separate file, leaves the binary untouched:
COMMAND "${OBJCOPY_PATH}" --only-keep-debug --compress-debug-sections "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
COMMAND "${OBJCOPY_PATH}" --only-keep-debug "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
# Strips binary, sections '.note' & '.comment' are removed in line with Debian's stripping policy: www.debian.org/doc/debian-policy/ch-files.html, section '.clickhouse.hash' is needed for integrity check:
COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note --keep-section=.clickhouse.hash "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"

View File

@ -94,8 +94,13 @@ if (LINKER_NAME)
if (NOT LLD_PATH)
message (FATAL_ERROR "Using linker ${LINKER_NAME} but can't find its path.")
endif ()
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_PATH}")
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --ld-path=${LLD_PATH}")
# This a temporary quirk to emit .debug_aranges with ThinLTO
set (LLD_WRAPPER "${CMAKE_CURRENT_BINARY_DIR}/ld.lld")
configure_file ("${CMAKE_CURRENT_SOURCE_DIR}/cmake/ld.lld.in" "${LLD_WRAPPER}" @ONLY)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_WRAPPER}")
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --ld-path=${LLD_WRAPPER}")
else ()
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}")
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}")

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit bdba298189e29995892de78dcecf64d127444e81
Subproject commit 1be805e7cb2494aa8170015493474379b0362dfc

2
contrib/capnproto vendored

@ -1 +1 @@
Subproject commit c8189ec3c27dacbd4a3288e682473010e377f593
Subproject commit 2e88221d3dde22266bfccf40eaee6ff9b40d113d

@ -1 +1 @@
Subproject commit 7d73d7610db31d4e1ecde0fb3a7ee90ef371207f
Subproject commit 7abd49bb2e72bf9a5029993d31dcb1872da88292

View File

@ -54,9 +54,8 @@ set(SRCS
add_library(cxx ${SRCS})
set_target_properties(cxx PROPERTIES FOLDER "contrib/libcxx-cmake")
target_include_directories(cxx SYSTEM BEFORE PUBLIC
$<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}/include>
$<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}>/src)
target_include_directories(cxx SYSTEM BEFORE PRIVATE $<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}/src>)
target_include_directories(cxx SYSTEM BEFORE PUBLIC $<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}/include>)
target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI)
# Enable capturing stack traces for all exceptions.

View File

@ -1,11 +1,9 @@
# We use vectorscan, a portable and API/ABI-compatible drop-in replacement for hyperscan.
if (ARCH_AMD64)
if ((ARCH_AMD64 AND NOT NO_SSE3_OR_HIGHER) OR ARCH_AARCH64)
option (ENABLE_VECTORSCAN "Enable vectorscan library" ${ENABLE_LIBRARIES})
endif()
# TODO: vectorscan supports ARM yet some tests involving cyrillic letters fail (PR #38171) ... needs further investigation
# TODO PPC should generally work but needs manual generation of ppc/config.h file on a PPC machine
if (NOT ENABLE_VECTORSCAN)
@ -236,11 +234,27 @@ set (SRCS
# The original build system invokes ragel on src/parser/{Parser|control_verbs}.rl (+ a few more .rl files which are unneeded). To avoid a
# build-time dependency on ragel (via contrib/ or find_program()), add the manually generated output of ragel to the sources.
# Please regenerate these files if you update vectorscan.
list (APPEND SRCS
"${LIBRARY_DIR}/../vectorscan-cmake/rageled_files/Parser.cpp"
"${LIBRARY_DIR}/../vectorscan-cmake/rageled_files/control_verbs.cpp"
)
#
# Please regenerate these files if you update vectorscan. They must be regenerated for each platform separately because ragel produces for
# weird reasons different constants in the output.
#
# Also, please use upstream versions of colm and ragel, the packages in Ubuntu 22.04 seem to produce wrong output on ARM.
if (ARCH_AMD64)
list (APPEND SRCS
"${LIBRARY_DIR}/../vectorscan-cmake/rageled_files/amd64/Parser.cpp"
"${LIBRARY_DIR}/../vectorscan-cmake/rageled_files/amd64/control_verbs.cpp"
)
elseif (ARCH_AARCH64)
list (APPEND SRCS
"${LIBRARY_DIR}/../vectorscan-cmake/rageled_files/aarch64/Parser.cpp"
"${LIBRARY_DIR}/../vectorscan-cmake/rageled_files/aarch64/control_verbs.cpp"
)
set_source_files_properties(
"${LIBRARY_DIR}/../vectorscan-cmake/rageled_files/aarch64/Parser.cpp"
"${LIBRARY_DIR}/../vectorscan-cmake/rageled_files/aarch64/control_verbs.cpp"
COMPILE_FLAGS -Wno-c++11-narrowing
)
endif()
# Platform-dependent files
if (ARCH_AMD64)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,547 @@
#line 1 "control_verbs.rl"
/*
* Copyright (c) 2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/**
* \file
* \brief Parser for control verbs that can occur at the beginning of a pattern.
*/
#include "parser/control_verbs.h"
#include "parser/Parser.h"
#include "parser/parse_error.h"
#include <cstring>
#include <sstream>
using namespace std;
namespace ue2 {
const char *read_control_verbs(const char *ptr, const char *end, size_t start,
ParseMode &mode) {
const char *p = ptr;
const char *pe = end;
const char *eof = pe;
const char *ts, *te;
int cs;
UNUSED int act;
#line 56 "control_verbs.cpp"
static const signed char _ControlVerbs_actions[] = {
0, 1, 0, 1, 1, 1, 2, 1,
3, 1, 4, 1, 5, 1, 6, 1,
7, 1, 8, 1, 9, 0
};
static const short _ControlVerbs_key_offsets[] = {
0, 7, 8, 10, 12, 14, 16, 18,
20, 21, 23, 25, 27, 30, 32, 34,
36, 38, 40, 42, 44, 46, 48, 50,
52, 55, 57, 59, 61, 63, 66, 68,
70, 72, 74, 76, 79, 82, 84, 86,
88, 90, 92, 94, 96, 98, 100, 102,
105, 107, 109, 111, 113, 115, 117, 119,
121, 123, 125, 127, 129, 131, 133, 135,
137, 139, 141, 143, 146, 148, 149, 151,
155, 157, 159, 160, 161, 0
};
static const char _ControlVerbs_trans_keys[] = {
41u, 65u, 66u, 67u, 76u, 78u, 85u, 41u,
41u, 78u, 41u, 89u, 41u, 67u, 41u, 82u,
41u, 76u, 41u, 70u, 41u, 41u, 83u, 41u,
82u, 41u, 95u, 41u, 65u, 85u, 41u, 78u,
41u, 89u, 41u, 67u, 41u, 78u, 41u, 73u,
41u, 67u, 41u, 79u, 41u, 68u, 41u, 69u,
41u, 82u, 41u, 76u, 41u, 70u, 73u, 41u,
77u, 41u, 73u, 41u, 84u, 41u, 95u, 41u,
77u, 82u, 41u, 65u, 41u, 84u, 41u, 67u,
41u, 72u, 41u, 61u, 41u, 48u, 57u, 41u,
48u, 57u, 41u, 69u, 41u, 67u, 41u, 85u,
41u, 82u, 41u, 83u, 41u, 73u, 41u, 79u,
41u, 78u, 41u, 79u, 41u, 95u, 41u, 65u,
83u, 41u, 85u, 41u, 84u, 41u, 79u, 41u,
95u, 41u, 80u, 41u, 79u, 41u, 83u, 41u,
83u, 41u, 69u, 41u, 83u, 41u, 83u, 41u,
84u, 41u, 65u, 41u, 82u, 41u, 84u, 41u,
95u, 41u, 79u, 41u, 80u, 41u, 84u, 41u,
67u, 84u, 41u, 80u, 41u, 41u, 70u, 41u,
49u, 51u, 56u, 41u, 54u, 41u, 50u, 41u,
40u, 42u, 0u
};
static const signed char _ControlVerbs_single_lengths[] = {
7, 1, 2, 2, 2, 2, 2, 2,
1, 2, 2, 2, 3, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2,
3, 2, 2, 2, 2, 3, 2, 2,
2, 2, 2, 1, 1, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 3,
2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 3, 2, 1, 2, 4,
2, 2, 1, 1, 1, 0
};
static const signed char _ControlVerbs_range_lengths[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0
};
static const short _ControlVerbs_index_offsets[] = {
0, 8, 10, 13, 16, 19, 22, 25,
28, 30, 33, 36, 39, 43, 46, 49,
52, 55, 58, 61, 64, 67, 70, 73,
76, 80, 83, 86, 89, 92, 96, 99,
102, 105, 108, 111, 114, 117, 120, 123,
126, 129, 132, 135, 138, 141, 144, 147,
151, 154, 157, 160, 163, 166, 169, 172,
175, 178, 181, 184, 187, 190, 193, 196,
199, 202, 205, 208, 212, 215, 217, 220,
225, 228, 231, 233, 235, 0
};
static const signed char _ControlVerbs_cond_targs[] = {
75, 2, 9, 22, 24, 45, 67, 1,
75, 1, 75, 3, 1, 75, 4, 1,
75, 5, 1, 75, 6, 1, 75, 7,
1, 75, 8, 1, 75, 1, 75, 10,
1, 75, 11, 1, 75, 12, 1, 75,
13, 16, 1, 75, 14, 1, 75, 15,
1, 75, 5, 1, 75, 17, 1, 75,
18, 1, 75, 19, 1, 75, 20, 1,
75, 21, 1, 75, 8, 1, 75, 23,
1, 75, 7, 1, 75, 8, 25, 1,
75, 26, 1, 75, 27, 1, 75, 28,
1, 75, 29, 1, 75, 30, 37, 1,
75, 31, 1, 75, 32, 1, 75, 33,
1, 75, 34, 1, 75, 35, 1, 75,
36, 1, 75, 36, 1, 75, 38, 1,
75, 39, 1, 75, 40, 1, 75, 41,
1, 75, 42, 1, 75, 43, 1, 75,
44, 1, 75, 34, 1, 75, 46, 1,
75, 47, 1, 75, 48, 59, 1, 75,
49, 1, 75, 50, 1, 75, 51, 1,
75, 52, 1, 75, 53, 1, 75, 54,
1, 75, 55, 1, 75, 56, 1, 75,
57, 1, 75, 58, 1, 75, 8, 1,
75, 60, 1, 75, 61, 1, 75, 62,
1, 75, 63, 1, 75, 64, 1, 75,
65, 1, 75, 66, 1, 75, 8, 1,
75, 68, 70, 1, 75, 69, 1, 75,
1, 75, 71, 1, 75, 72, 73, 74,
1, 75, 8, 1, 75, 8, 1, 75,
1, 76, 75, 0, 75, 75, 75, 75,
75, 75, 75, 75, 75, 75, 75, 75,
75, 75, 75, 75, 75, 75, 75, 75,
75, 75, 75, 75, 75, 75, 75, 75,
75, 75, 75, 75, 75, 75, 75, 75,
75, 75, 75, 75, 75, 75, 75, 75,
75, 75, 75, 75, 75, 75, 75, 75,
75, 75, 75, 75, 75, 75, 75, 75,
75, 75, 75, 75, 75, 75, 75, 75,
75, 75, 75, 75, 75, 75, 75, 75,
75, 75, 0
};
static const signed char _ControlVerbs_cond_actions[] = {
19, 0, 0, 0, 0, 0, 0, 0,
13, 0, 13, 0, 0, 13, 0, 0,
11, 0, 0, 13, 0, 0, 13, 0,
0, 13, 0, 0, 11, 0, 13, 0,
0, 13, 0, 0, 13, 0, 0, 13,
0, 0, 0, 13, 0, 0, 13, 0,
0, 13, 0, 0, 13, 0, 0, 13,
0, 0, 13, 0, 0, 13, 0, 0,
13, 0, 0, 13, 0, 0, 13, 0,
0, 11, 0, 0, 13, 0, 0, 0,
13, 0, 0, 13, 0, 0, 13, 0,
0, 13, 0, 0, 13, 0, 0, 0,
13, 0, 0, 13, 0, 0, 13, 0,
0, 13, 0, 0, 13, 0, 0, 13,
0, 0, 11, 0, 0, 13, 0, 0,
13, 0, 0, 13, 0, 0, 13, 0,
0, 13, 0, 0, 13, 0, 0, 13,
0, 0, 13, 0, 0, 13, 0, 0,
13, 0, 0, 13, 0, 0, 0, 13,
0, 0, 13, 0, 0, 13, 0, 0,
13, 0, 0, 13, 0, 0, 13, 0,
0, 13, 0, 0, 13, 0, 0, 13,
0, 0, 13, 0, 0, 13, 0, 0,
13, 0, 0, 13, 0, 0, 13, 0,
0, 13, 0, 0, 13, 0, 0, 13,
0, 0, 13, 0, 0, 13, 0, 0,
13, 0, 0, 0, 13, 0, 0, 9,
0, 13, 0, 0, 7, 0, 0, 0,
0, 13, 0, 0, 13, 0, 0, 7,
0, 5, 15, 0, 17, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19,
0, 17, 0
};
static const signed char _ControlVerbs_to_state_actions[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0
};
static const signed char _ControlVerbs_from_state_actions[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 3, 0, 0
};
static const short _ControlVerbs_eof_trans[] = {
238, 239, 240, 241, 242, 243, 244, 245,
246, 247, 248, 249, 250, 251, 252, 253,
254, 255, 256, 257, 258, 259, 260, 261,
262, 263, 264, 265, 266, 267, 268, 269,
270, 271, 272, 273, 274, 275, 276, 277,
278, 279, 280, 281, 282, 283, 284, 285,
286, 287, 288, 289, 290, 291, 292, 293,
294, 295, 296, 297, 298, 299, 300, 301,
302, 303, 304, 305, 306, 307, 308, 309,
310, 311, 312, 313, 314, 0
};
static const int ControlVerbs_start = 75;
static const int ControlVerbs_first_final = 75;
static const int ControlVerbs_error = -1;
static const int ControlVerbs_en_main = 75;
#line 269 "control_verbs.cpp"
{
cs = (int)ControlVerbs_start;
ts = 0;
te = 0;
}
#line 105 "control_verbs.rl"
try {
#line 278 "control_verbs.cpp"
{
int _klen;
unsigned int _trans = 0;
const char * _keys;
const signed char * _acts;
unsigned int _nacts;
_resume: {}
if ( p == pe && p != eof )
goto _out;
_acts = ( _ControlVerbs_actions + (_ControlVerbs_from_state_actions[cs]));
_nacts = (unsigned int)(*( _acts));
_acts += 1;
while ( _nacts > 0 ) {
switch ( (*( _acts)) ) {
case 1: {
{
#line 1 "NONE"
{ts = p;}}
#line 297 "control_verbs.cpp"
break;
}
}
_nacts -= 1;
_acts += 1;
}
if ( p == eof ) {
if ( _ControlVerbs_eof_trans[cs] > 0 ) {
_trans = (unsigned int)_ControlVerbs_eof_trans[cs] - 1;
}
}
else {
_keys = ( _ControlVerbs_trans_keys + (_ControlVerbs_key_offsets[cs]));
_trans = (unsigned int)_ControlVerbs_index_offsets[cs];
_klen = (int)_ControlVerbs_single_lengths[cs];
if ( _klen > 0 ) {
const char *_lower = _keys;
const char *_upper = _keys + _klen - 1;
const char *_mid;
while ( 1 ) {
if ( _upper < _lower ) {
_keys += _klen;
_trans += (unsigned int)_klen;
break;
}
_mid = _lower + ((_upper-_lower) >> 1);
if ( ( (*( p))) < (*( _mid)) )
_upper = _mid - 1;
else if ( ( (*( p))) > (*( _mid)) )
_lower = _mid + 1;
else {
_trans += (unsigned int)(_mid - _keys);
goto _match;
}
}
}
_klen = (int)_ControlVerbs_range_lengths[cs];
if ( _klen > 0 ) {
const char *_lower = _keys;
const char *_upper = _keys + (_klen<<1) - 2;
const char *_mid;
while ( 1 ) {
if ( _upper < _lower ) {
_trans += (unsigned int)_klen;
break;
}
_mid = _lower + (((_upper-_lower) >> 1) & ~1);
if ( ( (*( p))) < (*( _mid)) )
_upper = _mid - 2;
else if ( ( (*( p))) > (*( _mid + 1)) )
_lower = _mid + 2;
else {
_trans += (unsigned int)((_mid - _keys)>>1);
break;
}
}
}
_match: {}
}
cs = (int)_ControlVerbs_cond_targs[_trans];
if ( _ControlVerbs_cond_actions[_trans] != 0 ) {
_acts = ( _ControlVerbs_actions + (_ControlVerbs_cond_actions[_trans]));
_nacts = (unsigned int)(*( _acts));
_acts += 1;
while ( _nacts > 0 ) {
switch ( (*( _acts)) )
{
case 2: {
{
#line 1 "NONE"
{te = p+1;}}
#line 378 "control_verbs.cpp"
break;
}
case 3: {
{
#line 76 "control_verbs.rl"
{te = p+1;{
#line 76 "control_verbs.rl"
mode.utf8 = true;
}
}}
#line 391 "control_verbs.cpp"
break;
}
case 4: {
{
#line 80 "control_verbs.rl"
{te = p+1;{
#line 80 "control_verbs.rl"
mode.ucp = true;
}
}}
#line 404 "control_verbs.cpp"
break;
}
case 5: {
{
#line 84 "control_verbs.rl"
{te = p+1;{
#line 84 "control_verbs.rl"
ostringstream str;
str << "Unsupported control verb " << string(ts, te - ts);
throw LocatedParseError(str.str());
}
}}
#line 419 "control_verbs.cpp"
break;
}
case 6: {
{
#line 90 "control_verbs.rl"
{te = p+1;{
#line 90 "control_verbs.rl"
ostringstream str;
str << "Unknown control verb " << string(ts, te - ts);
throw LocatedParseError(str.str());
}
}}
#line 434 "control_verbs.cpp"
break;
}
case 7: {
{
#line 97 "control_verbs.rl"
{te = p+1;{
#line 97 "control_verbs.rl"
{p = p - 1; }
{p += 1; goto _out; }
}
}}
#line 448 "control_verbs.cpp"
break;
}
case 8: {
{
#line 97 "control_verbs.rl"
{te = p;p = p - 1;{
#line 97 "control_verbs.rl"
{p = p - 1; }
{p += 1; goto _out; }
}
}}
#line 462 "control_verbs.cpp"
break;
}
case 9: {
{
#line 97 "control_verbs.rl"
{p = ((te))-1;
{
#line 97 "control_verbs.rl"
{p = p - 1; }
{p += 1; goto _out; }
}
}}
#line 477 "control_verbs.cpp"
break;
}
}
_nacts -= 1;
_acts += 1;
}
}
if ( p == eof ) {
if ( cs >= 75 )
goto _out;
}
else {
_acts = ( _ControlVerbs_actions + (_ControlVerbs_to_state_actions[cs]));
_nacts = (unsigned int)(*( _acts));
_acts += 1;
while ( _nacts > 0 ) {
switch ( (*( _acts)) ) {
case 0: {
{
#line 1 "NONE"
{ts = 0;}}
#line 502 "control_verbs.cpp"
break;
}
}
_nacts -= 1;
_acts += 1;
}
p += 1;
goto _resume;
}
_out: {}
}
#line 108 "control_verbs.rl"
} catch (LocatedParseError &error) {
if (ts >= ptr && ts <= pe) {
error.locate(ts - ptr + start);
} else {
error.locate(0);
}
throw;
}
return p;
}
} // namespace ue2

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,547 @@
#line 1 "control_verbs.rl"
/*
* Copyright (c) 2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/**
* \file
* \brief Parser for control verbs that can occur at the beginning of a pattern.
*/
#include "parser/control_verbs.h"
#include "parser/Parser.h"
#include "parser/parse_error.h"
#include <cstring>
#include <sstream>
using namespace std;
namespace ue2 {
const char *read_control_verbs(const char *ptr, const char *end, size_t start,
ParseMode &mode) {
const char *p = ptr;
const char *pe = end;
const char *eof = pe;
const char *ts, *te;
int cs;
UNUSED int act;
#line 56 "control_verbs.cpp"
static const signed char _ControlVerbs_actions[] = {
0, 1, 0, 1, 1, 1, 2, 1,
3, 1, 4, 1, 5, 1, 6, 1,
7, 1, 8, 1, 9, 0
};
static const short _ControlVerbs_key_offsets[] = {
0, 7, 8, 10, 12, 14, 16, 18,
20, 21, 23, 25, 27, 30, 32, 34,
36, 38, 40, 42, 44, 46, 48, 50,
52, 55, 57, 59, 61, 63, 66, 68,
70, 72, 74, 76, 79, 82, 84, 86,
88, 90, 92, 94, 96, 98, 100, 102,
105, 107, 109, 111, 113, 115, 117, 119,
121, 123, 125, 127, 129, 131, 133, 135,
137, 139, 141, 143, 146, 148, 149, 151,
155, 157, 159, 160, 161, 0
};
static const char _ControlVerbs_trans_keys[] = {
41, 65, 66, 67, 76, 78, 85, 41,
41, 78, 41, 89, 41, 67, 41, 82,
41, 76, 41, 70, 41, 41, 83, 41,
82, 41, 95, 41, 65, 85, 41, 78,
41, 89, 41, 67, 41, 78, 41, 73,
41, 67, 41, 79, 41, 68, 41, 69,
41, 82, 41, 76, 41, 70, 73, 41,
77, 41, 73, 41, 84, 41, 95, 41,
77, 82, 41, 65, 41, 84, 41, 67,
41, 72, 41, 61, 41, 48, 57, 41,
48, 57, 41, 69, 41, 67, 41, 85,
41, 82, 41, 83, 41, 73, 41, 79,
41, 78, 41, 79, 41, 95, 41, 65,
83, 41, 85, 41, 84, 41, 79, 41,
95, 41, 80, 41, 79, 41, 83, 41,
83, 41, 69, 41, 83, 41, 83, 41,
84, 41, 65, 41, 82, 41, 84, 41,
95, 41, 79, 41, 80, 41, 84, 41,
67, 84, 41, 80, 41, 41, 70, 41,
49, 51, 56, 41, 54, 41, 50, 41,
40, 42, 0
};
static const signed char _ControlVerbs_single_lengths[] = {
7, 1, 2, 2, 2, 2, 2, 2,
1, 2, 2, 2, 3, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2,
3, 2, 2, 2, 2, 3, 2, 2,
2, 2, 2, 1, 1, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 3,
2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 3, 2, 1, 2, 4,
2, 2, 1, 1, 1, 0
};
static const signed char _ControlVerbs_range_lengths[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0
};
static const short _ControlVerbs_index_offsets[] = {
0, 8, 10, 13, 16, 19, 22, 25,
28, 30, 33, 36, 39, 43, 46, 49,
52, 55, 58, 61, 64, 67, 70, 73,
76, 80, 83, 86, 89, 92, 96, 99,
102, 105, 108, 111, 114, 117, 120, 123,
126, 129, 132, 135, 138, 141, 144, 147,
151, 154, 157, 160, 163, 166, 169, 172,
175, 178, 181, 184, 187, 190, 193, 196,
199, 202, 205, 208, 212, 215, 217, 220,
225, 228, 231, 233, 235, 0
};
static const signed char _ControlVerbs_cond_targs[] = {
75, 2, 9, 22, 24, 45, 67, 1,
75, 1, 75, 3, 1, 75, 4, 1,
75, 5, 1, 75, 6, 1, 75, 7,
1, 75, 8, 1, 75, 1, 75, 10,
1, 75, 11, 1, 75, 12, 1, 75,
13, 16, 1, 75, 14, 1, 75, 15,
1, 75, 5, 1, 75, 17, 1, 75,
18, 1, 75, 19, 1, 75, 20, 1,
75, 21, 1, 75, 8, 1, 75, 23,
1, 75, 7, 1, 75, 8, 25, 1,
75, 26, 1, 75, 27, 1, 75, 28,
1, 75, 29, 1, 75, 30, 37, 1,
75, 31, 1, 75, 32, 1, 75, 33,
1, 75, 34, 1, 75, 35, 1, 75,
36, 1, 75, 36, 1, 75, 38, 1,
75, 39, 1, 75, 40, 1, 75, 41,
1, 75, 42, 1, 75, 43, 1, 75,
44, 1, 75, 34, 1, 75, 46, 1,
75, 47, 1, 75, 48, 59, 1, 75,
49, 1, 75, 50, 1, 75, 51, 1,
75, 52, 1, 75, 53, 1, 75, 54,
1, 75, 55, 1, 75, 56, 1, 75,
57, 1, 75, 58, 1, 75, 8, 1,
75, 60, 1, 75, 61, 1, 75, 62,
1, 75, 63, 1, 75, 64, 1, 75,
65, 1, 75, 66, 1, 75, 8, 1,
75, 68, 70, 1, 75, 69, 1, 75,
1, 75, 71, 1, 75, 72, 73, 74,
1, 75, 8, 1, 75, 8, 1, 75,
1, 76, 75, 0, 75, 75, 75, 75,
75, 75, 75, 75, 75, 75, 75, 75,
75, 75, 75, 75, 75, 75, 75, 75,
75, 75, 75, 75, 75, 75, 75, 75,
75, 75, 75, 75, 75, 75, 75, 75,
75, 75, 75, 75, 75, 75, 75, 75,
75, 75, 75, 75, 75, 75, 75, 75,
75, 75, 75, 75, 75, 75, 75, 75,
75, 75, 75, 75, 75, 75, 75, 75,
75, 75, 75, 75, 75, 75, 75, 75,
75, 75, 0
};
static const signed char _ControlVerbs_cond_actions[] = {
19, 0, 0, 0, 0, 0, 0, 0,
13, 0, 13, 0, 0, 13, 0, 0,
11, 0, 0, 13, 0, 0, 13, 0,
0, 13, 0, 0, 11, 0, 13, 0,
0, 13, 0, 0, 13, 0, 0, 13,
0, 0, 0, 13, 0, 0, 13, 0,
0, 13, 0, 0, 13, 0, 0, 13,
0, 0, 13, 0, 0, 13, 0, 0,
13, 0, 0, 13, 0, 0, 13, 0,
0, 11, 0, 0, 13, 0, 0, 0,
13, 0, 0, 13, 0, 0, 13, 0,
0, 13, 0, 0, 13, 0, 0, 0,
13, 0, 0, 13, 0, 0, 13, 0,
0, 13, 0, 0, 13, 0, 0, 13,
0, 0, 11, 0, 0, 13, 0, 0,
13, 0, 0, 13, 0, 0, 13, 0,
0, 13, 0, 0, 13, 0, 0, 13,
0, 0, 13, 0, 0, 13, 0, 0,
13, 0, 0, 13, 0, 0, 0, 13,
0, 0, 13, 0, 0, 13, 0, 0,
13, 0, 0, 13, 0, 0, 13, 0,
0, 13, 0, 0, 13, 0, 0, 13,
0, 0, 13, 0, 0, 13, 0, 0,
13, 0, 0, 13, 0, 0, 13, 0,
0, 13, 0, 0, 13, 0, 0, 13,
0, 0, 13, 0, 0, 13, 0, 0,
13, 0, 0, 0, 13, 0, 0, 9,
0, 13, 0, 0, 7, 0, 0, 0,
0, 13, 0, 0, 13, 0, 0, 7,
0, 5, 15, 0, 17, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19,
0, 17, 0
};
static const signed char _ControlVerbs_to_state_actions[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0
};
static const signed char _ControlVerbs_from_state_actions[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 3, 0, 0
};
static const short _ControlVerbs_eof_trans[] = {
238, 239, 240, 241, 242, 243, 244, 245,
246, 247, 248, 249, 250, 251, 252, 253,
254, 255, 256, 257, 258, 259, 260, 261,
262, 263, 264, 265, 266, 267, 268, 269,
270, 271, 272, 273, 274, 275, 276, 277,
278, 279, 280, 281, 282, 283, 284, 285,
286, 287, 288, 289, 290, 291, 292, 293,
294, 295, 296, 297, 298, 299, 300, 301,
302, 303, 304, 305, 306, 307, 308, 309,
310, 311, 312, 313, 314, 0
};
static const int ControlVerbs_start = 75;
static const int ControlVerbs_first_final = 75;
static const int ControlVerbs_error = -1;
static const int ControlVerbs_en_main = 75;
#line 269 "control_verbs.cpp"
{
cs = (int)ControlVerbs_start;
ts = 0;
te = 0;
}
#line 105 "control_verbs.rl"
try {
#line 278 "control_verbs.cpp"
{
int _klen;
unsigned int _trans = 0;
const char * _keys;
const signed char * _acts;
unsigned int _nacts;
_resume: {}
if ( p == pe && p != eof )
goto _out;
_acts = ( _ControlVerbs_actions + (_ControlVerbs_from_state_actions[cs]));
_nacts = (unsigned int)(*( _acts));
_acts += 1;
while ( _nacts > 0 ) {
switch ( (*( _acts)) ) {
case 1: {
{
#line 1 "NONE"
{ts = p;}}
#line 297 "control_verbs.cpp"
break;
}
}
_nacts -= 1;
_acts += 1;
}
if ( p == eof ) {
if ( _ControlVerbs_eof_trans[cs] > 0 ) {
_trans = (unsigned int)_ControlVerbs_eof_trans[cs] - 1;
}
}
else {
_keys = ( _ControlVerbs_trans_keys + (_ControlVerbs_key_offsets[cs]));
_trans = (unsigned int)_ControlVerbs_index_offsets[cs];
_klen = (int)_ControlVerbs_single_lengths[cs];
if ( _klen > 0 ) {
const char *_lower = _keys;
const char *_upper = _keys + _klen - 1;
const char *_mid;
while ( 1 ) {
if ( _upper < _lower ) {
_keys += _klen;
_trans += (unsigned int)_klen;
break;
}
_mid = _lower + ((_upper-_lower) >> 1);
if ( ( (*( p))) < (*( _mid)) )
_upper = _mid - 1;
else if ( ( (*( p))) > (*( _mid)) )
_lower = _mid + 1;
else {
_trans += (unsigned int)(_mid - _keys);
goto _match;
}
}
}
_klen = (int)_ControlVerbs_range_lengths[cs];
if ( _klen > 0 ) {
const char *_lower = _keys;
const char *_upper = _keys + (_klen<<1) - 2;
const char *_mid;
while ( 1 ) {
if ( _upper < _lower ) {
_trans += (unsigned int)_klen;
break;
}
_mid = _lower + (((_upper-_lower) >> 1) & ~1);
if ( ( (*( p))) < (*( _mid)) )
_upper = _mid - 2;
else if ( ( (*( p))) > (*( _mid + 1)) )
_lower = _mid + 2;
else {
_trans += (unsigned int)((_mid - _keys)>>1);
break;
}
}
}
_match: {}
}
cs = (int)_ControlVerbs_cond_targs[_trans];
if ( _ControlVerbs_cond_actions[_trans] != 0 ) {
_acts = ( _ControlVerbs_actions + (_ControlVerbs_cond_actions[_trans]));
_nacts = (unsigned int)(*( _acts));
_acts += 1;
while ( _nacts > 0 ) {
switch ( (*( _acts)) )
{
case 2: {
{
#line 1 "NONE"
{te = p+1;}}
#line 378 "control_verbs.cpp"
break;
}
case 3: {
{
#line 76 "control_verbs.rl"
{te = p+1;{
#line 76 "control_verbs.rl"
mode.utf8 = true;
}
}}
#line 391 "control_verbs.cpp"
break;
}
case 4: {
{
#line 80 "control_verbs.rl"
{te = p+1;{
#line 80 "control_verbs.rl"
mode.ucp = true;
}
}}
#line 404 "control_verbs.cpp"
break;
}
case 5: {
{
#line 84 "control_verbs.rl"
{te = p+1;{
#line 84 "control_verbs.rl"
ostringstream str;
str << "Unsupported control verb " << string(ts, te - ts);
throw LocatedParseError(str.str());
}
}}
#line 419 "control_verbs.cpp"
break;
}
case 6: {
{
#line 90 "control_verbs.rl"
{te = p+1;{
#line 90 "control_verbs.rl"
ostringstream str;
str << "Unknown control verb " << string(ts, te - ts);
throw LocatedParseError(str.str());
}
}}
#line 434 "control_verbs.cpp"
break;
}
case 7: {
{
#line 97 "control_verbs.rl"
{te = p+1;{
#line 97 "control_verbs.rl"
{p = p - 1; }
{p += 1; goto _out; }
}
}}
#line 448 "control_verbs.cpp"
break;
}
case 8: {
{
#line 97 "control_verbs.rl"
{te = p;p = p - 1;{
#line 97 "control_verbs.rl"
{p = p - 1; }
{p += 1; goto _out; }
}
}}
#line 462 "control_verbs.cpp"
break;
}
case 9: {
{
#line 97 "control_verbs.rl"
{p = ((te))-1;
{
#line 97 "control_verbs.rl"
{p = p - 1; }
{p += 1; goto _out; }
}
}}
#line 477 "control_verbs.cpp"
break;
}
}
_nacts -= 1;
_acts += 1;
}
}
if ( p == eof ) {
if ( cs >= 75 )
goto _out;
}
else {
_acts = ( _ControlVerbs_actions + (_ControlVerbs_to_state_actions[cs]));
_nacts = (unsigned int)(*( _acts));
_acts += 1;
while ( _nacts > 0 ) {
switch ( (*( _acts)) ) {
case 0: {
{
#line 1 "NONE"
{ts = 0;}}
#line 502 "control_verbs.cpp"
break;
}
}
_nacts -= 1;
_acts += 1;
}
p += 1;
goto _resume;
}
_out: {}
}
#line 108 "control_verbs.rl"
} catch (LocatedParseError &error) {
if (ts >= ptr && ts <= pe) {
error.locate(ts - ptr + start);
} else {
error.locate(0);
}
throw;
}
return p;
}
} // namespace ue2

View File

@ -1,443 +0,0 @@
#line 1 "control_verbs.rl"
/*
* Copyright (c) 2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/**
* \file
* \brief Parser for control verbs that can occur at the beginning of a pattern.
*/
#include "parser/control_verbs.h"
#include "parser/Parser.h"
#include "parser/parse_error.h"
#include <cstring>
#include <sstream>
using namespace std;
namespace ue2 {
const char *read_control_verbs(const char *ptr, const char *end, size_t start,
ParseMode &mode) {
const char *p = ptr;
const char *pe = end;
const char *eof = pe;
const char *ts, *te;
int cs;
UNUSED int act;
#line 59 "control_verbs.cpp"
static const char _ControlVerbs_actions[] = {
0, 1, 0, 1, 1, 1, 2, 1,
3, 1, 4, 1, 5, 1, 6, 1,
7, 1, 8, 1, 9
};
static const unsigned char _ControlVerbs_key_offsets[] = {
0, 7, 8, 10, 12, 14, 16, 18,
20, 21, 23, 25, 27, 30, 32, 34,
36, 38, 40, 42, 44, 46, 48, 50,
52, 55, 57, 59, 61, 63, 66, 68,
70, 72, 74, 76, 79, 82, 84, 86,
88, 90, 92, 94, 96, 98, 100, 102,
105, 107, 109, 111, 113, 115, 117, 119,
121, 123, 125, 127, 129, 131, 133, 135,
137, 139, 141, 143, 146, 148, 149, 151,
155, 157, 159, 160, 161
};
static const char _ControlVerbs_trans_keys[] = {
41, 65, 66, 67, 76, 78, 85, 41,
41, 78, 41, 89, 41, 67, 41, 82,
41, 76, 41, 70, 41, 41, 83, 41,
82, 41, 95, 41, 65, 85, 41, 78,
41, 89, 41, 67, 41, 78, 41, 73,
41, 67, 41, 79, 41, 68, 41, 69,
41, 82, 41, 76, 41, 70, 73, 41,
77, 41, 73, 41, 84, 41, 95, 41,
77, 82, 41, 65, 41, 84, 41, 67,
41, 72, 41, 61, 41, 48, 57, 41,
48, 57, 41, 69, 41, 67, 41, 85,
41, 82, 41, 83, 41, 73, 41, 79,
41, 78, 41, 79, 41, 95, 41, 65,
83, 41, 85, 41, 84, 41, 79, 41,
95, 41, 80, 41, 79, 41, 83, 41,
83, 41, 69, 41, 83, 41, 83, 41,
84, 41, 65, 41, 82, 41, 84, 41,
95, 41, 79, 41, 80, 41, 84, 41,
67, 84, 41, 80, 41, 41, 70, 41,
49, 51, 56, 41, 54, 41, 50, 41,
40, 42, 0
};
static const char _ControlVerbs_single_lengths[] = {
7, 1, 2, 2, 2, 2, 2, 2,
1, 2, 2, 2, 3, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2,
3, 2, 2, 2, 2, 3, 2, 2,
2, 2, 2, 1, 1, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 3,
2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 3, 2, 1, 2, 4,
2, 2, 1, 1, 1
};
static const char _ControlVerbs_range_lengths[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0
};
static const short _ControlVerbs_index_offsets[] = {
0, 8, 10, 13, 16, 19, 22, 25,
28, 30, 33, 36, 39, 43, 46, 49,
52, 55, 58, 61, 64, 67, 70, 73,
76, 80, 83, 86, 89, 92, 96, 99,
102, 105, 108, 111, 114, 117, 120, 123,
126, 129, 132, 135, 138, 141, 144, 147,
151, 154, 157, 160, 163, 166, 169, 172,
175, 178, 181, 184, 187, 190, 193, 196,
199, 202, 205, 208, 212, 215, 217, 220,
225, 228, 231, 233, 235
};
static const char _ControlVerbs_indicies[] = {
0, 2, 3, 4, 5, 6, 7, 1,
8, 1, 8, 9, 1, 8, 10, 1,
11, 12, 1, 8, 13, 1, 8, 14,
1, 8, 15, 1, 11, 1, 8, 16,
1, 8, 17, 1, 8, 18, 1, 8,
19, 20, 1, 8, 21, 1, 8, 22,
1, 8, 12, 1, 8, 23, 1, 8,
24, 1, 8, 25, 1, 8, 26, 1,
8, 27, 1, 8, 15, 1, 8, 28,
1, 11, 14, 1, 8, 15, 29, 1,
8, 30, 1, 8, 31, 1, 8, 32,
1, 8, 33, 1, 8, 34, 35, 1,
8, 36, 1, 8, 37, 1, 8, 38,
1, 8, 39, 1, 8, 40, 1, 8,
41, 1, 11, 41, 1, 8, 42, 1,
8, 43, 1, 8, 44, 1, 8, 45,
1, 8, 46, 1, 8, 47, 1, 8,
48, 1, 8, 39, 1, 8, 49, 1,
8, 50, 1, 8, 51, 52, 1, 8,
53, 1, 8, 54, 1, 8, 55, 1,
8, 56, 1, 8, 57, 1, 8, 58,
1, 8, 59, 1, 8, 60, 1, 8,
61, 1, 8, 62, 1, 8, 15, 1,
8, 63, 1, 8, 64, 1, 8, 65,
1, 8, 66, 1, 8, 67, 1, 8,
68, 1, 8, 69, 1, 8, 15, 1,
8, 70, 71, 1, 8, 72, 1, 73,
1, 8, 74, 1, 75, 76, 77, 78,
1, 8, 15, 1, 8, 15, 1, 75,
1, 80, 79, 82, 81, 0
};
static const char _ControlVerbs_trans_targs[] = {
75, 1, 2, 9, 22, 24, 45, 67,
75, 3, 4, 75, 5, 6, 7, 8,
10, 11, 12, 13, 16, 14, 15, 17,
18, 19, 20, 21, 23, 25, 26, 27,
28, 29, 30, 37, 31, 32, 33, 34,
35, 36, 38, 39, 40, 41, 42, 43,
44, 46, 47, 48, 59, 49, 50, 51,
52, 53, 54, 55, 56, 57, 58, 60,
61, 62, 63, 64, 65, 66, 68, 70,
69, 75, 71, 75, 72, 73, 74, 75,
76, 75, 0
};
static const char _ControlVerbs_trans_actions[] = {
19, 0, 0, 0, 0, 0, 0, 0,
13, 0, 0, 11, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 9, 0, 7, 0, 0, 0, 15,
5, 17, 0
};
static const char _ControlVerbs_to_state_actions[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0
};
static const char _ControlVerbs_from_state_actions[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 3, 0
};
static const short _ControlVerbs_eof_trans[] = {
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 0, 82
};
static const int ControlVerbs_start = 75;
static const int ControlVerbs_first_final = 75;
static const int ControlVerbs_error = -1;
static const int ControlVerbs_en_main = 75;
#line 249 "control_verbs.cpp"
{
cs = ControlVerbs_start;
ts = 0;
te = 0;
act = 0;
}
#line 105 "control_verbs.rl"
try {
#line 262 "control_verbs.cpp"
{
int _klen;
unsigned int _trans;
const char *_acts;
unsigned int _nacts;
const char *_keys;
if ( p == pe )
goto _test_eof;
_resume:
_acts = _ControlVerbs_actions + _ControlVerbs_from_state_actions[cs];
_nacts = (unsigned int) *_acts++;
while ( _nacts-- > 0 ) {
switch ( *_acts++ ) {
case 1:
#line 1 "NONE"
{ts = p;}
break;
#line 281 "control_verbs.cpp"
}
}
_keys = _ControlVerbs_trans_keys + _ControlVerbs_key_offsets[cs];
_trans = _ControlVerbs_index_offsets[cs];
_klen = _ControlVerbs_single_lengths[cs];
if ( _klen > 0 ) {
const char *_lower = _keys;
const char *_mid;
const char *_upper = _keys + _klen - 1;
while (1) {
if ( _upper < _lower )
break;
_mid = _lower + ((_upper-_lower) >> 1);
if ( (*p) < *_mid )
_upper = _mid - 1;
else if ( (*p) > *_mid )
_lower = _mid + 1;
else {
_trans += (unsigned int)(_mid - _keys);
goto _match;
}
}
_keys += _klen;
_trans += _klen;
}
_klen = _ControlVerbs_range_lengths[cs];
if ( _klen > 0 ) {
const char *_lower = _keys;
const char *_mid;
const char *_upper = _keys + (_klen<<1) - 2;
while (1) {
if ( _upper < _lower )
break;
_mid = _lower + (((_upper-_lower) >> 1) & ~1);
if ( (*p) < _mid[0] )
_upper = _mid - 2;
else if ( (*p) > _mid[1] )
_lower = _mid + 2;
else {
_trans += (unsigned int)((_mid - _keys)>>1);
goto _match;
}
}
_trans += _klen;
}
_match:
_trans = _ControlVerbs_indicies[_trans];
_eof_trans:
cs = _ControlVerbs_trans_targs[_trans];
if ( _ControlVerbs_trans_actions[_trans] == 0 )
goto _again;
_acts = _ControlVerbs_actions + _ControlVerbs_trans_actions[_trans];
_nacts = (unsigned int) *_acts++;
while ( _nacts-- > 0 )
{
switch ( *_acts++ )
{
case 2:
#line 1 "NONE"
{te = p+1;}
break;
case 3:
#line 76 "control_verbs.rl"
{te = p+1;{
mode.utf8 = true;
}}
break;
case 4:
#line 80 "control_verbs.rl"
{te = p+1;{
mode.ucp = true;
}}
break;
case 5:
#line 84 "control_verbs.rl"
{te = p+1;{
ostringstream str;
str << "Unsupported control verb " << string(ts, te - ts);
throw LocatedParseError(str.str());
}}
break;
case 6:
#line 90 "control_verbs.rl"
{te = p+1;{
ostringstream str;
str << "Unknown control verb " << string(ts, te - ts);
throw LocatedParseError(str.str());
}}
break;
case 7:
#line 97 "control_verbs.rl"
{te = p+1;{
p--;
{p++; goto _out; }
}}
break;
case 8:
#line 97 "control_verbs.rl"
{te = p;p--;{
p--;
{p++; goto _out; }
}}
break;
case 9:
#line 97 "control_verbs.rl"
{{p = ((te))-1;}{
p--;
{p++; goto _out; }
}}
break;
#line 400 "control_verbs.cpp"
}
}
_again:
_acts = _ControlVerbs_actions + _ControlVerbs_to_state_actions[cs];
_nacts = (unsigned int) *_acts++;
while ( _nacts-- > 0 ) {
switch ( *_acts++ ) {
case 0:
#line 1 "NONE"
{ts = 0;}
break;
#line 413 "control_verbs.cpp"
}
}
if ( ++p != pe )
goto _resume;
_test_eof: {}
if ( p == eof )
{
if ( _ControlVerbs_eof_trans[cs] > 0 ) {
_trans = _ControlVerbs_eof_trans[cs] - 1;
goto _eof_trans;
}
}
_out: {}
}
#line 109 "control_verbs.rl"
} catch (LocatedParseError &error) {
if (ts >= ptr && ts <= pe) {
error.locate(ts - ptr + start);
} else {
error.locate(0);
}
throw;
}
return p;
}
} // namespace ue2

View File

@ -83,5 +83,8 @@ RUN export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
--yes --no-install-recommends \
&& apt-get clean
# for external_symbolizer_path
RUN ln -s /usr/bin/llvm-symbolizer-15 /usr/bin/llvm-symbolizer
COPY build.sh /
CMD ["bash", "-c", "/build.sh 2>&1"]

View File

@ -130,6 +130,7 @@ def parse_env_variables(
ARM_SUFFIX = "-aarch64"
FREEBSD_SUFFIX = "-freebsd"
PPC_SUFFIX = "-ppc64le"
AMD64_SSE2_SUFFIX = "-amd64sse2"
result = []
result.append("OUTPUT_DIR=/output")
@ -141,6 +142,7 @@ def parse_env_variables(
is_cross_arm = compiler.endswith(ARM_SUFFIX)
is_cross_ppc = compiler.endswith(PPC_SUFFIX)
is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX)
is_amd64_sse2 = compiler.endswith(AMD64_SSE2_SUFFIX)
if is_cross_darwin:
cc = compiler[: -len(DARWIN_SUFFIX)]
@ -186,6 +188,10 @@ def parse_env_variables(
cmake_flags.append(
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake"
)
elif is_amd64_sse2:
cc = compiler[: -len(AMD64_SSE2_SUFFIX)]
result.append("DEB_ARCH=amd64")
cmake_flags.append("-DNO_SSE3_OR_HIGHER=1")
else:
cc = compiler
result.append("DEB_ARCH=amd64")
@ -339,6 +345,7 @@ if __name__ == "__main__":
"clang-14-darwin-aarch64",
"clang-14-aarch64",
"clang-14-ppc64le",
"clang-14-amd64sse2",
"clang-14-freebsd",
"gcc-11",
),

View File

@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="20.9.3.45"
ARG VERSION="22.8.5.29"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# user/group precreated explicitly with fixed uid/gid on purpose.

View File

@ -21,7 +21,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION=22.6.1.*
ARG VERSION="22.8.5.29"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image
@ -31,9 +31,6 @@ ARG deb_location_url=""
# set non-empty single_binary_location_url to create docker image
# from a single binary url (useful for non-standard builds - with sanitizers, for arm64).
# for example (run on aarch64 server):
# docker build . --network host --build-arg single_binary_location_url="https://builds.clickhouse.com/master/aarch64/clickhouse" -t altinity/clickhouse-server:master-testing-arm
# note: clickhouse-odbc-bridge is not supported there.
ARG single_binary_location_url=""
# user/group precreated explicitly with fixed uid/gid on purpose.

View File

@ -37,8 +37,8 @@ if [ -n "$ERROR_LOG_PATH" ]; then ERROR_LOG_DIR="$(dirname "$ERROR_LOG_PATH")";
FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=format_schema_path || true)"
# There could be many disks declared in config
readarray -t FILESYSTEM_CACHE_PATHS < <(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key='storage_configuration.disks.*.data_cache_path' || true)
readarray -t DISKS_PATHS < <(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key='storage_configuration.disks.*.path' || true)
readarray -t DISKS_METADATA_PATHS < <(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key='storage_configuration.disks.*.metadata_path' || true)
CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}"
CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}"
@ -51,8 +51,8 @@ for dir in "$DATA_DIR" \
"$TMP_DIR" \
"$USER_PATH" \
"$FORMAT_SCHEMA_PATH" \
"${FILESYSTEM_CACHE_PATHS[@]}" \
"${DISKS_PATHS[@]}"
"${DISKS_PATHS[@]}" \
"${DISKS_METADATA_PATHS[@]}"
do
# check if variable not empty
[ -z "$dir" ] && continue
@ -108,7 +108,7 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
# port is needed to check if clickhouse-server is ready for connections
HTTP_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=http_port)"
HTTPS_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=https_port)"
if [ -n "$HTTP_PORT" ]; then
URL="http://127.0.0.1:$HTTP_PORT/ping"
else

View File

@ -1,8 +1,15 @@
#!/bin/bash
# shellcheck disable=SC2086,SC2001,SC2046,SC2030,SC2031
set -eux
set -x
# core.COMM.PID-TID
sysctl kernel.core_pattern='core.%e.%p-%P'
set -e
set -u
set -o pipefail
trap "exit" INT TERM
# The watchdog is in the separate process group, so we have to kill it separately
# if the script terminates earlier.
@ -87,6 +94,19 @@ function configure
# TODO figure out which ones are needed
cp -av --dereference "$repo_dir"/tests/config/config.d/listen.xml db/config.d
cp -av --dereference "$script_dir"/query-fuzzer-tweaks-users.xml db/users.d
cat > db/config.d/core.xml <<EOL
<clickhouse>
<core_dump>
<!-- 100GiB -->
<size_limit>107374182400</size_limit>
</core_dump>
<!-- NOTE: no need to configure core_path,
since clickhouse is not started as daemon (via clickhouse start)
-->
<core_path>$PWD</core_path>
</clickhouse>
EOL
}
function watchdog
@ -180,7 +200,6 @@ handle SIGUSR2 nostop noprint pass
handle SIG$RTMIN nostop noprint pass
info signals
continue
gcore
backtrace full
thread apply all backtrace full
info registers

View File

@ -8,6 +8,9 @@ services:
- type: bind
source: ${keeper_binary:-}
target: /usr/bin/clickhouse
- type: bind
source: ${keeper_binary:-}
target: /usr/bin/clickhouse-keeper
- type: bind
source: ${keeper_config_dir1:-}
target: /etc/clickhouse-keeper
@ -38,6 +41,9 @@ services:
- type: bind
source: ${keeper_binary:-}
target: /usr/bin/clickhouse
- type: bind
source: ${keeper_binary:-}
target: /usr/bin/clickhouse-keeper
- type: bind
source: ${keeper_config_dir2:-}
target: /etc/clickhouse-keeper
@ -68,6 +74,9 @@ services:
- type: bind
source: ${keeper_binary:-}
target: /usr/bin/clickhouse
- type: bind
source: ${keeper_binary:-}
target: /usr/bin/clickhouse-keeper
- type: bind
source: ${keeper_config_dir3:-}
target: /etc/clickhouse-keeper

View File

@ -61,7 +61,7 @@ function configure
cp -rv right/config left ||:
# Start a temporary server to rename the tables
while pkill clickhouse-serv; do echo . ; sleep 1 ; done
while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done
echo all killed
set -m # Spawn temporary in its own process groups
@ -88,7 +88,7 @@ function configure
clickhouse-client --port $LEFT_SERVER_PORT --query "create database test" ||:
clickhouse-client --port $LEFT_SERVER_PORT --query "rename table datasets.hits_v1 to test.hits" ||:
while pkill clickhouse-serv; do echo . ; sleep 1 ; done
while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done
echo all killed
# Make copies of the original db for both servers. Use hardlinks instead
@ -106,7 +106,7 @@ function configure
function restart
{
while pkill clickhouse-serv; do echo . ; sleep 1 ; done
while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done
echo all killed
# Change the jemalloc settings here.
@ -1400,7 +1400,7 @@ case "$stage" in
while env kill -- -$watchdog_pid ; do sleep 1; done
# Stop the servers to free memory for the subsequent query analysis.
while pkill clickhouse-serv; do echo . ; sleep 1 ; done
while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done
echo Servers stopped.
;&
"analyze_queries")

View File

@ -22,8 +22,8 @@ ls -lha
mkdir -p ./minio_data
if [ ! -f ./minio ]; then
MINIO_SERVER_VERSION=${MINIO_SERVER_VERSION:-2022-01-03T18-22-58Z}
MINIO_CLIENT_VERSION=${MINIO_CLIENT_VERSION:-2022-01-05T23-52-51Z}
MINIO_SERVER_VERSION=${MINIO_SERVER_VERSION:-2022-09-07T22-25-02Z}
MINIO_CLIENT_VERSION=${MINIO_CLIENT_VERSION:-2022-08-28T20-08-11Z}
case $(uname -m) in
x86_64) BIN_ARCH=amd64 ;;
aarch64) BIN_ARCH=arm64 ;;

View File

@ -3,8 +3,14 @@
# shellcheck disable=SC2086
# shellcheck disable=SC2024
# Avoid overlaps with previous runs
dmesg --clear
set -x
# core.COMM.PID-TID
sysctl kernel.core_pattern='core.%e.%p-%P'
# Thread Fuzzer allows to check more permutations of possible thread scheduling
# and find more potential issues.
@ -38,8 +44,10 @@ function install_packages()
function configure()
{
export ZOOKEEPER_FAULT_INJECTION=1
# install test configs
export USE_DATABASE_ORDINARY=1
export EXPORT_S3_STORAGE_POLICIES=1
/usr/share/clickhouse-test/config/install.sh
# we mount tests folder from repo to /usr/share
@ -99,6 +107,19 @@ EOL
</default>
</profiles>
</clickhouse>
EOL
cat > /etc/clickhouse-server/config.d/core.xml <<EOL
<clickhouse>
<core_dump>
<!-- 100GiB -->
<size_limit>107374182400</size_limit>
</core_dump>
<!-- NOTE: no need to configure core_path,
since clickhouse is not started as daemon (via clickhouse start)
-->
<core_path>$PWD</core_path>
</clickhouse>
EOL
}
@ -155,7 +176,6 @@ handle SIGUSR2 nostop noprint pass
handle SIG$RTMIN nostop noprint pass
info signals
continue
gcore
backtrace full
thread apply all backtrace full
info registers
@ -183,11 +203,11 @@ install_packages package_folder
configure
azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
./setup_minio.sh stateful # to have a proper environment
./setup_minio.sh stateless # to have a proper environment
start
# shellcheck disable=SC2086 # No quotes because I want to split it into words.
shellcheck disable=SC2086 # No quotes because I want to split it into words.
/s3downloader --url-prefix "$S3_URL" --dataset-names $DATASETS
chmod 777 -R /var/lib/clickhouse
clickhouse-client --query "ATTACH DATABASE IF NOT EXISTS datasets ENGINE = Ordinary"
@ -200,12 +220,36 @@ start
clickhouse-client --query "SHOW TABLES FROM datasets"
clickhouse-client --query "SHOW TABLES FROM test"
clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits"
clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
clickhouse-client --query "CREATE TABLE test.hits (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDate Date, Sign Int8, IsNew UInt8, VisitID UInt64, UserID UInt64, StartTime DateTime, Duration UInt32, UTCStartTime DateTime, PageViews Int32, Hits Int32, IsBounce UInt8, Referer String, StartURL String, RefererDomain String, StartURLDomain String, EndURL String, LinkURL String, IsDownload UInt8, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, PlaceID Int32, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), IsYandex UInt8, GoalReachesDepth Int32, GoalReachesURL Int32, GoalReachesAny Int32, SocialSourceNetworkID UInt8, SocialSourcePage String, MobilePhoneModel String, ClientEventTime DateTime, RegionID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RemoteIP UInt32, RemoteIP6 FixedString(16), IPNetworkID UInt32, SilverlightVersion3 UInt32, CodeVersion UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, UserAgentMajor UInt16, UserAgentMinor UInt16, WindowClientWidth UInt16, WindowClientHeight UInt16, SilverlightVersion2 UInt8, SilverlightVersion4 UInt16, FlashVersion3 UInt16, FlashVersion4 UInt16, ClientTimeZone Int16, OS UInt8, UserAgent UInt8, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, NetMajor UInt8, NetMinor UInt8, MobilePhone UInt8, SilverlightVersion1 UInt8, Age UInt8, Sex UInt8, Income UInt8, JavaEnable UInt8, CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, BrowserLanguage UInt16, BrowserCountry UInt16, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), Params Array(String), Goals Nested(ID UInt32, Serial UInt32, EventTime DateTime, Price Int64, OrderID String, CurrencyID UInt32), WatchIDs Array(UInt64), ParamSumPrice Int64, ParamCurrency FixedString(3), ParamCurrencyID UInt16, ClickLogID UInt64, ClickEventID Int32, ClickGoodEvent Int32, ClickEventTime DateTime, ClickPriorityID Int32, ClickPhraseID Int32, ClickPageID Int32, ClickPlaceID Int32, ClickTypeID Int32, ClickResourceID Int32, ClickCost UInt32, ClickClientIP UInt32, ClickDomainID UInt32, ClickURL String, ClickAttempt UInt8, ClickOrderID UInt32, ClickBannerID UInt32, ClickMarketCategoryID UInt32, ClickMarketPP UInt32, ClickMarketCategoryName String, ClickMarketPPName String, ClickAWAPSCampaignName String, ClickPageName String, ClickTargetType UInt16, ClickTargetPhraseID UInt64, ClickContextType UInt8, ClickSelectType Int8, ClickOptions String, ClickGroupBannerID Int32, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, FirstVisit DateTime, PredLastVisit Date, LastVisit Date, TotalVisits UInt32, TraficSource Nested(ID Int8, SearchEngineID UInt16, AdvEngineID UInt8, PlaceID UInt16, SocialSourceNetworkID UInt8, Domain String, SearchPhrase String, SocialSourcePage String), Attendance FixedString(16), CLID UInt32, YCLID UInt64, NormalizedRefererHash UInt64, SearchPhraseHash UInt64, RefererDomainHash UInt64, NormalizedStartURLHash UInt64, StartURLDomainHash UInt64, NormalizedEndURLHash UInt64, TopLevelDomain UInt64, URLScheme UInt64, OpenstatServiceNameHash UInt64, OpenstatCampaignIDHash UInt64, OpenstatAdIDHash UInt64, OpenstatSourceIDHash UInt64, UTMSourceHash UInt64, UTMMediumHash UInt64, UTMCampaignHash UInt64, UTMContentHash UInt64, UTMTermHash UInt64, FromHash UInt64, WebVisorEnabled UInt8, WebVisorActivity UInt32, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), Market Nested(Type UInt8, GoalID UInt32, OrderID String, OrderPrice Int64, PP UInt32, DirectPlaceID UInt32, DirectOrderID UInt32, DirectBannerID UInt32, GoodID String, GoodName String, GoodQuantity Int32, GoodPrice Int64), IslandID FixedString(16)) ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
clickhouse-client --query "DROP TABLE datasets.visits_v1 SYNC"
clickhouse-client --query "DROP TABLE datasets.hits_v1 SYNC"
clickhouse-client --query "SHOW TABLES FROM test"
clickhouse-client --query "SYSTEM STOP THREAD FUZZER"
stop
# Let's enable S3 storage by default
export USE_S3_STORAGE_FOR_MERGE_TREE=1
configure
# But we still need default disk because some tables loaded only into it
sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml | sed "s|<disk>s3</disk>|<disk>s3</disk><disk>default</disk>|" > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp
mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
start
./stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" \
&& echo -e 'Test script exit code\tOK' >> /test_output/test_results.tsv \
|| echo -e 'Test script failed\tFAIL' >> /test_output/test_results.tsv
@ -255,6 +299,14 @@ zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-serve
# Remove file logical_errors.txt if it's empty
[ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt
# No such key errors
zgrep -Ea "Code: 499.*The specified key does not exist" /var/log/clickhouse-server/clickhouse-server*.log > /test_output/no_such_key_errors.txt \
&& echo -e 'S3_ERROR No such key thrown (see clickhouse-server.log or no_such_key_errors.txt)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'No lost s3 keys\tOK' >> /test_output/test_results.tsv
# Remove file no_such_key_errors.txt if it's empty
[ -s /test_output/no_such_key_errors.txt ] || rm /test_output/no_such_key_errors.txt
# Crash
zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \
&& echo -e 'Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \
@ -318,6 +370,7 @@ else
# Avoid "Setting s3_check_objects_after_upload is neither a builtin setting..."
rm -f /etc/clickhouse-server/users.d/enable_blobs_check.xml ||:
rm -f /etc/clickhouse-server/users.d/marks.xml ||:
# Remove s3 related configs to avoid "there is no disk type `cache`"
rm -f /etc/clickhouse-server/config.d/storage_conf.xml ||:
@ -467,8 +520,7 @@ done
clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%'), rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
# Core dumps (see gcore)
# Default filename is 'core.PROCESS_ID'
# Core dumps
for core in core.*; do
pigz $core
mv $core.gz /test_output/

View File

@ -168,7 +168,7 @@ def prepare_for_hung_check(drop_databases):
for db in databases:
if db == "system":
continue
command = make_query_command(f"DROP DATABASE {db}")
command = make_query_command(f'DETACH DATABASE {db}')
# we don't wait for drop
Popen(command, shell=True)
break

View File

@ -17,7 +17,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
python3-pip \
shellcheck \
yamllint \
&& pip3 install black boto3 codespell dohq-artifactory PyGithub unidiff pylint==2.6.2 \
&& pip3 install black==22.8.0 boto3 codespell==2.2.1 dohq-artifactory PyGithub unidiff pylint==2.6.2 \
&& apt-get clean \
&& rm -rf /root/.cache/pip

View File

@ -0,0 +1,34 @@
---
sidebar_position: 1
sidebar_label: 2022
---
# 2022 Changelog
### ClickHouse release v22.8.5.29-lts (74ffb843807) FIXME as compared to v22.8.4.7-lts (baad27bcd2f)
#### New Feature
* Backported in [#40870](https://github.com/ClickHouse/ClickHouse/issues/40870): Add setting to disable limit on kafka_num_consumers. Closes [#40331](https://github.com/ClickHouse/ClickHouse/issues/40331). [#40670](https://github.com/ClickHouse/ClickHouse/pull/40670) ([Kruglov Pavel](https://github.com/Avogar)).
#### Improvement
* Backported in [#40817](https://github.com/ClickHouse/ClickHouse/issues/40817): The setting `show_addresses_in_stack_traces` was accidentally disabled in default `config.xml`. It's removed from the config now, so the setting is enabled by default. [#40749](https://github.com/ClickHouse/ClickHouse/pull/40749) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#40944](https://github.com/ClickHouse/ClickHouse/issues/40944): Fix issue with passing MySQL timeouts for MySQL database engine and MySQL table function. Closes [#34168](https://github.com/ClickHouse/ClickHouse/issues/34168)?notification_referrer_id=NT_kwDOAzsV57MzMDMxNjAzNTY5OjU0MjAzODc5. [#40751](https://github.com/ClickHouse/ClickHouse/pull/40751) ([Kseniia Sumarokova](https://github.com/kssenii)).
#### Build/Testing/Packaging Improvement
* Backported in [#41157](https://github.com/ClickHouse/ClickHouse/issues/41157): Add macOS binaries to GH release assets, it fixes [#37718](https://github.com/ClickHouse/ClickHouse/issues/37718). [#41088](https://github.com/ClickHouse/ClickHouse/pull/41088) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Backported in [#40866](https://github.com/ClickHouse/ClickHouse/issues/40866): - Fix crash while parsing values of type `Object` that contains arrays of variadic dimension. [#40483](https://github.com/ClickHouse/ClickHouse/pull/40483) ([Duc Canh Le](https://github.com/canhld94)).
* Backported in [#40805](https://github.com/ClickHouse/ClickHouse/issues/40805): During insertion of a new query to the `ProcessList` allocations happen. If we reach the memory limit during these allocations we can not use `OvercommitTracker`, because `ProcessList::mutex` is already acquired. Fixes [#40611](https://github.com/ClickHouse/ClickHouse/issues/40611). [#40677](https://github.com/ClickHouse/ClickHouse/pull/40677) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#40777](https://github.com/ClickHouse/ClickHouse/issues/40777): Fix memory leak while pushing to MVs w/o query context (from Kafka/...). [#40732](https://github.com/ClickHouse/ClickHouse/pull/40732) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#41135](https://github.com/ClickHouse/ClickHouse/issues/41135): Fix access rights for `DESCRIBE TABLE url()` and some other `DESCRIBE TABLE <table_function>()`. [#40975](https://github.com/ClickHouse/ClickHouse/pull/40975) ([Vitaly Baranov](https://github.com/vitlibar)).
* Backported in [#41242](https://github.com/ClickHouse/ClickHouse/issues/41242): Fixed "possible deadlock avoided" error on automatic conversion of database engine from Ordinary to Atomic. [#41146](https://github.com/ClickHouse/ClickHouse/pull/41146) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#41234](https://github.com/ClickHouse/ClickHouse/issues/41234): Fix background clean up of broken detached parts. [#41190](https://github.com/ClickHouse/ClickHouse/pull/41190) ([Kseniia Sumarokova](https://github.com/kssenii)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* use ROBOT_CLICKHOUSE_COMMIT_TOKEN for create-pull-request [#40067](https://github.com/ClickHouse/ClickHouse/pull/40067) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* use input token instead of env var [#40421](https://github.com/ClickHouse/ClickHouse/pull/40421) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* CaresPTRResolver small safety improvement [#40890](https://github.com/ClickHouse/ClickHouse/pull/40890) ([Arthur Passos](https://github.com/arthurpassos)).

View File

@ -37,7 +37,7 @@ sudo xcode-select --install
``` bash
brew update
brew install cmake ninja libtool gettext llvm gcc binutils grep findutils
brew install ccache cmake ninja libtool gettext llvm gcc binutils grep findutils
```
## Checkout ClickHouse Sources {#checkout-clickhouse-sources}

View File

@ -140,6 +140,6 @@ hash cmake
ClickHouse is available in pre-built binaries and packages. Binaries are portable and can be run on any Linux flavour.
They are built for stable, prestable and testing releases as long as for every commit to master and for every pull request.
Binaries are built for stable and LTS releases and also every commit to `master` for each pull request.
To find the freshest build from `master`, go to [commits page](https://github.com/ClickHouse/ClickHouse/commits/master), click on the first green check mark or red cross near commit, and click to the “Details” link right after “ClickHouse Build Check”.

View File

@ -12,7 +12,7 @@ One ClickHouse server can have multiple replicated databases running and updatin
## Creating a Database {#creating-a-database}
``` sql
CREATE DATABASE testdb ENGINE = Replicated('zoo_path', 'shard_name', 'replica_name') [SETTINGS ...]
CREATE DATABASE testdb ENGINE = Replicated('zoo_path', 'shard_name', 'replica_name') [SETTINGS ...]
```
**Engine Parameters**
@ -21,9 +21,7 @@ One ClickHouse server can have multiple replicated databases running and updatin
- `shard_name` — Shard name. Database replicas are grouped into shards by `shard_name`.
- `replica_name` — Replica name. Replica names must be different for all replicas of the same shard.
:::warning
For [ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table_engines-replication) tables if no arguments provided, then default arguments are used: `/clickhouse/tables/{uuid}/{shard}` and `{replica}`. These can be changed in the server settings [default_replica_path](../../operations/server-configuration-parameters/settings.md#default_replica_path) and [default_replica_name](../../operations/server-configuration-parameters/settings.md#default_replica_name). Macro `{uuid}` is unfolded to table's uuid, `{shard}` and `{replica}` are unfolded to values from server config, not from database engine arguments. But in the future, it will be possible to use `shard_name` and `replica_name` of Replicated database.
:::
## Specifics and Recommendations {#specifics-and-recommendations}

View File

@ -16,12 +16,14 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
) ENGINE = EmbeddedRocksDB([ttl]) PRIMARY KEY(primary_key_name)
) ENGINE = EmbeddedRocksDB([ttl, rocksdb_dir, read_only]) PRIMARY KEY(primary_key_name)
```
Engine parameters:
- `ttl` - time to live for values. TTL is accepted in seconds. If TTL is 0, regular RocksDB instance is used (without TTL).
- `rocksdb_dir` - path to the directory of an existed RocksDB or the destination path of the created RocksDB. Open the table with the specified `rocksdb_dir`.
- `read_only` - when `read_only` is set to true, read-only mode is used. For storage with TTL, compaction will not be triggered (neither manual nor automatic), so no expired entries are removed.
- `primary_key_name` any column name in the column list.
- `primary key` must be specified, it supports only one column in the primary key. The primary key will be serialized in binary as a `rocksdb key`.
- columns other than the primary key will be serialized in binary as `rocksdb` value in corresponding order.

View File

@ -15,7 +15,7 @@ Usage examples:
## Usage in ClickHouse Server {#usage-in-clickhouse-server}
``` sql
ENGINE = GenerateRandom(random_seed, max_string_length, max_array_length)
ENGINE = GenerateRandom([random_seed] [,max_string_length] [,max_array_length])
```
The `max_array_length` and `max_string_length` parameters specify maximum length of all

View File

@ -0,0 +1,654 @@
---
slug: /en/getting-started/example-datasets/nypd_complaint_data
sidebar_label: NYPD Complaint Data
description: "Ingest and query Tab Separated Value data in 5 steps"
title: NYPD Complaint Data
---
Tab separated value, or TSV, files are common and may include field headings as the first line of the file. ClickHouse can ingest TSVs, and also can query TSVs without ingesting the files. This guide covers both of these cases. If you need to query or ingest CSV files, the same techniques work, simply substitute `TSV` with `CSV` in your format arguments.
While working through this guide you will:
- **Investigate**: Query the structure and content of the TSV file.
- **Determine the target ClickHouse schema**: Choose proper data types and map the existing data to those types.
- **Create a ClickHouse table**.
- **Preprocess and stream** the data to ClickHouse.
- **Run some queries** against ClickHouse.
The dataset used in this guide comes from the NYC Open Data team, and contains data about "all valid felony, misdemeanor, and violation crimes reported to the New York City Police Department (NYPD)". At the time of writing, the data file is 166MB, but it is updated regularly.
**Source**: [data.cityofnewyork.us](https://data.cityofnewyork.us/Public-Safety/NYPD-Complaint-Data-Current-Year-To-Date-/5uac-w243)
**Terms of use**: https://www1.nyc.gov/home/terms-of-use.page
## Prerequisites
- Download the dataset by visiting the [NYPD Complaint Data Current (Year To Date)](https://data.cityofnewyork.us/Public-Safety/NYPD-Complaint-Data-Current-Year-To-Date-/5uac-w243) page, clicking the Export button, and choosing **TSV for Excel**.
- Install [ClickHouse server and client](../../getting-started/install.md).
- [Launch](../../getting-started/install.md#launch) ClickHouse server, and connect with `clickhouse-client`
### A note about the commands described in this guide
There are two types of commands in this guide:
- Some of the commands are querying the TSV files, these are run at the command prompt.
- The rest of the commands are querying ClickHouse, and these are run in the `clickhouse-client` or Play UI.
:::note
The examples in this guide assume that you have saved the TSV file to `${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv`, please adjust the commands if needed.
:::
## Familiarize yourself with the TSV file
Before starting to work with the ClickHouse database familiarize yourself with the data.
### Look at the fields in the source TSV file
This is an example of a command to query a TSV file, but don't run it yet.
```sh
clickhouse-local --query \
"describe file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')"
```
Sample response
```response
CMPLNT_NUM Nullable(Float64)
ADDR_PCT_CD Nullable(Float64)
BORO_NM Nullable(String)
CMPLNT_FR_DT Nullable(String)
CMPLNT_FR_TM Nullable(String)
```
:::tip
Most of the time the above command will let you know which fields in the input data are numeric, and which are strings, and which are tuples. This is not always the case. Because ClickHouse is routineley used with datasets containing billions of records there is a default number (100) of rows examined to [infer the schema](../../guides/developer/working-with-json/json-semi-structured.md/#relying-on-schema-inference) in order to avoid parsing billions of rows to infer the schema. The response below may not match what you see, as the dataset is updated several times each year. Looking at the Data Dictionary you can see that CMPLNT_NUM is specified as text, and not numeric. By overriding the default of 100 rows for inference with the setting `SETTINGS input_format_max_rows_to_read_for_schema_inference=2000`
you can get a better idea of the content.
Note: as of version 22.5 the default is now 25,000 rows for inferring the schema, so only change the setting if you are on an older version or if you need more than 25,000 rows to be sampled.
:::
Run this command at your command prompt. You will be using `clickhouse-local` to query the data in the TSV file you downloaded.
```sh
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
--query \
"describe file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')"
```
Result:
```response
CMPLNT_NUM Nullable(String)
ADDR_PCT_CD Nullable(Float64)
BORO_NM Nullable(String)
CMPLNT_FR_DT Nullable(String)
CMPLNT_FR_TM Nullable(String)
CMPLNT_TO_DT Nullable(String)
CMPLNT_TO_TM Nullable(String)
CRM_ATPT_CPTD_CD Nullable(String)
HADEVELOPT Nullable(String)
HOUSING_PSA Nullable(Float64)
JURISDICTION_CODE Nullable(Float64)
JURIS_DESC Nullable(String)
KY_CD Nullable(Float64)
LAW_CAT_CD Nullable(String)
LOC_OF_OCCUR_DESC Nullable(String)
OFNS_DESC Nullable(String)
PARKS_NM Nullable(String)
PATROL_BORO Nullable(String)
PD_CD Nullable(Float64)
PD_DESC Nullable(String)
PREM_TYP_DESC Nullable(String)
RPT_DT Nullable(String)
STATION_NAME Nullable(String)
SUSP_AGE_GROUP Nullable(String)
SUSP_RACE Nullable(String)
SUSP_SEX Nullable(String)
TRANSIT_DISTRICT Nullable(Float64)
VIC_AGE_GROUP Nullable(String)
VIC_RACE Nullable(String)
VIC_SEX Nullable(String)
X_COORD_CD Nullable(Float64)
Y_COORD_CD Nullable(Float64)
Latitude Nullable(Float64)
Longitude Nullable(Float64)
Lat_Lon Tuple(Nullable(Float64), Nullable(Float64))
New Georeferenced Column Nullable(String)
```
At this point you should check that the columns in the TSV file match the names and types specified in the **Columns in this Dataset** section of the [dataset web page](https://data.cityofnewyork.us/Public-Safety/NYPD-Complaint-Data-Current-Year-To-Date-/5uac-w243). The data types are not very specific, all numeric fields are set to `Nullable(Float64)`, and all other fields are `Nullable(String)`. When you create a ClickHouse table to store the data you can specify more appropriate and performant types.
### Determine the proper schema
In order to figure out what types should be used for the fields it is necessary to know what the data looks like. For example, the field `JURISDICTION_CODE` is a numeric: should it be a `UInt8`, or an `Enum`, or is `Float64` appropriate?
```sql
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
--query \
"select JURISDICTION_CODE, count() FROM
file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')
GROUP BY JURISDICTION_CODE
ORDER BY JURISDICTION_CODE
FORMAT PrettyCompact"
```
Result:
```response
┌─JURISDICTION_CODE─┬─count()─┐
│ 0 │ 188875 │
│ 1 │ 4799 │
│ 2 │ 13833 │
│ 3 │ 656 │
│ 4 │ 51 │
│ 6 │ 5 │
│ 7 │ 2 │
│ 9 │ 13 │
│ 11 │ 14 │
│ 12 │ 5 │
│ 13 │ 2 │
│ 14 │ 70 │
│ 15 │ 20 │
│ 72 │ 159 │
│ 87 │ 9 │
│ 88 │ 75 │
│ 97 │ 405 │
└───────────────────┴─────────┘
```
The query response shows that the `JURISDICTION_CODE` fits well in a `UInt8`.
Similarly, look at some of the `String` fields and see if they are well suited to being `DateTime` or [`LowCardinality(String)`](../../sql-reference/data-types/lowcardinality.md) fields.
For example, the field `PARKS_NM` is described as "Name of NYC park, playground or greenspace of occurrence, if applicable (state parks are not included)". The names of parks in New York City may be a good candidate for a `LowCardinality(String)`:
```sh
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
--query \
"select count(distinct PARKS_NM) FROM
file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')
FORMAT PrettyCompact"
```
Result:
```response
┌─uniqExact(PARKS_NM)─┐
│ 319 │
└─────────────────────┘
```
Have a look at some of the park names:
```sql
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
--query \
"select distinct PARKS_NM FROM
file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')
LIMIT 10
FORMAT PrettyCompact"
```
Result:
```response
┌─PARKS_NM───────────────────┐
│ (null) │
│ ASSER LEVY PARK │
│ JAMES J WALKER PARK │
│ BELT PARKWAY/SHORE PARKWAY │
│ PROSPECT PARK │
│ MONTEFIORE SQUARE │
│ SUTTON PLACE PARK │
│ JOYCE KILMER PARK │
│ ALLEY ATHLETIC PLAYGROUND │
│ ASTORIA PARK │
└────────────────────────────┘
```
The dataset in use at the time of writing has only a few hundred distinct parks and playgrounds in the `PARK_NM` column. This is a small number based on the [LowCardinality](../../sql-reference/data-types/lowcardinality.md#lowcardinality-dscr) recommendation to stay below 10,000 distinct strings in a `LowCardinality(String)` field.
### DateTime fields
Based on the **Columns in this Dataset** section of the [dataset web page](https://data.cityofnewyork.us/Public-Safety/NYPD-Complaint-Data-Current-Year-To-Date-/5uac-w243) there are date and time fields for the start and end of the reported event. Looking at the min and max of the `CMPLNT_FR_DT` and `CMPLT_TO_DT` gives an idea of whether or not the fields are always populated:
```sh title="CMPLNT_FR_DT"
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
--query \
"select min(CMPLNT_FR_DT), max(CMPLNT_FR_DT) FROM
file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')
FORMAT PrettyCompact"
```
Result:
```response
┌─min(CMPLNT_FR_DT)─┬─max(CMPLNT_FR_DT)─┐
│ 01/01/1973 │ 12/31/2021 │
└───────────────────┴───────────────────┘
```
```sh title="CMPLNT_TO_DT"
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
--query \
"select min(CMPLNT_TO_DT), max(CMPLNT_TO_DT) FROM
file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')
FORMAT PrettyCompact"
```
Result:
```response
┌─min(CMPLNT_TO_DT)─┬─max(CMPLNT_TO_DT)─┐
│ │ 12/31/2021 │
└───────────────────┴───────────────────┘
```
```sh title="CMPLNT_FR_TM"
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
--query \
"select min(CMPLNT_FR_TM), max(CMPLNT_FR_TM) FROM
file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')
FORMAT PrettyCompact"
```
Result:
```response
┌─min(CMPLNT_FR_TM)─┬─max(CMPLNT_FR_TM)─┐
│ 00:00:00 │ 23:59:00 │
└───────────────────┴───────────────────┘
```
```sh title="CMPLNT_TO_TM"
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
--query \
"select min(CMPLNT_TO_TM), max(CMPLNT_TO_TM) FROM
file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')
FORMAT PrettyCompact"
```
Result:
```response
┌─min(CMPLNT_TO_TM)─┬─max(CMPLNT_TO_TM)─┐
│ (null) │ 23:59:00 │
└───────────────────┴───────────────────┘
```
## Make a plan
Based on the above investigation:
- `JURISDICTION_CODE` should be cast as `UInt8`.
- `PARKS_NM` should be cast to `LowCardinality(String)`
- `CMPLNT_FR_DT` and `CMPLNT_FR_TM` are always populated (possibly with a default time of `00:00:00`)
- `CMPLNT_TO_DT` and `CMPLNT_TO_TM` may be empty
- Dates and times are stored in separate fields in the source
- Dates are `mm/dd/yyyy` format
- Times are `hh:mm:ss` format
- Dates and times can be concatenated into DateTime types
- There are some dates before January 1st 1970, which means we need a 64 bit DateTime
:::note
There are many more changes to be made to the types, they all can be determined by following the same investigation steps. Look at the number of distinct strings in a field, the min and max of the numerics, and make your decisions. The table schema that is given later in the guide has many low cardinality strings and unsigned integer fields and very few floating point numerics.
:::
## Concatenate the date and time fields
To concatenate the date and time fields `CMPLNT_FR_DT` and `CMPLNT_FR_TM` into a single `String` that can be cast to a `DateTime`, select the two fields joined by the concatenation operator: `CMPLNT_FR_DT || ' ' || CMPLNT_FR_TM`. The `CMPLNT_TO_DT` and `CMPLNT_TO_TM` fields are handled similarly.
```sh
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
--query \
"select CMPLNT_FR_DT || ' ' || CMPLNT_FR_TM AS complaint_begin FROM
file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')
LIMIT 10
FORMAT PrettyCompact"
```
Result:
```response
┌─complaint_begin─────┐
│ 07/29/2010 00:01:00 │
│ 12/01/2011 12:00:00 │
│ 04/01/2017 15:00:00 │
│ 03/26/2018 17:20:00 │
│ 01/01/2019 00:00:00 │
│ 06/14/2019 00:00:00 │
│ 11/29/2021 20:00:00 │
│ 12/04/2021 00:35:00 │
│ 12/05/2021 12:50:00 │
│ 12/07/2021 20:30:00 │
└─────────────────────┘
```
## Convert the date and time String to a DateTime64 type
Earlier in the guide we discovered that there are dates in the TSV file before January 1st 1970, which means that we need a 64 bit DateTime type for the dates. The dates also need to be converted from `MM/DD/YYYY` to `YYYY/MM/DD` format. Both of these can be done with [`parseDateTime64BestEffort()`](../../sql-reference/functions/type-conversion-functions.md#parsedatetime64besteffort).
```sh
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
--query \
"WITH (CMPLNT_FR_DT || ' ' || CMPLNT_FR_TM) AS CMPLNT_START,
(CMPLNT_TO_DT || ' ' || CMPLNT_TO_TM) AS CMPLNT_END
select parseDateTime64BestEffort(CMPLNT_START) AS complaint_begin,
parseDateTime64BestEffortOrNull(CMPLNT_END) AS complaint_end
FROM file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')
ORDER BY complaint_begin ASC
LIMIT 25
FORMAT PrettyCompact"
```
Lines 2 and 3 above contain the concatenation from the previous step, and lines 4 and 5 above parse the strings into `DateTime64`. As the complaint end time is not guaranteed to exist `parseDateTime64BestEffortOrNull` is used.
Result:
```response
┌─────────complaint_begin─┬───────────complaint_end─┐
│ 1925-01-01 10:00:00.000 │ 2021-02-12 09:30:00.000 │
│ 1925-01-01 11:37:00.000 │ 2022-01-16 11:49:00.000 │
│ 1925-01-01 15:00:00.000 │ 2021-12-31 00:00:00.000 │
│ 1925-01-01 15:00:00.000 │ 2022-02-02 22:00:00.000 │
│ 1925-01-01 19:00:00.000 │ 2022-04-14 05:00:00.000 │
│ 1955-09-01 19:55:00.000 │ 2022-08-01 00:45:00.000 │
│ 1972-03-17 11:40:00.000 │ 2022-03-17 11:43:00.000 │
│ 1972-05-23 22:00:00.000 │ 2022-05-24 09:00:00.000 │
│ 1972-05-30 23:37:00.000 │ 2022-05-30 23:50:00.000 │
│ 1972-07-04 02:17:00.000 │ ᴺᵁᴸᴸ │
│ 1973-01-01 00:00:00.000 │ ᴺᵁᴸᴸ │
│ 1975-01-01 00:00:00.000 │ ᴺᵁᴸᴸ │
│ 1976-11-05 00:01:00.000 │ 1988-10-05 23:59:00.000 │
│ 1977-01-01 00:00:00.000 │ 1977-01-01 23:59:00.000 │
│ 1977-12-20 00:01:00.000 │ ᴺᵁᴸᴸ │
│ 1981-01-01 00:01:00.000 │ ᴺᵁᴸᴸ │
│ 1981-08-14 00:00:00.000 │ 1987-08-13 23:59:00.000 │
│ 1983-01-07 00:00:00.000 │ 1990-01-06 00:00:00.000 │
│ 1984-01-01 00:01:00.000 │ 1984-12-31 23:59:00.000 │
│ 1985-01-01 12:00:00.000 │ 1987-12-31 15:00:00.000 │
│ 1985-01-11 09:00:00.000 │ 1985-12-31 12:00:00.000 │
│ 1986-03-16 00:05:00.000 │ 2022-03-16 00:45:00.000 │
│ 1987-01-07 00:00:00.000 │ 1987-01-09 00:00:00.000 │
│ 1988-04-03 18:30:00.000 │ 2022-08-03 09:45:00.000 │
│ 1988-07-29 12:00:00.000 │ 1990-07-27 22:00:00.000 │
└─────────────────────────┴─────────────────────────┘
```
:::note
The dates shown as `1925` above are from errors in the data. There are several records in the original data with dates in the years `1019` - `1022` that should be `2019` - `2022`. They are being stored as Jan 1st 1925 as that is the earliest date with a 64 bit DateTime.
:::
## Create a table
The decisions made above on the data types used for the columns are reflected in the table schema
below. We also need to decide on the `ORDER BY` and `PRIMARY KEY` used for the table. At least one
of `ORDER BY` or `PRIMARY KEY` must be specified. Here are some guidelines on deciding on the
columns to includes in `ORDER BY`, and more information is in the *Next Steps* section at the end
of this document.
### Order By and Primary Key clauses
- The `ORDER BY` tuple should include fields that are used in query filters
- To maximize compression on disk the `ORDER BY` tuple should be ordered by ascending cardinality
- If it exists, the `PRIMARY KEY` tuple must be a subset of the `ORDER BY` tuple
- If only `ORDER BY` is specified, then the same tuple will be used as `PRIMARY KEY`
- The primary key index is created using the `PRIMARY KEY` tuple if specified, otherwise the `ORDER BY` tuple
- The `PRIMARY KEY` index is kept in main memory
Looking at the dataset and the questions that might be answered by querying it we might
decide that we would look at the types of crimes reported over time in the five boroughs of
New York City. These fields might be then included in the `ORDER BY`:
| Column | Description (from the data dictionary) |
| ----------- | --------------------------------------------------- |
| OFNS_DESC | Description of offense corresponding with key code |
| RPT_DT | Date event was reported to police |
| BORO_NM | The name of the borough in which the incident occurred |
Querying the TSV file for the cardinality of the three candidate columns:
```bash
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
--query \
"select formatReadableQuantity(uniq(OFNS_DESC)) as cardinality_OFNS_DESC,
formatReadableQuantity(uniq(RPT_DT)) as cardinality_RPT_DT,
formatReadableQuantity(uniq(BORO_NM)) as cardinality_BORO_NM
FROM
file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')
FORMAT PrettyCompact"
```
Result:
```response
┌─cardinality_OFNS_DESC─┬─cardinality_RPT_DT─┬─cardinality_BORO_NM─┐
│ 60.00 │ 306.00 │ 6.00 │
└───────────────────────┴────────────────────┴─────────────────────┘
```
Ordering by cardinality, the `ORDER BY` becomes:
```
ORDER BY ( BORO_NM, OFNS_DESC, RPT_DT )
```
:::note
The table below will use more easily read column names, the above names will be mapped to
```
ORDER BY ( borough, offense_description, date_reported )
```
:::
Putting together the changes to data types and the `ORDER BY` tuple gives this table structure:
```sql
CREATE TABLE NYPD_Complaint (
complaint_number String,
precinct UInt8,
borough LowCardinality(String),
complaint_begin DateTime64(0,'America/New_York'),
complaint_end DateTime64(0,'America/New_York'),
was_crime_completed String,
housing_authority String,
housing_level_code UInt32,
jurisdiction_code UInt8,
jurisdiction LowCardinality(String),
offense_code UInt8,
offense_level LowCardinality(String),
location_descriptor LowCardinality(String),
offense_description LowCardinality(String),
park_name LowCardinality(String),
patrol_borough LowCardinality(String),
PD_CD UInt16,
PD_DESC String,
location_type LowCardinality(String),
date_reported Date,
transit_station LowCardinality(String),
suspect_age_group LowCardinality(String),
suspect_race LowCardinality(String),
suspect_sex LowCardinality(String),
transit_district UInt8,
victim_age_group LowCardinality(String),
victim_race LowCardinality(String),
victim_sex LowCardinality(String),
NY_x_coordinate UInt32,
NY_y_coordinate UInt32,
Latitude Float64,
Longitude Float64
) ENGINE = MergeTree
ORDER BY ( borough, offense_description, date_reported )
```
### Finding the primary key of a table
The ClickHouse `system` database, specifically `system.table` has all of the information about the table you
just created. This query shows the `ORDER BY` (sorting key), and the `PRIMARY KEY`:
```sql
SELECT
partition_key,
sorting_key,
primary_key,
table
FROM system.tables
WHERE table = 'NYPD_Complaint'
FORMAT Vertical
```
Response
```response
Query id: 6a5b10bf-9333-4090-b36e-c7f08b1d9e01
Row 1:
──────
partition_key:
sorting_key: borough, offense_description, date_reported
primary_key: borough, offense_description, date_reported
table: NYPD_Complaint
1 row in set. Elapsed: 0.001 sec.
```
## Preprocess and Import Data {#preprocess-import-data}
We will use `clickhouse-local` tool for data preprocessing and `clickhouse-client` to upload it.
### `clickhouse-local` arguments used
:::tip
`table='input'` appears in the arguments to clickhouse-local below. clickhouse-local takes the provided input (`cat ${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv`) and inserts the input into a table. By default the table is named `table`. In this guide the name of the table is set to `input` to make the data flow clearer. The final argument to clickhouse-local is a query that selects from the table (`FROM input`) which is then piped to `clickhouse-client` to populate the table `NYPD_Complaint`.
:::
```sql
cat ${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv \
| clickhouse-local --table='input' --input-format='TSVWithNames' \
--input_format_max_rows_to_read_for_schema_inference=2000 \
--query "
WITH (CMPLNT_FR_DT || ' ' || CMPLNT_FR_TM) AS CMPLNT_START,
(CMPLNT_TO_DT || ' ' || CMPLNT_TO_TM) AS CMPLNT_END
SELECT
CMPLNT_NUM AS complaint_number,
ADDR_PCT_CD AS precinct,
BORO_NM AS borough,
parseDateTime64BestEffort(CMPLNT_START) AS complaint_begin,
parseDateTime64BestEffortOrNull(CMPLNT_END) AS complaint_end,
CRM_ATPT_CPTD_CD AS was_crime_completed,
HADEVELOPT AS housing_authority_development,
HOUSING_PSA AS housing_level_code,
JURISDICTION_CODE AS jurisdiction_code,
JURIS_DESC AS jurisdiction,
KY_CD AS offense_code,
LAW_CAT_CD AS offense_level,
LOC_OF_OCCUR_DESC AS location_descriptor,
OFNS_DESC AS offense_description,
PARKS_NM AS park_name,
PATROL_BORO AS patrol_borough,
PD_CD,
PD_DESC,
PREM_TYP_DESC AS location_type,
toDate(parseDateTimeBestEffort(RPT_DT)) AS date_reported,
STATION_NAME AS transit_station,
SUSP_AGE_GROUP AS suspect_age_group,
SUSP_RACE AS suspect_race,
SUSP_SEX AS suspect_sex,
TRANSIT_DISTRICT AS transit_district,
VIC_AGE_GROUP AS victim_age_group,
VIC_RACE AS victim_race,
VIC_SEX AS victim_sex,
X_COORD_CD AS NY_x_coordinate,
Y_COORD_CD AS NY_y_coordinate,
Latitude,
Longitude
FROM input" \
| clickhouse-client --query='INSERT INTO NYPD_Complaint FORMAT TSV'
```
## Validate the Data {#validate-data}
:::note
The dataset changes once or more per year, your counts may not match what is in this document.
:::
Query:
```sql
SELECT count()
FROM NYPD_Complaint
```
Result:
```text
┌─count()─┐
│ 208993 │
└─────────┘
1 row in set. Elapsed: 0.001 sec.
```
The size of the dataset in ClickHouse is just 12% of the original TSV file, compare the size of the original TSV file with the size of the table:
Query:
```sql
SELECT formatReadableSize(total_bytes)
FROM system.tables
WHERE name = 'NYPD_Complaint'
```
Result:
```text
┌─formatReadableSize(total_bytes)─┐
│ 8.63 MiB │
└─────────────────────────────────┘
```
## Run Some Queries {#run-queries}
### Query 1. Compare the number of complaints by month
Query:
```sql
SELECT
dateName('month', date_reported) AS month,
count() AS complaints,
bar(complaints, 0, 50000, 80)
FROM NYPD_Complaint
GROUP BY month
ORDER BY complaints DESC
```
Result:
```response
Query id: 7fbd4244-b32a-4acf-b1f3-c3aa198e74d9
┌─month─────┬─complaints─┬─bar(count(), 0, 50000, 80)───────────────────────────────┐
│ March │ 34536 │ ███████████████████████████████████████████████████████▎ │
│ May │ 34250 │ ██████████████████████████████████████████████████████▋ │
│ April │ 32541 │ ████████████████████████████████████████████████████ │
│ January │ 30806 │ █████████████████████████████████████████████████▎ │
│ February │ 28118 │ ████████████████████████████████████████████▊ │
│ November │ 7474 │ ███████████▊ │
│ December │ 7223 │ ███████████▌ │
│ October │ 7070 │ ███████████▎ │
│ September │ 6910 │ ███████████ │
│ August │ 6801 │ ██████████▊ │
│ June │ 6779 │ ██████████▋ │
│ July │ 6485 │ ██████████▍ │
└───────────┴────────────┴──────────────────────────────────────────────────────────┘
12 rows in set. Elapsed: 0.006 sec. Processed 208.99 thousand rows, 417.99 KB (37.48 million rows/s., 74.96 MB/s.)
```
### Query 2. Compare total number of complaints by Borough
Query:
```sql
SELECT
borough,
count() AS complaints,
bar(complaints, 0, 125000, 60)
FROM NYPD_Complaint
GROUP BY borough
ORDER BY complaints DESC
```
Result:
```response
Query id: 8cdcdfd4-908f-4be0-99e3-265722a2ab8d
┌─borough───────┬─complaints─┬─bar(count(), 0, 125000, 60)──┐
│ BROOKLYN │ 57947 │ ███████████████████████████▋ │
│ MANHATTAN │ 53025 │ █████████████████████████▍ │
│ QUEENS │ 44875 │ █████████████████████▌ │
│ BRONX │ 44260 │ █████████████████████▏ │
│ STATEN ISLAND │ 8503 │ ████ │
│ (null) │ 383 │ ▏ │
└───────────────┴────────────┴──────────────────────────────┘
6 rows in set. Elapsed: 0.008 sec. Processed 208.99 thousand rows, 209.43 KB (27.14 million rows/s., 27.20 MB/s.)
```
## Next Steps
[A Practical Introduction to Sparse Primary Indexes in ClickHouse](../../guides/improving-query-performance/sparse-primary-indexes/sparse-primary-indexes-intro.md) discusses the differences in ClickHouse indexing compared to traditional relational databases, how ClickHouse builds and uses a sparse primary index, and indexing best practices.

View File

@ -6,19 +6,28 @@ title: "UK Property Price Paid"
---
The dataset contains data about prices paid for real-estate property in England and Wales. The data is available since year 1995.
The size of the dataset in uncompressed form is about 4 GiB and it will take about 270 MiB in ClickHouse.
The size of the dataset in uncompressed form is about 4 GiB and it will take about 278 MiB in ClickHouse.
Source: https://www.gov.uk/government/statistical-data-sets/price-paid-data-downloads <br/>
Source: https://www.gov.uk/government/statistical-data-sets/price-paid-data-downloads
Description of the fields: https://www.gov.uk/guidance/about-the-price-paid-data
Contains HM Land Registry data © Crown copyright and database right 2021. This data is licensed under the Open Government Licence v3.0.
## Download the Dataset {#download-dataset}
Run the command:
```bash
wget http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csv
```
Download will take about 2 minutes with good internet connection.
## Create the Table {#create-table}
```sql
CREATE TABLE uk_price_paid
(
uuid UUID,
price UInt32,
date Date,
postcode1 LowCardinality(String),
@ -33,68 +42,65 @@ CREATE TABLE uk_price_paid
town LowCardinality(String),
district LowCardinality(String),
county LowCardinality(String),
category UInt8,
category2 UInt8
) ORDER BY (postcode1, postcode2, addr1, addr2);
category UInt8
) ENGINE = MergeTree ORDER BY (postcode1, postcode2, addr1, addr2);
```
## Preprocess and Import Data {#preprocess-import-data}
In this example, we define the structure of source data from the CSV file and specify a query to preprocess the data with either `clickhouse-client` or the web based Play UI.
We will use `clickhouse-local` tool for data preprocessing and `clickhouse-client` to upload it.
In this example, we define the structure of source data from the CSV file and specify a query to preprocess the data with `clickhouse-local`.
The preprocessing is:
- splitting the postcode to two different columns `postcode1` and `postcode2` that are better for storage and queries;
- splitting the postcode to two different columns `postcode1` and `postcode2` that is better for storage and queries;
- coverting the `time` field to date as it only contains 00:00 time;
- ignoring the [UUid](../../sql-reference/data-types/uuid.md) field because we don't need it for analysis;
- transforming `type` and `duration` to more readable Enum fields with function [transform](../../sql-reference/functions/other-functions.md#transform);
- transforming `is_new` and `category` fields from single-character string (`Y`/`N` and `A`/`B`) to [UInt8](../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64-int128-int256) field with 0 and 1.
Preprocessed data is piped directly to `clickhouse-client` to be inserted into ClickHouse table in streaming fashion.
```bash
INSERT INTO uk_price_paid
WITH
splitByChar(' ', postcode) AS p
SELECT
replaceRegexpAll(uuid_string, '{|}','') AS uuid,
toUInt32(price_string) AS price,
parseDateTimeBestEffortUS(time) AS date,
p[1] AS postcode1,
p[2] AS postcode2,
transform(a, ['T', 'S', 'D', 'F', 'O'], ['terraced', 'semi-detached', 'detached', 'flat', 'other']) AS type,
b = 'Y' AS is_new,
transform(c, ['F', 'L', 'U'], ['freehold', 'leasehold', 'unknown']) AS duration,
addr1,
addr2,
street,
locality,
town,
district,
county,
d = 'B' AS category,
e = 'B' AS category2
FROM url(
'http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csv',
'CSV',
'uuid_string String,
price_string String,
time String,
postcode String,
a String,
b String,
c String,
addr1 String,
addr2 String,
street String,
locality String,
town String,
district String,
county String,
d String,
e String'
)
SETTINGS max_http_get_redirects=1;
clickhouse-local --input-format CSV --structure '
uuid String,
price UInt32,
time DateTime,
postcode String,
a String,
b String,
c String,
addr1 String,
addr2 String,
street String,
locality String,
town String,
district String,
county String,
d String,
e String
' --query "
WITH splitByChar(' ', postcode) AS p
SELECT
price,
toDate(time) AS date,
p[1] AS postcode1,
p[2] AS postcode2,
transform(a, ['T', 'S', 'D', 'F', 'O'], ['terraced', 'semi-detached', 'detached', 'flat', 'other']) AS type,
b = 'Y' AS is_new,
transform(c, ['F', 'L', 'U'], ['freehold', 'leasehold', 'unknown']) AS duration,
addr1,
addr2,
street,
locality,
town,
district,
county,
d = 'B' AS category
FROM table" --date_time_input_format best_effort < pp-complete.csv | clickhouse-client --query "INSERT INTO uk_price_paid FORMAT TSV"
```
It will take about 2 minutes depending on where you are in the world, and where your ClickHouse servers are. Almost all of the time is the download time of the CSV file from the UK government server.
It will take about 40 seconds.
## Validate the Data {#validate-data}
@ -106,13 +112,13 @@ SELECT count() FROM uk_price_paid;
Result:
```response
```text
┌──count()─┐
│ 27450499
│ 26321785
└──────────┘
```
The size of dataset in ClickHouse is just 540 MiB, check it.
The size of dataset in ClickHouse is just 278 MiB, check it.
Query:
@ -124,14 +130,10 @@ Result:
```text
┌─formatReadableSize(total_bytes)─┐
545.04 MiB │
278.80 MiB │
└─────────────────────────────────┘
```
:::note
The above size is for a replicated table, if you are using this dataset with a single instance the size will be half.
:::
## Run Some Queries {#run-queries}
### Query 1. Average Price Per Year {#average-price}
@ -144,7 +146,7 @@ SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 1000000,
Result:
```response
```text
┌─year─┬──price─┬─bar(round(avg(price)), 0, 1000000, 80)─┐
│ 1995 │ 67932 │ █████▍ │
│ 1996 │ 71505 │ █████▋ │

View File

@ -4,10 +4,9 @@ sidebar_position: 1
keywords: [clickhouse, install, installation, docs]
description: ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture.
slug: /en/getting-started/install
title: Installation
---
# Installation
## System Requirements {#system-requirements}
ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture.
@ -59,7 +58,7 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password.
</details>
You can replace `stable` with `lts` or `testing` to use different [release trains](../faq/operations/production.md) based on your needs.
You can replace `stable` with `lts` to use different [release kinds](../faq/operations/production.md) based on your needs.
You can also download and install packages manually from [here](https://packages.clickhouse.com/deb/pool/stable).
@ -106,7 +105,7 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password.
</details>
If you want to use the most recent version, replace `stable` with `testing` (this is recommended for your testing environments). `prestable` is sometimes also available.
You can replace `stable` with `lts` to use different [release kinds](../faq/operations/production.md) based on your needs.
Then run these commands to install packages:
@ -221,7 +220,7 @@ For non-Linux operating systems and for AArch64 CPU architecture, ClickHouse bui
curl -O 'https://builds.clickhouse.com/master/aarch64/clickhouse' && chmod a+x ./clickhouse
```
Run `sudo ./clickhouse install` to install ClickHouse system-wide (also with needed configuration files, configuring users etc.). Then run `clickhouse start` commands to start the clickhouse-server and `clickhouse-client` to connect to it.
Run `sudo ./clickhouse install` to install ClickHouse system-wide (also with needed configuration files, configuring users etc.). Then run `sudo clickhouse start` commands to start the clickhouse-server and `clickhouse-client` to connect to it.
Use the `clickhouse client` to connect to the server, or `clickhouse local` to process local data.

View File

@ -175,6 +175,10 @@ You can also choose to use [HTTP compression](https://en.wikipedia.org/wiki/HTTP
- `br`
- `deflate`
- `xz`
- `zstd`
- `lz4`
- `bz2`
- `snappy`
To send a compressed `POST` request, append the request header `Content-Encoding: compression_method`.
In order for ClickHouse to compress the response, enable compression with [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) setting and append `Accept-Encoding: compression_method` header to the request. You can configure the data compression level in the [http_zlib_compression_level](../operations/settings/settings.md#settings-http_zlib_compression_level) setting for all compression methods.

View File

@ -103,6 +103,7 @@ ClickHouse, Inc. does **not** maintain the tools and libraries listed below and
- [ClickHouse.Client](https://github.com/DarkWanderer/ClickHouse.Client)
- [ClickHouse.Net](https://github.com/ilyabreev/ClickHouse.Net)
- [ClickHouse.Net.Migrations](https://github.com/ilyabreev/ClickHouse.Net.Migrations)
- [Linq To DB](https://github.com/linq2db/linq2db)
- Elixir
- [Ecto](https://github.com/elixir-ecto/ecto)
- [clickhouse_ecto](https://github.com/appodeal/clickhouse_ecto)

View File

@ -2,10 +2,9 @@
slug: /en/operations/backup
sidebar_position: 49
sidebar_label: Data backup and restore
title: Data backup and restore
---
# Data backup and restore
While [replication](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [you cant just drop tables with a MergeTree-like engine containing more than 50 Gb of data](server-configuration-parameters/settings.md#max-table-size-to-drop). However, these safeguards do not cover all possible cases and can be circumvented.
In order to effectively mitigate possible human errors, you should carefully prepare a strategy for backing up and restoring your data **in advance**.

View File

@ -20,6 +20,7 @@ Additional cache types:
- [Avro format](../interfaces/formats.md#data-format-avro) schemas cache.
- [Dictionaries](../sql-reference/dictionaries/index.md) data cache.
- Schema inference cache.
- [Filesystem cache](storing-data.md) over S3, Azure, Local and other disks.
Indirectly used:

View File

@ -1452,7 +1452,7 @@ Port for communicating with clients over MySQL protocol.
**Possible values**
Positive integer.
Positive integer to specify the port number to listen to or empty value to disable.
Example
@ -1466,7 +1466,7 @@ Port for communicating with clients over PostgreSQL protocol.
**Possible values**
Positive integer.
Positive integer to specify the port number to listen to or empty value to disable.
Example

View File

@ -1176,8 +1176,9 @@ Enables the quorum writes.
- If `insert_quorum < 2`, the quorum writes are disabled.
- If `insert_quorum >= 2`, the quorum writes are enabled.
- If `insert_quorum = 'auto'`, use majority number (`number_of_replicas / 2 + 1`) as quorum number.
Default value: 0.
Default value: 0 - disabled.
Quorum writes

View File

@ -112,6 +112,133 @@ Example of disk configuration:
</clickhouse>
```
## Using local cache {#using-local-cache}
It is possible to configure local cache over disks in storage configuration starting from version 22.3. For versions 22.3 - 22.7 cache is supported only for `s3` disk type. For versions >= 22.8 cache is supported for any disk type: S3, Azure, Local, Encrypted, etc. Cache uses `LRU` cache policy.
Example of configuration for versions later or equal to 22.8:
``` xml
<clickhouse>
<storage_configuration>
<disks>
<s3>
<type>s3</type>
<endpoint>...</endpoint>
... s3 configuration ...
</s3>
<cache>
<type>cache</type>
<disk>s3</disk>
<path>/s3_cache/</path>
<max_size>10000000</max_size>
</cache>
</disks>
<policies>
<volumes>
<main>
<disk>cache</disk>
</main>
</volumes>
<policies>
</storage_configuration>
```
Example of configuration for versions earlier than 22.8:
``` xml
<clickhouse>
<storage_configuration>
<disks>
<s3>
<type>s3</type>
<endpoint>...</endpoint>
... s3 configuration ...
<data_cache_enabled>1</data_cache_enabled>
<data_cache_size>10000000</data_cache_size>
</s3>
</disks>
<policies>
<volumes>
<main>
<disk>s3</disk>
</main>
</volumes>
<policies>
</storage_configuration>
```
Cache **configuration settings**:
- `path` - path to the directory with cache. Default: None, this setting is obligatory.
- `max_size` - maximum size of the cache in bytes. When the limit is reached, cache files are evicted according to the cache eviction policy. Default: None, this setting is obligatory.
- `cache_on_write_operations` - allow to turn on `write-through` cache (caching data on any write operations: `INSERT` queries, background merges). Default: `false`. The `write-through` cache can be disabled per query using setting `enable_filesystem_cache_on_write_operations` (data is cached only if both cache config settings and corresponding query setting are enabled).
- `enable_filesystem_query_cache_limit` - allow to limit the size of cache which is downloaded within each query (depends on user setting `max_query_cache_size`). Default: `false`.
- `enable_cache_hits_threshold` - a number, which defines how many times some data needs to be read before it will be cached. Default: `0`, e.g. the data is cached at the first attempt to read it.
- `do_not_evict_index_and_mark_files` - do not evict small frequently used files according to cache policy. Default: `false`. This setting was added in version 22.8. If you used filesystem cache before this version, then it will not work on versions starting from 22.8 if this setting is set to `true`. If you want to use this setting, clear old cache created before version 22.8 before upgrading.
- `max_file_segment_size` - a maximum size of a single cache file. Default: `104857600` (100 Mb).
- `max_elements` - a limit for a number of cache files. Default: `1048576`.
Cache **query settings**:
- `enable_filesystem_cache` - allows to disable cache per query even if storage policy was configured with `cache` disk type. Default: `true`.
- `read_from_filesystem_cache_if_exists_otherwise_bypass_cache` - allows to use cache in query only if it already exists, otherwise query data will not be written to local cache storage. Default: `false`.
- `enable_filesystem_cache_on_write_operations` - turn on `write-through` cache. This setting works only if setting `cache_on_write_operations` in cache configuration is turned on.
- `enable_filesystem_cache_log` - turn on logging to `system.filesystem_cache_log` table. Gives a detailed view of cache usage per query. Default: `false`.
- `max_query_cache_size` - a limit for the cache size, which can be written to local cache storage. Requires enabled `enable_filesystem_query_cache_limit` in cache configuration. Default: `false`.
- `skip_download_if_exceeds_query_cache` - allows to change the behaviour of setting `max_query_cache_size`. Default: `true`. If this setting is turned on and cache download limit during query was reached, no more cache will be downloaded to cache storage. If this setting is turned off and cache download limit during query was reached, cache will still be written by cost of evicting previously downloaded (within current query) data, e.g. second behaviour allows to preserve `last recentltly used` behaviour while keeping query cache limit.
** Warning **
Cache configuration settings and cache query settings correspond to the latest ClickHouse version, for earlier versions something might not be supported.
Cache **system tables**:
- `system.filesystem_cache` - system tables which shows current state of cache.
- `system.filesystem_cache_log` - system table which shows detailed cache usage per query. Requires `enable_filesystem_cache_log` setting to be `true`.
Cache **commands**:
- `SYSTEM DROP FILESYSTEM CACHE (<path>) (ON CLUSTER)`
- `SHOW CACHES` -- show list of caches which were configured on the server.
- `DESCRIBE CACHE '<cache_name>'` - show cache configuration and some general statistics for a specific cache. Cache name can be taken from `SHOW CACHES` command.
Cache current metrics:
- `FilesystemCacheSize`
- `FilesystemCacheElements`
Cache asynchronous metrics:
- `FilesystemCacheBytes`
- `FilesystemCacheFiles`
Cache profile events:
- `CachedReadBufferReadFromSourceBytes`, `CachedReadBufferReadFromCacheBytes,`
- `CachedReadBufferReadFromSourceMicroseconds`, `CachedReadBufferReadFromCacheMicroseconds`
- `CachedReadBufferCacheWriteBytes`, `CachedReadBufferCacheWriteMicroseconds`
- `CachedWriteBufferCacheWriteBytes`, `CachedWriteBufferCacheWriteMicroseconds`
## Storing Data on Web Server {#storing-data-on-webserver}
There is a tool `clickhouse-static-files-uploader`, which prepares a data directory for a given table (`SELECT data_paths FROM system.tables WHERE name = 'table_name'`). For each table you need, you get a directory of files. These files can be uploaded to, for example, a web server with static files. After this preparation, you can load this table into any ClickHouse server via `DiskWeb`.

View File

@ -74,13 +74,16 @@ Make sure that [`fstrim`](https://en.wikipedia.org/wiki/Trim_(computing)) is ena
## File System {#file-system}
Ext4 is the most reliable option. Set the mount options `noatime`.
XFS should be avoided. It works mostly fine but there are some reports about lower performance.
Ext4 is the most reliable option. Set the mount options `noatime`. XFS works well too.
Most other file systems should also work fine.
FAT-32 and exFAT are not supported due to lack of hard links.
Do not use compressed filesystems, because ClickHouse does compression on its own and better.
It's not recommended to use encrypted filesystems, because you can use builtin encryption in ClickHouse, which is better.
While ClickHouse can work over NFS, it is not the best idea.
## Linux Kernel {#linux-kernel}
Dont use an outdated Linux kernel.

View File

@ -2,10 +2,9 @@
slug: /en/operations/troubleshooting
sidebar_position: 46
sidebar_label: Troubleshooting
title: Troubleshooting
---
# Troubleshooting
- [Installation](#troubleshooting-installation-errors)
- [Connecting to the server](#troubleshooting-accepts-no-connections)
- [Query processing](#troubleshooting-does-not-process-queries)

View File

@ -94,6 +94,21 @@ It is also possible for `Flat`, `Hashed`, `ComplexKeyHashed` dictionaries to onl
- If the source is HTTP then `update_field` will be added as a query parameter with the last update time as the parameter value.
- If the source is Executable then `update_field` will be added as an executable script argument with the last update time as the argument value.
- If the source is ClickHouse, MySQL, PostgreSQL, ODBC there will be an additional part of `WHERE`, where `update_field` is compared as greater or equal with the last update time.
- Per default, this `WHERE`-condition is checked at the highest level of the SQL-Query. Alternatively, the condition can be checked in any other `WHERE`-clause within the query using the `{condition}`-keyword. Example:
```sql
...
SOURCE(CLICKHOUSE(...
update_field 'added_time'
QUERY '
SELECT my_arr.1 AS x, my_arr.2 AS y, creation_time
FROM (
SELECT arrayZip(x_arr, y_arr) AS my_arr, creation_time
FROM dictionary_source
WHERE {condition}
)'
))
...
```
If `update_field` option is set, additional option `update_lag` can be set. Value of `update_lag` option is subtracted from previous update time before request updated data.

View File

@ -267,8 +267,8 @@ Result:
└────────────────┘
```
:::Attention
The return type of the `toStartOf*`, `toLastDayOfMonth`, `toMonday` functions described below is determined by the configuration parameter [enable_extended_results_for_datetime_functions](../../operations/settings/settings#enable-extended-results-for-datetime-functions) which is `false` by default.
:::note
The return type of `toStartOf*`, `toLastDayOfMonth`, `toMonday` functions described below is determined by the configuration parameter [enable_extended_results_for_datetime_functions](../../operations/settings/settings#enable-extended-results-for-datetime-functions) which is `false` by default.
Behavior for
* `enable_extended_results_for_datetime_functions = false`: Functions `toStartOf*`, `toLastDayOfMonth`, `toMonday` return `Date` or `DateTime`. Though these functions can take values of the extended types `Date32` and `DateTime64` as an argument, passing them a time outside the normal range (year 1970 to 2149 for `Date` / 2106 for `DateTime`) will produce wrong results. In case argument is out of normal range:
@ -643,6 +643,7 @@ Result:
## date\_diff
Returns the difference between two dates or dates with time values.
The difference is calculated using relative units, e.g. the difference between `2022-01-01` and `2021-12-29` is 3 days for day unit (see [toRelativeDayNum](#torelativedaynum)), 1 month for month unit (see [toRelativeMonthNum](#torelativemonthnum)), 1 year for year unit (see [toRelativeYearNum](#torelativeyearnum)).
**Syntax**
@ -694,6 +695,25 @@ Result:
└────────────────────────────────────────────────────────────────────────────────────────┘
```
Query:
``` sql
SELECT
toDate('2022-01-01') AS e,
toDate('2021-12-29') AS s,
dateDiff('day', s, e) AS day_diff,
dateDiff('month', s, e) AS month__diff,
dateDiff('year', s, e) AS year_diff;
```
Result:
``` text
┌──────────e─┬──────────s─┬─day_diff─┬─month__diff─┬─year_diff─┐
│ 2022-01-01 │ 2021-12-29 │ 3 │ 1 │ 1 │
└────────────┴────────────┴──────────┴─────────────┴───────────┘
```
## date\_sub
Subtracts the time interval or date interval from the provided date or date with time.
@ -1209,6 +1229,8 @@ Result:
Function converts Unix timestamp to a calendar date and a time of a day. When there is only a single argument of [Integer](../../sql-reference/data-types/int-uint.md) type, it acts in the same way as [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime) and return [DateTime](../../sql-reference/data-types/datetime.md) type.
Alias: `fromUnixTimestamp`.
**Example:**
Query:

View File

@ -430,5 +430,119 @@ Result:
└────────────────────────────┘
```
## mapApply
**Syntax**
```sql
mapApply(func, map)
```
**Parameters**
- `func` - [Lamda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
- `map` — [Map](../../sql-reference/data-types/map.md).
**Returned value**
- Returns a map obtained from the original map by application of `func(map1[i], …, mapN[i])` for each element.
**Example**
Query:
```sql
SELECT mapApply((k, v) -> (k, v * 10), _map) AS r
FROM
(
SELECT map('key1', number, 'key2', number * 2) AS _map
FROM numbers(3)
)
```
Result:
```text
┌─r─────────────────────┐
│ {'key1':0,'key2':0} │
│ {'key1':10,'key2':20} │
│ {'key1':20,'key2':40} │
└───────────────────────┘
```
## mapFilter
**Syntax**
```sql
mapFilter(func, map)
```
**Parameters**
- `func` - [Lamda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
- `map` — [Map](../../sql-reference/data-types/map.md).
**Returned value**
- Returns a map containing only the elements in `map` for which `func(map1[i], …, mapN[i])` returns something other than 0.
**Example**
Query:
```sql
SELECT mapFilter((k, v) -> ((v % 2) = 0), _map) AS r
FROM
(
SELECT map('key1', number, 'key2', number * 2) AS _map
FROM numbers(3)
)
```
Result:
```text
┌─r───────────────────┐
│ {'key1':0,'key2':0} │
│ {'key2':2} │
│ {'key1':2,'key2':4} │
└─────────────────────┘
```
## mapUpdate
**Syntax**
```sql
mapUpdate(map1, map2)
```
**Parameters**
- `map1` [Map](../../sql-reference/data-types/map.md).
- `map2` [Map](../../sql-reference/data-types/map.md).
**Returned value**
- Returns a map1 with values updated of values for the corresponding keys in map2.
**Example**
Query:
```sql
SELECT mapUpdate(map('key1', 0, 'key3', 0), map('key1', 10, 'key2', 10)) AS map;
```
Result:
```text
┌─map────────────────────────────┐
│ {'key3':0,'key1':10,'key2':10} │
└────────────────────────────────┘
```
[Original article](https://clickhouse.com/docs/en/sql-reference/functions/tuple-map-functions/) <!--hide-->

View File

@ -0,0 +1,94 @@
---
slug: /en/sql-reference/functions/uniqtheta-functions
---
# uniqTheta Functions
uniqTheta functions work for two uniqThetaSketch objects to do set operation calculations such as / ∩ / × (union/intersect/not), it is to return a new uniqThetaSketch object contain the result.
A uniqThetaSketch object is to be constructed by aggregation function uniqTheta with -State.
UniqThetaSketch is a data structure storage of approximate values set.
For more information on RoaringBitmap, see: [Theta Sketch Framework](https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html).
## uniqThetaUnion
Two uniqThetaSketch objects to do union calculation(set operation ), the result is a new uniqThetaSketch.
``` sql
uniqThetaUnion(uniqThetaSketch,uniqThetaSketch)
```
**Arguments**
- `uniqThetaSketch` uniqThetaSketch object.
**Example**
``` sql
select finalizeAggregation(uniqThetaUnion(a, b)) as a_union_b, finalizeAggregation(a) as a_cardinality, finalizeAggregation(b) as b_cardinality
from
(select arrayReduce('uniqThetaState',[1,2]) as a, arrayReduce('uniqThetaState',[2,3,4]) as b );
```
``` text
┌─a_union_b─┬─a_cardinality─┬─b_cardinality─┐
│ 4 │ 2 │ 3 │
└───────────┴───────────────┴───────────────┘
```
## uniqThetaIntersect
Two uniqThetaSketch objects to do intersect calculation(set operation ∩), the result is a new uniqThetaSketch.
``` sql
uniqThetaIntersect(uniqThetaSketch,uniqThetaSketch)
```
**Arguments**
- `uniqThetaSketch` uniqThetaSketch object.
**Example**
``` sql
select finalizeAggregation(uniqThetaIntersect(a, b)) as a_intersect_b, finalizeAggregation(a) as a_cardinality, finalizeAggregation(b) as b_cardinality
from
(select arrayReduce('uniqThetaState',[1,2]) as a, arrayReduce('uniqThetaState',[2,3,4]) as b );
```
``` text
┌─a_intersect_b─┬─a_cardinality─┬─b_cardinality─┐
│ 1 │ 2 │ 3 │
└───────────────┴───────────────┴───────────────┘
```
## uniqThetaNot
Two uniqThetaSketch objects to do a_not_b calculation(set operation ×), the result is a new uniqThetaSketch.
``` sql
uniqThetaNot(uniqThetaSketch,uniqThetaSketch)
```
**Arguments**
- `uniqThetaSketch` uniqThetaSketch object.
**Example**
``` sql
select finalizeAggregation(uniqThetaNot(a, b)) as a_not_b, finalizeAggregation(a) as a_cardinality, finalizeAggregation(b) as b_cardinality
from
(select arrayReduce('uniqThetaState',[2,3,4]) as a, arrayReduce('uniqThetaState',[1,2]) as b );
```
``` text
┌─a_not_b─┬─a_cardinality─┬─b_cardinality─┐
│ 2 │ 3 │ 2 │
└─────────┴───────────────┴───────────────┘
```
**See Also**
- [uniqThetaSketch](../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch)

View File

@ -12,8 +12,9 @@ ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE WHERE filter_expr
Deletes data matching the specified filtering expression. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
:::note
The `ALTER TABLE` prefix makes this syntax different from most other systems supporting SQL. It is intended to signify that unlike similar queries in OLTP databases this is a heavy operation not designed for frequent use.
:::note
The `ALTER TABLE` prefix makes this syntax different from most other systems supporting SQL. It is intended to signify that unlike similar queries in OLTP databases this is a heavy operation not designed for frequent use. `ALTER TABLE` is considered a heavyweight operation that requires the underlying data to be merged before it is deleted. For MergeTree tables, consider using the [`DELETE FROM` query](../delete.md), which performs a lightweight delete and can be considerably faster.
:::
The `filter_expr` must be of type `UInt8`. The query deletes rows in the table for which this expression takes a non-zero value.

View File

@ -0,0 +1,43 @@
---
slug: /en/sql-reference/statements/delete
sidebar_position: 36
sidebar_label: DELETE
---
# DELETE Statement
``` sql
DELETE FROM [db.]table [WHERE expr]
```
`DELETE FROM` removes rows from table `[db.]table` that match expression `expr`. The deleted rows are marked as deleted immediately and will be automatically filtered out of all subsequent queries. Cleanup of data happens asynchronously in background. This feature is only available for MergeTree table engine family.
For example, the following query deletes all rows from the `hits` table where the `Title` column contains the text `hello`:
```sql
DELETE FROM hits WHERE Title LIKE '%hello%';
```
Lightweight deletes are asynchronous by default. Set `mutations_sync` equal to 1 to wait for one replica to process the statement, and set `mutations_sync` to 2 to wait for all replicas.
:::note
This feature is experimental and requires you to set `allow_experimental_lightweight_delete` to true:
```sql
SET allow_experimental_lightweight_delete = true;
```
:::
An [alternative way to delete rows](./alter/delete.md) in ClickHouse is `ALTER TABLE ... DELETE`, which might be more efficient if you do bulk deletes only occasionally and don't need the operation to be applied instantly. In most use cases the new lightweight `DELETE FROM` behavior will be considerably faster.
:::warning
Even though deletes are becoming more lightweight in ClickHouse, they should still not be used as aggressively as on an OLTP system. Ligthweight deletes are currently efficient for wide parts, but for compact parts they can be a heavyweight operation, and it may be better to use `ALTER TABLE` for some scenarios.
:::
:::note
`DELETE FROM` requires the `ALTER DELETE` privilege:
```sql
grant ALTER DELETE ON db.table to username;
```
:::

View File

@ -10,7 +10,7 @@ Makes the server "forget" about the existence of a table, a materialized view, o
**Syntax**
``` sql
DETACH TABLE|VIEW|DICTIONARY [IF EXISTS] [db.]name [ON CLUSTER cluster] [PERMANENTLY]
DETACH TABLE|VIEW|DICTIONARY [IF EXISTS] [db.]name [ON CLUSTER cluster] [PERMANENTLY] [SYNC]
```
Detaching does not delete the data or metadata of a table, a materialized view or a dictionary. If an entity was not detached `PERMANENTLY`, on the next server launch the server will read the metadata and recall the table/view/dictionary again. If an entity was detached `PERMANENTLY`, there will be no automatic recall.
@ -24,6 +24,8 @@ Note that you can not detach permanently the table which is already detached (te
Also you can not [DROP](../../sql-reference/statements/drop#drop-table) the detached table, or [CREATE TABLE](../../sql-reference/statements/create/table.md) with the same name as detached permanently, or replace it with the other table with [RENAME TABLE](../../sql-reference/statements/rename.md) query.
The `SYNC` modifier executes the action without delay.
**Example**
Creating a table:

View File

@ -6,7 +6,7 @@ sidebar_label: DROP
# DROP Statements
Deletes existing entity. If the `IF EXISTS` clause is specified, these queries do not return an error if the entity does not exist.
Deletes existing entity. If the `IF EXISTS` clause is specified, these queries do not return an error if the entity does not exist. If the `SYNC` modifier is specified, the entity is dropped without delay.
## DROP DATABASE
@ -15,7 +15,7 @@ Deletes all tables inside the `db` database, then deletes the `db` database itse
Syntax:
``` sql
DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster]
DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster] [SYNC]
```
## DROP TABLE
@ -25,7 +25,7 @@ Deletes the table.
Syntax:
``` sql
DROP [TEMPORARY] TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster]
DROP [TEMPORARY] TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] [SYNC]
```
## DROP DICTIONARY
@ -35,7 +35,7 @@ Deletes the dictionary.
Syntax:
``` sql
DROP DICTIONARY [IF EXISTS] [db.]name
DROP DICTIONARY [IF EXISTS] [db.]name [SYNC]
```
## DROP USER
@ -95,7 +95,7 @@ Deletes a view. Views can be deleted by a `DROP TABLE` command as well but `DROP
Syntax:
``` sql
DROP VIEW [IF EXISTS] [db.]name [ON CLUSTER cluster]
DROP VIEW [IF EXISTS] [db.]name [ON CLUSTER cluster] [SYNC]
```
## DROP FUNCTION

View File

@ -303,7 +303,7 @@ SHOW USERS
## SHOW ROLES
Returns a list of [roles](../../operations/access-rights.md#role-management). To view another parameters, see system tables [system.roles](../../operations/system-tables/roles.md#system_tables-roles) and [system.role-grants](../../operations/system-tables/role-grants.md#system_tables-role_grants).
Returns a list of [roles](../../operations/access-rights.md#role-management). To view another parameters, see system tables [system.roles](../../operations/system-tables/roles.md#system_tables-roles) and [system.role_grants](../../operations/system-tables/role-grants.md#system_tables-role_grants).
### Syntax

View File

@ -6,45 +6,6 @@ sidebar_label: SYSTEM
# SYSTEM Statements
The list of available `SYSTEM` statements:
- [RELOAD EMBEDDED DICTIONARIES](#query_language-system-reload-emdedded-dictionaries)
- [RELOAD DICTIONARIES](#query_language-system-reload-dictionaries)
- [RELOAD DICTIONARY](#query_language-system-reload-dictionary)
- [RELOAD MODELS](#query_language-system-reload-models)
- [RELOAD MODEL](#query_language-system-reload-model)
- [RELOAD FUNCTIONS](#query_language-system-reload-functions)
- [RELOAD FUNCTION](#query_language-system-reload-functions)
- [DROP DNS CACHE](#query_language-system-drop-dns-cache)
- [DROP MARK CACHE](#query_language-system-drop-mark-cache)
- [DROP UNCOMPRESSED CACHE](#query_language-system-drop-uncompressed-cache)
- [DROP COMPILED EXPRESSION CACHE](#query_language-system-drop-compiled-expression-cache)
- [DROP REPLICA](#query_language-system-drop-replica)
- [FLUSH LOGS](#query_language-system-flush_logs)
- [RELOAD CONFIG](#query_language-system-reload-config)
- [SHUTDOWN](#query_language-system-shutdown)
- [KILL](#query_language-system-kill)
- [STOP DISTRIBUTED SENDS](#query_language-system-stop-distributed-sends)
- [FLUSH DISTRIBUTED](#query_language-system-flush-distributed)
- [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends)
- [STOP MERGES](#query_language-system-stop-merges)
- [START MERGES](#query_language-system-start-merges)
- [STOP TTL MERGES](#query_language-stop-ttl-merges)
- [START TTL MERGES](#query_language-start-ttl-merges)
- [STOP MOVES](#query_language-stop-moves)
- [START MOVES](#query_language-start-moves)
- [SYSTEM UNFREEZE](#query_language-system-unfreeze)
- [STOP FETCHES](#query_language-system-stop-fetches)
- [START FETCHES](#query_language-system-start-fetches)
- [STOP REPLICATED SENDS](#query_language-system-start-replicated-sends)
- [START REPLICATED SENDS](#query_language-system-start-replicated-sends)
- [STOP REPLICATION QUEUES](#query_language-system-stop-replication-queues)
- [START REPLICATION QUEUES](#query_language-system-start-replication-queues)
- [SYNC REPLICA](#query_language-system-sync-replica)
- [RESTART REPLICA](#query_language-system-restart-replica)
- [RESTORE REPLICA](#query_language-system-restore-replica)
- [RESTART REPLICAS](#query_language-system-restart-replicas)
## RELOAD EMBEDDED DICTIONARIES
Reload all [Internal dictionaries](../../sql-reference/dictionaries/internal-dicts.md).

View File

@ -13,7 +13,7 @@ Creates a table from a file. This table function is similar to [url](../../sql-r
**Syntax**
``` sql
file(path, format, structure)
file(path [,format] [,structure])
```
**Parameters**

View File

@ -11,7 +11,7 @@ Provides table-like interface to select/insert files in [Amazon S3](https://aws.
**Syntax**
``` sql
s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression])
s3(path [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression])
```
**Arguments**

View File

@ -10,7 +10,7 @@ Allows processing files from [Amazon S3](https://aws.amazon.com/s3/) in parallel
**Syntax**
``` sql
s3Cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure)
s3Cluster(cluster_name, source, [,access_key_id, secret_access_key] [,format] [,structure])
```
**Arguments**

View File

@ -13,7 +13,7 @@ sidebar_label: url
**Syntax**
``` sql
url(URL, format, structure)
url(URL [,format] [,structure])
```
**Parameters**

View File

@ -267,7 +267,7 @@ SELECT toUnixTimestamp('2017-11-05 08:07:47', 'Asia/Tokyo') AS unix_timestamp;
└────────────────┘
```
:::Attention
:::note
Тип возвращаемого значения описанными далее функциями `toStartOf*`, `toLastDayOfMonth`, `toMonday` определяется конфигурационным параметром [enable_extended_results_for_datetime_functions](../../operations/settings/settings#enable-extended-results-for-datetime-functions) имеющим по умолчанию значение `false`.
Поведение для
@ -280,7 +280,7 @@ SELECT toUnixTimestamp('2017-11-05 08:07:47', 'Asia/Tokyo') AS unix_timestamp;
* `enable_extended_results_for_datetime_functions = true`: Функции `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` возвращают `Date` или `DateTime` если их аргумент `Date` или `DateTime` и они возвращают `Date32` или `DateTime64` если их аргумент `Date32` или `DateTime64`.
:::
:::Attention
:::note
Тип возвращаемого описанными далее функциями `toStartOf*`, `toLastDayOfMonth`, `toMonday` значения - `Date` или `DateTime`.
Хотя эти функции могут принимать значения типа `Date32` или `DateTime64` в качестве аргумента, при обработке аргумента вне нормального диапазона значений (`1970` - `2148` для `Date` и `1970-01-01 00:00:00`-`2106-02-07 08:28:15` для `DateTime`) будет получен некорректный результат.
Возвращаемые значения для значений вне нормального диапазона:

View File

@ -305,7 +305,7 @@ SHOW USERS
## SHOW ROLES {#show-roles-statement}
Выводит список [ролей](../../operations/access-rights.md#role-management). Для просмотра параметров ролей, см. системные таблицы [system.roles](../../operations/system-tables/roles.md#system_tables-roles) и [system.role-grants](../../operations/system-tables/role-grants.md#system_tables-role_grants).
Выводит список [ролей](../../operations/access-rights.md#role-management). Для просмотра параметров ролей, см. системные таблицы [system.roles](../../operations/system-tables/roles.md#system_tables-roles) и [system.role_grants](../../operations/system-tables/role-grants.md#system_tables-role_grants).
### Синтаксис {#show-roles-syntax}

View File

@ -6,43 +6,6 @@ sidebar_label: SYSTEM
# Запросы SYSTEM {#query-language-system}
- [RELOAD EMBEDDED DICTIONARIES](#query_language-system-reload-emdedded-dictionaries)
- [RELOAD DICTIONARIES](#query_language-system-reload-dictionaries)
- [RELOAD DICTIONARY](#query_language-system-reload-dictionary)
- [RELOAD MODELS](#query_language-system-reload-models)
- [RELOAD MODEL](#query_language-system-reload-model)
- [RELOAD FUNCTIONS](#query_language-system-reload-functions)
- [RELOAD FUNCTION](#query_language-system-reload-functions)
- [DROP DNS CACHE](#query_language-system-drop-dns-cache)
- [DROP MARK CACHE](#query_language-system-drop-mark-cache)
- [DROP UNCOMPRESSED CACHE](#query_language-system-drop-uncompressed-cache)
- [DROP COMPILED EXPRESSION CACHE](#query_language-system-drop-compiled-expression-cache)
- [DROP REPLICA](#query_language-system-drop-replica)
- [FLUSH LOGS](#query_language-system-flush_logs)
- [RELOAD CONFIG](#query_language-system-reload-config)
- [SHUTDOWN](#query_language-system-shutdown)
- [KILL](#query_language-system-kill)
- [STOP DISTRIBUTED SENDS](#query_language-system-stop-distributed-sends)
- [FLUSH DISTRIBUTED](#query_language-system-flush-distributed)
- [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends)
- [STOP MERGES](#query_language-system-stop-merges)
- [START MERGES](#query_language-system-start-merges)
- [STOP TTL MERGES](#query_language-stop-ttl-merges)
- [START TTL MERGES](#query_language-start-ttl-merges)
- [STOP MOVES](#query_language-stop-moves)
- [START MOVES](#query_language-start-moves)
- [SYSTEM UNFREEZE](#query_language-system-unfreeze)
- [STOP FETCHES](#query_language-system-stop-fetches)
- [START FETCHES](#query_language-system-start-fetches)
- [STOP REPLICATED SENDS](#query_language-system-start-replicated-sends)
- [START REPLICATED SENDS](#query_language-system-start-replicated-sends)
- [STOP REPLICATION QUEUES](#query_language-system-stop-replication-queues)
- [START REPLICATION QUEUES](#query_language-system-start-replication-queues)
- [SYNC REPLICA](#query_language-system-sync-replica)
- [RESTART REPLICA](#query_language-system-restart-replica)
- [RESTORE REPLICA](#query_language-system-restore-replica)
- [RESTART REPLICAS](#query_language-system-restart-replicas)
## RELOAD EMBEDDED DICTIONARIES] {#query_language-system-reload-emdedded-dictionaries}
Перегружает все [Встроенные словари](../dictionaries/internal-dicts.md).
По умолчанию встроенные словари выключены.

View File

@ -13,7 +13,7 @@ sidebar_label: file
**Синтаксис**
``` sql
file(path, format, structure)
file(path [,format] [,structure])
```
**Параметры**

View File

@ -11,7 +11,7 @@ sidebar_label: s3
**Синтаксис**
``` sql
s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression])
s3(path [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression])
```
**Aргументы**

View File

@ -11,7 +11,7 @@ sidebar_label: s3Cluster
**Синтаксис**
``` sql
s3Cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure)
s3Cluster(cluster_name, source, [,access_key_id, secret_access_key] [,format] [,structure])
```
**Аргументы**

View File

@ -13,7 +13,7 @@ sidebar_label: url
**Синтаксис**
``` sql
url(URL, format, structure)
url(URL [,format] [,structure])
```
**Параметры**

View File

@ -1,338 +1,297 @@
---
slug: /zh/development/tests
sidebar_position: 70
sidebar_label: Testing
title: ClickHouse Testing
description: Most of ClickHouse features can be tested with functional tests and they are mandatory to use for every change in ClickHouse code that can be tested that way.
---
# ClickHouse 测试 {#clickhouse-testing}
## 功能测试 {#functional-tests}
## Functional Tests
功能测试使用起来最简单方便. 大多数 ClickHouse 特性都可以通过功能测试进行测试, 并且对于可以通过功能测试进行测试的 ClickHouse 代码的每一个更改, 都必须使用这些特性
Functional tests are the most simple and convenient to use. Most of ClickHouse features can be tested with functional tests and they are mandatory to use for every change in ClickHouse code that can be tested that way.
每个功能测试都会向正在运行的 ClickHouse 服务器发送一个或多个查询, 并将结果与参考进行比较.
Each functional test sends one or multiple queries to the running ClickHouse server and compares the result with reference.
测试位于 `查询` 目录中. 有两个子目录: `无状态``有状态`. 无状态测试在没有任何预加载测试数据的情况下运行查询 - 它们通常在测试本身内即时创建小型合成数据集. 状态测试需要来自 Yandex.Metrica 的预加载测试数据, 它对公众开放.
Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from ClickHouse and it is available to general public.
每个测试可以是两种类型之一: `.sql``.sh`. `.sql` 测试是简单的 SQL 脚本, 它通过管道传输到 `clickhouse-client --multiquery --testmode`. `.sh` 测试是一个自己运行的脚本. SQL 测试通常比 `.sh` 测试更可取. 仅当您必须测试某些无法从纯 SQL 中执行的功能时才应使用 `.sh` 测试, 例如将一些输入数据传送到 `clickhouse-client` 或测试 `clickhouse-local`.
Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`.
### 在本地运行测试 {#functional-test-locally}
### Running a Test Locally {#functional-test-locally}
在本地启动ClickHouse服务器, 监听默认端口(9000). 例如, 要运行测试 `01428_hash_set_nan_key`, 请切换到存储库文件夹并运行以下命令:
Start the ClickHouse server locally, listening on the default port (9000). To
run, for example, the test `01428_hash_set_nan_key`, change to the repository
folder and run the following command:
```
PATH=$PATH:<path to clickhouse-client> tests/clickhouse-test 01428_hash_set_nan_key
```
有关更多选项, 请参阅`tests/clickhouse-test --help`. 您可以简单地运行所有测试或运行由测试名称中的子字符串过滤的测试子集:`./clickhouse-test substring`. 还有并行或随机顺序运行测试的选项.
For more options, see `tests/clickhouse-test --help`. You can simply run all tests or run subset of tests filtered by substring in test name: `./clickhouse-test substring`. There are also options to run tests in parallel or in randomized order.
### 添加新测试 {#adding-new-test}
### Adding a New Test
添加新的测试, 在 `queries/0_stateless` 目录下创建 `.sql``.sh` 文件, 手动检查, 然后通过以下方式生成`.reference`文件:`clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` 或 `./00000_test.sh > ./00000_test.reference`.
To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client --multiquery < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`.
测试应仅使用(创建、删除等)`test` 数据库中假定已预先创建的表; 测试也可以使用临时表.
Tests should use (create, drop, etc) only tables in `test` database that is assumed to be created beforehand; also tests can use temporary tables.
### 选择测试名称 {#choosing-test-name}
### Choosing the Test Name
测试名称以五位数前缀开头, 后跟描述性名称, 例如 `00422_hash_function_constexpr.sql`. 要选择前缀, 请找到目录中已存在的最大前缀, 并将其加一. 在此期间, 可能会添加一些具有相同数字前缀的其他测试, 但这没关系并且不会导致任何问题, 您以后不必更改它.
The name of the test starts with a five-digit prefix followed by a descriptive name, such as `00422_hash_function_constexpr.sql`. To choose the prefix, find the largest prefix already present in the directory, and increment it by one. In the meantime, some other tests might be added with the same numeric prefix, but this is OK and does not lead to any problems, you don't have to change it later.
一些测试的名称中标有 `zookeeper`、`shard` 或 `long` . `zookeeper` 用于使用 ZooKeeper 的测试. `shard` 用于需要服务器监听 `127.0.0.*` 的测试; `distributed``global` 具有相同的含义. `long` 用于运行时间稍长于一秒的测试. Yo你可以分别使用 `--no-zookeeper`、`--no-shard` 和 `--no-long` 选项禁用这些测试组. 如果需要 ZooKeeper 或分布式查询,请确保为您的测试名称添加适当的前缀.
Some tests are marked with `zookeeper`, `shard` or `long` in their names. `zookeeper` is for tests that are using ZooKeeper. `shard` is for tests that requires server to listen `127.0.0.*`; `distributed` or `global` have the same meaning. `long` is for tests that run slightly longer that one second. You can disable these groups of tests using `--no-zookeeper`, `--no-shard` and `--no-long` options, respectively. Make sure to add a proper prefix to your test name if it needs ZooKeeper or distributed queries.
### 检查必须发生的错误 {#checking-error-must-occur}
### Checking for an Error that Must Occur
有时您想测试是否因不正确的查询而发生服务器错误. 我们支持在 SQL 测试中对此进行特殊注释, 形式如下:
Sometimes you want to test that a server error occurs for an incorrect query. We support special annotations for this in SQL tests, in the following form:
```
select x; -- { serverError 49 }
```
此测试确保服务器返回关于未知列“x”的错误代码为 49. 如果没有错误, 或者错误不同, 则测试失败. 如果您想确保错误发生在客户端, 请改用 `clientError` 注释.
This test ensures that the server returns an error with code 49 about unknown column `x`. If there is no error, or the error is different, the test will fail. If you want to ensure that an error occurs on the client side, use `clientError` annotation instead.
不要检查错误消息的特定措辞, 它将来可能会发生变化, 并且测试将不必要地中断. 只检查错误代码. 如果现有的错误代码不足以满足您的需求, 请考虑添加一个新的.
Do not check for a particular wording of error message, it may change in the future, and the test will needlessly break. Check only the error code. If the existing error code is not precise enough for your needs, consider adding a new one.
### 测试分布式查询 {#testing-distributed-query}
### Testing a Distributed Query
如果你想在功能测试中使用分布式查询, 你可以使用 `127.0.0.{1..2}` 的地址, 以便服务器查询自己; 或者您可以在服务器配置文件中使用预定义的测试集群, 例如`test_shard_localhost`. 请记住在测试名称中添加 `shard``distributed` 字样, 以便它以正确的配置在 CI 中运行, 其中服务器配置为支持分布式查询.
If you want to use distributed queries in functional tests, you can leverage `remote` table function with `127.0.0.{1..2}` addresses for the server to query itself; or you can use predefined test clusters in server configuration file like `test_shard_localhost`. Remember to add the words `shard` or `distributed` to the test name, so that it is run in CI in correct configurations, where the server is configured to support distributed queries.
## 已知错误 {#known-bugs}
## Known Bugs {#known-bugs}
如果我们知道一些可以通过功能测试轻松重现的错误, 我们将准备好的功能测试放在 `tests/queries/bugs` 目录中. 修复错误后, 这些测试将移至 `tests/queries/0_stateless` .
If we know some bugs that can be easily reproduced by functional tests, we place prepared functional tests in `tests/queries/bugs` directory. These tests will be moved to `tests/queries/0_stateless` when bugs are fixed.
## 集成测试 {#integration-tests}
## Integration Tests {#integration-tests}
集成测试允许在集群配置中测试 ClickHouse 以及 ClickHouse 与其他服务器(如 MySQL、Postgres、MongoDB)的交互. 它们可以用来模拟网络分裂、丢包等情况. 这些测试在Docker下运行, 并使用各种软件创建多个容器.
Integration tests allow testing ClickHouse in clustered configuration and ClickHouse interaction with other servers like MySQL, Postgres, MongoDB. They are useful to emulate network splits, packet drops, etc. These tests are run under Docker and create multiple containers with various software.
有关如何运行这些测试, 请参阅 `tests/integration/README.md` .
See `tests/integration/README.md` on how to run these tests.
注意, ClickHouse与第三方驱动程序的集成没有经过测试. 另外, 我们目前还没有JDBC和ODBC驱动程序的集成测试.
Note that integration of ClickHouse with third-party drivers is not tested. Also, we currently do not have integration tests with our JDBC and ODBC drivers.
## 单元测试 {#unit-tests}
## Unit Tests {#unit-tests}
当您想测试的不是 ClickHouse 整体, 而是单个独立库或类时,单元测试很有用. 您可以使用 `ENABLE_TESTS` CMake 选项启用或禁用测试构建. 单元测试(和其他测试程序)位于代码中的 `tests` 子目录中. 要运行单元测试, 请键入 `ninja test` 。有些测试使用 `gtest` , 但有些程序在测试失败时会返回非零退出码.
Unit tests are useful when you want to test not the ClickHouse as a whole, but a single isolated library or class. You can enable or disable build of tests with `ENABLE_TESTS` CMake option. Unit tests (and other test programs) are located in `tests` subdirectories across the code. To run unit tests, type `ninja test`. Some tests use `gtest`, but some are just programs that return non-zero exit code on test failure.
如果代码已经被功能测试覆盖了, 就没有必要进行单元测试(而且功能测试通常更易于使用).
Its not necessary to have unit tests if the code is already covered by functional tests (and functional tests are usually much more simple to use).
例如, 您可以通过直接调用可执行文件来运行单独的 gtest 检查:
You can run individual gtest checks by calling the executable directly, for example:
```bash
$ ./src/unit_tests_dbms --gtest_filter=LocalAddress*
```
## 性能测试 {#performance-tests}
## Performance Tests {#performance-tests}
性能测试允许测量和比较 ClickHouse 的某些孤立部分在合成查询上的性能. 测试位于 `tests/performance`. 每个测试都由带有测试用例描述的 `.xml` 文件表示. 测试使用 `docker/tests/performance-comparison` 工具运行. 请参阅自述文件以进行调用.
Performance tests allow to measure and compare performance of some isolated part of ClickHouse on synthetic queries. Performance tests are located at `tests/performance/`. Each test is represented by an `.xml` file with a description of the test case. Tests are run with `docker/test/performance-comparison` tool . See the readme file for invocation.
每个测试在循环中运行一个或多个查询(可能带有参数组合). 一些测试可以包含预加载测试数据集的先决条件.
Each test run one or multiple queries (possibly with combinations of parameters) in a loop.
如果您希望在某些场景中提高ClickHouse的性能并且如果可以在简单的查询中观察到改进那么强烈建议编写性能测试。在测试期间使用 `perf top` 或其他perf工具总是有意义的.
If you want to improve performance of ClickHouse in some scenario, and if improvements can be observed on simple queries, it is highly recommended to write a performance test. Also, it is recommended to write performance tests when you add or modify SQL functions which are relatively isolated and not too obscure. It always makes sense to use `perf top` or other `perf` tools during your tests.
## 测试工具和脚本 {#test-tools-and-scripts}
## Test Tools and Scripts {#test-tools-and-scripts}
`tests` 目录中的一些程序不是准备好的测试,而是测试工具. 例如, 对于 `Lexer`, 有一个工具 `src/Parsers/tests/lexer` , 它只是对标准输入进行标记化并将着色结果写入标准输出. 您可以将这些类型的工具用作代码示例以及用于探索和手动测试.
Some programs in `tests` directory are not prepared tests, but are test tools. For example, for `Lexer` there is a tool `src/Parsers/tests/lexer` that just do tokenization of stdin and writes colorized result to stdout. You can use these kind of tools as a code examples and for exploration and manual testing.
## 其他测试 {#miscellaneous-tests}
## Miscellaneous Tests {#miscellaneous-tests}
`tests/external_models` 中有机器学习模型的测试. 这些测试不会更新, 必须转移到集成测试.
There are tests for machine learned models in `tests/external_models`. These tests are not updated and must be transferred to integration tests.
仲裁插入有单独的测试. 该测试在不同的服务器上运行 ClickHouse 集群并模拟各种故障情况:网络分裂、丢包(ClickHouse 节点之间、ClickHouse 和 ZooKeeper 之间、ClickHouse 服务器和客户端之间等)、`kill -9`、`kill -STOP` 和 `kill -CONT` , 比如 [Jepsen](https://aphyr.com/tags/Jepsen). 然后测试检查所有已确认的插入是否已写入并且所有被拒绝的插入均未写入.
There is separate test for quorum inserts. This test run ClickHouse cluster on separate servers and emulate various failure cases: network split, packet drop (between ClickHouse nodes, between ClickHouse and ZooKeeper, between ClickHouse server and client, etc.), `kill -9`, `kill -STOP` and `kill -CONT` , like [Jepsen](https://aphyr.com/tags/Jepsen). Then the test checks that all acknowledged inserts was written and all rejected inserts was not.
在 ClickHouse 开源之前, Quorum 测试是由单独的团队编写的. 这个团队不再与ClickHouse合作. 测试碰巧是用Java编写的. 由于这些原因, 必须重写仲裁测试并将其转移到集成测试.
Quorum test was written by separate team before ClickHouse was open-sourced. This team no longer work with ClickHouse. Test was accidentally written in Java. For these reasons, quorum test must be rewritten and moved to integration tests.
## 手动测试 {#manual-testing}
## Manual Testing {#manual-testing}
当您开发一个新特性时, 手动测试它也是合理的. 您可以按照以下步骤进行操作:
When you develop a new feature, it is reasonable to also test it manually. You can do it with the following steps:
构建 ClickHouse. 从终端运行 ClickHouse将目录更改为 `programs/clickhouse-server` 并使用 `./clickhouse-server` 运行它. 默认情况下, 它将使用当前目录中的配置(`config.xml`、`users.xml` 和`config.d` 和`users.d` 目录中的文件). 要连接到 ClickHouse 服务器, 请运行 `programs/clickhouse-client/clickhouse-client` .
Build ClickHouse. Run ClickHouse from the terminal: change directory to `programs/clickhouse-server` and run it with `./clickhouse-server`. It will use configuration (`config.xml`, `users.xml` and files within `config.d` and `users.d` directories) from the current directory by default. To connect to ClickHouse server, run `programs/clickhouse-client/clickhouse-client`.
请注意, 所有 clickhouse 工具(服务器、客户端等)都只是指向名为 `clickhouse` 的单个二进制文件的符号链接. 你可以在 `programs/clickhouse` 找到这个二进制文件. 所有工具也可以作为 `clickhouse tool` 而不是 `clickhouse-tool` 调用.
Note that all clickhouse tools (server, client, etc) are just symlinks to a single binary named `clickhouse`. You can find this binary at `programs/clickhouse`. All tools can also be invoked as `clickhouse tool` instead of `clickhouse-tool`.
或者, 您可以安装 ClickHouse 包: 从 Yandex 存储库稳定发布, 或者您可以在 ClickHouse 源根目录中使用 `./release` 为自己构建包. 然后使用 `sudo service clickhouse-server start` 启动服务器(或停止以停止服务器). 在 `/etc/clickhouse-server/clickhouse-server.log` 中查找日志.
Alternatively you can install ClickHouse package: either stable release from ClickHouse repository or you can build package for yourself with `./release` in ClickHouse sources root. Then start the server with `sudo clickhouse start` (or stop to stop the server). Look for logs at `/etc/clickhouse-server/clickhouse-server.log`.
当您的系统上已经安装了 ClickHouse 时,您可以构建一个新的 `clickhouse` 二进制文件并替换现有的二进制文件:
When ClickHouse is already installed on your system, you can build a new `clickhouse` binary and replace the existing binary:
``` bash
$ sudo service clickhouse-server stop
$ sudo clickhouse stop
$ sudo cp ./clickhouse /usr/bin/
$ sudo service clickhouse-server start
$ sudo clickhouse start
```
您也可以停止系统 clickhouse-server 并使用相同的配置运行您自己的服务器, 但登录到终端:
Also you can stop system clickhouse-server and run your own with the same configuration but with logging to terminal:
``` bash
$ sudo service clickhouse-server stop
$ sudo clickhouse stop
$ sudo -u clickhouse /usr/bin/clickhouse server --config-file /etc/clickhouse-server/config.xml
```
使用 gdb 的示例:
Example with gdb:
``` bash
$ sudo -u clickhouse gdb --args /usr/bin/clickhouse server --config-file /etc/clickhouse-server/config.xml
```
如果系统 clickhouse-server 已经在运行并且你不想停止它, 你可以在你的 `config.xml` 中更改端口号(或在 `config.d` 目录中的文件中覆盖它们), 提供适当的数据路径, 并运行它.
If the system clickhouse-server is already running and you do not want to stop it, you can change port numbers in your `config.xml` (or override them in a file in `config.d` directory), provide appropriate data path, and run it.
`clickhouse` 二进制文件几乎没有依赖关系, 可以在广泛的 Linux 发行版中使用. 要在服务器上快速而肮脏地测试您的更改, 您可以简单地将新构建的 `clickhouse` 二进制文件 `scp` 到您的服务器, 然后按照上面的示例运行它.
`clickhouse` binary has almost no dependencies and works across wide range of Linux distributions. To quick and dirty test your changes on a server, you can simply `scp` your fresh built `clickhouse` binary to your server and then run it as in examples above.
## 测试环境 {#testing-environment}
## Build Tests {#build-tests}
在发布稳定版之前, 我们将其部署在测试环境中.测试环境是一个集群,处理 [Yandex.Metrica](https://metrica.yandex.com/) 数据的 1/39 部分. 我们与 Yandex.Metrica 团队共享我们的测试环境. ClickHouse无需在现有数据上停机即可升级. 我们首先看到的是, 数据被成功地处理了, 没有滞后于实时, 复制继续工作, Yandex.Metrica 团队没有发现任何问题. 第一次检查可以通过以下方式进行:
Build tests allow to check that build is not broken on various alternative configurations and on some foreign systems. These tests are automated as well.
``` sql
SELECT hostName() AS h, any(version()), any(uptime()), max(UTCEventTime), count() FROM remote('example01-01-{1..3}t', merge, hits) WHERE EventDate >= today() - 2 GROUP BY h ORDER BY h;
```
Examples:
- cross-compile for Darwin x86_64 (Mac OS X)
- cross-compile for FreeBSD x86_64
- cross-compile for Linux AArch64
- build on Ubuntu with libraries from system packages (discouraged)
- build with shared linking of libraries (discouraged)
在某些情况下, 我们还会部署到 Yandex 中我们朋友团队的测试环境Market、Cloud 等. 此外, 我们还有一些用于开发目的的硬件服务器.
For example, build with system packages is bad practice, because we cannot guarantee what exact version of packages a system will have. But this is really needed by Debian maintainers. For this reason we at least have to support this variant of build. Another example: shared linking is a common source of trouble, but it is needed for some enthusiasts.
## 负载测试 {#load-testing}
Though we cannot run all tests on all variant of builds, we want to check at least that various build variants are not broken. For this purpose we use build tests.
部署到测试环境后, 我们使用来自生产集群的查询运行负载测试. 这是手动完成的.
We also test that there are no translation units that are too long to compile or require too much RAM.
确保您在生产集群上启用了 `query_log`.
We also test that there are no too large stack frames.
收集一天或更长时间的查询日志:
## Testing for Protocol Compatibility {#testing-for-protocol-compatibility}
``` bash
$ clickhouse-client --query="SELECT DISTINCT query FROM system.query_log WHERE event_date = today() AND query LIKE '%ym:%' AND query NOT LIKE '%system.query_log%' AND type = 2 AND is_initial_query" > queries.tsv
```
When we extend ClickHouse network protocol, we test manually that old clickhouse-client works with new clickhouse-server and new clickhouse-client works with old clickhouse-server (simply by running binaries from corresponding packages).
这是一个复杂的例子. `type = 2` 将过滤成功执行的查询. `query LIKE '%ym:%'` 是从 Yandex.Metrica 中选择相关查询. `is_initial_query` 是只选择客户端发起的查询, 而不是 ClickHouse 本身(作为分布式查询处理的一部分).
We also test some cases automatically with integrational tests:
- if data written by old version of ClickHouse can be successfully read by the new version;
- do distributed queries work in a cluster with different ClickHouse versions.
`scp` 将此日志记录到您的测试集群并按如下方式运行它:
## Help from the Compiler {#help-from-the-compiler}
``` bash
$ clickhouse benchmark --concurrency 16 < queries.tsv
```
Main ClickHouse code (that is located in `dbms` directory) is built with `-Wall -Wextra -Werror` and with some additional enabled warnings. Although these options are not enabled for third-party libraries.
(可能你还想指定一个 `--user`)
Clang has even more useful warnings - you can look for them with `-Weverything` and pick something to default build.
然后把它留到晚上或周末, 去休息一下.
For production builds, clang is used, but we also test make gcc builds. For development, clang is usually more convenient to use. You can build on your own machine with debug mode (to save battery of your laptop), but please note that compiler is able to generate more warnings with `-O3` due to better control flow and inter-procedure analysis. When building with clang in debug mode, debug version of `libc++` is used that allows to catch more errors at runtime.
您应该检查 `clickhouse-server` 没有崩溃, 内存占用是有限的, 且性能不会随着时间的推移而降低.
## Sanitizers {#sanitizers}
由于查询和环境的高度可变性, 没有记录和比较精确的查询执行时间.
### Address sanitizer
We run functional, integration, stress and unit tests under ASan on per-commit basis.
## 构建测试 {#build-tests}
### Thread sanitizer
We run functional, integration, stress and unit tests under TSan on per-commit basis.
构建测试允许检查在各种可选配置和一些外部系统上的构建是否被破坏. 这些测试也是自动化的.
### Memory sanitizer
We run functional, integration, stress and unit tests under MSan on per-commit basis.
示例:
- Darwin x86_64 (Mac OS X) 交叉编译
- FreeBSD x86_64 交叉编译
- Linux AArch64 交叉编译
- 使用系统包中的库在 Ubuntu 上构建(不鼓励)
- 使用库的共享链接构建(不鼓励)
例如, 使用系统包构建是不好的做法, 因为我们无法保证系统将拥有哪个确切版本的包. 但这确实是 Debian 维护者所需要的. 出于这个原因, 我们至少必须支持这种构建变体. 另一个例子: 共享链接是一个常见的麻烦来源, 但对于一些爱好者来说是需要的.
虽然我们无法对所有构建变体运行所有测试, 但我们希望至少检查各种构建变体没有被破坏. 为此, 我们使用构建测试.
我们还测试了那些太长而无法编译或需要太多RAM的没有翻译单元.
我们还测试没有太大的堆栈帧.
## 协议兼容性测试 {#testing-for-protocol-compatibility}
当我们扩展 ClickHouse 网络协议时, 我们手动测试旧的 clickhouse-client 与新的 clickhouse-server 一起工作, 而新的 clickhouse-client 与旧的 clickhouse-server 一起工作(只需从相应的包中运行二进制文件).
我们还使用集成测试自动测试一些案例:
- 旧版本ClickHouse写入的数据是否可以被新版本成功读取;
- 在具有不同 ClickHouse 版本的集群中执行分布式查询.
## 编译器的帮助 {#help-from-the-compiler}
主要的 ClickHouse 代码(位于 `dbms` 目录中)是用 `-Wall -Wextra -Werror` 和一些额外的启用警告构建的. 虽然没有为第三方库启用这些选项.
Clang 有更多有用的警告 - 你可以用 `-Weverything` 寻找它们并选择一些东西来默认构建.
对于生产构建, 使用 clang, 但我们也测试 make gcc 构建. 对于开发, clang 通常使用起来更方便. 您可以使用调试模式在自己的机器上构建(以节省笔记本电脑的电池), 但请注意, 由于更好的控制流和过程间分析, 编译器能够使用 `-O3` 生成更多警告. 在调试模式下使用 clang 构建时, 使用调试版本的 `libc++` 允许在运行时捕获更多错误.
## 地址清理器 {#sanitizers}
### 地址清理器
我们在ASan上运行功能测试、集成测试、压力测试和单元测试.
### 线程清理器
我们在TSan下运行功能测试、集成测试、压力测试和单元测试.
### 内存清理器
我们在MSan上运行功能测试、集成测试、压力测试和单元测试.
### 未定义的行为清理器
我们在UBSan下运行功能测试、集成测试、压力测试和单元测试. 某些第三方库的代码未针对 UB 进行清理.
### Undefined behaviour sanitizer
We run functional, integration, stress and unit tests under UBSan on per-commit basis. The code of some third-party libraries is not sanitized for UB.
### Valgrind (Memcheck)
我们曾经在 Valgrind 下通宵运行功能测试, 但不再这样做了. 这需要几个小时. 目前在`re2`库中有一个已知的误报, 见[这篇文章](https://research.swtch.com/sparse).
We used to run functional tests under Valgrind overnight, but don't do it anymore. It takes multiple hours. Currently there is one known false positive in `re2` library, see [this article](https://research.swtch.com/sparse).
## 模糊测试 {#fuzzing}
## Fuzzing {#fuzzing}
ClickHouse 模糊测试是使用 [libFuzzer](https://llvm.org/docs/LibFuzzer.html) 和随机 SQL 查询实现的. 所有模糊测试都应使用sanitizers(地址和未定义)进行.
ClickHouse fuzzing is implemented both using [libFuzzer](https://llvm.org/docs/LibFuzzer.html) and random SQL queries.
All the fuzz testing should be performed with sanitizers (Address and Undefined).
LibFuzzer 用于库代码的隔离模糊测试. Fuzzer 作为测试代码的一部分实现, 并具有 `_fuzzer` 名称后缀.
Fuzzer 示例可以在 `src/Parsers/tests/lexer_fuzzer.cpp` 中找到. LibFuzzer 特定的配置、字典和语料库存储在 `tests/fuzz`.
我们鼓励您为处理用户输入的每个功能编写模糊测试.
LibFuzzer is used for isolated fuzz testing of library code. Fuzzers are implemented as part of test code and have “_fuzzer” name postfixes.
Fuzzer example can be found at `src/Parsers/fuzzers/lexer_fuzzer.cpp`. LibFuzzer-specific configs, dictionaries and corpus are stored at `tests/fuzz`.
We encourage you to write fuzz tests for every functionality that handles user input.
默认情况下不构建模糊器. 要构建模糊器, 应设置` -DENABLE_FUZZING=1` 和 `-DENABLE_TESTS=1` 选项.
我们建议在构建模糊器时禁用 Jemalloc. 用于将 ClickHouse fuzzing 集成到 Google OSS-Fuzz 的配置可以在 `docker/fuzz` 中找到.
Fuzzers are not built by default. To build fuzzers both `-DENABLE_FUZZING=1` and `-DENABLE_TESTS=1` options should be set.
We recommend to disable Jemalloc while building fuzzers. Configuration used to integrate ClickHouse fuzzing to
Google OSS-Fuzz can be found at `docker/fuzz`.
我们还使用简单的模糊测试来生成随机SQL查询, 并检查服务器在执行这些查询时是否会死亡.
你可以在 `00746_sql_fuzzy.pl` 中找到它. 这个测试应该连续运行(通宵或更长时间).
We also use simple fuzz test to generate random SQL queries and to check that the server does not die executing them.
You can find it in `00746_sql_fuzzy.pl`. This test should be run continuously (overnight and longer).
我们还使用复杂的基于 AST 的查询模糊器, 它能够找到大量的极端情况. 它在查询 AST 中进行随机排列和替换. 它会记住先前测试中的 AST 节点, 以使用它们对后续测试进行模糊测试, 同时以随机顺序处理它们. 您可以在 [这篇博客文章](https://clickhouse.com/blog/en/2021/fuzzing-clickhouse/) 中了解有关此模糊器的更多信息.
We also use sophisticated AST-based query fuzzer that is able to find huge amount of corner cases. It does random permutations and substitutions in queries AST. It remembers AST nodes from previous tests to use them for fuzzing of subsequent tests while processing them in random order. You can learn more about this fuzzer in [this blog article](https://clickhouse.com/blog/en/2021/fuzzing-clickhouse/).
## 压力测试 {#stress-test}
## Stress test
压力测试是另一种模糊测试. 它使用单个服务器以随机顺序并行运行所有功能测试. 不检查测试结果.
Stress tests are another case of fuzzing. It runs all functional tests in parallel in random order with a single server. Results of the tests are not checked.
经检查:
- 服务器不会崩溃,不会触发调试或清理程序陷阱;
- 没有死锁;
- 数据库结构一致;
- 服务器可以在测试后成功停止并重新启动,没有异常;
It is checked that:
- server does not crash, no debug or sanitizer traps are triggered;
- there are no deadlocks;
- the database structure is consistent;
- server can successfully stop after the test and start again without exceptions.
有五种变体 (Debug, ASan, TSan, MSan, UBSan).
There are five variants (Debug, ASan, TSan, MSan, UBSan).
## 线程模糊器 {#thread-fuzzer}
## Thread Fuzzer
Thread Fuzzer(请不要与 Thread Sanitizer 混淆)是另一种允许随机化线程执行顺序的模糊测试. 它有助于找到更多特殊情况.
Thread Fuzzer (please don't mix up with Thread Sanitizer) is another kind of fuzzing that allows to randomize thread order of execution. It helps to find even more special cases.
## 安全审计 {#security-audit}
## Security Audit
Yandex安全团队的人员从安全的角度对ClickHouse的功能做了一些基本的概述.
Our Security Team did some basic overview of ClickHouse capabilities from the security standpoint.
## 静态分析仪 {#static-analyzers}
## Static Analyzers {#static-analyzers}
我们在每次提交的基础上运行 `clang-tidy`. `clang-static-analyzer` 检查也被启用. `clang-tidy` 也用于一些样式检查.
We run `clang-tidy` on per-commit basis. `clang-static-analyzer` checks are also enabled. `clang-tidy` is also used for some style checks.
我们已经评估了 `clang-tidy`、`Coverity`、`cppcheck`、`PVS-Studio`、`tscancode`、`CodeQL`. 您将在 `tests/instructions/` 目录中找到使用说明. 你也可以阅读[俄文文章](https://habr.com/company/yandex/blog/342018/).
We have evaluated `clang-tidy`, `Coverity`, `cppcheck`, `PVS-Studio`, `tscancode`, `CodeQL`. You will find instructions for usage in `tests/instructions/` directory.
如果你使用 `CLion` 作为 IDE, 你可以利用一些开箱即用的 `clang-tidy` 检查
If you use `CLion` as an IDE, you can leverage some `clang-tidy` checks out of the box.
我们还使用 `shellcheck` 对shell脚本进行静态分析.
We also use `shellcheck` for static analysis of shell scripts.
## 硬化 {#hardening}
## Hardening {#hardening}
在调试版本中, 我们使用自定义分配器执行用户级分配的 ASLR.
In debug build we are using custom allocator that does ASLR of user-level allocations.
我们还手动保护在分配后预期为只读的内存区域.
We also manually protect memory regions that are expected to be readonly after allocation.
在调试构建中, 我们还需要对libc进行自定义, 以确保不会调用 "有害的" (过时的、不安全的、非线程安全的)函数.
In debug build we also involve a customization of libc that ensures that no "harmful" (obsolete, insecure, not thread-safe) functions are called.
Debug 断言被广泛使用.
Debug assertions are used extensively.
在调试版本中,如果抛出带有 "逻辑错误" 代码(暗示错误)的异常, 则程序会过早终止. 它允许在发布版本中使用异常, 但在调试版本中使其成为断言.
In debug build, if exception with "logical error" code (implies a bug) is being thrown, the program is terminated prematurely. It allows to use exceptions in release build but make it an assertion in debug build.
jemalloc 的调试版本用于调试版本.
libc++ 的调试版本用于调试版本.
Debug version of jemalloc is used for debug builds.
Debug version of libc++ is used for debug builds.
## 运行时完整性检查
## Runtime Integrity Checks
对存储在磁盘上的数据是校验和. MergeTree 表中的数据同时以三种方式进行校验和*(压缩数据块、未压缩数据块、跨块的总校验和). 客户端和服务器之间或服务器之间通过网络传输的数据也会进行校验和. 复制确保副本上的数据位相同.
Data stored on disk is checksummed. Data in MergeTree tables is checksummed in three ways simultaneously* (compressed data blocks, uncompressed data blocks, the total checksum across blocks). Data transferred over network between client and server or between servers is also checksummed. Replication ensures bit-identical data on replicas.
需要防止硬件故障(存储介质上的位腐烂、服务器上 RAM 中的位翻转、网络控制器 RAM 中的位翻转、网络交换机 RAM 中的位翻转、客户端 RAM 中的位翻转、线路上的位翻转). 请注意,比特位操作很常见, 即使对于 ECC RAM 和 TCP 校验和(如果您每天设法运行数千台处理 PB 数据的服务器, 也可能发生比特位操作. [观看视频(俄语)](https://www.youtube.com/watch?v=ooBAQIe0KlQ).
It is required to protect from faulty hardware (bit rot on storage media, bit flips in RAM on server, bit flips in RAM of network controller, bit flips in RAM of network switch, bit flips in RAM of client, bit flips on the wire). Note that bit flips are common and likely to occur even for ECC RAM and in presence of TCP checksums (if you manage to run thousands of servers processing petabytes of data each day). [See the video (russian)](https://www.youtube.com/watch?v=ooBAQIe0KlQ).
ClickHouse 提供诊断功能, 可帮助运维工程师找到故障硬件.
ClickHouse provides diagnostics that will help ops engineers to find faulty hardware.
\* 它并不慢.
\* and it is not slow.
## 代码风格 {#code-style}
## Code Style {#code-style}
[此处](style.md)描述了代码样式规则.
Code style rules are described [here](style.md).
要检查一些常见的样式违规,您可以使用 `utils/check-style` 脚本.
To check for some common style violations, you can use `utils/check-style` script.
要强制使用正确的代码样式, 您可以使用 `clang-format`. 文件 `.clang-format` 位于源根目录. 它大多与我们的实际代码风格相对应. 但是不建议将 `clang-format` 应用于现有文件, 因为它会使格式变得更糟. 您可以使用可以在 clang 源代码库中找到的 `clang-format-diff` 工具.
To force proper style of your code, you can use `clang-format`. File `.clang-format` is located at the sources root. It mostly corresponding with our actual code style. But its not recommended to apply `clang-format` to existing files because it makes formatting worse. You can use `clang-format-diff` tool that you can find in clang source repository.
或者, 您可以尝试使用 `uncrustify` 工具来重新格式化您的代码. 配置位于源根目录中的 `uncrustify.cfg` 中. 它比 `clang-format` 测试更少.
Alternatively you can try `uncrustify` tool to reformat your code. Configuration is in `uncrustify.cfg` in the sources root. It is less tested than `clang-format`.
`CLion` 有自己的代码格式化程序, 必须根据我们的代码风格进行调整.
`CLion` has its own code formatter that has to be tuned for our code style.
我们还使用 `codespell` 来查找代码中的拼写错误.它也是自动化的.
We also use `codespell` to find typos in code. It is automated as well.
## Metrica B2B 测试 {#metrica-b2b-tests}
## Test Coverage {#test-coverage}
每个 ClickHouse 版本都使用 Yandex Metrica 和 AppMetrica 引擎进行测试. ClickHouse 的测试版和稳定版部署在 VM 上, 并使用 Metrica 引擎的小副本运行, 该引擎处理输入数据的固定样本. 然后将两个 Metrica 引擎实例的结果放在一起比较.
这些测试由单独的团队自动化. 由于移动部件数量众多, 测试在大多数情况下都因完全不相关的原因而失败, 这些原因很难弄清楚. 这些测试很可能对我们有负面价值. 尽管如此, 这些测试在数百次中被证明是有用的.
## 测试覆盖率 {#test-coverage}
我们还跟踪测试覆盖率, 但仅针对功能测试和 clickhouse-server. 它每天进行.
We also track test coverage but only for functional tests and only for clickhouse-server. It is performed on daily basis.
## Tests for Tests
有自动检测薄片测试. 它运行所有新测试100次(用于功能测试)或10次(用于集成测试). 如果至少有一次测试失败,它就被认为是脆弱的.
There is automated check for flaky tests. It runs all new tests 100 times (for functional tests) or 10 times (for integration tests). If at least single time the test failed, it is considered flaky.
## Testflows
[Testflows](https://testflows.com/) 是一个企业级的测试框架. Altinity 使用它进行一些测试, 我们在 CI 中运行这些测试.
[Testflows](https://testflows.com/) is an enterprise-grade open-source testing framework, which is used to test a subset of ClickHouse.
## Yandex 检查 (only for Yandex employees)
## Test Automation {#test-automation}
这些检查将ClickHouse代码导入到Yandex内部的单一存储库中, 所以ClickHouse代码库可以被Yandex的其他产品(YT和YDB)用作库. 请注意, clickhouse-server本身并不是由内部回购构建的, Yandex应用程序使用的是未经修改的开源构建的.
We run tests with [GitHub Actions](https://github.com/features/actions).
## 测试自动化 {#test-automation}
Build jobs and tests are run in Sandbox on per commit basis. Resulting packages and test results are published in GitHub and can be downloaded by direct links. Artifacts are stored for several months. When you send a pull request on GitHub, we tag it as “can be tested” and our CI system will build ClickHouse packages (release, debug, with address sanitizer, etc) for you.
我们使用 Yandex 内部 CI 和名为 "Sandbox" 的作业自动化系统运行测试.
We do not use Travis CI due to the limit on time and computational power.
We do not use Jenkins. It was used before and now we are happy we are not using Jenkins.
在每次提交的基础上, 构建作业和测试都在沙箱中运行. 生成的包和测试结果发布在GitHub上, 可以通过直接链接下载. 产物要保存几个月. 当你在GitHub上发送一个pull请求时, 我们会把它标记为 "可以测试" , 我们的CI系统会为你构建ClickHouse包(发布、调试、使用地址清理器等).
由于时间和计算能力的限制, 我们不使用 Travis CI.
我们不用Jenkins. 以前用过, 现在我们很高兴不用Jenkins了.
[原始文章](https://clickhouse.com/docs/en/development/tests/) <!--hide-->
[Original article](https://clickhouse.com/docs/en/development/tests/) <!--hide-->

View File

@ -55,6 +55,5 @@ ORDER BY id
## 参考
- [高效低基数类型](https://www.altinity.com/blog/2019/3/27/low-cardinality).
- [使用低基数类型减少ClickHouse的存储成本 来自Instana工程师的分享](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/).
- [字符优化 (俄语视频分享)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [英语分享](https://github.com/ClickHouse/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf).
- [字符优化 (俄语视频分享)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [英语分享](https://github.com/ClickHouse/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf).

View File

@ -121,8 +121,6 @@ ENGINE = <Engine>
...
```
如果指定了编解ec则默认编解码器不适用。 编解码器可以组合在一个流水线中,例如, `CODEC(Delta, ZSTD)`. 要为您的项目选择最佳的编解码器组合请通过类似于Altinity中描述的基准测试 [新编码提高ClickHouse效率](https://www.altinity.com/blog/2019/7/new-encodings-to-improve-clickhouse) 文章.
!!! warning "警告"
您无法使用外部实用程序解压缩ClickHouse数据库文件`lz4`. 相反,使用特殊的 [ツ环板compressorョツ嘉ッツ偲](https://github.com/ClickHouse/ClickHouse/tree/master/programs/compressor) 实用程序。

View File

@ -6,38 +6,6 @@ sidebar_label: SYSTEM
# SYSTEM Queries {#query-language-system}
- [RELOAD EMBEDDED DICTIONARIES](#query_language-system-reload-emdedded-dictionaries)
- [RELOAD DICTIONARIES](#query_language-system-reload-dictionaries)
- [RELOAD DICTIONARY](#query_language-system-reload-dictionary)
- [DROP DNS CACHE](#query_language-system-drop-dns-cache)
- [DROP MARK CACHE](#query_language-system-drop-mark-cache)
- [DROP UNCOMPRESSED CACHE](#query_language-system-drop-uncompressed-cache)
- [DROP COMPILED EXPRESSION CACHE](#query_language-system-drop-compiled-expression-cache)
- [DROP REPLICA](#query_language-system-drop-replica)
- [FLUSH LOGS](#query_language-system-flush_logs)
- [RELOAD CONFIG](#query_language-system-reload-config)
- [SHUTDOWN](#query_language-system-shutdown)
- [KILL](#query_language-system-kill)
- [STOP DISTRIBUTED SENDS](#query_language-system-stop-distributed-sends)
- [FLUSH DISTRIBUTED](#query_language-system-flush-distributed)
- [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends)
- [STOP MERGES](#query_language-system-stop-merges)
- [START MERGES](#query_language-system-start-merges)
- [STOP TTL MERGES](#query_language-stop-ttl-merges)
- [START TTL MERGES](#query_language-start-ttl-merges)
- [STOP MOVES](#query_language-stop-moves)
- [START MOVES](#query_language-start-moves)
- [SYSTEM UNFREEZE](#query_language-system-unfreeze)
- [STOP FETCHES](#query_language-system-stop-fetches)
- [START FETCHES](#query_language-system-start-fetches)
- [STOP REPLICATED SENDS](#query_language-system-start-replicated-sends)
- [START REPLICATED SENDS](#query_language-system-start-replicated-sends)
- [STOP REPLICATION QUEUES](#query_language-system-stop-replication-queues)
- [START REPLICATION QUEUES](#query_language-system-start-replication-queues)
- [SYNC REPLICA](#query_language-system-sync-replica)
- [RESTART REPLICA](#query_language-system-restart-replica)
- [RESTART REPLICAS](#query_language-system-restart-replicas)
## RELOAD EMBEDDED DICTIONARIES\] {#query_language-system-reload-emdedded-dictionaries}
重新加载所有[内置字典](../../sql-reference/dictionaries/internal-dicts.md)。默认情况下内置字典是禁用的。

View File

@ -1,264 +0,0 @@
---
slug: /zh/whats-new/changelog/2017
---
### ClickHouse 版本 1.1.54327, 2017-12-21 {#clickhouse-release-1-1-54327-2017-12-21}
此版本包含先前版本 1.1.54318 的错误修复:
- 修复了可能导致数据丢失的复制中可能出现的竞争条件的错误. 此问题影响1.1.54310和1.1.54318版本. 如果将这些版本的任意一个与 Replicated 表一起使用,则强烈建议进行更新. 此问题显示在日志中的警告消息中,例如 `Part ... from own log does not exist.` 即使您没有在日志中看到这些消息,该问题也是相关的.
### ClickHouse 版本 1.1.54318, 2017-11-30 {#clickhouse-release-1-1-54318-2017-11-30}
此版本包含先前版本 1.1.54310 的错误修复:
- 修复了在 SummingMergeTree 引擎中合并期间不正确的行删除.
- 修复了未复制的 MergeTree 引擎中的内存泄漏.
- 修复了在 MergeTree 引擎中频繁插入导致性能下降的问题.
- 修复了导致复制队列停止运行的问题.
- 修复了服务器日志的轮换和归档.
### ClickHouse 版本 1.1.54310, 2017-11-01 {#clickhouse-release-1-1-54310-2017-11-01}
#### 新特征: {#new-features}
- MergeTree 系列表引擎的自定义分区键.
- [Kafka](https://clickhouse.com/docs/en/operations/table_engines/kafka/) 表引擎.
- 增加了对加载[CatBoost](https://catboost.yandex/)模型的支持, 并将它们应用到存储在ClickHouse中的数据.
- 添加了对 UTC 非整数偏移时区的支持
- 添加了对具有时间间隔的算术运算的支持.
- Date 和 DateTime 类型的值范围扩展到 2105 年.
- 添加了 `CREATE MATERIALIZED VIEW x TO y` 查询(指定用于存储物化视图数据的现有表).
- 添加了不带参数的`ATTACH TABLE` 查询.
- SummingMergeTree 表中名称以 -Map 结尾的嵌套列的处理逻辑被提取到 sumMap 聚合函数中. 您现在可以明确指定此类列.
- IP 树字典的最大大小增加到 128M 条目.
- 添加了 getSizeOfEnumType 函数.
- 添加了 sumWithOverflow 聚合函数.
- 添加了对 Cap'n Proto 输入格式的支持.
- 您现在可以在使用 zstd 算法时自定义压缩级别.
#### 向后不兼容的变化: {#backward-incompatible-changes}
- 不允许使用内存以外的引擎创建临时表.
- 不允许使用 View 或 MaterializedView 引擎显式创建表.
- 在表创建期间,新的检查验证采样键表达式是否包含在主键中.
#### Bug 修复: {#bug-fixes}
- 修复了同步插入分布式表时的挂断问题.
- 修复了复制表中部件的非原子添加和删除.
- 插入物化视图的数据不会进行不必要的重复数据删除.
- 对本地副本滞后且远程副本不可用的分布式表执行查询不再导致错误.
- 用户不再需要访问 `default` 数据库的权限来创建临时表.
- 修复了指定不带参数的 Array 类型时崩溃的问题.
- 修复了包含服务器日志的磁盘卷已满时的挂断问题.
- 修复了 Unix 纪元第一周 toRelativeWeekNum 函数中的溢出问题.
#### Build 改进: {#build-improvements}
- 更新了多个第三方库(尤其是 Poco)并转换为 git 子模块.
### ClickHouse 版本 1.1.54304, 2017-10-19 {#clickhouse-release-1-1-54304-2017-10-19}
#### 新特征: {#new-features-1}
- 本机协议中的 TLS 支持(要启用,请在 `config.xml` 中设置 `tcp_ssl_port`).
#### Bug 修复: {#bug-fixes-1}
- 复制表的`ALTER` 现在尝试尽快开始运行.
- 修复了使用设置 `preferred_block_size_bytes=0.` 读取数据时崩溃的问题.
- 修复了按下 `Page Down``clickhouse-client` 崩溃的问题.
- 使用 `GLOBAL IN``UNION ALL` 正确解释某些复杂的查询.
- `FREEZE PARTITION` 现在总是以原子方式工作.
- 空 POST 请求现在返回代码为 411 的响应.
- 修正了像 `CAST(1 AS Nullable(UInt8)).` 这样的表达式的解释错误.
- 修正了从 `MergeTree` 表中读取 `Array(Nullable(String))` 列时的错误.
- 修复了在解析诸如 `SELECT dummy AS dummy, dummy AS b` 之类的查询时崩溃的问题.
- 用户使用无效的 `users.xml` 正确更新.
- 可执行字典返回非零响应代码时的正确处理.
### ClickHouse 版本 1.1.54292, 2017-09-20 {#clickhouse-release-1-1-54292-2017-09-20}
#### 新特征: {#new-features-2}
- 添加了用于处理坐标平面上的坐标的 `pointInPolygon` 函数.
- 添加了用于计算数组总和的 `sumMap` 聚合函数, 类似于 `SummingMergeTree` .
- 添加了 `trunc` 功能. 改进了舍入函数(`round`、`floor`、`ceil`、`roundToExp2`)的性能并更正了它们工作方式的逻辑. 更改了分数和负数的 `roundToExp2` 函数的逻辑.
- ClickHouse 可执行文件现在较少依赖于 libc 版本. 同一个 ClickHouse 可执行文件可以在各种 Linux 系统上运行. 使用编译查询时仍然存在依赖性(使用设置 `compile = 1` , 默认情况下不使用).
- 减少动态编译查询所需的时间.
#### Bug 修复: {#bug-fixes-2}
- 修复了有时会产生 `part ... intersects previous part` 消息和削弱副本一致性的错误.
- 修复了关闭期间 ZooKeeper 不可用导致服务器锁定的错误.
- 恢复副本时删除了过多的日志记录.
- 修复了 UNION ALL 实现中的错误.
- 修复了如果块中的第一列具有 Array 类型时在 concat 函数中发生的错误.
- 进度现在在 system.merges 表中可以正确显示.
### ClickHouse 版本 1.1.54289, 2017-09-13 {#clickhouse-release-1-1-54289-2017-09-13}
#### 新特征: {#new-features-3}
- 用于服务器管理的 `SYSTEM` 查询: `SYSTEM RELOAD DICTIONARY`, `SYSTEM RELOAD DICTIONARIES`, `SYSTEM DROP DNS CACHE`, `SYSTEM SHUTDOWN`, `SYSTEM KILL`.
- 添加了用于处理数组的函数: `concat`, `arraySlice`, `arrayPushBack`, `arrayPushFront`, `arrayPopBack`, `arrayPopFront`.
- 为 ZooKeeper 配置添加了 `root``identity` 参数. 这将允许您隔离同一 ZooKeeper 集群上的各个用户.
- 添加了聚合函数 `groupBitAnd``groupBitOr``groupBitXor` (为了兼容性,它们也可以在名称 `BIT_AND` 、`BIT_OR`和`BIT_XOR` 下使用).
- 可以通过在文件系统中指定套接字来从 MySQL 加载外部字典.
- 可以通过 SSL 从 MySQL 加载外部字典 (`ssl_cert`, `ssl_key`, `ssl_ca` 参数).
- 添加了 `max_network_bandwidth_for_user` 设置以限制每个用户查询的总体带宽使用.
- 支持临时表的 `DROP TABLE`.
- 支持从 `CSV``JSONEachRow` 格式读取 Unix 时间戳格式的 `DateTime` 值.
- 现在默认排除分布式查询中的滞后副本(默认阈值为 5 分钟).
- 在 ALTER 期间使用 FIFO 锁定对于连续运行的查询ALTER 查询不会无限期阻塞.
- 在配置文件中设置 `umask` 的选项.
- 使用 `DISTINCT` 提高查询的性能.
#### Bug 修复: {#bug-fixes-3}
- 改进了在 ZooKeeper 中删除旧节点的过程. 以前, 如果插入非常频繁, 旧节点有时不会被删除, 从而导致服务器关闭缓慢等.
- 修复了为 ZooKeeper 连接选择主机时的随机化问题.
- 如果副本是本地主机, 则修复了在分布式查询中排除滞后副本的问题.
- 修复了在 `嵌套` 结构中的元素上运行 `ALTER MODIFY` 后, `ReplicatedMergeTree` 表中的数据部分可能被破坏的错误.
- 修复了可能导致 SELECT 查询 `hang` 的错误.
- 分布式 DDL 查询的改进.
- 修复了查询 `CREATE TABLE ... AS <materialized view>`.
- 解决了对 Buffer 表的 `ALTER ... CLEAR COLUMN IN PARTITION` 查询中的死锁.
- 修复了使用 `JSONEachRow``TSKV` 格式时 `Enum` 的无效默认值 (0 而不是最小值).
- 解决了使用带有 `可执行` 源的字典时出现僵尸进程的问题.
- 修复了 HEAD 查询的段错误.
#### 改进了开发和组装ClickHouse的工作流: {#improved-workflow-for-developing-and-assembling-clickhouse}
- 您可以使用 `pbuilder` 来构建 ClickHouse.
- 你可以使用 `libc++` 代替 `libstdc++` 在 Linux 上构建.
- 添加了使用静态代码分析工具的说明: `Coverage`, `clang-tidy`, `cppcheck`.
#### 升级时请注意: {#please-note-when-upgrading}
- 现在有更高的 MergeTree 设置默认值 `max_bytes_to_merge_at_max_space_in_pool` (要合并的数据部分的最大总大小, 以字节为单位): 它已从 100 GiB 增加到 150 GiB. 这可能会导致在服务器升级后运行大型合并, 从而导致磁盘子系统负载增加. 如果服务器上的可用空间小于正在运行的合并总量的两倍, 这将导致所有其他合并停止运行, 包括小数据部分的合并. 因此, INSERT 查询将失败并显示消息"合并的处理速度明显慢于插入." , 使用 `SELECT * FROM system.merges` 查询来监控情况. 您还可以在 `system.metrics` 表或 Graphite 中检查 `DiskSpaceReservedForMerge` 指标. 您不需要做任何事情来解决这个问题, 因为一旦大型合并完成, 问题就会自行解决. 如果您发现这不可接受, 您可以恢复 `max_bytes_to_merge_at_max_space_in_pool` 设置的先前值. 为此, 请转到 config.xml 中的 `<merge_tree>` 部分, 设置 ``` <merge_tree>``<max_bytes_to_merge_at_max_space_in_pool>107374182400</max_bytes_to_merge_at_max_space_in_pool> ``` 并重新启动服务器.
### ClickHouse 版本 1.1.54284, 2017-08-29 {#clickhouse-release-1-1-54284-2017-08-29}
- 这是先前 1.1.54282 版本的错误修复版本. 它修复了 ZooKeeper 中部分目录中的泄漏.
### ClickHouse 版本 1.1.54282, 2017-08-23 {#clickhouse-release-1-1-54282-2017-08-23}
此版本包含先前版本 1.1.54276 的错误修复:
- 修复了插入分布式表时的 `DB::Exception: Assertion violation: !_path.empty()`.
- 如果输入数据以 ';' 开头, 则在以 RowBinary 格式插入时固定解析.
- 某些聚合函数 (例如 `groupArray()` ) 的运行时编译期间的错误.
### ClickHouse 版本 1.1.54276, 2017-08-16 {#clickhouse-release-1-1-54276-2017-08-16}
#### 新特征: {#new-features-4}
- SELECT 查询添加了一个可选的 WITH 部分. 示例查询:`WITH 1+1 AS a SELECT a, a*a` .
- NSERT 可以在分布式表中同步执行:只有在所有数据都保存在所有分片上后才返回 OK. 这是通过设置 `insert_distributed_sync=1` 激活的.
- 添加了用于处理 16 字节标识符的 UUID 数据类型.
- 添加了 CHAR、FLOAT 和其他类型的别名以与 Tableau 兼容.
- 新增 toYYYYMM, toYYYYMMDD, toYYYYMMDDhhmmss 时间转数字功能.
- 您可以使用 IP 地址 (与主机名一起) 来识别集群 DDL 查询的服务器.
- 在函数 `substring(str, pos, len)` 中添加了对非常量参数和负偏移量的支持.
- 为 `groupArray(max_size)(column)` 聚合函数增加了max_size参数, 并优化了其性能.
#### Main Changes: {#main-changes}
- 安全改进:所有服务器文件都使用 0640 权限创建(可以通过 `<umask>` 配置参数更改).
- 改进了语法无效查询的错误消息.
- 合并大段 MergeTree 数据时显着减少内存消耗并提高性能.
- 显着提高了 ReplacingMergeTree 引擎的数据合并性能.
- 通过组合多个源插入提高了从分布式表进行异步插入的性能. 要启用此功能, 请使用设置 `distributed_directory_monitor_batch_inserts=1` .
#### Backward Incompatible Changes: {#backward-incompatible-changes-1}
- 更改了数组 `groupArray(array_column)` 函数聚合状态的二进制格式.
#### Complete List of Changes: {#complete-list-of-changes}
- 添加了 `output_format_json_quote_denormals` 设置, 可以以 JSON 格式输出 nan 和 inf 值.
- 从分布式表读取时优化流分配.
- 如果值不变, 可以在只读模式下配置设置.
- 加了检索 MergeTree 引擎的非整数粒度的功能, 以满足对 `preferred_block_size_bytes` 设置中指定的块大小的限制. 目的是在处理来自大列的表的查询时减少RAM的消耗并增加缓存局部性.
- 有效地使用包含像 `toStartOfHour(x)` 这样的表达式的索引来处理像 `toStartOfHour(x) op сonstexpr` 这样的条件.
- 添加了 MergeTree 引擎的新设置(config.xml 中的 merge_tree 部分):
- `replicad_deduplication_window_seconds` 设置允许在复制表中删除重复插入的秒数.
- `cleanup_delay_period` 设置启动清理以删除过时数据的频率.
- `Replicationd_can_become_leader` 可以防止副本成为领导者(并分配合并).
- 加速清理以从 ZooKeeper 中删除过时的数据.
- 集群 DDL 查询的多项改进和修复. 特别有趣的是新设置 `distributed_ddl_task_timeout`, 它限制了等待集群中服务器响应的时间. 如果 ddl 请求没有在所有主机上执行,响应将包含超时错误并且请求将以异步模式执行.
- 改进了服务器日志中堆栈跟踪的显示.
- 为压缩方法添加了 "none" 值.
- 您可以在 config.xml 中使用多个dictionaries_config 部分.
- 可以通过文件系统中的套接字连接到 MySQL.
- `system.parts` 表有一个新列, 其中包含有关标记大小的信息(以字节为单位).
#### Bug 修复: {#bug-fixes-4}
- 使用 Merge 表的分布式表现在可以正确用于带有 `_table` 字段条件的 SELECT 查询.
- 修复了检查数据部分时 ReplicatedMergeTree 中罕见的竞争条件.
- 修复了启动服务器时 `leader election` 可能会冻结的问题.
- 使用数据源的本地副本时,将忽略 `max_replica_delay_for_distributed_queries` 设置. 这已被修复.
- 修复了尝试清理不存在的列时 `ALTER TABLE CLEAR COLUMN IN PARTITION` 的错误行为.
- 修复了 multiIf 函数中使用空数组或字符串时的异常.
- 修复了反序列化本机格式时过多的内存分配.
- 修复了 Trie 词典的错误自动更新.
- 修复了在使用 SAMPLE 时从合并表中使用 GROUP BY 子句运行查询时的异常.
- 修复了 `distributed_aggregation_memory_efficient=1` 时 GROUP BY 的崩溃.
- 现在可以在 IN 和 JOIN 右侧指定 `database.table`.
- 太多线程用于并行聚合. 这已被修复.
- 修复了 `if` 函数如何与 FixedString 参数一起工作.
- 对于权重为 0 的分片, SELECT 在分布式表中工作不正确. 这已得到修复.
- 运行 `CREATE VIEW IF EXISTS 不再导致崩溃` .
- 修复了设置 `input_format_skip_unknown_fields=1` 且存在负数时的错误行为.
- 修复了如果字典中有一些无效数据, `dictGetHierarchy()` 函数中的无限循环.
- 修复了使用 IN 或 JOIN 子句和合并表中的子查询运行分布式查询时的 `Syntax error: unexpected (...)` 错误.
- 修复了对字典表中 SELECT 查询的错误解释.
- 修复了在超过 20 亿元素的 IN 和 JOIN 子句中使用数组时的 "Cannot mremap" 错误.
- 修复了以 MySQL 为源的字典的故障转移.
#### 改进了开发和组装ClickHouse的工作流: {#improved-workflow-for-developing-and-assembling-clickhouse-1}
- 可以在 Arcadia 中组装 Builds.
- 可以使用 gcc 7 编译 ClickHouse.
- 使用 ccache+distcc 的并行构建现在更快了.
### ClickHouse 版本 1.1.54245, 2017-07-04 {#clickhouse-release-1-1-54245-2017-07-04}
#### 新特征: {#new-features-5}
- 分布式 DDL(例如, `REATE TABLE ON CLUSTER`).
- 复制查询 `ALTER TABLE CLEAR COLUMN IN PARTITION.` .
- 字典表引擎 (以表的形式访问字典数据).
- 字典数据库引擎 (这种类型的数据库自动为所有连接的外部字典提供字典表).
- 您可以通过向源发送请求来检查字典的更新.
- 合格的列名.
- 使用双引号引用标识符.
- HTTP 接口中的会话.
- 复制表的 OPTIMIZE 查询不仅可以在领导者上运行.
#### 向后不兼容的变化: {#backward-incompatible-changes-2}
- 删除了 SET GLOBAL.
#### 次要更改: {#minor-changes}
- 目前在触发警报后,日志会打印完整的堆栈跟踪.
- 放宽了启动时损坏/额外数据部分数量的验证(误报太多).
#### Bug 修复: {#bug-fixes-5}
- 修复了插入分布式表时的错误连接"卡住"问题.
- GLOBAL IN 现在适用于来自查看分布式表的合并表的查询.
- 在 Google Compute Engine 虚拟机上检测到的内核数不正确. 这已被修复.
- 缓存外部字典的可执行源的工作方式发生了变化.
- 修正了包含空字符的字符串的比较.
- 修复了 Float32 主键字段与常量的比较.
- 以前,对字段大小的错误估计可能会导致分配过大.
- 修复了使用 ALTER 查询添加到表中的 Nullable 列时发生的崩溃.
- 修复了当行数小于 LIMIT 时按 Nullable 列排序时崩溃的问题.
- 修复了仅由常量值组成的 ORDER BY 子查询.
- 以前,复制表在 DROP TABLE 失败后可能保持无效状态.
- 结果为空的标量子查询的别名不再丢失.
- 现在,如果 .so 文件损坏,使用编译的查询不会因错误而失败.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -47,9 +47,10 @@ CLICKHOUSE_PIDFILE="$CLICKHOUSE_PIDDIR/$PROGRAM.pid"
# Some systems lack "flock"
command -v flock >/dev/null && FLOCK=flock
# Override defaults from optional config file
# Override defaults from optional config file and export them automatically
set -a
test -f /etc/default/clickhouse && . /etc/default/clickhouse
set +a
die()
{

View File

@ -723,7 +723,7 @@ bool Client::processWithFuzzing(const String & full_query)
// queries, for lack of a better solution.
// There is also a problem that fuzzer substitutes positive Int64
// literals or Decimal literals, which are then parsed back as
// UInt64, and suddenly duplicate alias substitition starts or stops
// UInt64, and suddenly duplicate alias substitution starts or stops
// working (ASTWithAlias::formatImpl) or something like that.
// So we compare not even the first and second formatting of the
// query, but second and third.

View File

@ -133,11 +133,11 @@ func TestConfigFileFrameCopy(t *testing.T) {
require.Empty(t, errs)
i := 0
sizes := map[string]int64{
"users.xml": int64(2039),
"users.xml": int64(2017),
"default-password.xml": int64(188),
"config.xml": int64(61282),
"config.xml": int64(61260),
"server-include.xml": int64(168),
"user-include.xml": int64(582),
"user-include.xml": int64(559),
}
var checkedFiles []string
for {

View File

@ -1,4 +1,3 @@
<?xml version="1.0" ?>
<clickhouse>
<test_user>
<networks>

View File

@ -1,4 +1,3 @@
<?xml version="1.0"?>
<!--
NOTE: User and query level settings are set up in "users.xml" file.
If you have accidentally specified user-level settings here, server won't start.

Some files were not shown because too many files have changed in this diff Show More