Merge branch 'master' into memory-overcommit

This commit is contained in:
mergify[bot] 2021-12-29 10:35:32 +00:00 committed by GitHub
commit 5070784282
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1032 changed files with 24866 additions and 10938 deletions

View File

@ -8,6 +8,10 @@
name: Docker Container Scan (clickhouse-server)
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
"on":
pull_request:
paths:

View File

@ -1,4 +1,9 @@
name: CherryPick
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
concurrency:
group: cherry-pick
on: # yamllint disable-line rule:truthy
@ -9,10 +14,13 @@ jobs:
runs-on: [self-hosted, style-checker]
steps:
- name: Set envs
# https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/cherry_pick
ROBOT_CLICKHOUSE_SSH_KEY=${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
RCSK
REPO_OWNER=ClickHouse
REPO_NAME=ClickHouse
REPO_TEAM=core

View File

@ -1,4 +1,9 @@
name: BackportPR
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
on: # yamllint disable-line rule:truthy
push:
branches:
@ -90,6 +95,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -134,6 +140,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -178,6 +185,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -222,6 +230,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}

View File

@ -1,4 +1,9 @@
name: Cancel
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
on: # yamllint disable-line rule:truthy
workflow_run:
workflows: ["CIGithubActions", "ReleaseCI", "DocsCheck", "BackportPR"]

View File

@ -1,4 +1,9 @@
name: DocsCheck
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
on: # yamllint disable-line rule:truthy
pull_request:
types:
@ -16,7 +21,7 @@ jobs:
steps:
- name: Clear repository
run: |
sudo rm -rf $GITHUB_WORKSPACE
sudo rm -rf $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
- name: Labels check
@ -29,7 +34,7 @@ jobs:
steps:
- name: Clear repository
run: |
sudo rm -rf $GITHUB_WORKSPACE
sudo rm -rf $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
- name: Images check
@ -58,7 +63,7 @@ jobs:
path: ${{ env.TEMP_PATH }}
- name: Clear repository
run: |
sudo rm -rf $GITHUB_WORKSPACE
sudo rm -rf $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
- name: Docs Check

44
.github/workflows/jepsen.yml vendored Normal file
View File

@ -0,0 +1,44 @@
name: JepsenWorkflow
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
concurrency:
group: jepsen
on: # yamllint disable-line rule:truthy
schedule:
- cron: '0 */6 * * *'
workflow_run:
workflows: ["CIGithubActions"]
types:
- completed
workflow_dispatch:
jobs:
KeeperJepsenRelease:
runs-on: [self-hosted, style-checker]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/keeper_jepsen
REPO_COPY=${{runner.temp}}/keeper_jepsen/ClickHouse
EOF
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Jepsen Test
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 keeper_jepsen_check.py
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH

View File

@ -1,4 +1,9 @@
name: CIGithubActions
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
on: # yamllint disable-line rule:truthy
pull_request:
types:
@ -231,6 +236,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -275,6 +281,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -319,6 +326,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -329,6 +337,48 @@ jobs:
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH $CACHES_PATH
BuilderDebAarch64:
needs: [DockerHubPush, FastTest]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
CHECK_NAME=ClickHouse build check (actions)
BUILD_NAME=package_aarch64
EOF
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'true'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH $CACHES_PATH
BuilderDebAsan:
needs: [DockerHubPush, FastTest]
runs-on: [self-hosted, builder]
@ -363,6 +413,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -407,6 +458,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -451,6 +503,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -495,6 +548,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -539,6 +593,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -586,6 +641,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -630,6 +686,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -674,6 +731,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -718,6 +776,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -762,6 +821,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -806,6 +866,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -850,6 +911,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -867,13 +929,14 @@ jobs:
needs:
- BuilderDebRelease
- BuilderBinRelease
- BuilderDebAarch64
- BuilderDebAsan
- BuilderDebTsan
- BuilderDebUBsan
- BuilderDebMsan
- BuilderDebDebug
runs-on: [self-hosted, style-checker]
if: always()
if: ${{ success() || failure() }}
steps:
- name: Set envs
run: |
@ -913,7 +976,7 @@ jobs:
- BuilderBinDarwinAarch64
- BuilderBinPPC64
runs-on: [self-hosted, style-checker]
if: always()
if: ${{ success() || failure() }}
steps:
- name: Set envs
run: |

View File

@ -1,4 +1,9 @@
name: MasterCI
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
on: # yamllint disable-line rule:truthy
push:
branches:
@ -152,6 +157,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -196,6 +202,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -241,6 +248,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -285,6 +293,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -329,6 +338,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -373,6 +383,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -417,6 +428,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -461,6 +473,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -509,6 +522,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -554,6 +568,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -599,6 +614,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -644,6 +660,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -689,6 +706,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -734,6 +752,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -779,6 +798,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}

View File

@ -1,4 +1,9 @@
name: DocsReleaseChecks
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
concurrency:
group: master-release
cancel-in-progress: true
@ -35,6 +40,17 @@ jobs:
needs: DockerHubPush
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
# https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/docs_release
REPO_COPY=${{runner.temp}}/docs_release/ClickHouse
CLOUDFLARE_TOKEN=${{secrets.CLOUDFLARE}}
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
RCSK
EOF
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
@ -46,11 +62,6 @@ jobs:
name: changed_images
path: ${{ env.TEMP_PATH }}
- name: Docs Release
env:
TEMP_PATH: ${{runner.temp}}/docs_release
REPO_COPY: ${{runner.temp}}/docs_release/ClickHouse
CLOUDFLARE_TOKEN: ${{secrets.CLOUDFLARE}}
ROBOT_CLICKHOUSE_SSH_KEY: ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH

View File

@ -1,4 +1,9 @@
name: ReleaseCI
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
on: # yamllint disable-line rule:truthy
push:
branches:
@ -93,6 +98,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -137,6 +143,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -181,6 +188,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -225,6 +233,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -269,6 +278,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -313,6 +323,7 @@ jobs:
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
@ -402,7 +413,7 @@ jobs:
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatelessTestAsan:
FunctionalStatelessTestAsan0:
needs: [BuilderDebAsan]
runs-on: [self-hosted, func-tester]
steps:
@ -414,6 +425,8 @@ jobs:
CHECK_NAME=Stateless tests (address, actions)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=2
EOF
- name: Download json reports
uses: actions/download-artifact@v2
@ -437,7 +450,44 @@ jobs:
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatelessTestTsan:
FunctionalStatelessTestAsan1:
needs: [BuilderDebAsan]
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (address, actions)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=2
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatelessTestTsan0:
needs: [BuilderDebTsan]
runs-on: [self-hosted, func-tester]
steps:
@ -449,6 +499,82 @@ jobs:
CHECK_NAME=Stateless tests (thread, actions)
REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=3
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatelessTestTsan1:
needs: [BuilderDebTsan]
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (thread, actions)
REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=3
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatelessTestTsan2:
needs: [BuilderDebTsan]
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (thread, actions)
REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
RUN_BY_HASH_TOTAL=3
EOF
- name: Download json reports
uses: actions/download-artifact@v2
@ -507,7 +633,7 @@ jobs:
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatelessTestMsan:
FunctionalStatelessTestMsan0:
needs: [BuilderDebMsan]
runs-on: [self-hosted, func-tester]
steps:
@ -519,6 +645,8 @@ jobs:
CHECK_NAME=Stateless tests (memory, actions)
REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=3
EOF
- name: Download json reports
uses: actions/download-artifact@v2
@ -542,7 +670,81 @@ jobs:
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatelessTestDebug:
FunctionalStatelessTestMsan1:
needs: [BuilderDebMsan]
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (memory, actions)
REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=3
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatelessTestMsan2:
needs: [BuilderDebMsan]
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (memory, actions)
REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
RUN_BY_HASH_TOTAL=3
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatelessTestDebug0:
needs: [BuilderDebDebug]
runs-on: [self-hosted, func-tester]
steps:
@ -554,6 +756,82 @@ jobs:
CHECK_NAME=Stateless tests (debug, actions)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=3
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatelessTestDebug1:
needs: [BuilderDebDebug]
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, actions)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=3
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatelessTestDebug2:
needs: [BuilderDebDebug]
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, actions)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
RUN_BY_HASH_TOTAL=3
EOF
- name: Download json reports
uses: actions/download-artifact@v2
@ -970,8 +1248,8 @@ jobs:
#############################################################################################
############################# INTEGRATION TESTS #############################################
#############################################################################################
IntegrationTestsAsan:
needs: [BuilderDebAsan, FunctionalStatelessTestAsan]
IntegrationTestsAsan0:
needs: [BuilderDebAsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
@ -981,6 +1259,8 @@ jobs:
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, actions)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=3
EOF
- name: Download json reports
uses: actions/download-artifact@v2
@ -1004,8 +1284,80 @@ jobs:
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
IntegrationTestsTsan:
needs: [BuilderDebTsan, FunctionalStatelessTestTsan]
IntegrationTestsAsan1:
needs: [BuilderDebAsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, actions)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=3
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
- name: Integration test
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 integration_test_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
IntegrationTestsAsan2:
needs: [BuilderDebAsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, actions)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=2
RUN_BY_HASH_TOTAL=3
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
- name: Integration test
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 integration_test_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
IntegrationTestsTsan0:
needs: [BuilderDebTsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
@ -1015,6 +1367,8 @@ jobs:
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=4
EOF
- name: Download json reports
uses: actions/download-artifact@v2
@ -1038,8 +1392,116 @@ jobs:
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
IntegrationTestsRelease:
needs: [BuilderDebRelease, FunctionalStatelessTestRelease]
IntegrationTestsTsan1:
needs: [BuilderDebTsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=4
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
- name: Integration test
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 integration_test_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
IntegrationTestsTsan2:
needs: [BuilderDebTsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=2
RUN_BY_HASH_TOTAL=4
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
- name: Integration test
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 integration_test_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
IntegrationTestsTsan3:
needs: [BuilderDebTsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=3
RUN_BY_HASH_TOTAL=4
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
- name: Integration test
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 integration_test_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
IntegrationTestsRelease0:
needs: [BuilderDebRelease]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
@ -1049,6 +1511,44 @@ jobs:
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (release, actions)
REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=2
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
- name: Integration test
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 integration_test_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
IntegrationTestsRelease1:
needs: [BuilderDebRelease]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (release, actions)
REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=2
EOF
- name: Download json reports
uses: actions/download-artifact@v2
@ -1076,11 +1576,18 @@ jobs:
needs:
- DockerHubPush
- BuilderReport
- FunctionalStatelessTestDebug
- FunctionalStatelessTestDebug0
- FunctionalStatelessTestDebug1
- FunctionalStatelessTestDebug2
- FunctionalStatelessTestRelease
- FunctionalStatelessTestAsan
- FunctionalStatelessTestTsan
- FunctionalStatelessTestMsan
- FunctionalStatelessTestAsan0
- FunctionalStatelessTestAsan1
- FunctionalStatelessTestTsan0
- FunctionalStatelessTestTsan1
- FunctionalStatelessTestTsan2
- FunctionalStatelessTestMsan0
- FunctionalStatelessTestMsan1
- FunctionalStatelessTestMsan2
- FunctionalStatelessTestUBsan
- FunctionalStatefulTestDebug
- FunctionalStatefulTestRelease
@ -1093,9 +1600,15 @@ jobs:
- StressTestTsan
- StressTestMsan
- StressTestUBsan
- IntegrationTestsAsan
- IntegrationTestsRelease
- IntegrationTestsTsan
- IntegrationTestsAsan0
- IntegrationTestsAsan1
- IntegrationTestsAsan2
- IntegrationTestsRelease0
- IntegrationTestsRelease1
- IntegrationTestsTsan0
- IntegrationTestsTsan1
- IntegrationTestsTsan2
- IntegrationTestsTsan3
- CompatibilityCheck
runs-on: [self-hosted, style-checker]
steps:

42
.github/workflows/woboq.yml vendored Normal file
View File

@ -0,0 +1,42 @@
name: WoboqBuilder
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
concurrency:
group: woboq
on: # yamllint disable-line rule:truthy
schedule:
- cron: '0 */18 * * *'
workflow_dispatch:
jobs:
# don't use dockerhub push because this image updates so rarely
WoboqCodebrowser:
runs-on: [self-hosted, style-checker]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/codebrowser
REPO_COPY=${{runner.temp}}/codebrowser/ClickHouse
IMAGES_PATH=${{runner.temp}}/images_path
EOF
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'true'
- name: Codebrowser
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 codebrowser_check.py
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH

8
.gitmodules vendored
View File

@ -54,8 +54,8 @@
url = https://github.com/ClickHouse-Extras/Turbo-Base64.git
[submodule "contrib/arrow"]
path = contrib/arrow
url = https://github.com/ClickHouse-Extras/arrow
branch = clickhouse-arrow-2.0.0
url = https://github.com/ClickHouse-Extras/arrow.git
branch = blessed/release-6.0.1
[submodule "contrib/thrift"]
path = contrib/thrift
url = https://github.com/apache/thrift.git
@ -190,8 +190,8 @@
url = https://github.com/xz-mirror/xz
[submodule "contrib/abseil-cpp"]
path = contrib/abseil-cpp
url = https://github.com/ClickHouse-Extras/abseil-cpp.git
branch = lts_2020_02_25
url = https://github.com/abseil/abseil-cpp.git
branch = lts_2021_11_02
[submodule "contrib/dragonbox"]
path = contrib/dragonbox
url = https://github.com/ClickHouse-Extras/dragonbox.git

View File

@ -424,6 +424,11 @@ if (OS_LINUX AND NOT SANITIZE)
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined")
endif ()
# Increase stack size on Musl. We need big stack for our recursive-descend parser.
if (USE_MUSL)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-z,stack-size=2097152")
endif ()
include(cmake/dbms_glob_sources.cmake)
if (OS_LINUX OR OS_ANDROID)
@ -451,6 +456,11 @@ if (MAKE_STATIC_LIBRARIES)
endif ()
else ()
set (CMAKE_POSITION_INDEPENDENT_CODE ON)
# This is required for clang on Arch linux, that uses PIE by default.
# See enable-SSP-and-PIE-by-default.patch [1].
#
# [1]: https://github.com/archlinux/svntogit-packages/blob/6e681aa860e65ad46a1387081482eb875c2200f2/trunk/enable-SSP-and-PIE-by-default.patch
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -no-pie")
endif ()
if (ENABLE_TESTS)

View File

@ -27,8 +27,7 @@ execute_process(COMMAND uname -m OUTPUT_VARIABLE ARCH)
if (OS MATCHES "Linux"
AND NOT DEFINED CMAKE_TOOLCHAIN_FILE
AND NOT DISABLE_HERMETIC_BUILD
AND ($ENV{CC} MATCHES ".*clang.*" OR CMAKE_C_COMPILER MATCHES ".*clang.*")
AND (USE_STATIC_LIBRARIES OR NOT DEFINED USE_STATIC_LIBRARIES))
AND ($ENV{CC} MATCHES ".*clang.*" OR CMAKE_C_COMPILER MATCHES ".*clang.*"))
if (ARCH MATCHES "amd64|x86_64")
set (CMAKE_TOOLCHAIN_FILE "cmake/linux/toolchain-x86_64.cmake" CACHE INTERNAL "" FORCE)

View File

@ -9,7 +9,3 @@ add_subdirectory (pcg-random)
add_subdirectory (widechar_width)
add_subdirectory (readpassphrase)
add_subdirectory (bridge)
if (USE_MYSQL)
add_subdirectory (mysqlxx)
endif ()

View File

@ -1,8 +1,6 @@
set (SRCS
argsToConfig.cpp
coverage.cpp
DateLUT.cpp
DateLUTImpl.cpp
demangle.cpp
getFQDNOrHostName.cpp
getMemoryAmount.cpp
@ -18,7 +16,6 @@ set (SRCS
sleep.cpp
terminalColors.cpp
errnoToString.cpp
getResource.cpp
StringRef.cpp
)

View File

@ -1,8 +1,11 @@
#include <base/getPageSize.h>
#include <unistd.h>
#include <cstdlib>
Int64 getPageSize()
{
return sysconf(_SC_PAGESIZE);
Int64 page_size = sysconf(_SC_PAGESIZE);
if (page_size < 0)
abort();
return page_size;
}

View File

@ -123,6 +123,12 @@ bool hasPHDRCache()
#else
void updatePHDRCache() {}
bool hasPHDRCache() { return false; }
#if defined(USE_MUSL)
/// With statically linked with musl, dl_iterate_phdr is immutable.
bool hasPHDRCache() { return true; }
#else
bool hasPHDRCache() { return false; }
#endif
#endif

View File

@ -182,7 +182,6 @@ TRAP(vlimit)
TRAP(wcsnrtombs)
TRAP(wcsrtombs)
TRAP(wctomb)
TRAP(wordexp)
TRAP(basename)
TRAP(catgets)
TRAP(dbm_clearerr)
@ -195,9 +194,8 @@ TRAP(dbm_nextkey)
TRAP(dbm_open)
TRAP(dbm_store)
TRAP(dirname)
#if !defined(SANITIZER)
TRAP(dlerror) // Used by tsan
#endif
// TRAP(dlerror) // It is not thread-safe. But it is used by dynamic linker to load some name resolution plugins. Also used by TSan.
/// Note: we should better get rid of glibc, dynamic linking and all that sort of annoying garbage altogether.
TRAP(ftw)
TRAP(getc_unlocked)
//TRAP(getenv) // Ok at program startup
@ -245,4 +243,21 @@ TRAP(lgammaf32x)
TRAP(lgammaf64)
TRAP(lgammaf64x)
/// These functions are unused by ClickHouse and we should be aware if they are accidentally get used.
/// Sometimes people report that these function contain vulnerabilities (these reports are bogus for ClickHouse).
TRAP(mq_close)
TRAP(mq_getattr)
TRAP(mq_setattr)
TRAP(mq_notify)
TRAP(mq_open)
TRAP(mq_receive)
TRAP(mq_send)
TRAP(mq_unlink)
TRAP(mq_timedsend)
TRAP(mq_timedreceive)
/// These functions are also unused by ClickHouse.
TRAP(wordexp)
TRAP(wordfree)
#endif

View File

@ -1,61 +0,0 @@
add_library (mysqlxx
Connection.cpp
Exception.cpp
Query.cpp
ResultBase.cpp
UseQueryResult.cpp
Row.cpp
Value.cpp
Pool.cpp
PoolFactory.cpp
PoolWithFailover.cpp
)
target_include_directories (mysqlxx PUBLIC ..)
if (NOT USE_INTERNAL_MYSQL_LIBRARY)
set(PLATFORM_LIBRARIES ${CMAKE_DL_LIBS})
if (USE_MYSQL)
target_include_directories (mysqlxx SYSTEM PRIVATE ${MYSQL_INCLUDE_DIR})
endif ()
if (APPLE)
find_library (ICONV_LIBRARY iconv)
set (MYSQLCLIENT_LIBRARIES ${MYSQLCLIENT_LIBRARIES} ${STATIC_MYSQLCLIENT_LIB} ${ICONV_LIBRARY})
elseif (USE_STATIC_LIBRARIES AND STATIC_MYSQLCLIENT_LIB)
set (MYSQLCLIENT_LIBRARIES ${STATIC_MYSQLCLIENT_LIB})
endif ()
endif ()
target_link_libraries (mysqlxx
PUBLIC
common
PRIVATE
${MYSQLCLIENT_LIBRARIES}
${ZLIB_LIBRARIES}
)
if(OPENSSL_LIBRARIES)
target_link_libraries(mysqlxx PRIVATE ${OPENSSL_LIBRARIES})
endif()
target_link_libraries(mysqlxx PRIVATE ${PLATFORM_LIBRARIES})
if (NOT USE_INTERNAL_MYSQL_LIBRARY AND OPENSSL_INCLUDE_DIR)
target_include_directories (mysqlxx SYSTEM PRIVATE ${OPENSSL_INCLUDE_DIR})
endif ()
target_no_warning(mysqlxx reserved-macro-identifier)
if (NOT USE_INTERNAL_MYSQL_LIBRARY AND USE_STATIC_LIBRARIES)
message(WARNING "Statically linking with system mysql/mariadb only works "
"if mysql client libraries are built with same openssl version as "
"we are going to use now. It wouldn't work if GnuTLS is used. "
"Try -D\"USE_INTERNAL_MYSQL_LIBRARY\"=ON or -D\"ENABLE_MYSQL\"=OFF or "
"-D\"USE_STATIC_LIBRARIES\"=OFF")
endif ()
if (ENABLE_TESTS)
add_subdirectory (tests)
endif ()

View File

@ -1,8 +1,10 @@
option (ENABLE_AZURE_BLOB_STORAGE "Enable Azure blob storage" ${ENABLE_LIBRARIES})
option(USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY
"Set to FALSE to use system Azure SDK instead of bundled (OFF currently not implemented)"
${ENABLE_LIBRARIES})
ON)
if (USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
if (ENABLE_AZURE_BLOB_STORAGE)
set(USE_AZURE_BLOB_STORAGE 1)
set(AZURE_BLOB_STORAGE_LIBRARY azure_sdk)
endif()

View File

@ -32,11 +32,6 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE)
if (CCACHE_VERSION VERSION_GREATER "3.2.0" OR NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
message(STATUS "Using ${CCACHE_FOUND} ${CCACHE_VERSION}")
set (CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_FOUND} ${CMAKE_CXX_COMPILER_LAUNCHER})
set (CMAKE_C_COMPILER_LAUNCHER ${CCACHE_FOUND} ${CMAKE_C_COMPILER_LAUNCHER})
set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_FOUND})
# debian (debhelpers) set SOURCE_DATE_EPOCH environment variable, that is
# filled from the debian/changelog or current time.
#
@ -49,11 +44,14 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE)
# - 4.0+ will ignore SOURCE_DATE_EPOCH environment variable.
if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.2")
message(STATUS "ccache is 4.2+ no quirks for SOURCE_DATE_EPOCH required")
set(LAUNCHER ${CCACHE_FOUND})
elseif (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0")
message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache")
set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "env -u SOURCE_DATE_EPOCH")
set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "env -u SOURCE_DATE_EPOCH")
set(LAUNCHER env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND})
endif()
set (CMAKE_CXX_COMPILER_LAUNCHER ${LAUNCHER} ${CMAKE_CXX_COMPILER_LAUNCHER})
set (CMAKE_C_COMPILER_LAUNCHER ${LAUNCHER} ${CMAKE_C_COMPILER_LAUNCHER})
else ()
message(${RECONFIGURE_MESSAGE_LEVEL} "Not using ${CCACHE_FOUND} ${CCACHE_VERSION} bug: https://bugzilla.samba.org/show_bug.cgi?id=8118")
endif ()

View File

@ -14,9 +14,12 @@ set (TOOLCHAIN_PATH "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/linux-x86_6
set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}/x86_64-linux-gnu/libc")
set (CMAKE_C_FLAGS_INIT "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
set (CMAKE_CXX_FLAGS_INIT "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
set (CMAKE_ASM_FLAGS_INIT "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
set (CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)

View File

@ -42,6 +42,14 @@ if (CMAKE_CROSSCOMPILING)
message (FATAL_ERROR "Trying to cross-compile to unsupported system: ${CMAKE_SYSTEM_NAME}!")
endif ()
if (USE_MUSL)
set (USE_SENTRY OFF CACHE INTERNAL "")
set (ENABLE_ODBC OFF CACHE INTERNAL "")
set (ENABLE_GRPC OFF CACHE INTERNAL "")
set (ENABLE_HDFS OFF CACHE INTERNAL "")
set (ENABLE_EMBEDDED_COMPILER OFF CACHE INTERNAL "")
endif ()
# Don't know why but CXX_STANDARD doesn't work for cross-compilation
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++20")

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit bb69d48e0ee35c87a0f19e509a09a914f71f0cff
Subproject commit ff100a8713146e1ca4b4158dd6cc4eef9af47fc3

2
contrib/abseil-cpp vendored

@ -1 +1 @@
Subproject commit b004a8a02418b83de8b686caa0b0f6e39ac2191f
Subproject commit 215105818dfde3174fe799600bb0f3cae233d0bf

View File

@ -2,6 +2,8 @@ set(ABSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp")
if(NOT EXISTS "${ABSL_ROOT_DIR}/CMakeLists.txt")
message(FATAL_ERROR " submodule third_party/abseil-cpp is missing. To fix try run: \n git submodule update --init --recursive")
endif()
set(BUILD_TESTING OFF)
set(ABSL_PROPAGATE_CXX_STD ON)
add_subdirectory("${ABSL_ROOT_DIR}" "${ClickHouse_BINARY_DIR}/contrib/abseil-cpp")
add_library(abseil_swiss_tables INTERFACE)

2
contrib/arrow vendored

@ -1 +1 @@
Subproject commit 078e21bad344747b7656ef2d7a4f7410a0a303eb
Subproject commit aa9a7a698e33e278abe053f4634170b3b026e48e

View File

@ -1,5 +1,22 @@
set (CMAKE_CXX_STANDARD 17)
set(ARROW_VERSION "6.0.1")
string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}")
set(ARROW_VERSION_MAJOR "6")
set(ARROW_VERSION_MINOR "0")
set(ARROW_VERSION_PATCH "1")
if(ARROW_VERSION_MAJOR STREQUAL "0")
# Arrow 0.x.y => SO version is "x", full SO version is "x.y.0"
set(ARROW_SO_VERSION "${ARROW_VERSION_MINOR}")
set(ARROW_FULL_SO_VERSION "${ARROW_SO_VERSION}.${ARROW_VERSION_PATCH}.0")
else()
# Arrow 1.x.y => SO version is "10x", full SO version is "10x.y.0"
math(EXPR ARROW_SO_VERSION "${ARROW_VERSION_MAJOR} * 100 + ${ARROW_VERSION_MINOR}")
set(ARROW_FULL_SO_VERSION "${ARROW_SO_VERSION}.${ARROW_VERSION_PATCH}.0")
endif()
# === thrift
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp")
@ -93,6 +110,9 @@ add_subdirectory(${FLATBUFFERS_SRC_DIR} "${FLATBUFFERS_BINARY_DIR}")
message(STATUS "FLATBUFFERS_LIBRARY: ${FLATBUFFERS_LIBRARY}")
# === hdfs
set(HDFS_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libhdfs3/include/hdfs/")
# arrow-cmake cmake file calling orc cmake subroutine which detects certain compiler features.
# Apple Clang compiler failed to compile this code without specifying c++11 standard.
# As result these compiler features detected as absent. In result it failed to compile orc itself.
@ -114,6 +134,7 @@ configure_file("${ORC_INCLUDE_DIR}/orc/orc-config.hh.in" "${ORC_BUILD_INCLUDE_DI
configure_file("${ORC_SOURCE_SRC_DIR}/Adaptor.hh.in" "${ORC_BUILD_INCLUDE_DIR}/Adaptor.hh")
# ARROW_ORC + adapters/orc/CMakefiles
set(ORC_SRCS
"${ARROW_SRC_DIR}/arrow/adapters/orc/adapter.cc"
"${ARROW_SRC_DIR}/arrow/adapters/orc/adapter_util.cc"
@ -150,28 +171,8 @@ set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/arrow")
configure_file("${LIBRARY_DIR}/util/config.h.cmake" "${CMAKE_CURRENT_BINARY_DIR}/cpp/src/arrow/util/config.h")
# arrow/cpp/src/arrow/CMakeLists.txt
# arrow/cpp/src/arrow/CMakeLists.txt (ARROW_SRCS + ARROW_COMPUTE + ARROW_IPC)
set(ARROW_SRCS
"${LIBRARY_DIR}/buffer.cc"
"${LIBRARY_DIR}/builder.cc"
"${LIBRARY_DIR}/chunked_array.cc"
"${LIBRARY_DIR}/compare.cc"
"${LIBRARY_DIR}/datum.cc"
"${LIBRARY_DIR}/device.cc"
"${LIBRARY_DIR}/extension_type.cc"
"${LIBRARY_DIR}/memory_pool.cc"
"${LIBRARY_DIR}/pretty_print.cc"
"${LIBRARY_DIR}/record_batch.cc"
"${LIBRARY_DIR}/result.cc"
"${LIBRARY_DIR}/scalar.cc"
"${LIBRARY_DIR}/sparse_tensor.cc"
"${LIBRARY_DIR}/status.cc"
"${LIBRARY_DIR}/table_builder.cc"
"${LIBRARY_DIR}/table.cc"
"${LIBRARY_DIR}/tensor.cc"
"${LIBRARY_DIR}/type.cc"
"${LIBRARY_DIR}/visitor.cc"
"${LIBRARY_DIR}/array/array_base.cc"
"${LIBRARY_DIR}/array/array_binary.cc"
"${LIBRARY_DIR}/array/array_decimal.cc"
@ -191,25 +192,112 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/array/diff.cc"
"${LIBRARY_DIR}/array/util.cc"
"${LIBRARY_DIR}/array/validate.cc"
"${LIBRARY_DIR}/builder.cc"
"${LIBRARY_DIR}/buffer.cc"
"${LIBRARY_DIR}/chunked_array.cc"
"${LIBRARY_DIR}/compare.cc"
"${LIBRARY_DIR}/config.cc"
"${LIBRARY_DIR}/datum.cc"
"${LIBRARY_DIR}/device.cc"
"${LIBRARY_DIR}/extension_type.cc"
"${LIBRARY_DIR}/memory_pool.cc"
"${LIBRARY_DIR}/pretty_print.cc"
"${LIBRARY_DIR}/record_batch.cc"
"${LIBRARY_DIR}/result.cc"
"${LIBRARY_DIR}/scalar.cc"
"${LIBRARY_DIR}/sparse_tensor.cc"
"${LIBRARY_DIR}/status.cc"
"${LIBRARY_DIR}/table.cc"
"${LIBRARY_DIR}/table_builder.cc"
"${LIBRARY_DIR}/tensor.cc"
"${LIBRARY_DIR}/tensor/coo_converter.cc"
"${LIBRARY_DIR}/tensor/csf_converter.cc"
"${LIBRARY_DIR}/tensor/csx_converter.cc"
"${LIBRARY_DIR}/type.cc"
"${LIBRARY_DIR}/visitor.cc"
"${LIBRARY_DIR}/c/bridge.cc"
"${LIBRARY_DIR}/io/buffered.cc"
"${LIBRARY_DIR}/io/caching.cc"
"${LIBRARY_DIR}/io/compressed.cc"
"${LIBRARY_DIR}/io/file.cc"
"${LIBRARY_DIR}/io/hdfs.cc"
"${LIBRARY_DIR}/io/hdfs_internal.cc"
"${LIBRARY_DIR}/io/interfaces.cc"
"${LIBRARY_DIR}/io/memory.cc"
"${LIBRARY_DIR}/io/slow.cc"
"${LIBRARY_DIR}/io/stdio.cc"
"${LIBRARY_DIR}/io/transform.cc"
"${LIBRARY_DIR}/util/async_util.cc"
"${LIBRARY_DIR}/util/basic_decimal.cc"
"${LIBRARY_DIR}/util/bit_block_counter.cc"
"${LIBRARY_DIR}/util/bit_run_reader.cc"
"${LIBRARY_DIR}/util/bit_util.cc"
"${LIBRARY_DIR}/util/bitmap.cc"
"${LIBRARY_DIR}/util/bitmap_builders.cc"
"${LIBRARY_DIR}/util/bitmap_ops.cc"
"${LIBRARY_DIR}/util/bpacking.cc"
"${LIBRARY_DIR}/util/cancel.cc"
"${LIBRARY_DIR}/util/compression.cc"
"${LIBRARY_DIR}/util/counting_semaphore.cc"
"${LIBRARY_DIR}/util/cpu_info.cc"
"${LIBRARY_DIR}/util/decimal.cc"
"${LIBRARY_DIR}/util/delimiting.cc"
"${LIBRARY_DIR}/util/formatting.cc"
"${LIBRARY_DIR}/util/future.cc"
"${LIBRARY_DIR}/util/int_util.cc"
"${LIBRARY_DIR}/util/io_util.cc"
"${LIBRARY_DIR}/util/logging.cc"
"${LIBRARY_DIR}/util/key_value_metadata.cc"
"${LIBRARY_DIR}/util/memory.cc"
"${LIBRARY_DIR}/util/mutex.cc"
"${LIBRARY_DIR}/util/string.cc"
"${LIBRARY_DIR}/util/string_builder.cc"
"${LIBRARY_DIR}/util/task_group.cc"
"${LIBRARY_DIR}/util/tdigest.cc"
"${LIBRARY_DIR}/util/thread_pool.cc"
"${LIBRARY_DIR}/util/time.cc"
"${LIBRARY_DIR}/util/trie.cc"
"${LIBRARY_DIR}/util/unreachable.cc"
"${LIBRARY_DIR}/util/uri.cc"
"${LIBRARY_DIR}/util/utf8.cc"
"${LIBRARY_DIR}/util/value_parsing.cc"
"${LIBRARY_DIR}/vendored/base64.cpp"
"${LIBRARY_DIR}/vendored/datetime/tz.cpp"
"${LIBRARY_DIR}/vendored/musl/strptime.c"
"${LIBRARY_DIR}/vendored/uriparser/UriCommon.c"
"${LIBRARY_DIR}/vendored/uriparser/UriCompare.c"
"${LIBRARY_DIR}/vendored/uriparser/UriEscape.c"
"${LIBRARY_DIR}/vendored/uriparser/UriFile.c"
"${LIBRARY_DIR}/vendored/uriparser/UriIp4Base.c"
"${LIBRARY_DIR}/vendored/uriparser/UriIp4.c"
"${LIBRARY_DIR}/vendored/uriparser/UriMemory.c"
"${LIBRARY_DIR}/vendored/uriparser/UriNormalizeBase.c"
"${LIBRARY_DIR}/vendored/uriparser/UriNormalize.c"
"${LIBRARY_DIR}/vendored/uriparser/UriParseBase.c"
"${LIBRARY_DIR}/vendored/uriparser/UriParse.c"
"${LIBRARY_DIR}/vendored/uriparser/UriQuery.c"
"${LIBRARY_DIR}/vendored/uriparser/UriRecompose.c"
"${LIBRARY_DIR}/vendored/uriparser/UriResolve.c"
"${LIBRARY_DIR}/vendored/uriparser/UriShorten.c"
"${LIBRARY_DIR}/compute/api_aggregate.cc"
"${LIBRARY_DIR}/compute/api_scalar.cc"
"${LIBRARY_DIR}/compute/api_vector.cc"
"${LIBRARY_DIR}/compute/cast.cc"
"${LIBRARY_DIR}/compute/exec.cc"
"${LIBRARY_DIR}/compute/exec/aggregate_node.cc"
"${LIBRARY_DIR}/compute/exec/exec_plan.cc"
"${LIBRARY_DIR}/compute/exec/expression.cc"
"${LIBRARY_DIR}/compute/exec/filter_node.cc"
"${LIBRARY_DIR}/compute/exec/project_node.cc"
"${LIBRARY_DIR}/compute/exec/source_node.cc"
"${LIBRARY_DIR}/compute/exec/sink_node.cc"
"${LIBRARY_DIR}/compute/exec/order_by_impl.cc"
"${LIBRARY_DIR}/compute/function.cc"
"${LIBRARY_DIR}/compute/function_internal.cc"
"${LIBRARY_DIR}/compute/kernel.cc"
"${LIBRARY_DIR}/compute/registry.cc"
"${LIBRARY_DIR}/compute/exec/exec_plan.cc"
"${LIBRARY_DIR}/compute/exec/expression.cc"
"${LIBRARY_DIR}/compute/exec/key_compare.cc"
"${LIBRARY_DIR}/compute/exec/key_encode.cc"
"${LIBRARY_DIR}/compute/exec/key_hash.cc"
"${LIBRARY_DIR}/compute/exec/key_map.cc"
"${LIBRARY_DIR}/compute/exec/util.cc"
"${LIBRARY_DIR}/compute/kernels/aggregate_basic.cc"
"${LIBRARY_DIR}/compute/kernels/aggregate_mode.cc"
"${LIBRARY_DIR}/compute/kernels/aggregate_quantile.cc"
@ -227,28 +315,31 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/compute/kernels/scalar_cast_string.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_temporal.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_compare.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_fill_null.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_if_else.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_nested.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_set_lookup.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_string.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_temporal.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_temporal_binary.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_temporal_unary.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_validity.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_if_else.cc"
"${LIBRARY_DIR}/compute/kernels/util_internal.cc"
"${LIBRARY_DIR}/compute/kernels/vector_array_sort.cc"
"${LIBRARY_DIR}/compute/kernels/vector_hash.cc"
"${LIBRARY_DIR}/compute/kernels/vector_nested.cc"
"${LIBRARY_DIR}/compute/kernels/vector_replace.cc"
"${LIBRARY_DIR}/compute/kernels/vector_selection.cc"
"${LIBRARY_DIR}/compute/kernels/vector_sort.cc"
"${LIBRARY_DIR}/csv/chunker.cc"
"${LIBRARY_DIR}/csv/column_builder.cc"
"${LIBRARY_DIR}/csv/column_decoder.cc"
"${LIBRARY_DIR}/csv/converter.cc"
"${LIBRARY_DIR}/csv/options.cc"
"${LIBRARY_DIR}/csv/parser.cc"
"${LIBRARY_DIR}/csv/reader.cc"
"${LIBRARY_DIR}/csv/writer.cc"
"${LIBRARY_DIR}/compute/kernels/row_encoder.cc"
"${LIBRARY_DIR}/compute/exec/union_node.cc"
"${LIBRARY_DIR}/compute/exec/key_hash.cc"
"${LIBRARY_DIR}/compute/exec/key_map.cc"
"${LIBRARY_DIR}/compute/exec/key_compare.cc"
"${LIBRARY_DIR}/compute/exec/key_encode.cc"
"${LIBRARY_DIR}/compute/exec/util.cc"
"${LIBRARY_DIR}/compute/exec/hash_join_dict.cc"
"${LIBRARY_DIR}/compute/exec/hash_join.cc"
"${LIBRARY_DIR}/compute/exec/hash_join_node.cc"
"${LIBRARY_DIR}/compute/exec/task_util.cc"
"${LIBRARY_DIR}/ipc/dictionary.cc"
"${LIBRARY_DIR}/ipc/feather.cc"
@ -258,52 +349,6 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/ipc/reader.cc"
"${LIBRARY_DIR}/ipc/writer.cc"
"${LIBRARY_DIR}/io/buffered.cc"
"${LIBRARY_DIR}/io/caching.cc"
"${LIBRARY_DIR}/io/compressed.cc"
"${LIBRARY_DIR}/io/file.cc"
"${LIBRARY_DIR}/io/interfaces.cc"
"${LIBRARY_DIR}/io/memory.cc"
"${LIBRARY_DIR}/io/slow.cc"
"${LIBRARY_DIR}/io/stdio.cc"
"${LIBRARY_DIR}/io/transform.cc"
"${LIBRARY_DIR}/tensor/coo_converter.cc"
"${LIBRARY_DIR}/tensor/csf_converter.cc"
"${LIBRARY_DIR}/tensor/csx_converter.cc"
"${LIBRARY_DIR}/util/basic_decimal.cc"
"${LIBRARY_DIR}/util/bit_block_counter.cc"
"${LIBRARY_DIR}/util/bit_run_reader.cc"
"${LIBRARY_DIR}/util/bit_util.cc"
"${LIBRARY_DIR}/util/bitmap_builders.cc"
"${LIBRARY_DIR}/util/bitmap_ops.cc"
"${LIBRARY_DIR}/util/bitmap.cc"
"${LIBRARY_DIR}/util/bpacking.cc"
"${LIBRARY_DIR}/util/cancel.cc"
"${LIBRARY_DIR}/util/compression.cc"
"${LIBRARY_DIR}/util/cpu_info.cc"
"${LIBRARY_DIR}/util/decimal.cc"
"${LIBRARY_DIR}/util/delimiting.cc"
"${LIBRARY_DIR}/util/formatting.cc"
"${LIBRARY_DIR}/util/future.cc"
"${LIBRARY_DIR}/util/int_util.cc"
"${LIBRARY_DIR}/util/io_util.cc"
"${LIBRARY_DIR}/util/key_value_metadata.cc"
"${LIBRARY_DIR}/util/logging.cc"
"${LIBRARY_DIR}/util/memory.cc"
"${LIBRARY_DIR}/util/mutex.cc"
"${LIBRARY_DIR}/util/string_builder.cc"
"${LIBRARY_DIR}/util/string.cc"
"${LIBRARY_DIR}/util/task_group.cc"
"${LIBRARY_DIR}/util/tdigest.cc"
"${LIBRARY_DIR}/util/thread_pool.cc"
"${LIBRARY_DIR}/util/time.cc"
"${LIBRARY_DIR}/util/trie.cc"
"${LIBRARY_DIR}/util/utf8.cc"
"${LIBRARY_DIR}/util/value_parsing.cc"
"${LIBRARY_DIR}/vendored/base64.cpp"
${ORC_SRCS}
)
@ -373,6 +418,7 @@ target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_BUILD_INCLUDE_D
target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_ADDITION_SOURCE_DIR})
target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ARROW_SRC_DIR})
target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${FLATBUFFERS_INCLUDE_DIR})
target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${HDFS_INCLUDE_DIR})
# === parquet
@ -446,7 +492,7 @@ set (HAVE_STRERROR_R 1)
set (HAVE_SCHED_GET_PRIORITY_MAX 1)
set (HAVE_SCHED_GET_PRIORITY_MIN 1)
if (OS_LINUX)
if (OS_LINUX AND NOT USE_MUSL)
set (STRERROR_R_CHAR_P 1)
endif ()

View File

@ -46,14 +46,17 @@ include("${AZURE_DIR}/cmake-modules/AzureTransportAdapters.cmake")
add_library(azure_sdk ${AZURE_SDK_UNIFIED_SRC})
if (COMPILER_CLANG)
target_compile_options(azure_sdk PUBLIC
target_compile_options(azure_sdk PRIVATE
-Wno-deprecated-copy-dtor
-Wno-extra-semi
-Wno-suggest-destructor-override
-Wno-inconsistent-missing-destructor-override
-Wno-error=unknown-warning-option
-Wno-reserved-identifier
)
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 13)
target_compile_options(azure_sdk PRIVATE -Wno-reserved-identifier)
endif()
endif()
# Originally, on Windows azure-core is built with bcrypt and crypt32 by default
@ -68,4 +71,4 @@ endif()
target_link_libraries(azure_sdk PRIVATE ${LIBXML2_LIBRARIES})
target_include_directories(azure_sdk PUBLIC ${AZURE_SDK_INCLUDES})
target_include_directories(azure_sdk SYSTEM PUBLIC ${AZURE_SDK_INCLUDES})

2
contrib/boost vendored

@ -1 +1 @@
Subproject commit fcb058e1459ac273ecfe7cdf72791cb1479115af
Subproject commit c0807e83f2824e8dd67a15b355496a9b784cdcd5

View File

@ -1,9 +1,7 @@
option (USE_INTERNAL_BOOST_LIBRARY "Use internal Boost library" ON)
if (NOT USE_INTERNAL_BOOST_LIBRARY)
# 1.70 like in contrib/boost
# 1.71 on CI
set(BOOST_VERSION 1.71)
set(BOOST_VERSION 1.78)
find_package(Boost ${BOOST_VERSION} COMPONENTS
system
@ -66,9 +64,11 @@ if (NOT EXTERNAL_BOOST_FOUND)
set (SRCS_FILESYSTEM
"${LIBRARY_DIR}/libs/filesystem/src/codecvt_error_category.cpp"
"${LIBRARY_DIR}/libs/filesystem/src/directory.cpp"
"${LIBRARY_DIR}/libs/filesystem/src/exception.cpp"
"${LIBRARY_DIR}/libs/filesystem/src/operations.cpp"
"${LIBRARY_DIR}/libs/filesystem/src/path_traits.cpp"
"${LIBRARY_DIR}/libs/filesystem/src/path.cpp"
"${LIBRARY_DIR}/libs/filesystem/src/path_traits.cpp"
"${LIBRARY_DIR}/libs/filesystem/src/portability.cpp"
"${LIBRARY_DIR}/libs/filesystem/src/unique_path.cpp"
"${LIBRARY_DIR}/libs/filesystem/src/utf8_codecvt_facet.cpp"
@ -126,24 +126,11 @@ if (NOT EXTERNAL_BOOST_FOUND)
# regex
set (SRCS_REGEX
"${LIBRARY_DIR}/libs/regex/src/c_regex_traits.cpp"
"${LIBRARY_DIR}/libs/regex/src/cpp_regex_traits.cpp"
"${LIBRARY_DIR}/libs/regex/src/cregex.cpp"
"${LIBRARY_DIR}/libs/regex/src/fileiter.cpp"
"${LIBRARY_DIR}/libs/regex/src/icu.cpp"
"${LIBRARY_DIR}/libs/regex/src/instances.cpp"
"${LIBRARY_DIR}/libs/regex/src/internals.hpp"
"${LIBRARY_DIR}/libs/regex/src/posix_api.cpp"
"${LIBRARY_DIR}/libs/regex/src/regex_debug.cpp"
"${LIBRARY_DIR}/libs/regex/src/regex_raw_buffer.cpp"
"${LIBRARY_DIR}/libs/regex/src/regex_traits_defaults.cpp"
"${LIBRARY_DIR}/libs/regex/src/regex.cpp"
"${LIBRARY_DIR}/libs/regex/src/static_mutex.cpp"
"${LIBRARY_DIR}/libs/regex/src/usinstances.cpp"
"${LIBRARY_DIR}/libs/regex/src/w32_regex_traits.cpp"
"${LIBRARY_DIR}/libs/regex/src/wc_regex_traits.cpp"
"${LIBRARY_DIR}/libs/regex/src/wide_posix_api.cpp"
"${LIBRARY_DIR}/libs/regex/src/winstances.cpp"
)
add_library (_boost_regex ${SRCS_REGEX})
@ -166,7 +153,6 @@ if (NOT EXTERNAL_BOOST_FOUND)
set (SRCS_CONTEXT
"${LIBRARY_DIR}/libs/context/src/dummy.cpp"
"${LIBRARY_DIR}/libs/context/src/execution_context.cpp"
"${LIBRARY_DIR}/libs/context/src/posix/stack_traits.cpp"
)

2
contrib/cassandra vendored

@ -1 +1 @@
Subproject commit eb9b68dadbb4417a2c132ad4a1c2fa76e65e6fc1
Subproject commit f4a31e92a25c34c02c7291ff97c7813bc83b0e09

2
contrib/jemalloc vendored

@ -1 +1 @@
Subproject commit e6891d9746143bf2cf617493d880ba5a0b9a3efd
Subproject commit a1404807211b1612539f840b3dcb1bf38d1a269e

View File

@ -1,17 +1,8 @@
# This file is a modified version of contrib/libuv/CMakeLists.txt
include(CMakeDependentOption)
set (SOURCE_DIR "${CMAKE_SOURCE_DIR}/contrib/libuv")
set (BINARY_DIR "${CMAKE_BINARY_DIR}/contrib/libuv")
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU")
list(APPEND uv_cflags -fvisibility=hidden --std=gnu89)
list(APPEND uv_cflags -Wall -Wextra -Wstrict-prototypes)
list(APPEND uv_cflags -Wno-unused-parameter)
endif()
set(uv_sources
src/fs-poll.c
src/idna.c
@ -76,7 +67,7 @@ endif()
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
list(APPEND uv_defines _GNU_SOURCE _POSIX_C_SOURCE=200112)
list(APPEND uv_libraries dl rt)
list(APPEND uv_libraries rt)
list(APPEND uv_sources
src/unix/linux-core.c
src/unix/linux-inotify.c

View File

@ -236,8 +236,7 @@ set(LIBMARIADB_SOURCES ${LIBMARIADB_SOURCES} ${CC_SOURCE_DIR}/libmariadb/mariadb
add_library(mariadbclient STATIC ${LIBMARIADB_SOURCES})
target_link_libraries(mariadbclient ${SYSTEM_LIBS})
target_include_directories(mariadbclient
PRIVATE ${CC_BINARY_DIR}/include-private
PUBLIC ${CC_BINARY_DIR}/include-public ${CC_SOURCE_DIR}/include ${CC_SOURCE_DIR}/libmariadb)
target_include_directories(mariadbclient PRIVATE ${CC_BINARY_DIR}/include-private)
target_include_directories(mariadbclient SYSTEM PUBLIC ${CC_BINARY_DIR}/include-public ${CC_SOURCE_DIR}/include ${CC_SOURCE_DIR}/libmariadb)
set_target_properties(mariadbclient PROPERTIES IMPORTED_INTERFACE_LINK_LIBRARIES "${SYSTEM_LIBS}")

2
contrib/protobuf vendored

@ -1 +1 @@
Subproject commit c1c5d02026059f4c3cb51aaa08e82288d3e08b89
Subproject commit 6bb70196c5360268d9f021bb7936fb0b551724c2

2
contrib/s2geometry vendored

@ -1 +1 @@
Subproject commit 38b7a290f927cc372218c2094602b83e35b18c05
Subproject commit 471fe9dc931a4bb560333545186e9b5da168ac83

View File

@ -1,8 +1,12 @@
set(S2_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/s2geometry/src")
set(ABSL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp")
if(NOT EXISTS "${ABSL_SOURCE_DIR}/CMakeLists.txt")
message(FATAL_ERROR " submodule contrib/abseil-cpp is missing. To fix try run: \n git submodule update --init --recursive")
endif()
set(S2_SRCS
"${S2_SOURCE_DIR}/s2/base/stringprintf.cc"
"${S2_SOURCE_DIR}/s2/base/strtoint.cc"
"${S2_SOURCE_DIR}/s2/encoded_s2cell_id_vector.cc"
"${S2_SOURCE_DIR}/s2/encoded_s2point_vector.cc"
"${S2_SOURCE_DIR}/s2/encoded_s2shape_index.cc"
@ -14,11 +18,14 @@ set(S2_SRCS
"${S2_SOURCE_DIR}/s2/s1chord_angle.cc"
"${S2_SOURCE_DIR}/s2/s1interval.cc"
"${S2_SOURCE_DIR}/s2/s2boolean_operation.cc"
"${S2_SOURCE_DIR}/s2/s2buffer_operation.cc"
"${S2_SOURCE_DIR}/s2/s2builder.cc"
"${S2_SOURCE_DIR}/s2/s2builder_graph.cc"
"${S2_SOURCE_DIR}/s2/s2builderutil_closed_set_normalizer.cc"
"${S2_SOURCE_DIR}/s2/s2builderutil_find_polygon_degeneracies.cc"
"${S2_SOURCE_DIR}/s2/s2builderutil_get_snapped_winding_delta.cc"
"${S2_SOURCE_DIR}/s2/s2builderutil_lax_polygon_layer.cc"
"${S2_SOURCE_DIR}/s2/s2builderutil_lax_polyline_layer.cc"
"${S2_SOURCE_DIR}/s2/s2builderutil_s2point_vector_layer.cc"
"${S2_SOURCE_DIR}/s2/s2builderutil_s2polygon_layer.cc"
"${S2_SOURCE_DIR}/s2/s2builderutil_s2polyline_layer.cc"
@ -44,7 +51,6 @@ set(S2_SRCS
"${S2_SOURCE_DIR}/s2/s2edge_crossings.cc"
"${S2_SOURCE_DIR}/s2/s2edge_distances.cc"
"${S2_SOURCE_DIR}/s2/s2edge_tessellator.cc"
"${S2_SOURCE_DIR}/s2/s2error.cc"
"${S2_SOURCE_DIR}/s2/s2furthest_edge_query.cc"
"${S2_SOURCE_DIR}/s2/s2latlng.cc"
"${S2_SOURCE_DIR}/s2/s2latlng_rect.cc"
@ -55,6 +61,7 @@ set(S2_SRCS
"${S2_SOURCE_DIR}/s2/s2loop.cc"
"${S2_SOURCE_DIR}/s2/s2loop_measures.cc"
"${S2_SOURCE_DIR}/s2/s2measures.cc"
"${S2_SOURCE_DIR}/s2/s2memory_tracker.cc"
"${S2_SOURCE_DIR}/s2/s2metrics.cc"
"${S2_SOURCE_DIR}/s2/s2max_distance_targets.cc"
"${S2_SOURCE_DIR}/s2/s2min_distance_targets.cc"
@ -82,28 +89,15 @@ set(S2_SRCS
"${S2_SOURCE_DIR}/s2/s2shapeutil_build_polygon_boundaries.cc"
"${S2_SOURCE_DIR}/s2/s2shapeutil_coding.cc"
"${S2_SOURCE_DIR}/s2/s2shapeutil_contains_brute_force.cc"
"${S2_SOURCE_DIR}/s2/s2shapeutil_conversion.cc"
"${S2_SOURCE_DIR}/s2/s2shapeutil_edge_iterator.cc"
"${S2_SOURCE_DIR}/s2/s2shapeutil_get_reference_point.cc"
"${S2_SOURCE_DIR}/s2/s2shapeutil_range_iterator.cc"
"${S2_SOURCE_DIR}/s2/s2shapeutil_visit_crossing_edge_pairs.cc"
"${S2_SOURCE_DIR}/s2/s2text_format.cc"
"${S2_SOURCE_DIR}/s2/s2wedge_relations.cc"
"${S2_SOURCE_DIR}/s2/strings/ostringstream.cc"
"${S2_SOURCE_DIR}/s2/s2winding_operation.cc"
"${S2_SOURCE_DIR}/s2/strings/serialize.cc"
# ClickHouse doesn't use strings from abseil.
# So, there is no duplicate symbols.
"${S2_SOURCE_DIR}/s2/third_party/absl/base/dynamic_annotations.cc"
"${S2_SOURCE_DIR}/s2/third_party/absl/base/internal/raw_logging.cc"
"${S2_SOURCE_DIR}/s2/third_party/absl/base/internal/throw_delegate.cc"
"${S2_SOURCE_DIR}/s2/third_party/absl/numeric/int128.cc"
"${S2_SOURCE_DIR}/s2/third_party/absl/strings/ascii.cc"
"${S2_SOURCE_DIR}/s2/third_party/absl/strings/match.cc"
"${S2_SOURCE_DIR}/s2/third_party/absl/strings/numbers.cc"
"${S2_SOURCE_DIR}/s2/third_party/absl/strings/str_cat.cc"
"${S2_SOURCE_DIR}/s2/third_party/absl/strings/str_split.cc"
"${S2_SOURCE_DIR}/s2/third_party/absl/strings/string_view.cc"
"${S2_SOURCE_DIR}/s2/third_party/absl/strings/strip.cc"
"${S2_SOURCE_DIR}/s2/third_party/absl/strings/internal/memutil.cc"
"${S2_SOURCE_DIR}/s2/util/bits/bit-interleave.cc"
"${S2_SOURCE_DIR}/s2/util/bits/bits.cc"
"${S2_SOURCE_DIR}/s2/util/coding/coder.cc"
@ -111,17 +105,41 @@ set(S2_SRCS
"${S2_SOURCE_DIR}/s2/util/math/exactfloat/exactfloat.cc"
"${S2_SOURCE_DIR}/s2/util/math/mathutil.cc"
"${S2_SOURCE_DIR}/s2/util/units/length-units.cc"
)
add_library(s2 ${S2_SRCS})
set_property(TARGET s2 PROPERTY CXX_STANDARD 11)
set_property(TARGET s2 PROPERTY CXX_STANDARD 17)
if (OPENSSL_FOUND)
target_link_libraries(s2 PRIVATE ${OPENSSL_LIBRARIES})
endif()
# Copied from contrib/s2geometry/CMakeLists
target_link_libraries(s2 PRIVATE
absl::base
absl::btree
absl::config
absl::core_headers
absl::dynamic_annotations
absl::endian
absl::fixed_array
absl::flat_hash_map
absl::flat_hash_set
absl::hash
absl::inlined_vector
absl::int128
absl::log_severity
absl::memory
absl::span
absl::str_format
absl::strings
absl::type_traits
absl::utility
)
target_include_directories(s2 SYSTEM BEFORE PUBLIC "${S2_SOURCE_DIR}/")
target_include_directories(s2 SYSTEM PUBLIC "${ABSL_SOURCE_DIR}")
if(M_LIBRARY)
target_link_libraries(s2 PRIVATE ${M_LIBRARY})

2
contrib/sysroot vendored

@ -1 +1 @@
Subproject commit 410845187f582c5e6692b53dddbe43efbb728734
Subproject commit bbcac834526d90d1e764164b861be426891d1743

14
debian/rules vendored
View File

@ -45,6 +45,10 @@ ifdef DEB_CXX
ifeq ($(DEB_BUILD_GNU_TYPE),$(DEB_HOST_GNU_TYPE))
CC := $(DEB_CC)
CXX := $(DEB_CXX)
else ifeq (clang,$(findstring clang,$(DEB_CXX)))
# If we crosscompile with clang, it knows what to do
CC := $(DEB_CC)
CXX := $(DEB_CXX)
else
CC := $(DEB_HOST_GNU_TYPE)-$(DEB_CC)
CXX := $(DEB_HOST_GNU_TYPE)-$(DEB_CXX)
@ -77,10 +81,6 @@ else
THREADS_COUNT = 1
endif
ifneq ($(THREADS_COUNT),)
THREADS_COUNT:=-j$(THREADS_COUNT)
endif
%:
dh $@ $(DH_FLAGS) --buildsystem=cmake
@ -89,11 +89,11 @@ override_dh_auto_configure:
override_dh_auto_build:
# Fix for ninja. Do not add -O.
$(MAKE) $(THREADS_COUNT) -C $(BUILDDIR) $(MAKE_TARGET)
$(MAKE) -j$(THREADS_COUNT) -C $(BUILDDIR) $(MAKE_TARGET)
override_dh_auto_test:
ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS)))
cd $(BUILDDIR) && ctest $(THREADS_COUNT) -V
cd $(BUILDDIR) && ctest -j$(THREADS_COUNT) -V
endif
override_dh_clean:
@ -120,7 +120,7 @@ override_dh_install:
dh_install --list-missing --sourcedir=$(DESTDIR)
override_dh_auto_install:
env DESTDIR=$(DESTDIR) $(MAKE) $(THREADS_COUNT) -C $(BUILDDIR) install
env DESTDIR=$(DESTDIR) $(MAKE) -j$(THREADS_COUNT) -C $(BUILDDIR) install
override_dh_shlibdeps:
true # We depend only on libc and dh_shlibdeps gives us wrong (too strict) dependency.

View File

@ -46,7 +46,6 @@
"name": "clickhouse/stateless-test",
"dependent": [
"docker/test/stateful",
"docker/test/coverage",
"docker/test/unit"
]
},
@ -56,10 +55,6 @@
"docker/test/stress"
]
},
"docker/test/coverage": {
"name": "clickhouse/test-coverage",
"dependent": []
},
"docker/test/unit": {
"name": "clickhouse/unit-test",
"dependent": []

View File

@ -24,40 +24,34 @@ RUN apt-get update \
&& apt-key add /tmp/llvm-snapshot.gpg.key \
&& export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
&& echo "deb [trusted=yes] https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \
/etc/apt/sources.list
/etc/apt/sources.list \
&& apt-get clean
# initial packages
RUN apt-get update \
&& apt-get install \
bash \
fakeroot \
ccache \
curl \
software-properties-common \
--yes --no-install-recommends
RUN apt-get update \
&& apt-get install \
bash \
build-essential \
ccache \
clang-${LLVM_VERSION} \
clang-tidy-${LLVM_VERSION} \
cmake \
curl \
fakeroot \
gdb \
git \
gperf \
clang-${LLVM_VERSION} \
clang-tidy-${LLVM_VERSION} \
lld-${LLVM_VERSION} \
llvm-${LLVM_VERSION} \
llvm-${LLVM_VERSION}-dev \
libicu-dev \
moreutils \
ninja-build \
pigz \
rename \
software-properties-common \
tzdata \
--yes --no-install-recommends
--yes --no-install-recommends \
&& apt-get clean
# This symlink required by gcc to find lld compiler
RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
@ -66,7 +60,7 @@ ENV CC=clang-${LLVM_VERSION}
ENV CXX=clang++-${LLVM_VERSION}
# libtapi is required to support .tbh format from recent MacOS SDKs
RUN git clone https://github.com/tpoechtrager/apple-libtapi.git \
RUN git clone --depth 1 https://github.com/tpoechtrager/apple-libtapi.git \
&& cd apple-libtapi \
&& INSTALLPREFIX=/cctools ./build.sh \
&& ./install.sh \
@ -74,7 +68,7 @@ RUN git clone https://github.com/tpoechtrager/apple-libtapi.git \
&& rm -rf apple-libtapi
# Build and install tools for cross-linking to Darwin (x86-64)
RUN git clone https://github.com/tpoechtrager/cctools-port.git \
RUN git clone --depth 1 https://github.com/tpoechtrager/cctools-port.git \
&& cd cctools-port/cctools \
&& ./configure --prefix=/cctools --with-libtapi=/cctools \
--target=x86_64-apple-darwin \
@ -83,7 +77,7 @@ RUN git clone https://github.com/tpoechtrager/cctools-port.git \
&& rm -rf cctools-port
# Build and install tools for cross-linking to Darwin (aarch64)
RUN git clone https://github.com/tpoechtrager/cctools-port.git \
RUN git clone --depth 1 https://github.com/tpoechtrager/cctools-port.git \
&& cd cctools-port/cctools \
&& ./configure --prefix=/cctools --with-libtapi=/cctools \
--target=aarch64-apple-darwin \
@ -97,7 +91,8 @@ RUN wget -nv https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacO
# NOTE: Seems like gcc-11 is too new for ubuntu20 repository
RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \
&& apt-get update \
&& apt-get install gcc-11 g++-11 --yes
&& apt-get install gcc-11 g++-11 --yes \
&& apt-get clean
COPY build.sh /

View File

@ -64,8 +64,14 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \
&& apt-get install gcc-11 g++-11 --yes
# This symlink required by gcc to find lld compiler
RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
# These symlinks are required:
# /usr/bin/ld.lld: by gcc to find lld compiler
# /usr/bin/aarch64-linux-gnu-obj*: for debug symbols stripping
RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld \
&& ln -sf /usr/lib/llvm-${LLVM_VERSION}/bin/llvm-objcopy /usr/bin/aarch64-linux-gnu-strip \
&& ln -sf /usr/lib/llvm-${LLVM_VERSION}/bin/llvm-objcopy /usr/bin/aarch64-linux-gnu-objcopy \
&& ln -sf /usr/lib/llvm-${LLVM_VERSION}/bin/llvm-objdump /usr/bin/aarch64-linux-gnu-objdump
COPY build.sh /

View File

@ -29,7 +29,13 @@ def pull_image(image_name):
return False
def build_image(image_name, filepath):
subprocess.check_call("docker build --network=host -t {} -f {} .".format(image_name, filepath), shell=True)
context = os.path.dirname(filepath)
subprocess.check_call(
"docker build --network=host -t {} -f {} {}".format(
image_name, filepath, context
),
shell=True,
)
def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache_dir, docker_image_version):
env_part = " -e ".join(env_variables)
@ -90,6 +96,7 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
elif is_cross_arm:
cc = compiler[:-len(ARM_SUFFIX)]
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake")
result.append("DEB_ARCH_FLAG=-aarm64")
elif is_cross_freebsd:
cc = compiler[:-len(FREEBSD_SUFFIX)]
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/freebsd/toolchain-x86_64.cmake")
@ -98,6 +105,7 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake")
else:
cc = compiler
result.append("DEB_ARCH_FLAG=-aamd64")
cxx = cc.replace('gcc', 'g++').replace('clang', 'clang++')

View File

@ -17,6 +17,8 @@ $ docker run -d --name some-clickhouse-server --ulimit nofile=262144:262144 clic
By default ClickHouse will be accessible only via docker network. See the [networking section below](#networking).
By default, starting above server instance will be run as default user without password.
### connect to it from a native client
```bash
$ docker run -it --rm --link some-clickhouse-server:clickhouse-server clickhouse/clickhouse-client --host clickhouse-server

View File

@ -6,7 +6,7 @@ FROM clickhouse/binary-builder
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-9 libllvm9 libclang-9-dev
RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-13 libllvm13 libclang-13-dev
# repo versions doesn't work correctly with C++17
# also we push reports to s3, so we add index.html to subfolder urls
@ -23,12 +23,12 @@ ENV SOURCE_DIRECTORY=/repo_folder
ENV BUILD_DIRECTORY=/build
ENV HTML_RESULT_DIRECTORY=$BUILD_DIRECTORY/html_report
ENV SHA=nosha
ENV DATA="data"
ENV DATA="https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data"
CMD mkdir -p $BUILD_DIRECTORY && cd $BUILD_DIRECTORY && \
cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-13 -DCMAKE_C_COMPILER=/usr/bin/clang-13 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_S3=0 && \
mkdir -p $HTML_RESULT_DIRECTORY && \
$CODEGEN -b $BUILD_DIRECTORY -a -o $HTML_RESULT_DIRECTORY -p ClickHouse:$SOURCE_DIRECTORY:$SHA -d $DATA | ts '%Y-%m-%d %H:%M:%S' && \
cp -r $STATIC_DATA $HTML_RESULT_DIRECTORY/ &&\
$CODEINDEX $HTML_RESULT_DIRECTORY -d $DATA | ts '%Y-%m-%d %H:%M:%S' && \
$CODEINDEX $HTML_RESULT_DIRECTORY -d "$DATA" | ts '%Y-%m-%d %H:%M:%S' && \
mv $HTML_RESULT_DIRECTORY /test_output

View File

@ -1,18 +0,0 @@
# docker build -t clickhouse/test-coverage .
FROM clickhouse/stateless-test
RUN apt-get update -y \
&& env DEBIAN_FRONTEND=noninteractive \
apt-get install --yes --no-install-recommends \
cmake
COPY s3downloader /s3downloader
COPY run.sh /run.sh
ENV DATASETS="hits visits"
ENV COVERAGE_DIR=/coverage_reports
ENV SOURCE_DIR=/build
ENV OUTPUT_DIR=/output
ENV IGNORE='.*contrib.*'
CMD ["/bin/bash", "/run.sh"]

View File

@ -1,112 +0,0 @@
#!/bin/bash
kill_clickhouse () {
echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S'
pkill -f "clickhouse-server" 2>/dev/null
for _ in {1..120}
do
if ! pkill -0 -f "clickhouse-server" ; then break ; fi
echo "ClickHouse still alive" | ts '%Y-%m-%d %H:%M:%S'
sleep 1
done
if pkill -0 -f "clickhouse-server"
then
pstree -apgT
jobs
echo "Failed to kill the ClickHouse server" | ts '%Y-%m-%d %H:%M:%S'
return 1
fi
}
start_clickhouse () {
LLVM_PROFILE_FILE='server_%h_%p_%m.profraw' sudo -Eu clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml &
counter=0
until clickhouse-client --query "SELECT 1"
do
if [ "$counter" -gt 120 ]
then
echo "Cannot start clickhouse-server"
cat /var/log/clickhouse-server/stdout.log
tail -n1000 /var/log/clickhouse-server/stderr.log
tail -n1000 /var/log/clickhouse-server/clickhouse-server.log
break
fi
sleep 0.5
counter=$((counter + 1))
done
}
chmod 777 /
dpkg -i package_folder/clickhouse-common-static_*.deb; \
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb; \
dpkg -i package_folder/clickhouse-server_*.deb; \
dpkg -i package_folder/clickhouse-client_*.deb; \
dpkg -i package_folder/clickhouse-test_*.deb
mkdir -p /var/lib/clickhouse
mkdir -p /var/log/clickhouse-server
chmod 777 -R /var/log/clickhouse-server/
# install test configs
/usr/share/clickhouse-test/config/install.sh
start_clickhouse
# shellcheck disable=SC2086 # No quotes because I want to split it into words.
if ! /s3downloader --dataset-names $DATASETS; then
echo "Cannot download datatsets"
exit 1
fi
chmod 777 -R /var/lib/clickhouse
LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-client --query "SHOW DATABASES"
LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-client --query "ATTACH DATABASE datasets ENGINE = Ordinary"
LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-client --query "CREATE DATABASE test"
kill_clickhouse
start_clickhouse
LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-client --query "SHOW TABLES FROM datasets"
LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-client --query "SHOW TABLES FROM test"
LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-client --query "SHOW TABLES FROM test"
LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-test -j 8 --testname --shard --zookeeper --print-time 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee /test_result.txt
readarray -t FAILED_TESTS < <(awk '/FAIL|TIMEOUT|ERROR/ { print substr($3, 1, length($3)-1) }' "/test_result.txt")
kill_clickhouse
sleep 3
if [[ -n "${FAILED_TESTS[*]}" ]]
then
# Clean the data so that there is no interference from the previous test run.
rm -rf /var/lib/clickhouse/{{meta,}data,user_files} ||:
start_clickhouse
echo "Going to run again: ${FAILED_TESTS[*]}"
LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-test --order=random --testname --shard --zookeeper "${FAILED_TESTS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a /test_result.txt
else
echo "No failed tests"
fi
mkdir -p "$COVERAGE_DIR"
mv /*.profraw "$COVERAGE_DIR"
mkdir -p "$SOURCE_DIR"/obj-x86_64-linux-gnu
cd "$SOURCE_DIR"/obj-x86_64-linux-gnu && CC=clang-11 CXX=clang++-11 cmake .. && cd /
llvm-profdata-11 merge -sparse "${COVERAGE_DIR}"/* -o clickhouse.profdata
llvm-cov-11 export /usr/bin/clickhouse -instr-profile=clickhouse.profdata -j=16 -format=lcov -skip-functions -ignore-filename-regex "$IGNORE" > output.lcov
genhtml output.lcov --ignore-errors source --output-directory "${OUTPUT_DIR}"

View File

@ -1,101 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import sys
import time
import tarfile
import logging
import argparse
import requests
import tempfile
DEFAULT_URL = 'https://clickhouse-datasets.s3.yandex.net'
AVAILABLE_DATASETS = {
'hits': 'hits_v1.tar',
'visits': 'visits_v1.tar',
}
RETRIES_COUNT = 5
def _get_temp_file_name():
return os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names()))
def build_url(base_url, dataset):
return os.path.join(base_url, dataset, 'partitions', AVAILABLE_DATASETS[dataset])
def dowload_with_progress(url, path):
logging.info("Downloading from %s to temp path %s", url, path)
for i in range(RETRIES_COUNT):
try:
with open(path, 'wb') as f:
response = requests.get(url, stream=True)
response.raise_for_status()
total_length = response.headers.get('content-length')
if total_length is None or int(total_length) == 0:
logging.info("No content-length, will download file without progress")
f.write(response.content)
else:
dl = 0
total_length = int(total_length)
logging.info("Content length is %ld bytes", total_length)
for data in response.iter_content(chunk_size=4096):
dl += len(data)
f.write(data)
if sys.stdout.isatty():
done = int(50 * dl / total_length)
percent = int(100 * float(dl) / total_length)
sys.stdout.write("\r[{}{}] {}%".format('=' * done, ' ' * (50-done), percent))
sys.stdout.flush()
break
except Exception as ex:
sys.stdout.write("\n")
time.sleep(3)
logging.info("Exception while downloading %s, retry %s", ex, i + 1)
if os.path.exists(path):
os.remove(path)
else:
raise Exception("Cannot download dataset from {}, all retries exceeded".format(url))
sys.stdout.write("\n")
logging.info("Downloading finished")
def unpack_to_clickhouse_directory(tar_path, clickhouse_path):
logging.info("Will unpack data from temp path %s to clickhouse db %s", tar_path, clickhouse_path)
with tarfile.open(tar_path, 'r') as comp_file:
comp_file.extractall(path=clickhouse_path)
logging.info("Unpack finished")
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
parser = argparse.ArgumentParser(
description="Simple tool for dowloading datasets for clickhouse from S3")
parser.add_argument('--dataset-names', required=True, nargs='+', choices=list(AVAILABLE_DATASETS.keys()))
parser.add_argument('--url-prefix', default=DEFAULT_URL)
parser.add_argument('--clickhouse-data-path', default='/var/lib/clickhouse/')
args = parser.parse_args()
datasets = args.dataset_names
logging.info("Will fetch following datasets: %s", ', '.join(datasets))
for dataset in datasets:
logging.info("Processing %s", dataset)
temp_archive_path = _get_temp_file_name()
try:
download_url_for_dataset = build_url(args.url_prefix, dataset)
dowload_with_progress(download_url_for_dataset, temp_archive_path)
unpack_to_clickhouse_directory(temp_archive_path, args.clickhouse_data_path)
except Exception as ex:
logging.info("Some exception occured %s", str(ex))
raise
finally:
logging.info("Will remove downloaded file %s from filesystem if it exists", temp_archive_path)
if os.path.exists(temp_archive_path):
os.remove(temp_archive_path)
logging.info("Processing of %s finished", dataset)
logging.info("Fetch finished, enjoy your tables!")

View File

@ -173,6 +173,8 @@ function clone_submodules
contrib/dragonbox
contrib/fast_float
contrib/NuRaft
contrib/jemalloc
contrib/replxx
)
git submodule sync
@ -193,6 +195,8 @@ function run_cmake
"-DENABLE_THINLTO=0"
"-DUSE_UNWIND=1"
"-DENABLE_NURAFT=1"
"-DENABLE_JEMALLOC=1"
"-DENABLE_REPLXX=1"
)
# TODO remove this? we don't use ccache anyway. An option would be to download it
@ -253,7 +257,13 @@ function run_tests
start_server
set +e
time clickhouse-test --hung-check -j 8 --order=random \
local NPROC
NPROC=$(nproc)
NPROC=$((NPROC / 2))
if [[ $NPROC == 0 ]]; then
NPROC=1
fi
time clickhouse-test --hung-check -j "${NPROC}" --order=random \
--fast-tests-only --no-long --testname --shard --zookeeper --check-zookeeper-session \
-- "$FASTTEST_FOCUS" 2>&1 \
| ts '%Y-%m-%d %H:%M:%S' \

View File

@ -52,9 +52,21 @@ function clone
}
function wget_with_retry
{
for _ in 1 2 3 4; do
if wget -nv -nd -c "$1";then
return 0
else
sleep 0.5
fi
done
return 1
}
function download
{
wget -nv -nd -c "$BINARY_URL_TO_DOWNLOAD"
wget_with_retry "$BINARY_URL_TO_DOWNLOAD"
chmod +x clickhouse
ln -s ./clickhouse ./clickhouse-server
@ -175,6 +187,15 @@ info signals
continue
backtrace full
info locals
info registers
disassemble /s
up
info locals
disassemble /s
up
info locals
disassemble /s
p \"done\"
detach
quit
" > script.gdb
@ -185,8 +206,8 @@ quit
time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||:
# Check connectivity after we attach gdb, because it might cause the server
# to freeze and the fuzzer will fail.
for _ in {1..60}
# to freeze and the fuzzer will fail. In debug build it can take a lot of time.
for _ in {1..180}
do
sleep 1
if clickhouse-client --query "select 1"

View File

@ -72,11 +72,13 @@ RUN python3 -m pip install \
grpcio-tools \
kafka-python \
kazoo \
lz4 \
minio \
protobuf \
psycopg2-binary==2.8.6 \
pymongo==3.11.0 \
pytest \
pytest-order==1.0.0 \
pytest-timeout \
pytest-xdist \
pytest-repeat \

View File

@ -8,8 +8,8 @@ echo '{
"ip-forward": true,
"log-level": "debug",
"storage-driver": "overlay2",
"insecure-registries" : ["dockerhub-proxy.sas.yp-c.yandex.net:5000"],
"registry-mirrors" : ["http://dockerhub-proxy.sas.yp-c.yandex.net:5000"]
"insecure-registries" : ["dockerhub-proxy.dockerhub-proxy-zone:5000"],
"registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"]
}' | dd of=/etc/docker/daemon.json 2>/dev/null
dockerd --host=unix:///var/run/docker.sock --host=tcp://0.0.0.0:2375 --default-address-pool base=172.17.0.0/12,size=24 &>/ClickHouse/tests/integration/dockerd.log &

View File

@ -11,6 +11,20 @@ if [[ $S3_URL == *"s3.amazonaws.com"* ]]; then
COMMON_BUILD_PREFIX=""
fi
# Sometimes AWS responde with DNS error and it's impossible to retry it with
# current curl version options.
function curl_with_retry
{
for _ in 1 2 3 4; do
if curl --fail --head "$1";then
return 0
else
sleep 0.5
fi
done
return 1
}
# Use the packaged repository to find the revision we will compare to.
function find_reference_sha
{
@ -55,7 +69,7 @@ function find_reference_sha
)
for path in "${urls_to_try[@]}"
do
if curl --fail --retry 5 --retry-delay 1 --retry-max-time 15 --head "$path"
if curl_with_retry "$path"
then
found="$path"
break
@ -76,7 +90,7 @@ chmod 777 workspace output
cd workspace
# Download the package for the version we are going to test.
if curl --fail --retry 5 --retry-delay 1 --retry-max-time 15 --head "$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/performance/performance.tgz"
if curl_with_retry "$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/performance/performance.tgz"
then
right_path="$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/performance/performance.tgz"
fi

View File

@ -42,7 +42,7 @@ ENV CCACHE_DIR=/test_output/ccache
CMD echo "Running PVS version $PKG_VERSION" && mkdir -p $CCACHE_DIR && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \
&& cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF -D"DISABLE_HERMETIC_BUILD"=ON -DCMAKE_C_COMPILER=clang-13 -DCMAKE_CXX_COMPILER=clang\+\+-13 \
&& ninja re2_st clickhouse_grpc_protos \
&& pvs-studio-analyzer analyze -o pvs-studio.log -e contrib -j 4 -l ./licence.lic; \
&& pvs-studio-analyzer analyze -o pvs-studio.log -e contrib -j "$(nproc)" -l ./licence.lic; \
cp /repo_folder/pvs-studio.log /test_output; \
plog-converter -a GA:1,2 -t fullhtml -o /test_output/pvs-studio-html-report pvs-studio.log; \
plog-converter -a GA:1,2 -t tasklist -o /test_output/pvs-studio-task-report.txt pvs-studio.log

View File

@ -148,6 +148,15 @@ info signals
continue
backtrace full
info locals
info registers
disassemble /s
up
info locals
disassemble /s
up
info locals
disassemble /s
p \"done\"
detach
quit
" > script.gdb

View File

@ -5,8 +5,8 @@ echo "Configure to use Yandex dockerhub-proxy"
mkdir -p /etc/docker/
cat > /etc/docker/daemon.json << EOF
{
"insecure-registries" : ["dockerhub-proxy.sas.yp-c.yandex.net:5000"],
"registry-mirrors" : ["http://dockerhub-proxy.sas.yp-c.yandex.net:5000"]
"insecure-registries" : ["dockerhub-proxy.dockerhub-proxy-zone:5000"],
"registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"]
}
EOF

View File

@ -106,20 +106,20 @@ Build ClickHouse. Run ClickHouse from the terminal: change directory to `program
Note that all clickhouse tools (server, client, etc) are just symlinks to a single binary named `clickhouse`. You can find this binary at `programs/clickhouse`. All tools can also be invoked as `clickhouse tool` instead of `clickhouse-tool`.
Alternatively you can install ClickHouse package: either stable release from Yandex repository or you can build package for yourself with `./release` in ClickHouse sources root. Then start the server with `sudo service clickhouse-server start` (or stop to stop the server). Look for logs at `/etc/clickhouse-server/clickhouse-server.log`.
Alternatively you can install ClickHouse package: either stable release from ClickHouse repository or you can build package for yourself with `./release` in ClickHouse sources root. Then start the server with `sudo clickhouse start` (or stop to stop the server). Look for logs at `/etc/clickhouse-server/clickhouse-server.log`.
When ClickHouse is already installed on your system, you can build a new `clickhouse` binary and replace the existing binary:
``` bash
$ sudo service clickhouse-server stop
$ sudo clickhouse stop
$ sudo cp ./clickhouse /usr/bin/
$ sudo service clickhouse-server start
$ sudo clickhouse start
```
Also you can stop system clickhouse-server and run your own with the same configuration but with logging to terminal:
``` bash
$ sudo service clickhouse-server stop
$ sudo clickhouse stop
$ sudo -u clickhouse /usr/bin/clickhouse server --config-file /etc/clickhouse-server/config.xml
```
@ -257,9 +257,9 @@ There are five variants (Debug, ASan, TSan, MSan, UBSan).
Thread Fuzzer (please don't mix up with Thread Sanitizer) is another kind of fuzzing that allows to randomize thread order of execution. It helps to find even more special cases.
## Security Audit {#security-audit}
## Security Audit
People from Yandex Security Team do some basic overview of ClickHouse capabilities from the security standpoint.
People from Yandex Security Team did some basic overview of ClickHouse capabilities from the security standpoint.
## Static Analyzers {#static-analyzers}
@ -326,15 +326,11 @@ There is automated check for flaky tests. It runs all new tests 100 times (for f
## Testflows
[Testflows](https://testflows.com/) is an enterprise-grade testing framework. It is used by Altinity for some of the tests and we run these tests in our CI.
## Yandex Checks (only for Yandex employees)
These checks are importing ClickHouse code into Yandex internal monorepository, so ClickHouse codebase can be used as a library by other products at Yandex (YT and YDB). Note that clickhouse-server itself is not being build from internal repo and unmodified open-source build is used for Yandex applications.
[Testflows](https://testflows.com/) is an enterprise-grade open-source testing framework, which is used to test a subset of ClickHouse.
## Test Automation {#test-automation}
We run tests with Yandex internal CI and job automation system named “Sandbox”.
We run tests with [GitHub Actions](https://github.com/features/actions).
Build jobs and tests are run in Sandbox on per commit basis. Resulting packages and test results are published in GitHub and can be downloaded by direct links. Artifacts are stored for several months. When you send a pull request on GitHub, we tag it as “can be tested” and our CI system will build ClickHouse packages (release, debug, with address sanitizer, etc) for you.

View File

@ -83,6 +83,7 @@ When working with the `MaterializedMySQL` database engine, [ReplacingMergeTree](
| VARCHAR, VAR_STRING | [String](../../sql-reference/data-types/string.md) |
| BLOB | [String](../../sql-reference/data-types/string.md) |
| BINARY | [FixedString](../../sql-reference/data-types/fixedstring.md) |
| BIT | [UInt64](../../sql-reference/data-types/int-uint.md) |
[Nullable](../../sql-reference/data-types/nullable.md) is supported.
@ -150,20 +151,38 @@ Table overrides can be used to customize the ClickHouse DDL queries, allowing yo
application. This is especially useful for controlling partitioning, which is important for the overall performance of
MaterializedMySQL.
These are the schema conversion manipulations you can do with table overrides for MaterializedMySQL:
* Modify column type. Must be compatible with the original type, or replication will fail. For example,
you can modify a UInt32 column to UInt64, but you can not modify a String column to Array(String).
* Modify [column TTL](../table-engines/mergetree-family/mergetree/#mergetree-column-ttl).
* Modify [column compression codec](../../sql-reference/statements/create/table/#codecs).
* Add [ALIAS columns](../../sql-reference/statements/create/table/#alias).
* Add [skipping indexes](../table-engines/mergetree-family/mergetree/#table_engine-mergetree-data_skipping-indexes)
* Add [projections](../table-engines/mergetree-family/mergetree/#projections). Note that projection optimizations are
disabled when using `SELECT ... FINAL` (which MaterializedMySQL does by default), so their utility is limited here.
`INDEX ... TYPE hypothesis` as [described in the v21.12 blog post]](https://clickhouse.com/blog/en/2021/clickhouse-v21.12-released/)
may be more useful in this case.
* Modify [PARTITION BY](../table-engines/mergetree-family/custom-partitioning-key/)
* Modify [ORDER BY](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses)
* Modify [PRIMARY KEY](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses)
* Add [SAMPLE BY](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses)
* Add [table TTL](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses)
```sql
CREATE DATABASE db_name ENGINE = MaterializedMySQL(...)
[SETTINGS ...]
[TABLE OVERRIDE table_name (
[COLUMNS (
[name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1], ...]
[INDEX index_name1 expr1 TYPE type1(...) GRANULARITY value1, ...]
[PROJECTION projection_name_1 (SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY]), ...]
)]
[ORDER BY expr]
[PRIMARY KEY expr]
[PARTITION BY expr]
[SAMPLE BY expr]
[TTL expr]
[col_name [datatype] [ALIAS expr] [CODEC(...)] [TTL expr], ...]
[INDEX index_name expr TYPE indextype[(...)] GRANULARITY val, ...]
[PROJECTION projection_name (SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY]), ...]
)]
[ORDER BY expr]
[PRIMARY KEY expr]
[PARTITION BY expr]
[SAMPLE BY expr]
[TTL expr]
), ...]
```
@ -173,34 +192,34 @@ Example:
CREATE DATABASE db_name ENGINE = MaterializedMySQL(...)
TABLE OVERRIDE table1 (
COLUMNS (
userid UUID,
category LowCardinality(String),
timestamp DateTime CODEC(Delta, Default)
userid UUID,
category LowCardinality(String),
timestamp DateTime CODEC(Delta, Default)
)
PARTITION BY toYear(timestamp)
),
TABLE OVERRIDE table2 (
COLUMNS (
ip_hash UInt32 MATERIALIZED xxHash32(client_ip),
client_ip String TTL created + INTERVAL 72 HOUR
)
SAMPLE BY ip_hash
client_ip String TTL created + INTERVAL 72 HOUR
)
SAMPLE BY ip_hash
)
```
The `COLUMNS` list is sparse; it contains only modified or extra (MATERIALIZED or ALIAS) columns. Modified columns with
a different type must be assignable from the original type. There is currently no validation of this or similar issues
when the `CREATE DATABASE` query executes, so extra care needs to be taken.
The `COLUMNS` list is sparse; existing columns are modified as specified, extra ALIAS columns are added. It is not
possible to add ordinary or MATERIALIZED columns. Modified columns with a different type must be assignable from the
original type. There is currently no validation of this or similar issues when the `CREATE DATABASE` query executes, so
extra care needs to be taken.
You may specify overrides for tables that do not exist yet.
!!! note "Warning"
It is easy to break replication with TABLE OVERRIDEs if not used with care. For example:
!!! warning "Warning"
It is easy to break replication with table overrides if not used with care. For example:
* If a column is added with a table override, but then later added to the source MySQL table, the converted ALTER TABLE
query in ClickHouse will fail because the column already exists.
* If an ALIAS column is added with a table override, and a column with the same name is later added to the source
MySQL table, the converted ALTER TABLE query in ClickHouse will fail and replication stops.
* It is currently possible to add overrides that reference nullable columns where not-nullable are required, such as in
`ORDER BY` or `PARTITION BY`.
`ORDER BY` or `PARTITION BY`. This will cause CREATE TABLE queries that will fail, also causing replication to stop.
## Examples of Use {#examples-of-use}
@ -217,11 +236,9 @@ mysql> SELECT * FROM test;
```
```text
+---+------+------+
| a | b | c |
+---+------+------+
| 2 | 222 | Wow! |
+---+------+------+
┌─a─┬───b─┬─c────┐
│ 2 │ 222 │ Wow! │
└───┴─────┴──────┘
```
Database in ClickHouse, exchanging data with the MySQL server:

View File

@ -5,15 +5,15 @@ toc_title: MaterializedPostgreSQL
# [experimental] MaterializedPostgreSQL {#materialize-postgresql}
Creates ClickHouse database with an initial data dump of PostgreSQL database tables and starts replication process, i.e. executes background job to apply new changes as they happen on PostgreSQL database tables in the remote PostgreSQL database.
Creates a ClickHouse database with tables from PostgreSQL database. Firstly, database with engine `MaterializedPostgreSQL` creates a snapshot of PostgreSQL database and loads required tables. Required tables can include any subset of tables from any subset of schemas from specified database. Along with the snapshot database engine acquires LSN and once initial dump of tables is performed - it starts pulling updates from WAL. After database is created, newly added tables to PostgreSQL database are not automatically added to replication. They have to be added manually with `ATTACH TABLE db.table` query.
ClickHouse server works as PostgreSQL replica. It reads WAL and performs DML queries. DDL is not replicated, but can be handled (described below).
Replication is implemented with PostgreSQL Logical Replication Protocol, which does not allow to replicate DDL, but allows to know whether replication breaking changes happened (column type changes, adding/removing columns). Such changes are detected and according tables stop receiving updates. Such tables can be automatically reloaded in the background in case required setting is turned on. Safest way for now is to use `ATTACH`/ `DETACH` queries to reload table completely. If DDL does not break replication (for example, renaming a column) table will still receive updates (insertion is done by position).
## Creating a Database {#creating-a-database}
``` sql
CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster]
ENGINE = MaterializedPostgreSQL('host:port', ['database' | database], 'user', 'password') [SETTINGS ...]
ENGINE = MaterializedPostgreSQL('host:port', 'database', 'user', 'password') [SETTINGS ...]
```
**Engine Parameters**
@ -23,51 +23,39 @@ ENGINE = MaterializedPostgreSQL('host:port', ['database' | database], 'user', 'p
- `user` — PostgreSQL user.
- `password` — User password.
## Example of Use {#example-of-use}
``` sql
CREATE DATABASE postgresql;
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password');
SHOW TABLES FROM postgres_db;
┌─name───┐
│ table1 │
└────────┘
SELECT * FROM postgresql_db.postgres_table;
```
## Dynamically adding new tables to replication {#dynamically-adding-table-to-replication}
After `MaterializedPostgreSQL` database is created, it does not automatically detect new tables in according PostgreSQL database. Such tables can be added manually:
``` sql
ATTACH TABLE postgres_database.new_table;
```
When specifying a specific list of tables in the database using the setting [materialized_postgresql_tables_list](../../operations/settings/settings.md#materialized-postgresql-tables-list), it will be updated to the current state, taking into account the tables which were added by the `ATTACH TABLE` query.
Warning: before version 21.13 adding table to replication left unremoved temprorary replication slot (named `{db_name}_ch_replication_slot_tmp`). If attaching tables in clickhouse version before 21.13, make sure to delete it manually (`SELECT pg_drop_replication_slot('{db_name}_ch_replication_slot_tmp')`). Otherwise disk usage will grow. Issue is fixed in 21.13.
## Dynamically removing tables from replication {#dynamically-removing-table-from-replication}
It is possible to remove specific tables from replication:
``` sql
DETACH TABLE postgres_database.table_to_remove;
```
## Settings {#settings}
- [materialized_postgresql_tables_list](../../operations/settings/settings.md#materialized-postgresql-tables-list)
- [materialized_postgresql_schema](../../operations/settings/settings.md#materialized-postgresql-schema)
- [materialized_postgresql_schema_list](../../operations/settings/settings.md#materialized-postgresql-schema-list)
- [materialized_postgresql_allow_automatic_update](../../operations/settings/settings.md#materialized-postgresql-allow-automatic-update)
- [materialized_postgresql_max_block_size](../../operations/settings/settings.md#materialized-postgresql-max-block-size)
- [materialized_postgresql_replication_slot](../../operations/settings/settings.md#materialized-postgresql-replication-slot)
- [materialized_postgresql_snapshot](../../operations/settings/settings.md#materialized-postgresql-snapshot)
``` sql
CREATE DATABASE database1
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password')
SETTINGS materialized_postgresql_tables_list = 'table1,table2,table3';
SELECT * FROM database1.table1;
```
The settings can be changed, if necessary, using a DDL query. But it is impossible to change the setting `materialized_postgresql_tables_list`. To update the list of tables in this setting use the `ATTACH TABLE` query.
``` sql
ALTER DATABASE postgres_database MODIFY SETTING materialized_postgresql_max_block_size = <new_size>;
```
## PostgreSQL schema {#schema}
PostgreSQL [schema](https://www.postgresql.org/docs/9.1/ddl-schemas.html) can be configured in 3 ways (starting from version 21.12).
@ -150,13 +138,63 @@ WHERE oid = 'postgres_table'::regclass;
!!! warning "Warning"
Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.html) values is not supported. The default value for the data type will be used.
## Example of Use {#example-of-use}
## Settings {#settings}
1. materialized_postgresql_tables_list {#materialized-postgresql-tables-list}
Sets a comma-separated list of PostgreSQL database tables, which will be replicated via [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) database engine.
Default value: empty list — means whole PostgreSQL database will be replicated.
2. materialized_postgresql_schema {#materialized-postgresql-schema}
Default value: empty string. (Default schema is used)
3. materialized_postgresql_schema_list {#materialized-postgresql-schema-list}
Default value: empty list. (Default schema is used)
4. materialized_postgresql_allow_automatic_update {#materialized-postgresql-allow-automatic-update}
Allows reloading table in the background, when schema changes are detected. DDL queries on the PostgreSQL side are not replicated via ClickHouse [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) engine, because it is not allowed with PostgreSQL logical replication protocol, but the fact of DDL changes is detected transactionally. In this case, the default behaviour is to stop replicating those tables once DDL is detected. However, if this setting is enabled, then, instead of stopping the replication of those tables, they will be reloaded in the background via database snapshot without data losses and replication will continue for them.
Possible values:
- 0 — The table is not automatically updated in the background, when schema changes are detected.
- 1 — The table is automatically updated in the background, when schema changes are detected.
Default value: `0`.
5. materialized_postgresql_max_block_size {#materialized-postgresql-max-block-size}
Sets the number of rows collected in memory before flushing data into PostgreSQL database table.
Possible values:
- Positive integer.
Default value: `65536`.
6. materialized_postgresql_replication_slot {#materialized-postgresql-replication-slot}
A user-created replication slot. Must be used together with `materialized_postgresql_snapshot`.
7. materialized_postgresql_snapshot {#materialized-postgresql-snapshot}
A text string identifying a snapshot, from which [initial dump of PostgreSQL tables](../../engines/database-engines/materialized-postgresql.md) will be performed. Must be used together with `materialized_postgresql_replication_slot`.
``` sql
CREATE DATABASE postgresql_db
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password');
CREATE DATABASE database1
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password')
SETTINGS materialized_postgresql_tables_list = 'table1,table2,table3';
SELECT * FROM postgresql_db.postgres_table;
SELECT * FROM database1.table1;
```
The settings can be changed, if necessary, using a DDL query. But it is impossible to change the setting `materialized_postgresql_tables_list`. To update the list of tables in this setting use the `ATTACH TABLE` query.
``` sql
ALTER DATABASE postgres_database MODIFY SETTING materialized_postgresql_max_block_size = <new_size>;
```
## Notes {#notes}
@ -165,11 +203,11 @@ SELECT * FROM postgresql_db.postgres_table;
Logical Replication Slots which exist on the primary are not available on standby replicas.
So if there is a failover, new primary (the old physical standby) wont be aware of any slots which were existing with old primary. This will lead to a broken replication from PostgreSQL.
A solution to this is to manage replication slots yourself and define a permanent replication slot (some information can be found [here](https://patroni.readthedocs.io/en/latest/SETTINGS.html)). You'll need to pass slot name via [materialized_postgresql_replication_slot](../../operations/settings/settings.md#materialized-postgresql-replication-slot) setting, and it has to be exported with `EXPORT SNAPSHOT` option. The snapshot identifier needs to be passed via [materialized_postgresql_snapshot](../../operations/settings/settings.md#materialized-postgresql-snapshot) setting.
A solution to this is to manage replication slots yourself and define a permanent replication slot (some information can be found [here](https://patroni.readthedocs.io/en/latest/SETTINGS.html)). You'll need to pass slot name via `materialized_postgresql_replication_slot` setting, and it has to be exported with `EXPORT SNAPSHOT` option. The snapshot identifier needs to be passed via `materialized_postgresql_snapshot` setting.
Please note that this should be used only if it is actually needed. If there is no real need for that or full understanding why, then it is better to allow the table engine to create and manage its own replication slot.
**Example (from [@bchrobot](https://github.com/bchrobot))**
**Example (from [@bchrobot](https://github.com/bchrobot))**
1. Configure replication slot in PostgreSQL.
@ -214,3 +252,23 @@ SETTINGS
```bash
kubectl exec acid-demo-cluster-0 -c postgres -- su postgres -c 'patronictl failover --candidate acid-demo-cluster-1 --force'
```
### Required permissions
1. [CREATE PUBLICATION](https://postgrespro.ru/docs/postgresql/14/sql-createpublication) -- create query privilege.
2. [CREATE_REPLICATION_SLOT](https://postgrespro.ru/docs/postgrespro/10/protocol-replication#PROTOCOL-REPLICATION-CREATE-SLOT) -- replication privelege.
3. [pg_drop_replication_slot](https://postgrespro.ru/docs/postgrespro/9.5/functions-admin#functions-replication) -- replication privilege or superuser.
4. [DROP PUBLICATION](https://postgrespro.ru/docs/postgresql/10/sql-droppublication) -- owner of publication (`username` in MaterializedPostgreSQL engine itself).
It is possible to avoid executing `2` and `3` commands and having those permissions. Use settings `materialized_postgresql_replication_slot` and `materialized_postgresql_snapshot`. But with much care.
Access to tables:
1. pg_publication
2. pg_replication_slots
3. pg_publication_tables

View File

@ -5,8 +5,7 @@ toc_title: HDFS
# HDFS {#table_engines-hdfs}
This engine provides integration with [Apache Hadoop](https://en.wikipedia.org/wiki/Apache_Hadoop) ecosystem by allowing to manage data on [HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html) via ClickHouse. This engine is similar
to the [File](../../../engines/table-engines/special/file.md#table_engines-file) and [URL](../../../engines/table-engines/special/url.md#table_engines-url) engines, but provides Hadoop-specific features.
This engine provides integration with the [Apache Hadoop](https://en.wikipedia.org/wiki/Apache_Hadoop) ecosystem by allowing to manage data on [HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html) via ClickHouse. This engine is similar to the [File](../../../engines/table-engines/special/file.md#table_engines-file) and [URL](../../../engines/table-engines/special/url.md#table_engines-url) engines, but provides Hadoop-specific features.
## Usage {#usage}
@ -14,12 +13,13 @@ to the [File](../../../engines/table-engines/special/file.md#table_engines-file)
ENGINE = HDFS(URI, format)
```
The `URI` parameter is the whole file URI in HDFS.
The `format` parameter specifies one of the available file formats. To perform
**Engine Parameters**
- `URI` - whole file URI in HDFS. The path part of `URI` may contain globs. In this case the table would be readonly.
- `format` - specifies one of the available file formats. To perform
`SELECT` queries, the format must be supported for input, and to perform
`INSERT` queries for output. The available formats are listed in the
[Formats](../../../interfaces/formats.md#formats) section.
The path part of `URI` may contain globs. In this case the table would be readonly.
**Example:**
@ -71,12 +71,12 @@ Constructions with `{}` are similar to the [remote](../../../sql-reference/table
1. Suppose we have several files in TSV format with the following URIs on HDFS:
- 'hdfs://hdfs1:9000/some_dir/some_file_1'
- 'hdfs://hdfs1:9000/some_dir/some_file_2'
- 'hdfs://hdfs1:9000/some_dir/some_file_3'
- 'hdfs://hdfs1:9000/another_dir/some_file_1'
- 'hdfs://hdfs1:9000/another_dir/some_file_2'
- 'hdfs://hdfs1:9000/another_dir/some_file_3'
- 'hdfs://hdfs1:9000/some_dir/some_file_1'
- 'hdfs://hdfs1:9000/some_dir/some_file_2'
- 'hdfs://hdfs1:9000/some_dir/some_file_3'
- 'hdfs://hdfs1:9000/another_dir/some_file_1'
- 'hdfs://hdfs1:9000/another_dir/some_file_2'
- 'hdfs://hdfs1:9000/another_dir/some_file_3'
1. There are several ways to make a table consisting of all six files:
@ -132,6 +132,7 @@ Similar to GraphiteMergeTree, the HDFS engine supports extended configuration us
| **parameter** | **default value** |
| - | - |
| rpc\_client\_connect\_tcpnodelay | true |
| dfs\_client\_read\_shortcircuit | true |
| output\_replace-datanode-on-failure | true |
@ -181,25 +182,26 @@ Similar to GraphiteMergeTree, the HDFS engine supports extended configuration us
#### ClickHouse extras {#clickhouse-extras}
| **parameter** | **default value** |
| - | - |
|hadoop\_kerberos\_keytab | "" |
|hadoop\_kerberos\_principal | "" |
|hadoop\_kerberos\_kinit\_command | kinit |
|libhdfs3\_conf | "" |
### Limitations {#limitations}
* hadoop\_security\_kerberos\_ticket\_cache\_path and libhdfs3\_conf can be global only, not user specific
* `hadoop_security_kerberos_ticket_cache_path` and `libhdfs3_conf` can be global only, not user specific
## Kerberos support {#kerberos-support}
If hadoop\_security\_authentication parameter has value 'kerberos', ClickHouse authentifies via Kerberos facility.
Parameters [here](#clickhouse-extras) and hadoop\_security\_kerberos\_ticket\_cache\_path may be of help.
If the `hadoop_security_authentication` parameter has the value `kerberos`, ClickHouse authenticates via Kerberos.
Parameters are [here](#clickhouse-extras) and `hadoop_security_kerberos_ticket_cache_path` may be of help.
Note that due to libhdfs3 limitations only old-fashioned approach is supported,
datanode communications are not secured by SASL (HADOOP\_SECURE\_DN\_USER is a reliable indicator of such
security approach). Use tests/integration/test\_storage\_kerberized\_hdfs/hdfs_configs/bootstrap.sh for reference.
datanode communications are not secured by SASL (`HADOOP_SECURE_DN_USER` is a reliable indicator of such
security approach). Use `tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh` for reference.
If hadoop\_kerberos\_keytab, hadoop\_kerberos\_principal or hadoop\_kerberos\_kinit\_command is specified, kinit will be invoked. hadoop\_kerberos\_keytab and hadoop\_kerberos\_principal are mandatory in this case. kinit tool and krb5 configuration files are required.
If `hadoop_kerberos_keytab`, `hadoop_kerberos_principal` or `hadoop_kerberos_kinit_command` is specified, `kinit` will be invoked. `hadoop_kerberos_keytab` and `hadoop_kerberos_principal` are mandatory in this case. `kinit` tool and krb5 configuration files are required.
## HDFS Namenode HA support{#namenode-ha}
## HDFS Namenode HA support {#namenode-ha}
libhdfs3 support HDFS namenode HA.

View File

@ -7,7 +7,7 @@ toc_title: MaterializedPostgreSQL
Creates ClickHouse table with an initial data dump of PostgreSQL table and starts replication process, i.e. executes background job to apply new changes as they happen on PostgreSQL table in the remote PostgreSQL database.
If more than one table is required, it is highly recommended to use the [MaterializedPostgreSQL](../../../engines/database-engines/materialized-postgresql.md) database engine instead of the table engine and use the [materialized_postgresql_tables_list](../../../operations/settings/settings.md#materialized-postgresql-tables-list) setting, which specifies the tables to be replicated. It will be much better in terms of CPU, fewer connections and fewer replication slots inside the remote PostgreSQL database.
If more than one table is required, it is highly recommended to use the [MaterializedPostgreSQL](../../../engines/database-engines/materialized-postgresql.md) database engine instead of the table engine and use the `materialized_postgresql_tables_list` setting, which specifies the tables to be replicated (will also be possible to add database `schema`). It will be much better in terms of CPU, fewer connections and fewer replication slots inside the remote PostgreSQL database.
## Creating a Table {#creating-a-table}
@ -38,7 +38,7 @@ PRIMARY KEY key;
- `_version` — Transaction counter. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `_sign` — Deletion mark. Type: [Int8](../../../sql-reference/data-types/int-uint.md). Possible values:
- `1` — Row is not deleted,
- `1` — Row is not deleted,
- `-1` — Row is deleted.
These columns do not need to be added when a table is created. They are always accessible in `SELECT` query.

View File

@ -36,6 +36,31 @@ The table structure can differ from the original PostgreSQL table structure:
- `schema` — Non-default table schema. Optional.
- `on conflict ...` — example: `ON CONFLICT DO NOTHING`. Optional. Note: adding this option will make insertion less efficient.
or via config (since version 21.11):
```
<named_collections>
<postgres1>
<host></host>
<port></port>
<username></username>
<password></password>
<table></table>
</postgres1>
<postgres2>
<host></host>
<port></port>
<username></username>
<password></password>
</postgres2>
</named_collections>
```
Some parameters can be overriden by key value arguments:
``` sql
SELECT * FROM postgresql(postgres1, schema='schema1', table='table1');
```
## Implementation Details {#implementation-details}
`SELECT` queries on PostgreSQL side run as `COPY (SELECT ...) TO STDOUT` inside read-only PostgreSQL transaction with commit after each `SELECT` query.

View File

@ -37,6 +37,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
[rabbitmq_skip_broken_messages = N,]
[rabbitmq_max_block_size = N,]
[rabbitmq_flush_interval_ms = N]
[rabbitmq_queue_settings_list = 'x-dead-letter-exchange=my-dlx,x-max-length=10,x-overflow=reject-publish']
```
Required parameters:
@ -59,6 +60,7 @@ Optional parameters:
- `rabbitmq_skip_broken_messages` RabbitMQ message parser tolerance to schema-incompatible messages per block. Default: `0`. If `rabbitmq_skip_broken_messages = N` then the engine skips *N* RabbitMQ messages that cannot be parsed (a message equals a row of data).
- `rabbitmq_max_block_size`
- `rabbitmq_flush_interval_ms`
- `rabbitmq_queue_settings_list` - allows to set RabbitMQ settings when creating a queue. Available settings: `x-max-length`, `x-max-length-bytes`, `x-message-ttl`, `x-expires`, `x-priority`, `x-max-priority`, `x-overflow`, `x-dead-letter-exchange`, `x-queue-type`. The `durable` setting is enabled automatically for the queue.
SSL connection:

View File

@ -66,9 +66,9 @@ WHERE table = 'visits'
└───────────┴────────────────┴────────┘
```
The `partition` column contains the names of the partitions. There are two partitions in this example: `201901` and `201902`. You can use this column value to specify the partition name in [ALTER … PARTITION](#alter_manipulations-with-partitions) queries.
The `partition` column contains the names of the partitions. There are two partitions in this example: `201901` and `201902`. You can use this column value to specify the partition name in [ALTER … PARTITION](../../../sql-reference/statements/alter/partition.md) queries.
The `name` column contains the names of the partition data parts. You can use this column to specify the name of the part in the [ALTER ATTACH PART](#alter_attach-partition) query.
The `name` column contains the names of the partition data parts. You can use this column to specify the name of the part in the [ALTER ATTACH PART](../../../sql-reference/statements/alter/partition.md#alter_attach-partition) query.
Lets break down the name of the first part: `201901_1_3_1`:

View File

@ -39,10 +39,10 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] AS [db2.]name2
- `policy_name` - (optionally) policy name, it will be used to store temporary files for async send
See also:
**See Also**
- [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) setting
- [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) for the examples
- [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) setting
- [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) for the examples
**Distributed Settings**

View File

@ -0,0 +1,15 @@
---
title: How do I contribute code to ClickHouse?
toc_hidden: true
toc_priority: 120
---
# How do I contribute code to ClickHouse? {#how-do-i-contribute-code-to-clickhouse}
ClickHouse is an open-source project [developed on GitHub](https://github.com/ClickHouse/ClickHouse).
As customary, contribution instructions are published in [CONTRIBUTING.md](https://github.com/ClickHouse/ClickHouse/blob/master/CONTRIBUTING.md) file in the root of the source code repository.
If you want to suggest a substantial change to ClickHouse, consider [opening a GitHub issue](https://github.com/ClickHouse/ClickHouse/issues/new/choose) explaining what you want to do, to discuss it with maintainers and community first. [Examples of such RFC issues](https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aissue+is%3Aopen+rfc).
If your contributions are security related, please check out [our security policy](https://github.com/ClickHouse/ClickHouse/security/policy/) too.

View File

@ -17,6 +17,7 @@ Questions:
- [What is OLAP?](../../faq/general/olap.md)
- [What is a columnar database?](../../faq/general/columnar-database.md)
- [Why not use something like MapReduce?](../../faq/general/mapreduce.md)
- [How do I contribute code to ClickHouse?](../../faq/general/how-do-i-contribute-code-to-clickhouse.md)
!!! info "Dont see what you were looking for?"
Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar.

View File

@ -11,6 +11,7 @@ Questions:
- [Which ClickHouse version to use in production?](../../faq/operations/production.md)
- [Is it possible to delete old records from a ClickHouse table?](../../faq/operations/delete-old-data.md)
- [Does ClickHouse support multi-region replication?](../../faq/operations/multi-region-replication.md)
!!! info "Dont see what you were looking for?"
Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar.

View File

@ -0,0 +1,13 @@
---
title: Does ClickHouse support multi-region replication?
toc_hidden: true
toc_priority: 30
---
# Does ClickHouse support multi-region replication? {#does-clickhouse-support-multi-region-replication}
The short answer is "yes". However, we recommend keeping latency between all regions/datacenters in two-digit range, otherwise write performance will suffer as it goes through distributed consensus protocol. For example, replication between US coasts will likely work fine, but between the US and Europe won't.
Configuration-wise there's no difference compared to single-region replication, simply use hosts that are located in different locations for replicas.
For more information, see [full article on data replication](../../engines/table-engines/mergetree-family/replication.md).

View File

@ -142,6 +142,12 @@ On Gentoo, you can just use `emerge clickhouse` to install ClickHouse from sourc
To start the server as a daemon, run:
``` bash
$ sudo clickhouse start
```
There are also another ways to run ClickHouse:
``` bash
$ sudo service clickhouse-server start
```
@ -152,6 +158,12 @@ If you do not have `service` command, run as
$ sudo /etc/init.d/clickhouse-server start
```
If you have `systemctl` command, run as
``` bash
$ sudo systemctl start clickhouse-server.service
```
See the logs in the `/var/log/clickhouse-server/` directory.
If the server does not start, check the configurations in the file `/etc/clickhouse-server/config.xml`.

View File

@ -204,7 +204,7 @@ When parsing with this format, tabs or linefeeds are not allowed in each field.
This format is also available under the name `TSVRawWithNames`.
## TabSeparatedWithNamesAndTypes {#tabseparatedrawwithnamesandtypes}
## TabSeparatedRawWithNamesAndTypes {#tabseparatedrawwithnamesandtypes}
Differs from `TabSeparatedWithNamesAndTypes` format in that the rows are written without escaping.
When parsing with this format, tabs or linefeeds are not allowed in each field.

View File

@ -424,7 +424,10 @@ Next are the configuration methods for different `type`.
`query` value is a predefined query of `predefined_query_handler`, which is executed by ClickHouse when an HTTP request is matched and the result of the query is returned. It is a must configuration.
The following example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_alter_threads` settings, then queries the system table to check whether these settings were set successfully.
The following example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_final_threads` settings, then queries the system table to check whether these settings were set successfully.
!!! note "Warning"
To keep the default `handlers` such as` query`, `play`,` ping`, use the `<defaults/>` rule.
Example:
@ -443,13 +446,14 @@ Example:
<query>SELECT name, value FROM system.settings WHERE name = {name_2:String}</query>
</handler>
</rule>
<defaults/>
</http_handlers>
```
``` bash
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_alter_threads?max_threads=1&max_alter_threads=2'
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_final_threads?max_threads=1&max_final_threads=2'
1
max_alter_threads 2
max_final_threads 2
```
!!! note "caution"
@ -461,7 +465,7 @@ In `dynamic_query_handler`, the query is written in the form of param of the HTT
ClickHouse extracts and executes the value corresponding to the `query_param_name` value in the URL of the HTTP request. The default value of `query_param_name` is `/query` . It is an optional configuration. If there is no definition in the configuration file, the param is not passed in.
To experiment with this functionality, the example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_alter_threads` and `queries` whether the settings were set successfully.
To experiment with this functionality, the example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_final_threads` and `queries` whether the settings were set successfully.
Example:
@ -475,13 +479,14 @@ Example:
<query_param_name>query_param</query_param_name>
</handler>
</rule>
<defaults/>
</http_handlers>
```
``` bash
$ curl -H 'XXX:TEST_HEADER_VALUE_DYNAMIC' 'http://localhost:8123/own?max_threads=1&max_alter_threads=2&param_name_1=max_threads&param_name_2=max_alter_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D'
$ curl -H 'XXX:TEST_HEADER_VALUE_DYNAMIC' 'http://localhost:8123/own?max_threads=1&max_final_threads=2&param_name_1=max_threads&param_name_2=max_final_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D'
max_threads 1
max_alter_threads 2
max_final_threads 2
```
### static {#static}
@ -505,6 +510,7 @@ Return a message.
<response_content>Say Hi!</response_content>
</handler>
</rule>
<defaults/>
</http_handlers>
```

View File

@ -6,7 +6,7 @@ toc_title: Client Libraries
# Client Libraries from Third-party Developers {#client-libraries-from-third-party-developers}
!!! warning "Disclaimer"
Yandex does **not** maintain the libraries listed below and hasnt done any extensive testing to ensure their quality.
ClickHouse Inc does **not** maintain the libraries listed below and hasnt done any extensive testing to ensure their quality.
- Python
- [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm)

View File

@ -220,4 +220,24 @@ SeekTable is [free](https://www.seektable.com/help/cloud-pricing) for personal/i
[Chadmin](https://github.com/bun4uk/chadmin) is a simple UI where you can visualize your currently running queries on your ClickHouse cluster and info about them and kill them if you want.
### TABLUM.IO {#tablum_io}
[TABLUM.IO](https://tablum.io/) — an online query and analytics tool for ETL and visualization. It allows connecting to ClickHouse, query data via a versatile SQL console as well as to load data from static files and 3rd party services. TABLUM.IO can visualize data results as charts and tables.
Features:
- ETL: data loading from popular databases, local and remote files, API invocations.
- Versatile SQL console with syntax highlight and visual query builder.
- Data visualization as charts and tables.
- Data materialization and sub-queries.
- Data reporting to Slack, Telegram or email.
- Data pipelining via proprietary API.
- Data export in JSON, CSV, SQL, HTML formats.
- Web-based interface.
TABLUM.IO can be run as a self-hosted solution (as a docker image) or in the cloud.
License: [commercial](https://tablum.io/pricing) product with 3-month free period.
Try it out for free [in the cloud](https://tablum.io/try).
Learn more about the product at [TABLUM.IO](https://tablum.io/)
[Original article](https://clickhouse.com/docs/en/interfaces/third-party/gui/) <!--hide-->

View File

@ -87,7 +87,7 @@ toc_title: Adopters
| <a href="https://kontur.ru" class="favicon">Kontur</a> | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) |
| <a href="https://www.kuaishou.com/" class="favicon">Kuaishou</a> | Video | — | — | — | [ClickHouse Meetup, October 2018](https://clickhouse.com/blog/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/) |
| <a href="https://www.kgk-global.com/en/" class="favicon">KGK Global</a> | Vehicle monitoring | — | — | — | [Press release, June 2021](https://zoom.cnews.ru/news/item/530921) |
| <a href="https://www.lbl.gov" class="favicon">Lawrence Berkeley National Laboratory</a> | Research | Traffic analysis | 1 server | 11.8 TiB | [Slides in English, April 2019](https://www.smitasin.com/presentations/2019-04-17_DOE-NSM.pdf) |
| <a href="https://www.lbl.gov" class="favicon">Lawrence Berkeley National Laboratory</a> | Research | Traffic analysis | 5 servers | 55 TiB | [Slides in English, April 2019](https://www.smitasin.com/presentations/2019-04-17_DOE-NSM.pdf) |
| <a href="https://lifestreet.com/" class="favicon">LifeStreet</a> | Ad network | Main product | 75 servers (3 replicas) | 5.27 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) |
| <a href="https://mcs.mail.ru/" class="favicon">Mail.ru Cloud Solutions</a> | Cloud services | Main product | — | — | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) |
| <a href="https://maxilect.com/" class="favicon">MAXILECT</a> | Ad Tech, Blockchain, ML, AI | — | — | — | [Job advertisement, 2021](https://www.linkedin.com/feed/update/urn:li:activity:6780842017229430784/) |
@ -178,5 +178,9 @@ toc_title: Adopters
| <a href="https://promo.croc.ru/digitalworker" class="favicon">Цифровой Рабочий</a> | Industrial IoT, Analytics | — | — | — | [Blog post in Russian, March 2021](https://habr.com/en/company/croc/blog/548018/) |
| <a href="https://shop.okraina.ru/" class="favicon">ООО «МПЗ Богородский»</a> | Agriculture | — | — | — | [Article in Russian, November 2020](https://cloud.yandex.ru/cases/okraina) |
| <a href="https://domclick.ru/" class="favicon">ДомКлик</a> | Real Estate | — | — | — | [Article in Russian, October 2021](https://habr.com/ru/company/domclick/blog/585936/) |
| <a href="https://futurragroup.com/" class="favicon">Futurra Group</a> | Analytics | — | — | — | [Article in Russian, December 2021](https://dou.ua/forums/topic/35587/) |
| <a href="https://usetech.com/" class="favicon">UseTech</a> | Software Development | — | — | — | [Job Posting, December 2021](https://vk.com/wall136266658_2418) |
| <a href="https://lookforsale.ru/" class="favicon">Lookforsale</a> | E-Commerce | — | — | — | [Job Posting, December 2021](https://telegram.me/javascript_jobs/587318) |
| <a href="https://rvision.pro/en/" class="favicon">R-Vision</a> | Information Security | — | — | — | [Article in Russian, December 2021](https://www.anti-malware.ru/reviews/R-Vision-SENSE-15) |
[Original article](https://clickhouse.com/docs/en/introduction/adopters/) <!--hide-->

View File

@ -16,6 +16,11 @@ ZooKeeper is one of the first well-known open-source coordination systems. It's
By default, ClickHouse Keeper provides the same guarantees as ZooKeeper (linearizable writes, non-linearizable reads). It has a compatible client-server protocol, so any standard ZooKeeper client can be used to interact with ClickHouse Keeper. Snapshots and logs have an incompatible format with ZooKeeper, but `clickhouse-keeper-converter` tool allows to convert ZooKeeper data to ClickHouse Keeper snapshot. Interserver protocol in ClickHouse Keeper is also incompatible with ZooKeeper so mixed ZooKeeper / ClickHouse Keeper cluster is impossible.
ClickHouse Keeper supports Access Control List (ACL) the same way as [ZooKeeper](https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) does. ClickHouse Keeper supports the same set of permissions and has the identical built-in schemes: `world`, `auth`, `digest`, `host` and `ip`. Digest authentication scheme uses pair `username:password`. Password is encoded in Base64.
!!! info "Note"
External integrations are not supported.
## Configuration
ClickHouse Keeper can be used as a standalone replacement for ZooKeeper or as an internal part of the ClickHouse server, but in both cases configuration is almost the same `.xml` file. The main ClickHouse Keeper configuration tag is `<keeper_server>`. Keeper configuration has the following parameters:
@ -118,13 +123,13 @@ echo mntr | nc localhost 9181
Bellow is the detailed 4lw commands:
- ruok : Tests if server is running in a non-error state. The server will respond with imok if it is running. Otherwise it will not respond at all. A response of "imok" does not necessarily indicate that the server has joined the quorum, just that the server process is active and bound to the specified client port. Use "stat" for details on state wrt quorum and client connection information.
- `ruok`: Tests if server is running in a non-error state. The server will respond with imok if it is running. Otherwise it will not respond at all. A response of "imok" does not necessarily indicate that the server has joined the quorum, just that the server process is active and bound to the specified client port. Use "stat" for details on state wrt quorum and client connection information.
```
imok
```
- mntr : Outputs a list of variables that could be used for monitoring the health of the cluster.
- `mntr`: Outputs a list of variables that could be used for monitoring the health of the cluster.
```
zk_version v21.11.1.1-prestable-7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7
@ -146,12 +151,11 @@ zk_followers 0
zk_synced_followers 0
```
- srvr : Lists full details for the server.
- `srvr`: Lists full details for the server.
```
ClickHouse Keeper version: v21.11.1.1-prestable-7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7
Latency min/avg/max: 0/0/0
Received: 2
Sent : 2
Connections: 1
@ -161,16 +165,14 @@ Mode: leader
Node count: 4
```
- stat : Lists brief details for the server and connected clients.
- `stat`: Lists brief details for the server and connected clients.
```
ClickHouse Keeper version: v21.11.1.1-prestable-7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7
Clients:
192.168.1.1:52852(recved=0,sent=0)
192.168.1.1:52042(recved=24,sent=48)
Latency min/avg/max: 0/0/0
Received: 4
Sent : 4
Connections: 1
@ -178,16 +180,15 @@ Outstanding: 0
Zxid: 36
Mode: leader
Node count: 4
```
- srst : Reset server statistics. The command will affect the result of `srvr`, `mntr` and `stat`.
- `srst`: Reset server statistics. The command will affect the result of `srvr`, `mntr` and `stat`.
```
Server stats reset.
```
- conf : Print details about serving configuration.
- `conf`: Print details about serving configuration.
```
server_id=1
@ -220,20 +221,20 @@ compress_snapshots_with_zstd_format=true
configuration_change_tries_count=20
```
- cons : List full connection/session details for all clients connected to this server. Includes information on numbers of packets received/sent, session id, operation latencies, last operation performed, etc...
- `cons`: List full connection/session details for all clients connected to this server. Includes information on numbers of packets received/sent, session id, operation latencies, last operation performed, etc...
```
192.168.1.1:52163(recved=0,sent=0,sid=0xffffffffffffffff,lop=NA,est=1636454787393,to=30000,lzxid=0xffffffffffffffff,lresp=0,llat=0,minlat=0,avglat=0,maxlat=0)
192.168.1.1:52042(recved=9,sent=18,sid=0x0000000000000001,lop=List,est=1636454739887,to=30000,lcxid=0x0000000000000005,lzxid=0x0000000000000005,lresp=1636454739892,llat=0,minlat=0,avglat=0,maxlat=0)
```
- crst : Reset connection/session statistics for all connections.
- `crst`: Reset connection/session statistics for all connections.
```
Connection stats reset.
```
- envi : Print details about serving environment
- `envi`: Print details about serving environment
```
Environment:
@ -250,41 +251,41 @@ user.tmp=/var/folders/b4/smbq5mfj7578f2jzwn602tt40000gn/T/
```
- dirs : Shows the total size of snapshot and log files in bytes
- `dirs`: Shows the total size of snapshot and log files in bytes
```
snapshot_dir_size: 0
log_dir_size: 3875
```
- isro: Tests if server is running in read-only mode. The server will respond with "ro" if in read-only mode or "rw" if not in read-only mode.
- `isro`: Tests if server is running in read-only mode. The server will respond with "ro" if in read-only mode or "rw" if not in read-only mode.
```
rw
```
- wchs : Lists brief information on watches for the server.
- `wchs`: Lists brief information on watches for the server.
```
1 connections watching 1 paths
Total watches:1
```
- wchc : Lists detailed information on watches for the server, by session. This outputs a list of sessions(connections) with associated watches (paths). Note, depending on the number of watches this operation may be expensive (ie impact server performance), use it carefully.
- `wchc`: Lists detailed information on watches for the server, by session. This outputs a list of sessions (connections) with associated watches (paths). Note, depending on the number of watches this operation may be expensive (ie impact server performance), use it carefully.
```
0x0000000000000001
/clickhouse/task_queue/ddl
```
- wchp : Lists detailed information on watches for the server, by path. This outputs a list of paths (znodes) with associated sessions. Note, depending on the number of watches this operation may be expensive (ie impact server performance), use it carefully.
- `wchp`: Lists detailed information on watches for the server, by path. This outputs a list of paths (znodes) with associated sessions. Note, depending on the number of watches this operation may be expensive (i. e. impact server performance), use it carefully.
```
/clickhouse/task_queue/ddl
0x0000000000000001
```
- dump : Lists the outstanding sessions and ephemeral nodes. This only works on the leader.
- `dump`: Lists the outstanding sessions and ephemeral nodes. This only works on the leader.
```
Sessions dump (2):

View File

@ -14,11 +14,11 @@ To enable Kerberos, one should include `kerberos` section in `config.xml`. This
#### Parameters:
- `principal` - canonical service principal name that will be acquired and used when accepting security contexts.
- This parameter is optional, if omitted, the default principal will be used.
- This parameter is optional, if omitted, the default principal will be used.
- `realm` - a realm, that will be used to restrict authentication to only those requests whose initiator's realm matches it.
- This parameter is optional, if omitted, no additional filtering by realm will be applied.
- This parameter is optional, if omitted, no additional filtering by realm will be applied.
Example (goes into `config.xml`):
@ -75,7 +75,7 @@ In order to enable Kerberos authentication for the user, specify `kerberos` sect
Parameters:
- `realm` - a realm that will be used to restrict authentication to only those requests whose initiator's realm matches it.
- This parameter is optional, if omitted, no additional filtering by realm will be applied.
- This parameter is optional, if omitted, no additional filtering by realm will be applied.
Example (goes into `users.xml`):

View File

@ -435,26 +435,58 @@ Similar to `interserver_http_host`, except that this hostname can be used by oth
## interserver_http_credentials {#server-settings-interserver-http-credentials}
The username and password used to authenticate during [replication](../../engines/table-engines/mergetree-family/replication.md) with the Replicated\* engines. These credentials are used only for communication between replicas and are unrelated to credentials for ClickHouse clients. The server is checking these credentials for connecting replicas and use the same credentials when connecting to other replicas. So, these credentials should be set the same for all replicas in a cluster.
By default, the authentication is not used.
A username and a password used to connect to other servers during [replication](../../engines/table-engines/mergetree-family/replication.md). Also the server authenticates other replicas using these credentials. So, `interserver_http_credentials` must be the same for all replicas in a cluster.
By default, if `interserver_http_credentials` section is omitted, authentication is not used during replication.
!!! note "Note"
These credentials are common for replication through `HTTP` and `HTTPS`.
`interserver_http_credentials` settings do not relate to a ClickHouse client credentials [configuration](../../interfaces/cli.md#configuration_files).
This section contains the following parameters:
!!! note "Note"
These credentials are common for replication via `HTTP` and `HTTPS`.
- `user` — username.
- `password` — password.
The section contains the following parameters:
**Example**
- `user` — Username.
- `password` — Password.
- `allow_empty` — If `true`, then other replicas are allowed to connect without authentication even if credentials are set. If `false`, then connections without authentication are refused. Default value: `false`.
- `old` — Contains old `user` and `password` used during credential rotation. Several `old` sections can be specified.
**Credentials Rotation**
ClickHouse supports dynamic interserver credentials rotation without stopping all replicas at the same time to update their configuration. Credentials can be changed in several steps.
To enable authentication, set `interserver_http_credentials.allow_empty` to `true` and add credentials. This allows connections with authentication and without it.
``` xml
<interserver_http_credentials>
<user>admin</user>
<password>111</password>
<allow_empty>true</allow_empty>
</interserver_http_credentials>
```
After configuring all replicas set `allow_empty` to `false` or remove this setting. It makes authentication with new credentials mandatory.
To change existing credentials, move the username and the password to `interserver_http_credentials.old` section and update `user` and `password` with new values. At this point the server uses new credentials to connect to other replicas and accepts connections with either new or old credentials.
``` xml
<interserver_http_credentials>
<user>admin</user>
<password>222</password>
<old>
<user>admin</user>
<password>111</password>
</old>
<old>
<user>temp</user>
<password>000</password>
</old>
</interserver_http_credentials>
```
When new credentials are applied to all replicas, old credentials may be removed.
## keep_alive_timeout {#keep-alive-timeout}
The number of seconds that ClickHouse waits for incoming requests before closing the connection. Defaults to 10 seconds.

View File

@ -817,9 +817,19 @@ If the number of rows to be read from a file of a [MergeTree](../../engines/tabl
Possible values:
- Any positive integer.
- Positive integer.
Default value: 163840.
Default value: `163840`.
## merge_tree_min_rows_for_concurrent_read_for_remote_filesystem {#merge-tree-min-rows-for-concurrent-read-for-remote-filesystem}
The minimum number of lines to read from one file before [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) engine can parallelize reading, when reading from remote filesystem.
Possible values:
- Positive integer.
Default value: `163840`.
## merge_tree_min_bytes_for_concurrent_read {#setting-merge-tree-min-bytes-for-concurrent-read}
@ -827,9 +837,19 @@ If the number of bytes to read from one file of a [MergeTree](../../engines/tabl
Possible value:
- Any positive integer.
- Positive integer.
Default value: 251658240.
Default value: `251658240`.
## merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem {#merge-tree-min-bytes-for-concurrent-read-for-remote-filesystem}
The minimum number of bytes to read from one file before [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) engine can parallelize reading, when reading from remote filesystem.
Possible values:
- Positive integer.
Default value: `251658240`.
## merge_tree_min_rows_for_seek {#setting-merge-tree-min-rows-for-seek}
@ -1469,7 +1489,7 @@ Possible values:
Default value: `1`.
**See Also**
**See Also**
- [min_count_to_compile_aggregate_expression](#min_count_to_compile_aggregate_expression)
@ -2095,7 +2115,7 @@ Possible values:
- 0 — Optimization disabled.
- 1 — Optimization enabled.
Default value: `1`.
See also:
@ -3134,6 +3154,12 @@ Possible values:
Default value: `0`.
!!! warning "Warning"
Nullable primary key usually indicates bad design. It is forbidden in almost all main stream DBMS. The feature is mainly for [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) and is not heavily tested. Use with care.
!!! warning "Warning"
Do not enable this feature in version `<= 21.8`. It's not properly implemented and may lead to server crash.
## aggregate_functions_null_for_empty {#aggregate_functions_null_for_empty}
Enables or disables rewriting all aggregate functions in a query, adding [-OrNull](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-ornull) suffix to them. Enable it for SQL standard compatibility.
@ -3682,49 +3708,6 @@ Possible values:
Default value: `0`.
## materialized_postgresql_max_block_size {#materialized-postgresql-max-block-size}
Sets the number of rows collected in memory before flushing data into PostgreSQL database table.
Possible values:
- Positive integer.
Default value: `65536`.
## materialized_postgresql_tables_list {#materialized-postgresql-tables-list}
Sets a comma-separated list of PostgreSQL database tables, which will be replicated via [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) database engine.
Default value: empty list — means whole PostgreSQL database will be replicated.
## materialized_postgresql_schema {#materialized-postgresql-schema}
Default value: empty string. (Default schema is used)
## materialized_postgresql_schema_list {#materialized-postgresql-schema-list}
Default value: empty list. (Default schema is used)
## materialized_postgresql_allow_automatic_update {#materialized-postgresql-allow-automatic-update}
Allows reloading table in the background, when schema changes are detected. DDL queries on the PostgreSQL side are not replicated via ClickHouse [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) engine, because it is not allowed with PostgreSQL logical replication protocol, but the fact of DDL changes is detected transactionally. In this case, the default behaviour is to stop replicating those tables once DDL is detected. However, if this setting is enabled, then, instead of stopping the replication of those tables, they will be reloaded in the background via database snapshot without data losses and replication will continue for them.
Possible values:
- 0 — The table is not automatically updated in the background, when schema changes are detected.
- 1 — The table is automatically updated in the background, when schema changes are detected.
Default value: `0`.
## materialized_postgresql_replication_slot {#materialized-postgresql-replication-slot}
A user-created replication slot. Must be used together with [materialized_postgresql_snapshot](#materialized-postgresql-snapshot).
## materialized_postgresql_snapshot {#materialized-postgresql-snapshot}
A text string identifying a snapshot, from which [initial dump of PostgreSQL tables](../../engines/database-engines/materialized-postgresql.md) will be performed. Must be used together with [materialized_postgresql_replication_slot](#materialized-postgresql-replication-slot).
## allow_experimental_projection_optimization {#allow-experimental-projection-optimization}
Enables or disables [projection](../../engines/table-engines/mergetree-family/mergetree.md#projections) optimization when processing `SELECT` queries.
@ -3993,8 +3976,8 @@ If [wait_for_async_insert](#wait-for-async-insert) is enabled, every client will
Possible values:
- 0 — Insertions are made synchronously, one after another.
- 1 — Multiple asynchronous insertions enabled.
- 0 — Insertions are made synchronously, one after another.
- 1 — Multiple asynchronous insertions enabled.
Default value: `0`.
@ -4066,7 +4049,7 @@ Default value: `0`.
## alter_partition_verbose_result {#alter-partition-verbose-result}
Enables or disables the display of information about the parts to which the manipulation operations with partitions and parts have been successfully applied.
Enables or disables the display of information about the parts to which the manipulation operations with partitions and parts have been successfully applied.
Applicable to [ATTACH PARTITION|PART](../../sql-reference/statements/alter/partition.md#alter_attach-partition) and to [FREEZE PARTITION](../../sql-reference/statements/alter/partition.md#alter_freeze-partition).
Possible values:

View File

@ -6,7 +6,7 @@ You can use this table to get information similar to the [DESCRIBE TABLE](../../
Columns from [temporary tables](../../sql-reference/statements/create/table.md#temporary-tables) are visible in the `system.columns` only in those session where they have been created. They are shown with the empty `database` field.
Columns:
The `system.columns` table contains the following columns (the column type is shown in brackets):
- `database` ([String](../../sql-reference/data-types/string.md)) — Database name.
- `table` ([String](../../sql-reference/data-types/string.md)) — Table name.
@ -86,21 +86,4 @@ numeric_scale: ᴺᵁᴸᴸ
datetime_precision: ᴺᵁᴸᴸ
```
The `system.columns` table contains the following columns (the column type is shown in brackets):
- `database` (String) — Database name.
- `table` (String) — Table name.
- `name` (String) — Column name.
- `type` (String) — Column type.
- `default_kind` (String) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`) for the default value, or an empty string if it is not defined.
- `default_expression` (String) — Expression for the default value, or an empty string if it is not defined.
- `data_compressed_bytes` (UInt64) — The size of compressed data, in bytes.
- `data_uncompressed_bytes` (UInt64) — The size of decompressed data, in bytes.
- `marks_bytes` (UInt64) — The size of marks, in bytes.
- `comment` (String) — Comment on the column, or an empty string if it is not defined.
- `is_in_partition_key` (UInt8) — Flag that indicates whether the column is in the partition expression.
- `is_in_sorting_key` (UInt8) — Flag that indicates whether the column is in the sorting key expression.
- `is_in_primary_key` (UInt8) — Flag that indicates whether the column is in the primary key expression.
- `is_in_sampling_key` (UInt8) — Flag that indicates whether the column is in the sampling key expression.
[Original article](https://clickhouse.com/docs/en/operations/system-tables/columns) <!--hide-->

View File

@ -35,7 +35,7 @@ SELECT * FROM system.metrics LIMIT 10
- [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics.
- [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that occurred.
- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`.
- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` and `system.events`.
- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring.
[Original article](https://clickhouse.com/docs/en/operations/system-tables/metrics) <!--hide-->

View File

@ -24,9 +24,7 @@ Function:
- Calculates a hash for all parameters in the aggregate, then uses it in calculations.
- Uses an adaptive sampling algorithm. For the calculation state, the function uses a sample of element hash values up to 65536.
This algorithm is very accurate and very efficient on the CPU. When the query contains several of these functions, using `uniq` is almost as fast as using other aggregate functions.
- Uses an adaptive sampling algorithm. For the calculation state, the function uses a sample of element hash values up to 65536. This algorithm is very accurate and very efficient on the CPU. When the query contains several of these functions, using `uniq` is almost as fast as using other aggregate functions.
- Provides the result deterministically (it does not depend on the query processing order).

View File

@ -11,9 +11,7 @@ Aggregate functions can have an implementation-defined intermediate state that c
**Parameters**
- Name of the aggregate function.
If the function is parametric, specify its parameters too.
- Name of the aggregate function. If the function is parametric, specify its parameters too.
- Types of the aggregate function arguments.

View File

@ -41,7 +41,7 @@ Example of a polygon dictionary configuration:
</dictionary>
```
Tne corresponding [DDL-query](../../../sql-reference/statements/create/dictionary.md#create-dictionary-query):
The corresponding [DDL-query](../../../sql-reference/statements/create/dictionary.md#create-dictionary-query):
``` sql
CREATE DICTIONARY polygon_dict_name (
key Array(Array(Array(Array(Float64)))),

View File

@ -122,7 +122,12 @@ Setting fields:
- `command` — The absolute path to the executable file, or the file name (if the program directory is written to `PATH`).
- `format` — The file format. All the formats described in [Formats](../../../interfaces/formats.md#formats) are supported.
- `command_termination_timeout` — executable script should contain main read-write loop. After dictionary is destroyed, pipe is closed, and executable file will have `command_termination_timeout` seconds to shutdown, before ClickHouse will send SIGTERM signal to child process. Specified in seconds. Default value is 10. Optional parameter.
- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter.
- `command_read_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter.
- `implicit_key` — The executable source file can return only values, and the correspondence to the requested keys is determined implicitly — by the order of rows in the result. Default value is false.
- `execute_direct` - Executable source file will be searched inside `user_scripts` folder and executed directly. Arguments are passed using spaces. Example: `test_script arg_1 arg_2`. Default value is false. Optional parameter.
- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`.
That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node.
@ -150,10 +155,14 @@ Setting fields:
- `command` — The absolute path to the executable file, or the file name (if the program directory is written to `PATH`).
- `format` — The file format. All the formats described in “[Formats](../../../interfaces/formats.md#formats)” are supported.
- `pool_size` — Size of pool. If 0 is specified as `pool_size` then there is no pool size restrictions.
- `command_termination_timeout`Executable pool script should contain main read-write loop. After dictionary is destroyed, pipe is closed, and executable file will have `command_termination_timeout` seconds to shutdown, before ClickHouse will send SIGTERM signal to child process. Specified in seconds. Default value is 10. Optional parameter.
- `pool_size` — Size of pool. If 0 is specified as `pool_size` then there is no pool size restrictions. Default value is `16`.
- `command_termination_timeout`executable script should contain main read-write loop. After dictionary is destroyed, pipe is closed, and executable file will have `command_termination_timeout` seconds to shutdown, before ClickHouse will send SIGTERM signal to child process. Specified in seconds. Default value is 10. Optional parameter.
- `max_command_execution_time` — Maximum executable script command execution time for processing block of data. Specified in seconds. Default value is 10. Optional parameter.
- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter.
- `command_read_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter.
- `implicit_key` — The executable source file can return only values, and the correspondence to the requested keys is determined implicitly — by the order of rows in the result. Default value is false. Optional parameter.
- `execute_direct` - Executable source file will be searched inside `user_scripts` folder and executed directly. Additional arguments can be specified. Example: `test_script arg_1 arg_2`. Default value is false. Optional parameter.
- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`.
That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node.

View File

@ -73,16 +73,19 @@ User defined function configurations are searched relative to the path specified
A function configuration contains the following settings:
- `name` - a function name.
- `command` - a command or a script to execute.
- `command` - script name to execute or command if `execute_direct` is false.
- `argument` - argument description with the `type` of an argument. Each argument is described in a separate setting.
- `format` - a [format](../../interfaces/formats.md) in which arguments are passed to the command.
- `return_type` - the type of a returned value.
- `type` - an executable type. If `type` is set to `executable` then single command is started. If it is set to `executable_pool` then a pool of commands is created.
- `max_command_execution_time` - maximum execution time in seconds for processing block of data. This setting is valid for `executable_pool` commands only. Optional. Default value is `10`.
- `command_termination_timeout` - time in seconds during which a command should finish after its pipe is closed. After that time `SIGTERM` is sent to the process executing the command. This setting is valid for `executable_pool` commands only. Optional. Default value is `10`.
- `command_termination_timeout` - time in seconds during which a command should finish after its pipe is closed. After that time `SIGTERM` is sent to the process executing the command. Optional. Default value is `10`.
- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter.
- `command_read_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter.
- `pool_size` - the size of a command pool. Optional. Default value is `16`.
- `lifetime` - the reload interval of a function in seconds. If it is set to `0` then the function is not reloaded.
- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`.
- `execute_direct` - Executable source file will be searched inside `user_scripts` folder and executed directly. Additional arguments can be specified. Example: `test_script arg_1 arg_2`. Default value is true. Optional parameter.
- `lifetime` - the reload interval of a function in seconds. If it is set to `0` then the function is not reloaded. Default value is `0`. Optional parameter.
The command must read arguments from `STDIN` and must output the result to `STDOUT`. The command must process arguments iteratively. That is after processing a chunk of arguments it must wait for the next chunk.
@ -102,7 +105,6 @@ Creating `test_function` using XML configuration:
</argument>
<format>TabSeparated</format>
<command>cd /; clickhouse-local --input-format TabSeparated --output-format TabSeparated --structure 'x UInt64, y UInt64' --query "SELECT x + y FROM table"</command>
<lifetime>0</lifetime>
</function>
</functions>
```

View File

@ -31,7 +31,7 @@ CREATE ROLE accountant;
GRANT SELECT ON db.* TO accountant;
```
This sequence of queries creates the role `accountant` that has the privilege of reading data from the `accounting` database.
This sequence of queries creates the role `accountant` that has the privilege of reading data from the `db` database.
Assigning the role to the user `mira`:

View File

@ -21,7 +21,7 @@ GRANT [ON CLUSTER cluster_name] privilege[(column_name [,...])] [,...] ON {db.ta
- `user` — ClickHouse user account.
The `WITH GRANT OPTION` clause grants `user` or `role` with permission to execute the `GRANT` query. Users can grant privileges of the same scope they have and less.
The `WITH REPLACE OPTION` clause replace old privileges by new privileges for the `user` or `role`, if not specified it is append privileges.
The `WITH REPLACE OPTION` clause replace old privileges by new privileges for the `user` or `role`, if is not specified it appends privileges.
## Assigning Role Syntax {#assign-role-syntax}
@ -33,7 +33,7 @@ GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_US
- `user` — ClickHouse user account.
The `WITH ADMIN OPTION` clause grants [ADMIN OPTION](#admin-option-privilege) privilege to `user` or `role`.
The `WITH REPLACE OPTION` clause replace old roles by new role for the `user` or `role`, if not specified it is append roles.
The `WITH REPLACE OPTION` clause replace old roles by new role for the `user` or `role`, if is not specified it appends roles.
## Usage {#grant-usage}

View File

@ -55,13 +55,13 @@ The behavior of ClickHouse server for `ANY JOIN` operations depends on the [any_
- [join_on_disk_max_files_to_merge](../../../operations/settings/settings.md#join_on_disk_max_files_to_merge)
- [any_join_distinct_right_table_keys](../../../operations/settings/settings.md#any_join_distinct_right_table_keys)
## ON Section Conditions {on-section-conditions}
## ON Section Conditions {#on-section-conditions}
An `ON` section can contain several conditions combined using the `AND` and `OR` operators. Conditions specifying join keys must refer both left and right tables and must use the equality operator. Other conditions may use other logical operators but they must refer either the left or the right table of a query.
An `ON` section can contain several conditions combined using the `AND` operator. Conditions specifying join keys must refer both left and right tables and must use the equality operator. Other conditions may use other logical operators but they must refer either the left or the right table of a query.
Rows are joined if the whole complex condition is met. If the conditions are not met, still rows may be included in the result depending on the `JOIN` type. Note that if the same conditions are placed in a `WHERE` section and they are not met, then rows are always filtered out from the result.
!!! note "Note"
The `OR` operator inside an `ON` section is not supported yet.
The `OR` operator inside the `ON` clause works using the hash join algorithm — for each `OR` argument with join keys for `JOIN`, a separate hash table is created, so memory consumption and query execution time grow linearly with an increase in the number of expressions `OR` of the `ON` clause.
!!! note "Note"
If a condition refers columns from different tables, then only the equality operator (`=`) is supported so far.
@ -109,7 +109,47 @@ Result:
│ B │ Text B │ 15 │
└──────┴────────┴────────┘
```
Query with `INNER` type of a join and condition with `OR`:
``` sql
CREATE TABLE t1 (`a` Int64, `b` Int64) ENGINE = MergeTree() ORDER BY a;
CREATE TABLE t2 (`key` Int32, `val` Int64) ENGINE = MergeTree() ORDER BY key;
INSERT INTO t1 SELECT number as a, -a as b from numbers(5);
INSERT INTO t2 SELECT if(number % 2 == 0, toInt64(number), -number) as key, number as val from numbers(5);
SELECT a, b, val FROM t1 INNER JOIN t2 ON t1.a = t2.key OR t1.b = t2.key;
```
Result:
```
┌─a─┬──b─┬─val─┐
│ 0 │ 0 │ 0 │
│ 1 │ -1 │ 1 │
│ 2 │ -2 │ 2 │
│ 3 │ -3 │ 3 │
│ 4 │ -4 │ 4 │
└───┴────┴─────┘
```
Query with `INNER` type of a join and conditions with `OR` and `AND`:
``` sql
SELECT a, b, val FROM t1 INNER JOIN t2 ON t1.a = t2.key OR t1.b = t2.key AND t2.val > 3;
```
Result:
```
┌─a─┬──b─┬─val─┐
│ 0 │ 0 │ 0 │
│ 2 │ -2 │ 2 │
│ 4 │ -4 │ 4 │
└───┴────┴─────┘
```
## ASOF JOIN Usage {#asof-join-usage}
`ASOF JOIN` is useful when you need to join records that have no exact match.

View File

@ -0,0 +1,58 @@
---
toc_priority: 55
toc_title: hdfsCluster
---
# hdfsCluster Table Function {#hdfsCluster-table-function}
Allows processing files from HDFS in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterics in HDFS file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
**Syntax**
``` sql
hdfsCluster(cluster_name, URI, format, structure)
```
**Arguments**
- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers.
- `URI` — URI to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path).
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
**Returned value**
A table with the specified structure for reading data in the specified file.
**Examples**
1. Suppose that we have a ClickHouse cluster named `cluster_simple`, and several files with following URIs on HDFS:
- hdfs://hdfs1:9000/some_dir/some_file_1
- hdfs://hdfs1:9000/some_dir/some_file_2
- hdfs://hdfs1:9000/some_dir/some_file_3
- hdfs://hdfs1:9000/another_dir/some_file_1
- hdfs://hdfs1:9000/another_dir/some_file_2
- hdfs://hdfs1:9000/another_dir/some_file_3
2. Query the amount of rows in these files:
``` sql
SELECT count(*)
FROM hdfsCluster('cluster_simple', 'hdfs://hdfs1:9000/{some,another}_dir/some_file_{1..3}', 'TSV', 'name String, value UInt32')
```
3. Query the amount of rows in all files of these two directories:
``` sql
SELECT count(*)
FROM hdfsCluster('cluster_simple', 'hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV', 'name String, value UInt32')
```
!!! warning "Warning"
If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
**See Also**
- [HDFS engine](../../engines/table-engines/integrations/hdfs.md)
- [HDFS table function](../../sql-reference/table-functions/hdfs.md)

View File

@ -0,0 +1 @@
../../../en/faq/general/how-do-i-contribute-code-to-clickhouse.md

View File

@ -0,0 +1 @@
../../../en/faq/operations/multi-region-replication.md

View File

@ -15,13 +15,7 @@ toc_title: OnTime
データのダウンロード:
``` bash
for s in `seq 1987 2018`
do
for m in `seq 1 12`
do
wget https://transtats.bts.gov/PREZIP/On_Time_Reporting_Carrier_On_Time_Performance_1987_present_${s}_${m}.zip
done
done
wget --no-check-certificate --continue https://transtats.bts.gov/PREZIP/On_Time_Reporting_Carrier_On_Time_Performance_1987_present_{1987..2021}_{1..12}.zip
```
https://github.com/Percona-Lab/ontime-airline-performance/blob/master/download.sh より)
@ -40,7 +34,7 @@ CREATE TABLE `ontime`
`Reporting_Airline` String,
`DOT_ID_Reporting_Airline` Int32,
`IATA_CODE_Reporting_Airline` String,
`Tail_Number` Int32,
`Tail_Number` String,
`Flight_Number_Reporting_Airline` String,
`OriginAirportID` Int32,
`OriginAirportSeqID` Int32,

View File

@ -397,7 +397,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
`<query>` 値は以下の定義済みクエリです `<predefined_query_handler>` これは、Http要求が一致し、クエリの結果が返されたときにClickHouseによって実行されます。 これは必須構成です。
次の例では、次の値を定義します `max_threads``max_alter_threads` 設定、そしてクエリのテーブルから設定設定します。
次の例では、次の値を定義します `max_threads``max_final_threads` 設定、そしてクエリのテーブルから設定設定します。
例:
@ -420,9 +420,9 @@ $ curl -v 'http://localhost:8123/predefined_query'
```
``` bash
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_alter_threads?max_threads=1&max_alter_threads=2'
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_final_threads?max_threads=1&max_final_threads=2'
1
max_alter_threads 2
max_final_threads 2
```
!!! note "注意"
@ -434,7 +434,7 @@ max_alter_threads 2
クリックハウスは、 `<query_param_name>` HTTP要求のurlの値。 のデフォルト値 `<query_param_name>``/query` . これはオプションの構成です。 設定ファイルに定義がない場合、paramは渡されません。
この機能を試すために、この例ではmax_threadsとmax_alter_threadsの値を定義し、設定が正常に設定されたかどうかを照会します。
この機能を試すために、この例ではmax_threadsとmax_final_threadsの値を定義し、設定が正常に設定されたかどうかを照会します。
例:
@ -452,9 +452,9 @@ max_alter_threads 2
```
``` bash
$ curl -H 'XXX:TEST_HEADER_VALUE_DYNAMIC' 'http://localhost:8123/own?max_threads=1&max_alter_threads=2&param_name_1=max_threads&param_name_2=max_alter_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D'
$ curl -H 'XXX:TEST_HEADER_VALUE_DYNAMIC' 'http://localhost:8123/own?max_threads=1&max_final_threads=2&param_name_1=max_threads&param_name_2=max_final_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D'
max_threads 1
max_alter_threads 2
max_final_threads 2
```
## 静的 {#static}

View File

@ -5,7 +5,7 @@ toc_title: HDFS
# HDFS {#table_engines-hdfs}
Управляет данными в HDFS. Данный движок похож на движки [File](../special/file.md#table_engines-file) и [URL](../special/url.md#table_engines-url).
Этот движок обеспечивает интеграцию с экосистемой [Apache Hadoop](https://ru.wikipedia.org/wiki/Hadoop), позволяя управлять данными в HDFS посредством ClickHouse. Данный движок похож на движки [File](../special/file.md#table_engines-file) и [URL](../special/url.md#table_engines-url), но предоставляет возможности, характерные для Hadoop.
## Использование движка {#usage}
@ -13,9 +13,11 @@ toc_title: HDFS
ENGINE = HDFS(URI, format)
```
В параметр `URI` нужно передавать полный URI файла в HDFS.
**Параметры движка**
В параметр `URI` нужно передавать полный URI файла в HDFS. Часть URI с путем файла может содержать шаблоны. В этом случае таблица может использоваться только для чтения.
Параметр `format` должен быть таким, который ClickHouse может использовать и в запросах `INSERT`, и в запросах `SELECT`. Полный список поддерживаемых форматов смотрите в разделе [Форматы](../../../interfaces/formats.md#formats).
Часть URI с путем файла может содержать шаблоны. В этом случае таблица может использоваться только для чтения.
**Пример:**
@ -67,12 +69,12 @@ SELECT * FROM hdfs_engine_table LIMIT 2
1. Предположим, у нас есть несколько файлов со следующими URI в HDFS:
- 'hdfs://hdfs1:9000/some_dir/some_file_1'
- 'hdfs://hdfs1:9000/some_dir/some_file_2'
- 'hdfs://hdfs1:9000/some_dir/some_file_3'
- 'hdfs://hdfs1:9000/another_dir/some_file_1'
- 'hdfs://hdfs1:9000/another_dir/some_file_2'
- 'hdfs://hdfs1:9000/another_dir/some_file_3'
- 'hdfs://hdfs1:9000/some_dir/some_file_1'
- 'hdfs://hdfs1:9000/some_dir/some_file_2'
- 'hdfs://hdfs1:9000/some_dir/some_file_3'
- 'hdfs://hdfs1:9000/another_dir/some_file_1'
- 'hdfs://hdfs1:9000/another_dir/some_file_2'
- 'hdfs://hdfs1:9000/another_dir/some_file_3'
1. Есть несколько возможностей создать таблицу, состояющую из этих шести файлов:
@ -128,6 +130,7 @@ CREATE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9
| **параметр** | **по умолчанию** |
| - | - |
| rpc\_client\_connect\_tcpnodelay | true |
| dfs\_client\_read\_shortcircuit | true |
| output\_replace-datanode-on-failure | true |
@ -177,22 +180,23 @@ CREATE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9
#### Расширенные параметры для ClickHouse {#clickhouse-extras}
| **параметр** | **по умолчанию** |
| - | - |
|hadoop\_kerberos\_keytab | "" |
|hadoop\_kerberos\_principal | "" |
|hadoop\_kerberos\_kinit\_command | kinit |
### Ограничения {#limitations}
* hadoop\_security\_kerberos\_ticket\_cache\_path могут быть определены только на глобальном уровне
* `hadoop_security_kerberos_ticket_cache_path` и `libhdfs3_conf` могут быть определены только на глобальном, а не на пользовательском уровне
## Поддержка Kerberos {#kerberos-support}
Если hadoop\_security\_authentication параметр имеет значение 'kerberos', ClickHouse аутентифицируется с помощью Kerberos.
[Расширенные параметры](#clickhouse-extras) и hadoop\_security\_kerberos\_ticket\_cache\_path помогают сделать это.
Если параметр `hadoop_security_authentication` имеет значение `kerberos`, ClickHouse аутентифицируется с помощью Kerberos.
[Расширенные параметры](#clickhouse-extras) и `hadoop_security_kerberos_ticket_cache_path` помогают сделать это.
Обратите внимание что из-за ограничений libhdfs3 поддерживается только устаревший метод аутентификации,
коммуникация с узлами данных не защищена SASL (HADOOP\_SECURE\_DN\_USER надежный показатель такого
подхода к безопасности). Используйте tests/integration/test\_storage\_kerberized\_hdfs/hdfs_configs/bootstrap.sh для примера настроек.
коммуникация с узлами данных не защищена SASL (`HADOOP_SECURE_DN_USER` надежный показатель такого
подхода к безопасности). Используйте `tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh` для примера настроек.
Если hadoop\_kerberos\_keytab, hadoop\_kerberos\_principal или hadoop\_kerberos\_kinit\_command указаны в настройках, kinit будет вызван. hadoop\_kerberos\_keytab и hadoop\_kerberos\_principal обязательны в этом случае. Необходимо также будет установить kinit и файлы конфигурации krb5.
Если `hadoop_kerberos_keytab`, `hadoop_kerberos_principal` или `hadoop_kerberos_kinit_command` указаны в настройках, `kinit` будет вызван. `hadoop_kerberos_keytab` и `hadoop_kerberos_principal` обязательны в этом случае. Необходимо также будет установить `kinit` и файлы конфигурации krb5.
## Виртуальные столбцы {#virtual-columns}

View File

@ -191,5 +191,5 @@ ClickHouse может поддерживать учетные данные Kerbe
**Смотрите также**
- [Виртуальные столбцы](index.md#table_engines-virtual_columns)
- [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size)
- [background_message_broker_schedule_pool_size](../../../operations/settings/settings.md#background_message_broker_schedule_pool_size)

View File

@ -0,0 +1 @@
../../../en/faq/general/how-do-i-contribute-code-to-clickhouse.md

View File

@ -0,0 +1 @@
../../../en/faq/operations/multi-region-replication.md

Some files were not shown because too many files have changed in this diff Show More