Merge branch 'master' into memory-overcommit

This commit is contained in:
Dmitry Novik 2022-01-18 15:30:24 +03:00 committed by GitHub
commit c0970b75ee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
875 changed files with 31376 additions and 9589 deletions

4
.github/CODEOWNERS vendored
View File

@ -1,3 +1 @@
docs/* @ClickHouse/docs
docs/zh/* @ClickHouse/docs-zh
website/* @ClickHouse/docs

8
.github/actionlint.yml vendored Normal file
View File

@ -0,0 +1,8 @@
self-hosted-runner:
labels:
- builder
- fuzzer-unit-tester
- stress-tester
- style-checker
- func-tester-aarch64
- func-tester

View File

@ -33,11 +33,11 @@ jobs:
- name: Cherry pick
run: |
sudo pip install GitPython
cd $GITHUB_WORKSPACE/tests/ci
cd "$GITHUB_WORKSPACE/tests/ci"
python3 cherry_pick.py
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"

View File

@ -9,23 +9,68 @@ on: # yamllint disable-line rule:truthy
branches:
- 'backport/**'
jobs:
DockerHubPush:
runs-on: [self-hosted, style-checker]
DockerHubPushAarch64:
runs-on: [self-hosted, func-tester-aarch64]
steps:
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Images check
run: |
cd $GITHUB_WORKSPACE/tests/ci
python3 docker_images_check.py
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_images_check.py --suffix aarch64
- name: Upload images files to artifacts
uses: actions/upload-artifact@v2
with:
name: changed_images_aarch64
path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json
DockerHubPushAmd64:
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Images check
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_images_check.py --suffix amd64
- name: Upload images files to artifacts
uses: actions/upload-artifact@v2
with:
name: changed_images_amd64
path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json
DockerHubPush:
needs: [DockerHubPushAmd64, DockerHubPushAarch64]
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Download changed aarch64 images
uses: actions/download-artifact@v2
with:
name: changed_images_aarch64
path: ${{ runner.temp }}
- name: Download changed amd64 images
uses: actions/download-artifact@v2
with:
name: changed_images_amd64
path: ${{ runner.temp }}
- name: Images check
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64
- name: Upload images files to artifacts
uses: actions/upload-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/docker_images_check/changed_images.json
path: ${{ runner.temp }}/changed_images.json
CompatibilityCheck:
needs: [BuilderDebRelease]
runs-on: [self-hosted, style-checker]
@ -39,7 +84,7 @@ jobs:
EOF
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Download json reports
@ -48,16 +93,16 @@ jobs:
path: ${{ env.REPORTS_PATH }}
- name: CompatibilityCheck
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 compatibility_check.py
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
#########################################################################################
#################################### ORDINARY BUILDS ####################################
#########################################################################################
@ -82,7 +127,7 @@ jobs:
path: ${{ env.IMAGES_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
with:
@ -90,10 +135,10 @@ jobs:
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
@ -103,9 +148,50 @@ jobs:
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH $CACHES_PATH
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderDebAarch64:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
CHECK_NAME=ClickHouse build check (actions)
BUILD_NAME=package_aarch64
EOF
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'true'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderDebAsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
@ -127,7 +213,7 @@ jobs:
path: ${{ env.IMAGES_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
with:
@ -135,10 +221,10 @@ jobs:
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
@ -148,9 +234,9 @@ jobs:
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH $CACHES_PATH
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderDebTsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
@ -172,7 +258,7 @@ jobs:
path: ${{ env.IMAGES_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
with:
@ -180,10 +266,10 @@ jobs:
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
@ -193,9 +279,9 @@ jobs:
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH $CACHES_PATH
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderDebDebug:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
@ -217,7 +303,7 @@ jobs:
path: ${{ env.IMAGES_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
with:
@ -225,10 +311,10 @@ jobs:
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
@ -238,15 +324,16 @@ jobs:
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH $CACHES_PATH
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
############################################################################################
##################################### BUILD REPORTER #######################################
############################################################################################
BuilderReport:
needs:
- BuilderDebRelease
- BuilderDebAarch64
- BuilderDebAsan
- BuilderDebTsan
- BuilderDebDebug
@ -265,21 +352,21 @@ jobs:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Report Builder
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cd $GITHUB_WORKSPACE/tests/ci
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cd "$GITHUB_WORKSPACE/tests/ci"
python3 build_report_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
##############################################################################################
########################### FUNCTIONAl STATELESS TESTS #######################################
##############################################################################################
@ -302,22 +389,22 @@ jobs:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
##############################################################################################
############################ FUNCTIONAl STATEFUL TESTS #######################################
##############################################################################################
@ -340,22 +427,22 @@ jobs:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
##############################################################################################
######################################### STRESS TESTS #######################################
##############################################################################################
@ -381,22 +468,22 @@ jobs:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Stress test
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 stress_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
#############################################################################################
############################# INTEGRATION TESTS #############################################
#############################################################################################
@ -418,22 +505,22 @@ jobs:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Integration test
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 integration_test_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
FinishCheck:
needs:
- DockerHubPush
@ -447,10 +534,10 @@ jobs:
steps:
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Finish label
run: |
cd $GITHUB_WORKSPACE/tests/ci
cd "$GITHUB_WORKSPACE/tests/ci"
python3 finish_check.py

View File

@ -6,7 +6,7 @@ env:
on: # yamllint disable-line rule:truthy
workflow_run:
workflows: ["CIGithubActions", "ReleaseCI", "DocsCheck", "BackportPR"]
workflows: ["PullRequestCI", "ReleaseCI", "DocsCheck", "BackportPR"]
types:
- requested
jobs:

View File

@ -21,31 +21,77 @@ jobs:
steps:
- name: Clear repository
run: |
sudo rm -rf $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
sudo rm -rf "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Labels check
run: |
cd $GITHUB_WORKSPACE/tests/ci
cd "$GITHUB_WORKSPACE/tests/ci"
python3 run_check.py
DockerHubPush:
DockerHubPushAarch64:
needs: CheckLabels
runs-on: [self-hosted, func-tester-aarch64]
steps:
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Images check
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_images_check.py --suffix aarch64
- name: Upload images files to artifacts
uses: actions/upload-artifact@v2
with:
name: changed_images_aarch64
path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json
DockerHubPushAmd64:
needs: CheckLabels
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
run: |
sudo rm -rf $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Images check
run: |
cd $GITHUB_WORKSPACE/tests/ci
python3 docker_images_check.py
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_images_check.py --suffix amd64
- name: Upload images files to artifacts
uses: actions/upload-artifact@v2
with:
name: changed_images_amd64
path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json
DockerHubPush:
needs: [DockerHubPushAmd64, DockerHubPushAarch64]
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Download changed aarch64 images
uses: actions/download-artifact@v2
with:
name: changed_images_aarch64
path: ${{ runner.temp }}
- name: Download changed amd64 images
uses: actions/download-artifact@v2
with:
name: changed_images_amd64
path: ${{ runner.temp }}
- name: Images check
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64
- name: Upload images files to artifacts
uses: actions/upload-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/docker_images_check/changed_images.json
path: ${{ runner.temp }}/changed_images.json
DocsCheck:
needs: DockerHubPush
runs-on: [self-hosted, func-tester]
@ -63,17 +109,17 @@ jobs:
path: ${{ env.TEMP_PATH }}
- name: Clear repository
run: |
sudo rm -rf $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
sudo rm -rf "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Docs Check
run: |
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 docs_check.py
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"

View File

@ -8,7 +8,7 @@ on: # yamllint disable-line rule:truthy
schedule:
- cron: '0 */6 * * *'
workflow_run:
workflows: ["CIGithubActions"]
workflows: ["PullRequestCI"]
types:
- completed
workflow_dispatch:
@ -24,21 +24,21 @@ jobs:
EOF
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Jepsen Test
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 keeper_jepsen_check.py
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -19,23 +19,68 @@ on: # yamllint disable-line rule:truthy
- '.github/**'
workflow_dispatch:
jobs:
DockerHubPush:
runs-on: [self-hosted, style-checker]
DockerHubPushAarch64:
runs-on: [self-hosted, func-tester-aarch64]
steps:
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Images check
run: |
cd $GITHUB_WORKSPACE/tests/ci
python3 docker_images_check.py
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_images_check.py --suffix aarch64
- name: Upload images files to artifacts
uses: actions/upload-artifact@v2
with:
name: changed_images_aarch64
path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json
DockerHubPushAmd64:
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Images check
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_images_check.py --suffix amd64
- name: Upload images files to artifacts
uses: actions/upload-artifact@v2
with:
name: changed_images_amd64
path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json
DockerHubPush:
needs: [DockerHubPushAmd64, DockerHubPushAarch64]
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Download changed aarch64 images
uses: actions/download-artifact@v2
with:
name: changed_images_aarch64
path: ${{ runner.temp }}
- name: Download changed amd64 images
uses: actions/download-artifact@v2
with:
name: changed_images_amd64
path: ${{ runner.temp }}
- name: Images check
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64
- name: Upload images files to artifacts
uses: actions/upload-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/docker_images_check/changed_images.json
path: ${{ runner.temp }}/changed_images.json
DocsRelease:
needs: DockerHubPush
runs-on: [self-hosted, func-tester]
@ -53,7 +98,7 @@ jobs:
EOF
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Download changed images
@ -63,14 +108,14 @@ jobs:
path: ${{ env.TEMP_PATH }}
- name: Docs Release
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 docs_release.py
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"

File diff suppressed because it is too large Load Diff

View File

@ -23,20 +23,20 @@ jobs:
EOF
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'true'
- name: Codebrowser
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 codebrowser_check.py
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 codebrowser_check.py
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"

3
.gitmodules vendored
View File

@ -247,6 +247,9 @@
[submodule "contrib/sysroot"]
path = contrib/sysroot
url = https://github.com/ClickHouse-Extras/sysroot.git
[submodule "contrib/hive-metastore"]
path = contrib/hive-metastore
url = https://github.com/ClickHouse-Extras/hive-metastore
[submodule "contrib/azure"]
path = contrib/azure
url = https://github.com/ClickHouse-Extras/azure-sdk-for-cpp.git

File diff suppressed because it is too large Load Diff

View File

@ -511,6 +511,7 @@ include (cmake/find/h3.cmake)
include (cmake/find/libxml2.cmake)
include (cmake/find/brotli.cmake)
include (cmake/find/protobuf.cmake)
include (cmake/find/thrift.cmake)
include (cmake/find/grpc.cmake)
include (cmake/find/pdqsort.cmake)
include (cmake/find/miniselect.cmake)
@ -520,7 +521,7 @@ include (cmake/find/curl.cmake)
include (cmake/find/s3.cmake)
include (cmake/find/blob_storage.cmake)
include (cmake/find/base64.cmake)
include (cmake/find/parquet.cmake)
include (cmake/find/parquet.cmake) # uses protobuf and thrift
include (cmake/find/simdjson.cmake)
include (cmake/find/fast_float.cmake)
include (cmake/find/rapidjson.cmake)
@ -548,6 +549,7 @@ include (cmake/find/cassandra.cmake)
include (cmake/find/sentry.cmake)
include (cmake/find/datasketches.cmake)
include (cmake/find/libprotobuf-mutator.cmake)
include (cmake/find/hive-metastore.cmake)
set (USE_INTERNAL_CITYHASH_LIBRARY ON CACHE INTERNAL "")
find_contrib_lib(cityhash)

View File

@ -21,9 +21,10 @@ The following versions of ClickHouse server are currently being supported with s
| 21.6 | :x: |
| 21.7 | :x: |
| 21.8 | ✅ |
| 21.9 | |
| 21.9 | :x: |
| 21.10 | ✅ |
| 21.11 | ✅ |
| 21.12 | ✅ |
## Reporting a Vulnerability

View File

@ -48,7 +48,9 @@ struct StringRef
std::string toString() const { return std::string(data, size); }
explicit operator std::string() const { return toString(); }
constexpr explicit operator std::string_view() const { return {data, size}; }
std::string_view toView() const { return std::string_view(data, size); }
constexpr explicit operator std::string_view() const { return std::string_view(data, size); }
};
/// Here constexpr doesn't implicate inline, see https://www.viva64.com/en/w/v1043/

View File

@ -125,11 +125,11 @@ public:
template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
struct common_type<wide::integer<Bits, Signed>, wide::integer<Bits2, Signed2>>
{
using type = std::conditional_t < Bits == Bits2,
wide::integer<
Bits,
std::conditional_t<(std::is_same_v<Signed, Signed2> && std::is_same_v<Signed2, signed>), signed, unsigned>>,
std::conditional_t<Bits2<Bits, wide::integer<Bits, Signed>, wide::integer<Bits2, Signed2>>>;
using type = std::conditional_t<Bits == Bits2,
wide::integer<
Bits,
std::conditional_t<(std::is_same_v<Signed, Signed2> && std::is_same_v<Signed2, signed>), signed, unsigned>>,
std::conditional_t<Bits2<Bits, wide::integer<Bits, Signed>, wide::integer<Bits2, Signed2>>>;
};
template <size_t Bits, typename Signed, typename Arithmetic>
@ -827,7 +827,7 @@ public:
CompilerUInt128 a = (CompilerUInt128(numerator.items[1]) << 64) + numerator.items[0];
CompilerUInt128 b = (CompilerUInt128(denominator.items[1]) << 64) + denominator.items[0];
CompilerUInt128 c = a / b;
CompilerUInt128 c = a / b; // NOLINT
integer<Bits, Signed> res;
res.items[0] = c;
@ -1020,8 +1020,15 @@ constexpr integer<Bits, Signed>::integer(std::initializer_list<T> il) noexcept
{
auto it = il.begin();
for (size_t i = 0; i < _impl::item_count; ++i)
{
if (it < il.end())
{
items[i] = *it;
++it;
}
else
items[i] = 0;
}
}
}

View File

@ -2,11 +2,11 @@
# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
SET(VERSION_REVISION 54458)
SET(VERSION_MAJOR 21)
SET(VERSION_MINOR 13)
SET(VERSION_REVISION 54459)
SET(VERSION_MAJOR 22)
SET(VERSION_MINOR 2)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH 4cc45c1e15912ee300bca7cc8b8da2b888a70e2a)
SET(VERSION_DESCRIBE v21.13.1.1-prestable)
SET(VERSION_STRING 21.13.1.1)
SET(VERSION_GITHASH dfe64a2789bbf51046bb6b5476f874f7b59d124c)
SET(VERSION_DESCRIBE v22.2.1.1-prestable)
SET(VERSION_STRING 22.2.1.1)
# end of autochange

View File

@ -1,30 +1,29 @@
option (ENABLE_AZURE_BLOB_STORAGE "Enable Azure blob storage" ${ENABLE_LIBRARIES})
option(USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY
"Set to FALSE to use system Azure SDK instead of bundled (OFF currently not implemented)"
ON)
if (ENABLE_AZURE_BLOB_STORAGE)
option(USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY
"Set to FALSE to use system Azure SDK instead of bundled (OFF currently not implemented)"
ON)
set(USE_AZURE_BLOB_STORAGE 1)
set(AZURE_BLOB_STORAGE_LIBRARY azure_sdk)
if ((NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/azure/sdk"
OR NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/azure/cmake-modules")
AND USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
message (WARNING "submodule contrib/azure is missing. to fix try run: \n git submodule update --init")
set(USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY OFF)
set(USE_AZURE_BLOB_STORAGE 0)
endif ()
if (NOT USE_INTERNAL_SSL_LIBRARY AND USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
message (FATAL_ERROR "Currently Blob Storage support can be built only with internal SSL library")
endif()
if (NOT USE_INTERNAL_CURL AND USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
message (FATAL_ERROR "Currently Blob Storage support can be built only with internal curl library")
endif()
endif()
if ((NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/azure/sdk"
OR NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/azure/cmake-modules")
AND USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
message (WARNING "submodule contrib/azure is missing. to fix try run: \n git submodule update --init")
set(USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY OFF)
set(USE_AZURE_BLOB_STORAGE 0)
endif ()
if (NOT USE_INTERNAL_SSL_LIBRARY AND USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
message (FATAL_ERROR "Currently Blob Storage support can be built only with internal SSL library")
endif()
if (NOT USE_INTERNAL_CURL AND USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
message (FATAL_ERROR "Currently Blob Storage support can be built only with internal curl library")
endif()
if (USE_AZURE_BLOB_STORAGE)
message (STATUS "Using Azure Blob Storage - ${USE_AZURE_BLOB_STORAGE}")
endif()
message (STATUS "Using Azure Blob Storage - ${USE_AZURE_BLOB_STORAGE}")

View File

@ -31,6 +31,7 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE)
if (CCACHE_VERSION VERSION_GREATER "3.2.0" OR NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
message(STATUS "Using ${CCACHE_FOUND} ${CCACHE_VERSION}")
set(LAUNCHER ${CCACHE_FOUND})
# debian (debhelpers) set SOURCE_DATE_EPOCH environment variable, that is
# filled from the debian/changelog or current time.
@ -39,13 +40,8 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE)
# of the manifest, which do not allow to use previous cache,
# - 4.2+ ccache ignores SOURCE_DATE_EPOCH for every file w/o __DATE__/__TIME__
#
# So for:
# - 4.2+ does not require any sloppiness
# - 4.0+ will ignore SOURCE_DATE_EPOCH environment variable.
if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.2")
message(STATUS "ccache is 4.2+ no quirks for SOURCE_DATE_EPOCH required")
set(LAUNCHER ${CCACHE_FOUND})
elseif (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0")
# Exclude SOURCE_DATE_EPOCH env for ccache versions between [4.0, 4.2).
if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0" AND CCACHE_VERSION VERSION_LESS "4.2")
message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache")
set(LAUNCHER env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND})
endif()

View File

@ -0,0 +1,26 @@
option(ENABLE_HIVE "Enable Hive" ${ENABLE_LIBRARIES})
if (NOT ENABLE_HIVE)
message("Hive disabled")
return()
endif()
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/hive-metastore")
message(WARNING "submodule contrib/hive-metastore is missing. to fix try run: \n git submodule update --init")
set(USE_HIVE 0)
elseif (NOT USE_THRIFT)
message(WARNING "Thrift is not found, which is needed by Hive")
set(USE_HIVE 0)
elseif (NOT USE_HDFS)
message(WARNING "HDFS is not found, which is needed by Hive")
set(USE_HIVE 0)
elseif (NOT USE_ORC OR NOT USE_ARROW OR NOT USE_PARQUET)
message(WARNING "ORC/Arrow/Parquet is not found, which are needed by Hive")
set(USE_HIVE 0)
else()
set(USE_HIVE 1)
set(HIVE_METASTORE_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/hive-metastore)
set(HIVE_METASTORE_LIBRARY hivemetastore)
endif()
message (STATUS "Using_Hive=${USE_HIVE}: ${HIVE_METASTORE_INCLUDE_DIR} : ${HIVE_METASTORE_LIBRARY}")

View File

@ -34,7 +34,6 @@ endif()
if(NOT USE_INTERNAL_PARQUET_LIBRARY)
find_package(Arrow)
find_package(Parquet)
find_library(THRIFT_LIBRARY thrift)
find_library(UTF8_PROC_LIBRARY utf8proc)
find_package(BZip2)
@ -145,12 +144,10 @@ if(NOT EXTERNAL_PARQUET_FOUND AND NOT MISSING_INTERNAL_PARQUET_LIBRARY AND NOT O
set(FLATBUFFERS_LIBRARY flatbuffers)
set(ARROW_LIBRARY arrow_static)
set(PARQUET_LIBRARY parquet_static)
set(THRIFT_LIBRARY thrift_static)
else()
set(FLATBUFFERS_LIBRARY flatbuffers_shared)
set(ARROW_LIBRARY arrow_shared)
set(PARQUET_LIBRARY parquet_shared)
set(THRIFT_LIBRARY thrift)
endif()
set(USE_PARQUET 1)

34
cmake/find/thrift.cmake Normal file
View File

@ -0,0 +1,34 @@
option(ENABLE_THRIFT "Enable Thrift" ${ENABLE_LIBRARIES})
if (NOT ENABLE_THRIFT)
message (STATUS "thrift disabled")
set(USE_INTERNAL_THRIFT_LIBRARY 0)
return()
endif()
option(USE_INTERNAL_THRIFT_LIBRARY "Set to FALSE to use system thrift library instead of bundled" ON)
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/thrift")
if (USE_INTERNAL_THRIFT_LIBRARY)
message (WARNING "submodule contrib/thrift is missing. to fix try run: \n git submodule update --init --recursive")
set(USE_INTERNAL_THRIFT_LIBRARY 0)
endif ()
endif()
if (USE_INTERNAL_THRIFT_LIBRARY)
if (MAKE_STATIC_LIBRARIES)
set(THRIFT_LIBRARY thrift_static)
else()
set(THRIFT_LIBRARY thrift)
endif()
set (THRIFT_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp/src")
set(USE_THRIFT 1)
else()
find_library(THRIFT_LIBRARY thrift)
if (NOT THRIFT_LIBRARY)
set(USE_THRIFT 0)
else()
set(USE_THRIFT 1)
endif()
endif ()
message (STATUS "Using thrift=${USE_THRIFT}: ${THRIFT_INCLUDE_DIR} : ${THRIFT_LIBRARY}")

View File

@ -150,7 +150,6 @@ if (USE_INTERNAL_PARQUET_LIBRARY)
# The library is large - avoid bloat.
target_compile_options (${ARROW_LIBRARY} PRIVATE -g0)
target_compile_options (${THRIFT_LIBRARY} PRIVATE -g0)
target_compile_options (${PARQUET_LIBRARY} PRIVATE -g0)
endif()
@ -206,6 +205,10 @@ if (USE_INTERNAL_PROTOBUF_LIBRARY)
add_subdirectory(protobuf-cmake)
endif ()
if (USE_INTERNAL_THRIFT_LIBRARY)
add_subdirectory(thrift-cmake)
endif ()
if (USE_INTERNAL_HDFS3_LIBRARY)
add_subdirectory(libhdfs3-cmake)
endif ()
@ -299,6 +302,10 @@ if (USE_S2_GEOMETRY)
add_subdirectory(s2geometry-cmake)
endif()
if (USE_HIVE)
add_subdirectory (hive-metastore-cmake)
endif()
# Put all targets defined here and in subdirectories under "contrib/<immediate-subdir>" folders in GUI-based IDEs.
# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear
# in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually,

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit ff100a8713146e1ca4b4158dd6cc4eef9af47fc3
Subproject commit c2043aa250e53ad5cf75e596e319d587af4dcb3c

2
contrib/arrow vendored

@ -1 +1 @@
Subproject commit aa9a7a698e33e278abe053f4634170b3b026e48e
Subproject commit 1d9cc51daa4e7e9fc6926320ef73759818bd736e

View File

@ -17,57 +17,8 @@ else()
set(ARROW_FULL_SO_VERSION "${ARROW_SO_VERSION}.${ARROW_VERSION_PATCH}.0")
endif()
# === thrift
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp")
# contrib/thrift/lib/cpp/CMakeLists.txt
set(thriftcpp_SOURCES
"${LIBRARY_DIR}/src/thrift/TApplicationException.cpp"
"${LIBRARY_DIR}/src/thrift/TOutput.cpp"
"${LIBRARY_DIR}/src/thrift/async/TAsyncChannel.cpp"
"${LIBRARY_DIR}/src/thrift/async/TAsyncProtocolProcessor.cpp"
"${LIBRARY_DIR}/src/thrift/async/TConcurrentClientSyncInfo.h"
"${LIBRARY_DIR}/src/thrift/async/TConcurrentClientSyncInfo.cpp"
"${LIBRARY_DIR}/src/thrift/concurrency/ThreadManager.cpp"
"${LIBRARY_DIR}/src/thrift/concurrency/TimerManager.cpp"
"${LIBRARY_DIR}/src/thrift/concurrency/Util.cpp"
"${LIBRARY_DIR}/src/thrift/processor/PeekProcessor.cpp"
"${LIBRARY_DIR}/src/thrift/protocol/TBase64Utils.cpp"
"${LIBRARY_DIR}/src/thrift/protocol/TDebugProtocol.cpp"
"${LIBRARY_DIR}/src/thrift/protocol/TJSONProtocol.cpp"
"${LIBRARY_DIR}/src/thrift/protocol/TMultiplexedProtocol.cpp"
"${LIBRARY_DIR}/src/thrift/protocol/TProtocol.cpp"
"${LIBRARY_DIR}/src/thrift/transport/TTransportException.cpp"
"${LIBRARY_DIR}/src/thrift/transport/TFDTransport.cpp"
"${LIBRARY_DIR}/src/thrift/transport/TSimpleFileTransport.cpp"
"${LIBRARY_DIR}/src/thrift/transport/THttpTransport.cpp"
"${LIBRARY_DIR}/src/thrift/transport/THttpClient.cpp"
"${LIBRARY_DIR}/src/thrift/transport/THttpServer.cpp"
"${LIBRARY_DIR}/src/thrift/transport/TSocket.cpp"
"${LIBRARY_DIR}/src/thrift/transport/TSocketPool.cpp"
"${LIBRARY_DIR}/src/thrift/transport/TServerSocket.cpp"
"${LIBRARY_DIR}/src/thrift/transport/TTransportUtils.cpp"
"${LIBRARY_DIR}/src/thrift/transport/TBufferTransports.cpp"
"${LIBRARY_DIR}/src/thrift/server/TConnectedClient.cpp"
"${LIBRARY_DIR}/src/thrift/server/TServerFramework.cpp"
"${LIBRARY_DIR}/src/thrift/server/TSimpleServer.cpp"
"${LIBRARY_DIR}/src/thrift/server/TThreadPoolServer.cpp"
"${LIBRARY_DIR}/src/thrift/server/TThreadedServer.cpp"
)
set(thriftcpp_threads_SOURCES
"${LIBRARY_DIR}/src/thrift/concurrency/ThreadFactory.cpp"
"${LIBRARY_DIR}/src/thrift/concurrency/Thread.cpp"
"${LIBRARY_DIR}/src/thrift/concurrency/Monitor.cpp"
"${LIBRARY_DIR}/src/thrift/concurrency/Mutex.cpp"
)
add_library(${THRIFT_LIBRARY} ${thriftcpp_SOURCES} ${thriftcpp_threads_SOURCES})
set_target_properties(${THRIFT_LIBRARY} PROPERTIES CXX_STANDARD 14) # REMOVE after https://github.com/apache/thrift/pull/1641
target_include_directories(${THRIFT_LIBRARY} SYSTEM PUBLIC "${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp/src")
target_link_libraries (${THRIFT_LIBRARY} PRIVATE boost::headers_only)
# === orc
set(ORC_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/orc/c++")
set(ORC_INCLUDE_DIR "${ORC_SOURCE_DIR}/include")
set(ORC_SOURCE_SRC_DIR "${ORC_SOURCE_DIR}/src")
@ -463,49 +414,6 @@ set(PARQUET_SRCS
#list(TRANSFORM PARQUET_SRCS PREPEND "${LIBRARY_DIR}/") # cmake 3.12
add_library(${PARQUET_LIBRARY} ${PARQUET_SRCS})
target_include_directories(${PARQUET_LIBRARY} SYSTEM PUBLIC "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src" "${CMAKE_CURRENT_SOURCE_DIR}/cpp/src" PRIVATE ${OPENSSL_INCLUDE_DIR})
set (HAVE_ARPA_INET_H 1)
set (HAVE_FCNTL_H 1)
set (HAVE_GETOPT_H 1)
set (HAVE_INTTYPES_H 1)
set (HAVE_NETDB_H 1)
set (HAVE_NETINET_IN_H 1)
set (HAVE_SIGNAL_H 1)
set (HAVE_STDINT_H 1)
set (HAVE_UNISTD_H 1)
set (HAVE_PTHREAD_H 1)
set (HAVE_SYS_IOCTL_H 1)
set (HAVE_SYS_PARAM_H 1)
set (HAVE_SYS_RESOURCE_H 1)
set (HAVE_SYS_SOCKET_H 1)
set (HAVE_SYS_STAT_H 1)
set (HAVE_SYS_TIME_H 1)
set (HAVE_SYS_UN_H 1)
set (HAVE_POLL_H 1)
set (HAVE_SYS_POLL_H 1)
set (HAVE_SYS_SELECT_H 1)
set (HAVE_SCHED_H 1)
set (HAVE_STRING_H 1)
set (HAVE_STRINGS_H 1)
set (HAVE_GETHOSTBYNAME 1)
set (HAVE_STRERROR_R 1)
set (HAVE_SCHED_GET_PRIORITY_MAX 1)
set (HAVE_SCHED_GET_PRIORITY_MIN 1)
if (OS_LINUX AND NOT USE_MUSL)
set (STRERROR_R_CHAR_P 1)
endif ()
#set(PACKAGE ${PACKAGE_NAME})
#set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
#set(VERSION ${thrift_VERSION})
# generate a config.h file
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/build/cmake/config.h.in" "${CMAKE_CURRENT_BINARY_DIR}/thrift/config.h")
include_directories("${CMAKE_CURRENT_BINARY_DIR}")
target_link_libraries(${PARQUET_LIBRARY} PUBLIC ${ARROW_LIBRARY} PRIVATE ${THRIFT_LIBRARY} boost::headers_only boost::regex ${OPENSSL_LIBRARIES})
if (SANITIZE STREQUAL "undefined")

1
contrib/hive-metastore vendored Submodule

@ -0,0 +1 @@
Subproject commit 809a77d435ce218d9b000733f19489c606fc567b

View File

@ -0,0 +1,9 @@
set (SRCS
${ClickHouse_SOURCE_DIR}/contrib/hive-metastore/hive_metastore_constants.cpp
${ClickHouse_SOURCE_DIR}/contrib/hive-metastore/hive_metastore_types.cpp
${ClickHouse_SOURCE_DIR}/contrib/hive-metastore/ThriftHiveMetastore.cpp
)
add_library(${HIVE_METASTORE_LIBRARY} ${SRCS})
target_link_libraries(${HIVE_METASTORE_LIBRARY} PUBLIC ${THRIFT_LIBRARY})
target_include_directories(${HIVE_METASTORE_LIBRARY} SYSTEM PUBLIC ${HIVE_METASTORE_INCLUDE_DIR})

View File

@ -0,0 +1,87 @@
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp")
set(thriftcpp_SOURCES
"${LIBRARY_DIR}/src/thrift/TApplicationException.cpp"
"${LIBRARY_DIR}/src/thrift/TOutput.cpp"
"${LIBRARY_DIR}/src/thrift/async/TAsyncChannel.cpp"
"${LIBRARY_DIR}/src/thrift/async/TAsyncProtocolProcessor.cpp"
"${LIBRARY_DIR}/src/thrift/async/TConcurrentClientSyncInfo.h"
"${LIBRARY_DIR}/src/thrift/async/TConcurrentClientSyncInfo.cpp"
"${LIBRARY_DIR}/src/thrift/concurrency/ThreadManager.cpp"
"${LIBRARY_DIR}/src/thrift/concurrency/TimerManager.cpp"
"${LIBRARY_DIR}/src/thrift/concurrency/Util.cpp"
"${LIBRARY_DIR}/src/thrift/processor/PeekProcessor.cpp"
"${LIBRARY_DIR}/src/thrift/protocol/TBase64Utils.cpp"
"${LIBRARY_DIR}/src/thrift/protocol/TDebugProtocol.cpp"
"${LIBRARY_DIR}/src/thrift/protocol/TJSONProtocol.cpp"
"${LIBRARY_DIR}/src/thrift/protocol/TMultiplexedProtocol.cpp"
"${LIBRARY_DIR}/src/thrift/protocol/TProtocol.cpp"
"${LIBRARY_DIR}/src/thrift/transport/TTransportException.cpp"
"${LIBRARY_DIR}/src/thrift/transport/TFDTransport.cpp"
"${LIBRARY_DIR}/src/thrift/transport/TSimpleFileTransport.cpp"
"${LIBRARY_DIR}/src/thrift/transport/THttpTransport.cpp"
"${LIBRARY_DIR}/src/thrift/transport/THttpClient.cpp"
"${LIBRARY_DIR}/src/thrift/transport/THttpServer.cpp"
"${LIBRARY_DIR}/src/thrift/transport/TSocket.cpp"
"${LIBRARY_DIR}/src/thrift/transport/TSocketPool.cpp"
"${LIBRARY_DIR}/src/thrift/transport/TServerSocket.cpp"
"${LIBRARY_DIR}/src/thrift/transport/TTransportUtils.cpp"
"${LIBRARY_DIR}/src/thrift/transport/TBufferTransports.cpp"
"${LIBRARY_DIR}/src/thrift/server/TConnectedClient.cpp"
"${LIBRARY_DIR}/src/thrift/server/TServerFramework.cpp"
"${LIBRARY_DIR}/src/thrift/server/TSimpleServer.cpp"
"${LIBRARY_DIR}/src/thrift/server/TThreadPoolServer.cpp"
"${LIBRARY_DIR}/src/thrift/server/TThreadedServer.cpp"
)
set(thriftcpp_threads_SOURCES
"${LIBRARY_DIR}/src/thrift/concurrency/ThreadFactory.cpp"
"${LIBRARY_DIR}/src/thrift/concurrency/Thread.cpp"
"${LIBRARY_DIR}/src/thrift/concurrency/Monitor.cpp"
"${LIBRARY_DIR}/src/thrift/concurrency/Mutex.cpp"
)
include("${ClickHouse_SOURCE_DIR}/contrib/thrift/build/cmake/ConfigureChecks.cmake") # makes config.h
set (HAVE_ARPA_INET_H 1)
set (HAVE_FCNTL_H 1)
set (HAVE_GETOPT_H 1)
set (HAVE_INTTYPES_H 1)
set (HAVE_NETDB_H 1)
set (HAVE_NETINET_IN_H 1)
set (HAVE_SIGNAL_H 1)
set (HAVE_STDINT_H 1)
set (HAVE_UNISTD_H 1)
set (HAVE_PTHREAD_H 1)
set (HAVE_SYS_IOCTL_H 1)
set (HAVE_SYS_PARAM_H 1)
set (HAVE_SYS_RESOURCE_H 1)
set (HAVE_SYS_SOCKET_H 1)
set (HAVE_SYS_STAT_H 1)
set (HAVE_SYS_TIME_H 1)
set (HAVE_SYS_UN_H 1)
set (HAVE_POLL_H 1)
set (HAVE_SYS_POLL_H 1)
set (HAVE_SYS_SELECT_H 1)
set (HAVE_SCHED_H 1)
set (HAVE_STRING_H 1)
set (HAVE_STRINGS_H 1)
set (HAVE_GETHOSTBYNAME 1)
set (HAVE_STRERROR_R 1)
set (HAVE_SCHED_GET_PRIORITY_MAX 1)
set (HAVE_SCHED_GET_PRIORITY_MIN 1)
if (OS_LINUX AND NOT USE_MUSL)
set (STRERROR_R_CHAR_P 1)
endif ()
#set(PACKAGE ${PACKAGE_NAME})
#set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
#set(VERSION ${thrift_VERSION})
# generate a config.h file
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/build/cmake/config.h.in" "${CMAKE_CURRENT_BINARY_DIR}/thrift/config.h")
include_directories("${CMAKE_CURRENT_BINARY_DIR}")
add_library(${THRIFT_LIBRARY} ${thriftcpp_SOURCES} ${thriftcpp_threads_SOURCES})
target_include_directories(${THRIFT_LIBRARY} SYSTEM PUBLIC "${THRIFT_INCLUDE_DIR}" ${CMAKE_CURRENT_BINARY_DIR})
target_link_libraries (${THRIFT_LIBRARY} PUBLIC boost::headers_only)

View File

@ -0,0 +1 @@
../../../thrift/build/cmake/config.h.in

2
debian/changelog vendored
View File

@ -1,4 +1,4 @@
clickhouse (21.13.1.1) unstable; urgency=low
clickhouse (22.1.1.1) unstable; urgency=low
* Modified source code

View File

@ -1,46 +0,0 @@
FROM ubuntu:20.04
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=13
RUN apt-get update \
&& apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \
--yes --no-install-recommends --verbose-versions \
&& export LLVM_PUBKEY_HASH="bda960a8da687a275a2078d43c111d66b1c6a893a3275271beedf266c1ff4a0cdecb429c7a5cccf9f486ea7aa43fd27f" \
&& wget -nv -O /tmp/llvm-snapshot.gpg.key https://apt.llvm.org/llvm-snapshot.gpg.key \
&& echo "${LLVM_PUBKEY_HASH} /tmp/llvm-snapshot.gpg.key" | sha384sum -c \
&& apt-key add /tmp/llvm-snapshot.gpg.key \
&& export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
&& echo "deb [trusted=yes] http://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \
/etc/apt/sources.list
RUN apt-get update \
&& apt-get install \
bash \
ccache \
cmake \
curl \
expect \
g++ \
gcc \
ninja-build \
perl \
pkg-config \
python3 \
python3-lxml \
python3-requests \
python3-termcolor \
tzdata \
llvm-${LLVM_VERSION} \
clang-${LLVM_VERSION} \
clang-tidy-${LLVM_VERSION} \
lld-${LLVM_VERSION} \
lldb-${LLVM_VERSION} \
--yes --no-install-recommends
COPY build.sh /
CMD ["/bin/bash", "/build.sh"]

View File

@ -1,12 +0,0 @@
build: image
mkdir -p $(HOME)/.ccache
docker run --network=host --rm --workdir /server --volume $(realpath ../..):/server --cap-add=SYS_PTRACE --mount=type=bind,source=$(HOME)/.ccache,destination=/ccache -e CCACHE_DIR=/ccache -it yandex/clickhouse-builder
pull:
docker pull yandex/clickhouse-builder
image:
docker build --network=host -t yandex/clickhouse-builder .
image_push:
docker push yandex/clickhouse-builder

View File

@ -1,33 +0,0 @@
Allows to build ClickHouse in Docker.
This is useful if you have an old OS distribution and you don't want to build fresh gcc or clang from sources.
Usage:
Prepare image:
```
make image
```
Run build:
```
make build
```
Before run, ensure that your user has access to docker:
To check, that you have access to Docker, run `docker ps`.
If not, you must add this user to `docker` group: `sudo usermod -aG docker $USER` and relogin.
(You must close all your sessions. For example, restart your computer.)
Build results are available in `build_docker` directory at top level of your working copy.
It builds only binaries, not packages.
For example, run server:
```
cd $(git rev-parse --show-toplevel)/src/Server
$(git rev-parse --show-toplevel)/docker/builder/programs/clickhouse server --config-file $(git rev-parse --show-toplevel)/programs/server/config.xml
```
Run client:
```
$(git rev-parse --show-toplevel)/docker/builder/programs/clickhouse client
```

View File

@ -1,15 +0,0 @@
#!/usr/bin/env bash
set -e
#ccache -s # uncomment to display CCache statistics
mkdir -p /server/build_docker
cd /server/build_docker
cmake -G Ninja /server "-DCMAKE_C_COMPILER=$(command -v clang-13)" "-DCMAKE_CXX_COMPILER=$(command -v clang++-13)"
# Set the number of build jobs to the half of number of virtual CPU cores (rounded up).
# By default, ninja use all virtual CPU cores, that leads to very high memory consumption without much improvement in build time.
# Note that modern x86_64 CPUs use two-way hyper-threading (as of 2018).
# Without this option my laptop with 16 GiB RAM failed to execute build due to full system freeze.
NUM_JOBS=$(( ($(nproc || grep -c ^processor /proc/cpuinfo) + 1) / 2 ))
ninja -j $NUM_JOBS && env TEST_OPT="--skip long compile $TEST_OPT" ctest -V -j $NUM_JOBS

View File

@ -5,7 +5,7 @@ ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/"
ARG version=21.13.1.*
ARG version=22.1.1.*
RUN apt-get update \
&& apt-get install --yes --no-install-recommends \

View File

@ -1,4 +1,5 @@
# docker build -t clickhouse/docs-build .
# rebuild in #33610
# docker build -t clickhouse/docs-builder .
FROM ubuntu:20.04
# ARG for quick switch to a given ubuntu mirror
@ -9,8 +10,6 @@ ENV LANG=C.UTF-8
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \
python3-setuptools \
virtualenv \
wget \
bash \
python \

View File

@ -1,5 +1,7 @@
# rebuild in #33610
# docker build -t clickhouse/docs-check .
FROM clickhouse/docs-builder
ARG FROM_TAG=latest
FROM clickhouse/docs-builder:$FROM_TAG
COPY run.sh /

View File

@ -2,6 +2,7 @@
set -euo pipefail
cd $REPO_PATH/docs/tools
rm -rf venv
mkdir venv
virtualenv -p $(which python3) venv
source venv/bin/activate

View File

@ -1,5 +1,7 @@
# rebuild in #33610
# docker build -t clickhouse/docs-release .
FROM clickhouse/docs-builder
ARG FROM_TAG=latest
FROM clickhouse/docs-builder:$FROM_TAG
COPY run.sh /

View File

@ -103,6 +103,10 @@
"name": "clickhouse/mysql-golang-client",
"dependent": []
},
"docker/test/integration/dotnet_client": {
"name": "clickhouse/dotnet-client",
"dependent": []
},
"docker/test/integration/mysql_java_client": {
"name": "clickhouse/mysql-java-client",
"dependent": []

View File

@ -1,4 +1,5 @@
# docker build -t clickhouse/binary-builder .
# rebuild in #33610
# docker build -t clickhouse/binary-builder .
FROM ubuntu:20.04
# ARG for quick switch to a given ubuntu mirror

View File

@ -1,3 +1,4 @@
# rebuild in #33610
# docker build -t clickhouse/deb-builder .
FROM ubuntu:20.04
@ -28,12 +29,14 @@ RUN apt-get update \
software-properties-common \
--yes --no-install-recommends
# Architecture of the image when BuildKit/buildx is used
ARG TARGETARCH
# Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able
# to compress files using pigz (https://zlib.net/pigz/) instead of gzip.
# Significantly increase deb packaging speed and compatible with old systems
RUN curl -O https://clickhouse-datasets.s3.yandex.net/utils/1/dpkg-deb \
&& chmod +x dpkg-deb \
&& cp dpkg-deb /usr/bin
RUN arch=${TARGETARCH:-amd64} \
&& curl -Lo /usr/bin/dpkg-deb https://github.com/ClickHouse-Extras/dpkg/releases/download/1.21.1-clickhouse/dpkg-deb-${arch}
RUN apt-get update \
&& apt-get install \

View File

@ -5,8 +5,7 @@ ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/"
ARG version=21.13.1.*
ARG gosu_ver=1.10
ARG version=22.1.1.*
# set non-empty deb_location_url url to create a docker image
# from debs created by CI build, for example:
@ -30,6 +29,23 @@ ARG DEBIAN_FRONTEND=noninteractive
# installed to prevent picking those uid / gid by some unrelated software.
# The same uid / gid (101) is used both for alpine and ubuntu.
# To drop privileges, we need 'su' command, that simply changes uid and gid.
# In fact, the 'su' command from Linux is not so simple, due to inherent vulnerability in Linux:
# https://ruderich.org/simon/notes/su-sudo-from-root-tty-hijacking
# It has to mitigate this drawback of Linux, and to do this, 'su' command is creating it's own pseudo-terminal
# and forwarding commands. Due to some ridiculous curcumstances, it does not work in Docker (or it does)
# and for these reasons people are using alternatives to the 'su' command in Docker,
# that don't mess with the terminal, don't care about closing the opened files, etc...
# but can only be safe to drop privileges inside Docker.
# The question - what implementation of 'su' command to use.
# It should be a simple script doing about just two syscalls.
# Some people tend to use 'gosu' tool that is written in Go.
# It is not used for several reasons:
# 1. Dependency on some foreign code in yet another programming language - does not sound alright.
# 2. Anselmo D. Adams suggested not to use it due to false positive alarms in some undisclosed security scanners.
COPY su-exec.c /su-exec.c
RUN groupadd -r clickhouse --gid=101 \
&& useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \
&& apt-get update \
@ -68,8 +84,12 @@ RUN groupadd -r clickhouse --gid=101 \
clickhouse-client=$version \
clickhouse-server=$version ; \
fi \
&& wget --progress=bar:force:noscroll "https://github.com/tianon/gosu/releases/download/$gosu_ver/gosu-$(dpkg --print-architecture)" -O /bin/gosu \
&& chmod +x /bin/gosu \
&& apt-get install -y --no-install-recommends tcc libc-dev && \
tcc /su-exec.c -o /bin/su-exec && \
chown root:root /bin/su-exec && \
chmod 0755 /bin/su-exec && \
rm /su-exec.c && \
apt-get purge -y --auto-remove tcc libc-dev libc-dev-bin libc6-dev linux-libc-dev \
&& clickhouse-local -q 'SELECT * FROM system.build_options' \
&& rm -rf \
/var/lib/apt/lists/* \
@ -100,4 +120,3 @@ VOLUME /var/lib/clickhouse
ENV CLICKHOUSE_CONFIG /etc/clickhouse-server/config.xml
ENTRYPOINT ["/entrypoint.sh"]

View File

@ -6,7 +6,7 @@
# Middle steps are performed by the bash script.
FROM ubuntu:18.04 as clickhouse-server-base
ARG gosu_ver=1.10
ARG gosu_ver=1.14
VOLUME /packages/

138
docker/server/su-exec.c Normal file
View File

@ -0,0 +1,138 @@
/*
https://github.com/ncopa/su-exec
The file is copy-pasted verbatim to avoid supply chain attacks.
The MIT License (MIT)
Copyright (c) 2015 ncopa
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
/* set user and group id and exec */
#include <sys/types.h>
#include <err.h>
#include <errno.h>
#include <grp.h>
#include <pwd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
static char *argv0;
static void usage(int exitcode)
{
printf("Usage: %s user-spec command [args]\n", argv0);
exit(exitcode);
}
int main(int argc, char *argv[])
{
char *user, *group, **cmdargv;
char *end;
uid_t uid = getuid();
gid_t gid = getgid();
argv0 = argv[0];
if (argc < 3)
usage(0);
user = argv[1];
group = strchr(user, ':');
if (group)
*group++ = '\0';
cmdargv = &argv[2];
struct passwd *pw = NULL;
if (user[0] != '\0') {
uid_t nuid = strtol(user, &end, 10);
if (*end == '\0')
uid = nuid;
else {
pw = getpwnam(user);
if (pw == NULL)
err(1, "getpwnam(%s)", user);
}
}
if (pw == NULL) {
pw = getpwuid(uid);
}
if (pw != NULL) {
uid = pw->pw_uid;
gid = pw->pw_gid;
}
setenv("HOME", pw != NULL ? pw->pw_dir : "/", 1);
if (group && group[0] != '\0') {
/* group was specified, ignore grouplist for setgroups later */
pw = NULL;
gid_t ngid = strtol(group, &end, 10);
if (*end == '\0')
gid = ngid;
else {
struct group *gr = getgrnam(group);
if (gr == NULL)
err(1, "getgrnam(%s)", group);
gid = gr->gr_gid;
}
}
if (pw == NULL) {
if (setgroups(1, &gid) < 0)
err(1, "setgroups(%i)", gid);
} else {
int ngroups = 0;
gid_t *glist = NULL;
while (1) {
int r = getgrouplist(pw->pw_name, gid, glist, &ngroups);
if (r >= 0) {
if (setgroups(ngroups, glist) < 0)
err(1, "setgroups");
break;
}
glist = realloc(glist, ngroups * sizeof(gid_t));
if (glist == NULL)
err(1, "malloc");
}
}
if (setgid(gid) < 0)
err(1, "setgid(%i)", gid);
if (setuid(uid) < 0)
err(1, "setuid(%i)", uid);
execvp(cmdargv[0], cmdargv);
err(1, "%s", cmdargv[0]);
return 1;
}

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/"
ARG version=21.13.1.*
ARG version=22.1.1.*
RUN apt-get update && \
apt-get install -y apt-transport-https dirmngr && \

View File

@ -1,5 +1,7 @@
# rebuild in #33610
# docker build -t clickhouse/test-base .
FROM clickhouse/test-util
ARG FROM_TAG=latest
FROM clickhouse/test-util:$FROM_TAG
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
@ -28,12 +30,14 @@ RUN apt-get update \
software-properties-common \
--yes --no-install-recommends
# Architecture of the image when BuildKit/buildx is used
ARG TARGETARCH
# Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able
# to compress files using pigz (https://zlib.net/pigz/) instead of gzip.
# Significantly increase deb packaging speed and compatible with old systems
RUN curl -O https://clickhouse-datasets.s3.yandex.net/utils/1/dpkg-deb \
&& chmod +x dpkg-deb \
&& cp dpkg-deb /usr/bin
RUN arch=${TARGETARCH:-amd64} \
&& curl -Lo /usr/bin/dpkg-deb https://github.com/ClickHouse-Extras/dpkg/releases/download/1.21.1-clickhouse/dpkg-deb-${arch}
RUN apt-get update \
&& apt-get install \

View File

@ -1,12 +1,14 @@
# rebuild in #33610
# docker build --network=host -t clickhouse/codebrowser .
# docker run --volume=path_to_repo:/repo_folder --volume=path_to_result:/test_output clickhouse/codebrowser
FROM clickhouse/binary-builder
ARG FROM_TAG=latest
FROM clickhouse/binary-builder:$FROM_TAG
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-13 libllvm13 libclang-13-dev
RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-13 libllvm13 libclang-13-dev libmlir-13-dev
# repo versions doesn't work correctly with C++17
# also we push reports to s3, so we add index.html to subfolder urls

View File

@ -1,5 +1,7 @@
# rebuild in #33610
# docker build -t clickhouse/fasttest .
FROM clickhouse/test-util
ARG FROM_TAG=latest
FROM clickhouse/test-util:$FROM_TAG
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
@ -28,12 +30,14 @@ RUN apt-get update \
software-properties-common \
--yes --no-install-recommends
# Architecture of the image when BuildKit/buildx is used
ARG TARGETARCH
# Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able
# to compress files using pigz (https://zlib.net/pigz/) instead of gzip.
# Significantly increase deb packaging speed and compatible with old systems
RUN curl -O https://clickhouse-datasets.s3.yandex.net/utils/1/dpkg-deb \
&& chmod +x dpkg-deb \
&& cp dpkg-deb /usr/bin
RUN arch=${TARGETARCH:-amd64} \
&& curl -Lo /usr/bin/dpkg-deb https://github.com/ClickHouse-Extras/dpkg/releases/download/1.21.1-clickhouse/dpkg-deb-${arch}
RUN apt-get update \
&& apt-get install \

View File

@ -1,5 +1,7 @@
# rebuild in #33610
# docker build -t clickhouse/fuzzer .
FROM clickhouse/test-base
ARG FROM_TAG=latest
FROM clickhouse/test-base:$FROM_TAG
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"

View File

@ -1,44 +1,57 @@
# rebuild in #33610
# docker build -t clickhouse/integration-test .
FROM clickhouse/test-base
ARG FROM_TAG=latest
FROM clickhouse/test-base:$FROM_TAG
SHELL ["/bin/bash", "-c"]
RUN apt-get update \
&& env DEBIAN_FRONTEND=noninteractive apt-get -y install \
tzdata \
python3 \
libicu-dev \
bsdutils \
curl \
default-jre \
g++ \
gdb \
unixodbc \
odbcinst \
iproute2 \
krb5-user \
libicu-dev \
libsqlite3-dev \
libsqliteodbc \
odbc-postgresql \
sqlite3 \
curl \
tar \
lz4 \
krb5-user \
iproute2 \
lsof \
g++ \
default-jre
lz4 \
odbc-postgresql \
odbcinst \
python3 \
rpm2cpio \
sqlite3 \
tar \
tzdata \
unixodbc \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
RUN rm -rf \
/var/lib/apt/lists/* \
/var/cache/debconf \
/tmp/* \
RUN apt-get clean
# Architecture of the image when BuildKit/buildx is used
ARG TARGETARCH
# Install MySQL ODBC driver
RUN curl 'https://downloads.mysql.com/archives/get/p/10/file/mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit.tar.gz' --location --output 'mysql-connector.tar.gz' && tar -xzf mysql-connector.tar.gz && cd mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit/lib && mv * /usr/local/lib && ln -s /usr/local/lib/libmyodbc8a.so /usr/lib/x86_64-linux-gnu/odbc/libmyodbc.so
# Install MySQL ODBC driver from RHEL rpm
RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \
amd64) rarch=x86_64 ;; \
arm64) rarch=aarch64 ;; \
esac \
&& cd /tmp \
&& curl -o mysql-odbc.rpm "https://cdn.mysql.com/Downloads/Connector-ODBC/8.0/mysql-connector-odbc-8.0.27-1.el8.${rarch}.rpm" \
&& rpm2archive mysql-odbc.rpm \
&& tar xf mysql-odbc.rpm.tgz -C / ./usr/lib64/ \
&& LINK_DIR=$(dpkg -L libodbc1 | grep '^/usr/lib/.*-linux-gnu/odbc$') \
&& ln -s /usr/lib64/libmyodbc8a.so "$LINK_DIR" \
&& ln -s /usr/lib64/libmyodbc8a.so "$LINK_DIR"/libmyodbc.so
# Unfortunately this is required for a single test for conversion data from zookeeper to clickhouse-keeper.
# ZooKeeper is not started by default, but consumes some space in containers.
# 777 perms used to allow anybody to start/stop ZooKeeper
ENV ZOOKEEPER_VERSION='3.6.3'
RUN curl -O "https://mirrors.estointernet.in/apache/zookeeper/zookeeper-${ZOOKEEPER_VERSION}/apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz"
RUN curl -O "https://dlcdn.apache.org/zookeeper/zookeeper-${ZOOKEEPER_VERSION}/apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz"
RUN tar -zxvf apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz && mv apache-zookeeper-${ZOOKEEPER_VERSION}-bin /opt/zookeeper && chmod -R 777 /opt/zookeeper && rm apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz
RUN echo $'tickTime=2500 \n\
tickTime=2500 \n\

View File

@ -0,0 +1,2 @@
bin/
obj/

View File

@ -0,0 +1,10 @@
# docker build .
# docker run -it --rm --network=host 14f23e59669c dotnet run --host localhost --port 8123 --user default --database default
FROM mcr.microsoft.com/dotnet/sdk:3.1
WORKDIR /client
COPY *.cs *.csproj /client/
ARG VERSION=4.1.0
RUN dotnet add package ClickHouse.Client -v ${VERSION}

View File

@ -0,0 +1,90 @@
using System;
using System.Threading.Tasks;
using ClickHouse.Client.ADO;
using ClickHouse.Client.Utility;
namespace clickhouse.test
{
class Program
{
static async Task Main(string[] args)
{
try
{
using var connection = new ClickHouseConnection(GetConnectionString(args));
await connection.ExecuteStatementAsync("CREATE DATABASE IF NOT EXISTS test");
await connection.ExecuteStatementAsync("TRUNCATE TABLE IF EXISTS test.dotnet_test");
await connection.ExecuteStatementAsync("CREATE TABLE IF NOT EXISTS test.dotnet_test (`age` Int32, `name` String) Engine = Memory");
using var command = connection.CreateCommand();
command.AddParameter("name", "Linus Torvalds");
command.AddParameter("age", 51);
command.CommandText = "INSERT INTO test.dotnet_test VALUES({age:Int32}, {name:String})";
await command.ExecuteNonQueryAsync();
using var result1 = await connection.ExecuteReaderAsync("SELECT * FROM test.dotnet_test");
while (result1.Read())
{
var values = new object[result1.FieldCount];
result1.GetValues(values);
foreach (var row in values)
{
Console.WriteLine(row);
}
}
using var result2 = await connection.ExecuteReaderAsync(selectSql);
while (result2.Read())
{
var values = new object[result2.FieldCount];
result2.GetValues(values);
foreach (var row in values)
{
Console.WriteLine(row);
}
}
}
catch (Exception e)
{
Console.Error.WriteLine(e);
Environment.ExitCode = 1;
}
}
private static string GetConnectionString(string[] args)
{
var builder = new ClickHouseConnectionStringBuilder();
int i = 0;
while (i < args.Length)
{
switch (args[i])
{
case "--host":
builder.Host = args[++i];
break;
case "--port":
builder.Port = UInt16.Parse(args[++i]);
break;
case "--user":
builder.Username = args[++i];
break;
case "--password":
builder.Password = args[++i];
break;
case "--database":
builder.Database = args[++i];
break;
default:
i++;
break;
}
}
return builder.ToString();
}
private static string selectSql = @"SELECT NULL, toInt8(-8), toUInt8(8), toInt16(-16), toUInt16(16), toInt16(-32), toUInt16(32), toInt64(-64), toUInt64(64), toFloat32(32e6), toFloat32(-32e6), toFloat64(64e6), toFloat64(-64e6), 'TestString', toFixedString('ASD',3), toFixedString('ASD',5), toUUID('00000000-0000-0000-0000-000000000000'), toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0'), toIPv4('1.2.3.4'), toIPv4('255.255.255.255'), CAST('a', 'Enum(\'a\' = 1, \'b\' = 2)'), CAST('a', 'Enum8(\'a\' = -1, \'b\' = 127)'), CAST('a', 'Enum16(\'a\' = -32768, \'b\' = 32767)'), array(1, 2, 3), array('a', 'b', 'c'), array(1, 2, NULL), toInt32OrNull('123'), toInt32OrNull(NULL), CAST(NULL AS Nullable(DateTime)), CAST(NULL AS LowCardinality(Nullable(String))), toLowCardinality('lowcardinality'), tuple(1, 'a', 8), tuple(123, tuple(5, 'a', 7)), toDateOrNull('1999-11-12'), toDateTime('1988-08-28 11:22:33'), toDateTime64('2043-03-01 18:34:04.4444444', 9), toDecimal32(123.45, 3), toDecimal32(-123.45, 3), toDecimal64(1.2345, 7), toDecimal64(-1.2345, 7), toDecimal128(12.34, 9), toDecimal128(-12.34, 9), toIPv6('2001:0db8:85a3:0000:0000:8a2e:0370:7334')";
}
}

View File

@ -0,0 +1,13 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>netcoreapp3.1</TargetFramework>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="clickhouse.client" Version="4.1.0" />
<PackageReference Include="dapper" Version="2.0.30" />
</ItemGroup>
</Project>

View File

@ -0,0 +1,47 @@
FROM ubuntu:20.04
MAINTAINER lgbo-ustc <lgbo.ustc@gmail.com>
RUN apt-get update
RUN apt-get install -y wget openjdk-8-jre
RUN wget https://archive.apache.org/dist/hadoop/common/hadoop-3.1.0/hadoop-3.1.0.tar.gz && \
tar -xf hadoop-3.1.0.tar.gz && rm -rf hadoop-3.1.0.tar.gz
RUN wget https://dlcdn.apache.org/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz && \
tar -xf apache-hive-2.3.9-bin.tar.gz && rm -rf apache-hive-2.3.9-bin.tar.gz
RUN apt install -y vim
RUN apt install -y openssh-server openssh-client
RUN apt install -y mysql-server
RUN mkdir -p /root/.ssh && \
ssh-keygen -t rsa -b 2048 -P '' -f /root/.ssh/id_rsa && \
cat /root/.ssh/id_rsa.pub > /root/.ssh/authorized_keys && \
cp /root/.ssh/id_rsa /etc/ssh/ssh_host_rsa_key && \
cp /root/.ssh/id_rsa.pub /etc/ssh/ssh_host_rsa_key.pub
RUN wget https://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-8.0.27.tar.gz &&\
tar -xf mysql-connector-java-8.0.27.tar.gz && \
mv mysql-connector-java-8.0.27/mysql-connector-java-8.0.27.jar /apache-hive-2.3.9-bin/lib/ && \
rm -rf mysql-connector-java-8.0.27.tar.gz mysql-connector-java-8.0.27
RUN apt install -y iputils-ping net-tools
ENV JAVA_HOME=/usr
ENV HADOOP_HOME=/hadoop-3.1.0
ENV HDFS_NAMENODE_USER=root
ENV HDFS_DATANODE_USER=root HDFS_SECONDARYNAMENODE_USER=root YARN_RESOURCEMANAGER_USER=root YARN_NODEMANAGER_USER=root HDFS_DATANODE_SECURE_USER=hdfs
COPY hdfs-site.xml /hadoop-3.1.0/etc/hadoop
COPY mapred-site.xml /hadoop-3.1.0/etc/hadoop
COPY yarn-site.xml /hadoop-3.1.0/etc/hadoop
COPY hadoop-env.sh /hadoop-3.1.0/etc/hadoop/
#COPY core-site.xml /hadoop-3.1.0/etc/hadoop
COPY core-site.xml.template /hadoop-3.1.0/etc/hadoop
COPY hive-site.xml /apache-hive-2.3.9-bin/conf
COPY prepare_hive_data.sh /
COPY demo_data.txt /
ENV PATH=/apache-hive-2.3.9-bin/bin:/hadoop-3.1.0/bin:/hadoop-3.1.0/sbin:$PATH
COPY start.sh /

View File

@ -0,0 +1,14 @@
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://HOSTNAME:9000</value>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
</configuration>

View File

@ -0,0 +1,6 @@
abc,1,2021-11-16
abd,15,2021-11-05
aaa,22,2021-11-16
dda,0,2021-11-01
dfb,11,2021-11-05
jhn,89,2021-11-11

View File

@ -0,0 +1,422 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Set Hadoop-specific environment variables here.
##
## THIS FILE ACTS AS THE MASTER FILE FOR ALL HADOOP PROJECTS.
## SETTINGS HERE WILL BE READ BY ALL HADOOP COMMANDS. THEREFORE,
## ONE CAN USE THIS FILE TO SET YARN, HDFS, AND MAPREDUCE
## CONFIGURATION OPTIONS INSTEAD OF xxx-env.sh.
##
## Precedence rules:
##
## {yarn-env.sh|hdfs-env.sh} > hadoop-env.sh > hard-coded defaults
##
## {YARN_xyz|HDFS_xyz} > HADOOP_xyz > hard-coded defaults
##
# Many of the options here are built from the perspective that users
# may want to provide OVERWRITING values on the command line.
# For example:
#
JAVA_HOME=/usr/
#
# Therefore, the vast majority (BUT NOT ALL!) of these defaults
# are configured for substitution and not append. If append
# is preferable, modify this file accordingly.
###
# Generic settings for HADOOP
###
# Technically, the only required environment variable is JAVA_HOME.
# All others are optional. However, the defaults are probably not
# preferred. Many sites configure these options outside of Hadoop,
# such as in /etc/profile.d
# The java implementation to use. By default, this environment
# variable is REQUIRED on ALL platforms except OS X!
# export JAVA_HOME=
# Location of Hadoop. By default, Hadoop will attempt to determine
# this location based upon its execution path.
# export HADOOP_HOME=
# Location of Hadoop's configuration information. i.e., where this
# file is living. If this is not defined, Hadoop will attempt to
# locate it based upon its execution path.
#
# NOTE: It is recommend that this variable not be set here but in
# /etc/profile.d or equivalent. Some options (such as
# --config) may react strangely otherwise.
#
# export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
# The maximum amount of heap to use (Java -Xmx). If no unit
# is provided, it will be converted to MB. Daemons will
# prefer any Xmx setting in their respective _OPT variable.
# There is no default; the JVM will autoscale based upon machine
# memory size.
# export HADOOP_HEAPSIZE_MAX=
# The minimum amount of heap to use (Java -Xms). If no unit
# is provided, it will be converted to MB. Daemons will
# prefer any Xms setting in their respective _OPT variable.
# There is no default; the JVM will autoscale based upon machine
# memory size.
# export HADOOP_HEAPSIZE_MIN=
# Enable extra debugging of Hadoop's JAAS binding, used to set up
# Kerberos security.
# export HADOOP_JAAS_DEBUG=true
# Extra Java runtime options for all Hadoop commands. We don't support
# IPv6 yet/still, so by default the preference is set to IPv4.
# export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true"
# For Kerberos debugging, an extended option set logs more invormation
# export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true -Dsun.security.krb5.debug=true -Dsun.security.spnego.debug"
# Some parts of the shell code may do special things dependent upon
# the operating system. We have to set this here. See the next
# section as to why....
export HADOOP_OS_TYPE=${HADOOP_OS_TYPE:-$(uname -s)}
# Under certain conditions, Java on OS X will throw SCDynamicStore errors
# in the system logs.
# See HADOOP-8719 for more information. If one needs Kerberos
# support on OS X, one will want to change/remove this extra bit.
case ${HADOOP_OS_TYPE} in
Darwin*)
export HADOOP_OPTS="${HADOOP_OPTS} -Djava.security.krb5.realm= "
export HADOOP_OPTS="${HADOOP_OPTS} -Djava.security.krb5.kdc= "
export HADOOP_OPTS="${HADOOP_OPTS} -Djava.security.krb5.conf= "
;;
esac
# Extra Java runtime options for some Hadoop commands
# and clients (i.e., hdfs dfs -blah). These get appended to HADOOP_OPTS for
# such commands. In most cases, # this should be left empty and
# let users supply it on the command line.
# export HADOOP_CLIENT_OPTS=""
#
# A note about classpaths.
#
# By default, Apache Hadoop overrides Java's CLASSPATH
# environment variable. It is configured such
# that it sarts out blank with new entries added after passing
# a series of checks (file/dir exists, not already listed aka
# de-deduplication). During de-depulication, wildcards and/or
# directories are *NOT* expanded to keep it simple. Therefore,
# if the computed classpath has two specific mentions of
# awesome-methods-1.0.jar, only the first one added will be seen.
# If two directories are in the classpath that both contain
# awesome-methods-1.0.jar, then Java will pick up both versions.
# An additional, custom CLASSPATH. Site-wide configs should be
# handled via the shellprofile functionality, utilizing the
# hadoop_add_classpath function for greater control and much
# harder for apps/end-users to accidentally override.
# Similarly, end users should utilize ${HOME}/.hadooprc .
# This variable should ideally only be used as a short-cut,
# interactive way for temporary additions on the command line.
# export HADOOP_CLASSPATH="/some/cool/path/on/your/machine"
# Should HADOOP_CLASSPATH be first in the official CLASSPATH?
# export HADOOP_USER_CLASSPATH_FIRST="yes"
# If HADOOP_USE_CLIENT_CLASSLOADER is set, the classpath along
# with the main jar are handled by a separate isolated
# client classloader when 'hadoop jar', 'yarn jar', or 'mapred job'
# is utilized. If it is set, HADOOP_CLASSPATH and
# HADOOP_USER_CLASSPATH_FIRST are ignored.
# export HADOOP_USE_CLIENT_CLASSLOADER=true
# HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES overrides the default definition of
# system classes for the client classloader when HADOOP_USE_CLIENT_CLASSLOADER
# is enabled. Names ending in '.' (period) are treated as package names, and
# names starting with a '-' are treated as negative matches. For example,
# export HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES="-org.apache.hadoop.UserClass,java.,javax.,org.apache.hadoop."
# Enable optional, bundled Hadoop features
# This is a comma delimited list. It may NOT be overridden via .hadooprc
# Entries may be added/removed as needed.
# export HADOOP_OPTIONAL_TOOLS="hadoop-openstack,hadoop-aliyun,hadoop-azure,hadoop-azure-datalake,hadoop-aws,hadoop-kafka"
###
# Options for remote shell connectivity
###
# There are some optional components of hadoop that allow for
# command and control of remote hosts. For example,
# start-dfs.sh will attempt to bring up all NNs, DNS, etc.
# Options to pass to SSH when one of the "log into a host and
# start/stop daemons" scripts is executed
# export HADOOP_SSH_OPTS="-o BatchMode=yes -o StrictHostKeyChecking=no -o ConnectTimeout=10s"
# The built-in ssh handler will limit itself to 10 simultaneous connections.
# For pdsh users, this sets the fanout size ( -f )
# Change this to increase/decrease as necessary.
# export HADOOP_SSH_PARALLEL=10
# Filename which contains all of the hosts for any remote execution
# helper scripts # such as workers.sh, start-dfs.sh, etc.
# export HADOOP_WORKERS="${HADOOP_CONF_DIR}/workers"
###
# Options for all daemons
###
#
#
# Many options may also be specified as Java properties. It is
# very common, and in many cases, desirable, to hard-set these
# in daemon _OPTS variables. Where applicable, the appropriate
# Java property is also identified. Note that many are re-used
# or set differently in certain contexts (e.g., secure vs
# non-secure)
#
# Where (primarily) daemon log files are stored.
# ${HADOOP_HOME}/logs by default.
# Java property: hadoop.log.dir
# export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
# A string representing this instance of hadoop. $USER by default.
# This is used in writing log and pid files, so keep that in mind!
# Java property: hadoop.id.str
# export HADOOP_IDENT_STRING=$USER
# How many seconds to pause after stopping a daemon
# export HADOOP_STOP_TIMEOUT=5
# Where pid files are stored. /tmp by default.
# export HADOOP_PID_DIR=/tmp
# Default log4j setting for interactive commands
# Java property: hadoop.root.logger
# export HADOOP_ROOT_LOGGER=INFO,console
# Default log4j setting for daemons spawned explicitly by
# --daemon option of hadoop, hdfs, mapred and yarn command.
# Java property: hadoop.root.logger
# export HADOOP_DAEMON_ROOT_LOGGER=INFO,RFA
# Default log level and output location for security-related messages.
# You will almost certainly want to change this on a per-daemon basis via
# the Java property (i.e., -Dhadoop.security.logger=foo). (Note that the
# defaults for the NN and 2NN override this by default.)
# Java property: hadoop.security.logger
# export HADOOP_SECURITY_LOGGER=INFO,NullAppender
# Default process priority level
# Note that sub-processes will also run at this level!
# export HADOOP_NICENESS=0
# Default name for the service level authorization file
# Java property: hadoop.policy.file
# export HADOOP_POLICYFILE="hadoop-policy.xml"
#
# NOTE: this is not used by default! <-----
# You can define variables right here and then re-use them later on.
# For example, it is common to use the same garbage collection settings
# for all the daemons. So one could define:
#
# export HADOOP_GC_SETTINGS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps"
#
# .. and then use it as per the b option under the namenode.
###
# Secure/privileged execution
###
#
# Out of the box, Hadoop uses jsvc from Apache Commons to launch daemons
# on privileged ports. This functionality can be replaced by providing
# custom functions. See hadoop-functions.sh for more information.
#
# The jsvc implementation to use. Jsvc is required to run secure datanodes
# that bind to privileged ports to provide authentication of data transfer
# protocol. Jsvc is not required if SASL is configured for authentication of
# data transfer protocol using non-privileged ports.
# export JSVC_HOME=/usr/bin
#
# This directory contains pids for secure and privileged processes.
#export HADOOP_SECURE_PID_DIR=${HADOOP_PID_DIR}
#
# This directory contains the logs for secure and privileged processes.
# Java property: hadoop.log.dir
# export HADOOP_SECURE_LOG=${HADOOP_LOG_DIR}
#
# When running a secure daemon, the default value of HADOOP_IDENT_STRING
# ends up being a bit bogus. Therefore, by default, the code will
# replace HADOOP_IDENT_STRING with HADOOP_xx_SECURE_USER. If one wants
# to keep HADOOP_IDENT_STRING untouched, then uncomment this line.
# export HADOOP_SECURE_IDENT_PRESERVE="true"
###
# NameNode specific parameters
###
# Default log level and output location for file system related change
# messages. For non-namenode daemons, the Java property must be set in
# the appropriate _OPTS if one wants something other than INFO,NullAppender
# Java property: hdfs.audit.logger
# export HDFS_AUDIT_LOGGER=INFO,NullAppender
# Specify the JVM options to be used when starting the NameNode.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# a) Set JMX options
# export HDFS_NAMENODE_OPTS="-Dcom.sun.management.jmxremote=true -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.port=1026"
#
# b) Set garbage collection logs
# export HDFS_NAMENODE_OPTS="${HADOOP_GC_SETTINGS} -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')"
#
# c) ... or set them directly
# export HDFS_NAMENODE_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')"
# this is the default:
# export HDFS_NAMENODE_OPTS="-Dhadoop.security.logger=INFO,RFAS"
###
# SecondaryNameNode specific parameters
###
# Specify the JVM options to be used when starting the SecondaryNameNode.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# This is the default:
# export HDFS_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=INFO,RFAS"
###
# DataNode specific parameters
###
# Specify the JVM options to be used when starting the DataNode.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# This is the default:
# export HDFS_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS"
# On secure datanodes, user to run the datanode as after dropping privileges.
# This **MUST** be uncommented to enable secure HDFS if using privileged ports
# to provide authentication of data transfer protocol. This **MUST NOT** be
# defined if SASL is configured for authentication of data transfer protocol
# using non-privileged ports.
# This will replace the hadoop.id.str Java property in secure mode.
# export HDFS_DATANODE_SECURE_USER=hdfs
# Supplemental options for secure datanodes
# By default, Hadoop uses jsvc which needs to know to launch a
# server jvm.
# export HDFS_DATANODE_SECURE_EXTRA_OPTS="-jvm server"
###
# NFS3 Gateway specific parameters
###
# Specify the JVM options to be used when starting the NFS3 Gateway.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HDFS_NFS3_OPTS=""
# Specify the JVM options to be used when starting the Hadoop portmapper.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HDFS_PORTMAP_OPTS="-Xmx512m"
# Supplemental options for priviliged gateways
# By default, Hadoop uses jsvc which needs to know to launch a
# server jvm.
# export HDFS_NFS3_SECURE_EXTRA_OPTS="-jvm server"
# On privileged gateways, user to run the gateway as after dropping privileges
# This will replace the hadoop.id.str Java property in secure mode.
# export HDFS_NFS3_SECURE_USER=nfsserver
###
# ZKFailoverController specific parameters
###
# Specify the JVM options to be used when starting the ZKFailoverController.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HDFS_ZKFC_OPTS=""
###
# QuorumJournalNode specific parameters
###
# Specify the JVM options to be used when starting the QuorumJournalNode.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HDFS_JOURNALNODE_OPTS=""
###
# HDFS Balancer specific parameters
###
# Specify the JVM options to be used when starting the HDFS Balancer.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HDFS_BALANCER_OPTS=""
###
# HDFS Mover specific parameters
###
# Specify the JVM options to be used when starting the HDFS Mover.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HDFS_MOVER_OPTS=""
###
# Router-based HDFS Federation specific parameters
# Specify the JVM options to be used when starting the RBF Routers.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HDFS_DFSROUTER_OPTS=""
###
###
# Advanced Users Only!
###
#
# When building Hadoop, one can add the class paths to the commands
# via this special env var:
# export HADOOP_ENABLE_BUILD_PATHS="true"
#
# To prevent accidents, shell commands be (superficially) locked
# to only allow certain users to execute certain subcommands.
# It uses the format of (command)_(subcommand)_USER.
#
# For example, to limit who can execute the namenode command,
# export HDFS_NAMENODE_USER=hdfs

View File

@ -0,0 +1,6 @@
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>

View File

@ -0,0 +1,35 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?><!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://localhost/hcatalog?createDatabaseIfNotExist=true</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>test</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>test</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
</configuration>

View File

@ -0,0 +1,6 @@
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>

View File

@ -0,0 +1,6 @@
#!/bin/bash
hive -e "create database test"
hive -e "create table test.demo(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'; create table test.demo_orc(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'; "
hive -e "create table test.demo_text(id string, score int, day string)row format delimited fields terminated by ','; load data local inpath '/demo_data.txt' into table test.demo_text "
hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.demo partition(day) select * from test.demo_text; insert into test.demo_orc partition(day) select * from test.demo_text"

View File

@ -0,0 +1,12 @@
service ssh start
sed s/HOSTNAME/$HOSTNAME/ /hadoop-3.1.0/etc/hadoop/core-site.xml.template > /hadoop-3.1.0/etc/hadoop/core-site.xml
hadoop namenode -format
start-all.sh
service mysql start
mysql -u root -e "CREATE USER \"test\"@\"localhost\" IDENTIFIED BY \"test\""
mysql -u root -e "GRANT ALL ON * . * TO 'test'@'localhost'"
schematool -initSchema -dbType mysql
#nohup hiveserver2 &
nohup hive --service metastore &
bash /prepare_hive_data.sh
while true; do sleep 1000; done

View File

@ -0,0 +1,32 @@
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.application.classpath</name>
<value>/hadoop-3.1.0/etc/hadoop,/hadoop-3.1.0/share/hadoop/common/*,/hadoop-3.1.0/share/hadoop/common/lib/*,/hadoop-3.1.0/share/hadoop/hdfs/*, /hadoop-3.1.0/share/hadoop/hdfs/lib/*, /hadoop-3.1.0/share/hadoop/mapreduce/*, /hadoop-3.1.0/share/hadoop/mapreduce/lib/*, /hadoop-3.1.0/share/hadoop/yarn/*, /hadoop-3.1.0/share/hadoop/yarn/lib/*</value>
</property>
<property>
<description>
Number of seconds after an application finishes before the nodemanager's
DeletionService will delete the application's localized file directory
and log directory.
To diagnose Yarn application problems, set this property's value large
enough (for example, to 600 = 10 minutes) to permit examination of these
directories. After changing the property's value, you must restart the
nodemanager in order for it to have an effect.
The roots of Yarn applications' work directories is configurable with
the yarn.nodemanager.local-dirs property (see below), and the roots
of the Yarn applications' log directories is configurable with the
yarn.nodemanager.log-dirs property (see also below).
</description>
<name>yarn.nodemanager.delete.debug-delay-sec</name>
<value>600</value>
</property>
</configuration>

View File

@ -45,7 +45,7 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
ENV DOCKER_CHANNEL stable
RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add -
RUN add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -c -s) ${DOCKER_CHANNEL}"
RUN add-apt-repository "deb https://download.docker.com/linux/ubuntu $(lsb_release -c -s) ${DOCKER_CHANNEL}"
RUN apt-get update \
&& env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
@ -58,7 +58,9 @@ RUN apt-get update \
RUN dockerd --version; docker --version
RUN python3 -m pip install \
ARG TARGETARCH
# FIXME: psycopg2-binary is not available for aarch64, we skip it for now
RUN test x$TARGETARCH = xarm64 || ( python3 -m pip install \
PyMySQL \
aerospike==4.0.0 \
avro==1.10.2 \
@ -88,7 +90,7 @@ RUN python3 -m pip install \
urllib3 \
requests-kerberos \
pyhdfs \
azure-storage-blob
azure-storage-blob )
COPY modprobe.sh /usr/local/bin/modprobe
COPY dockerd-entrypoint.sh /usr/local/bin/
@ -102,8 +104,6 @@ RUN set -x \
&& echo 'dockremap:165536:65536' >> /etc/subuid \
&& echo 'dockremap:165536:65536' >> /etc/subgid
RUN echo '127.0.0.1 localhost test.com' >> /etc/hosts
EXPOSE 2375
ENTRYPOINT ["dockerd-entrypoint.sh"]
CMD ["sh", "-c", "pytest $PYTEST_OPTS"]

View File

@ -0,0 +1,6 @@
version: '2.3'
services:
dotnet1:
image: clickhouse/dotnet-client:${DOCKER_DOTNET_CLIENT_TAG:-latest}
# to keep container running
command: sleep infinity

View File

@ -0,0 +1,7 @@
version: '2.3'
services:
hdfs1:
image: lgboustc/hive_test:v1.0
hostname: hivetest
restart: always
entrypoint: bash /start.sh

View File

@ -14,4 +14,4 @@ services:
image: mongo:5.0
restart: always
ports:
- "27018:27017"
- ${MONGO_NO_CRED_EXTERNAL_PORT}:${MONGO_NO_CRED_INTERNAL_PORT}

View File

@ -39,6 +39,7 @@ export CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH=/clickhouse-odbc-bridge
export CLICKHOUSE_LIBRARY_BRIDGE_BINARY_PATH=/clickhouse-library-bridge
export DOCKER_MYSQL_GOLANG_CLIENT_TAG=${DOCKER_MYSQL_GOLANG_CLIENT_TAG:=latest}
export DOCKER_DOTNET_CLIENT_TAG=${DOCKER_DOTNET_CLIENT_TAG:=latest}
export DOCKER_MYSQL_JAVA_CLIENT_TAG=${DOCKER_MYSQL_JAVA_CLIENT_TAG:=latest}
export DOCKER_MYSQL_JS_CLIENT_TAG=${DOCKER_MYSQL_JS_CLIENT_TAG:=latest}
export DOCKER_MYSQL_PHP_CLIENT_TAG=${DOCKER_MYSQL_PHP_CLIENT_TAG:=latest}

View File

@ -1,5 +1,7 @@
# rebuild in #33610
# docker build -t clickhouse/keeper-jepsen-test .
FROM clickhouse/test-base
ARG FROM_TAG=latest
FROM clickhouse/test-base:$FROM_TAG
ENV DEBIAN_FRONTEND=noninteractive
ENV CLOJURE_VERSION=1.10.3.814

View File

@ -1,8 +1,14 @@
# rebuild in #33610
# docker build -t clickhouse/pvs-test .
FROM clickhouse/binary-builder
ARG FROM_TAG=latest
FROM clickhouse/binary-builder:$FROM_TAG
RUN apt-get update --yes \
# PVS studio doesn't support aarch64/arm64, so there is a check for it everywhere
# We'll produce an empty image for arm64
ARG TARGETARCH
RUN test x$TARGETARCH = xarm64 || ( apt-get update --yes \
&& apt-get install \
bash \
wget \
@ -15,7 +21,7 @@ RUN apt-get update --yes \
libprotoc-dev \
libgrpc++-dev \
libc-ares-dev \
--yes --no-install-recommends
--yes --no-install-recommends )
#RUN wget -nv -O - http://files.viva64.com/etc/pubkey.txt | sudo apt-key add -
#RUN sudo wget -nv -O /etc/apt/sources.list.d/viva64.list http://files.viva64.com/etc/viva64.list
@ -27,7 +33,7 @@ RUN apt-get update --yes \
ENV PKG_VERSION="pvs-studio-latest"
RUN set -x \
RUN test x$TARGETARCH = xarm64 || ( set -x \
&& export PUBKEY_HASHSUM="ad369a2e9d8b8c30f5a9f2eb131121739b79c78e03fef0f016ea51871a5f78cd4e6257b270dca0ac3be3d1f19d885516" \
&& wget -nv https://files.viva64.com/etc/pubkey.txt -O /tmp/pubkey.txt \
&& echo "${PUBKEY_HASHSUM} /tmp/pubkey.txt" | sha384sum -c \
@ -35,7 +41,7 @@ RUN set -x \
&& wget -nv "https://files.viva64.com/${PKG_VERSION}.deb" \
&& { debsig-verify ${PKG_VERSION}.deb \
|| echo "WARNING: Some file was just downloaded from the internet without any validation and we are installing it into the system"; } \
&& dpkg -i "${PKG_VERSION}.deb"
&& dpkg -i "${PKG_VERSION}.deb" )
ENV CCACHE_DIR=/test_output/ccache

View File

@ -1,5 +1,7 @@
# rebuild in #33610
# docker build -t clickhouse/split-build-smoke-test .
FROM clickhouse/binary-builder
ARG FROM_TAG=latest
FROM clickhouse/binary-builder:$FROM_TAG
COPY run.sh /run.sh
COPY process_split_build_smoke_test_result.py /

View File

@ -1,5 +1,7 @@
# rebuild in #33610
# docker build -t clickhouse/stateful-test .
FROM clickhouse/stateless-test
ARG FROM_TAG=latest
FROM clickhouse/stateless-test:$FROM_TAG
RUN apt-get update -y \
&& env DEBIAN_FRONTEND=noninteractive \

View File

@ -1,11 +1,10 @@
# rebuild in #33610
# docker build -t clickhouse/stateless-test .
FROM clickhouse/test-base
ARG FROM_TAG=latest
FROM clickhouse/test-base:$FROM_TAG
ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.4.20200302/clickhouse-odbc-1.1.4-Linux.tar.gz"
RUN echo "deb [trusted=yes] http://repo.mysql.com/apt/ubuntu/ bionic mysql-5.7" >> /etc/apt/sources.list \
&& apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 8C718D3B5072E1F5
RUN apt-get update -y \
&& env DEBIAN_FRONTEND=noninteractive \
apt-get install --yes --no-install-recommends \
@ -30,7 +29,7 @@ RUN apt-get update -y \
tree \
unixodbc \
wget \
mysql-client=5.7* \
mysql-client=8.0* \
postgresql-client \
sqlite3
@ -49,10 +48,13 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
ENV NUM_TRIES=1
ENV MAX_RUN_TIME=0
ARG TARGETARCH
# Download Minio-related binaries
RUN wget 'https://dl.min.io/server/minio/release/linux-amd64/minio' \
RUN arch=${TARGETARCH:-amd64} \
&& wget "https://dl.min.io/server/minio/release/linux-${arch}/minio" \
&& chmod +x ./minio \
&& wget 'https://dl.min.io/client/mc/release/linux-amd64/mc' \
&& wget "https://dl.min.io/client/mc/release/linux-${arch}/mc" \
&& chmod +x ./mc
ENV MINIO_ROOT_USER="clickhouse"

View File

@ -1,5 +1,7 @@
# rebuild in #33610
# docker build -t clickhouse/stateless-pytest .
FROM clickhouse/test-base
ARG FROM_TAG=latest
FROM clickhouse/test-base:$FROM_TAG
RUN apt-get update -y && \
apt-get install -y --no-install-recommends \

View File

@ -1,5 +1,7 @@
# rebuild in #33610
# docker build -t clickhouse/stress-test .
FROM clickhouse/stateful-test
ARG FROM_TAG=latest
FROM clickhouse/stateful-test:$FROM_TAG
RUN apt-get update -y \
&& env DEBIAN_FRONTEND=noninteractive \

View File

@ -146,6 +146,7 @@ handle SIGUSR2 nostop noprint pass
handle SIG$RTMIN nostop noprint pass
info signals
continue
gcore
backtrace full
info locals
info registers
@ -263,3 +264,10 @@ done
# Write check result into check_status.tsv
clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%') LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
# Core dumps (see gcore)
# Default filename is 'core.PROCESS_ID'
for core in core.*; do
pigz $core
mv $core.gz /output/
done

View File

@ -1,19 +1,41 @@
# docker build -t clickhouse/style-test .
FROM ubuntu:20.04
ARG ACT_VERSION=0.2.25
ARG ACTIONLINT_VERSION=1.6.8
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
shellcheck \
libxml2-utils \
curl \
git \
python3-pip \
libxml2-utils \
pylint \
python3-pip \
shellcheck \
yamllint \
&& pip3 install codespell PyGithub boto3 unidiff dohq-artifactory
# Architecture of the image when BuildKit/buildx is used
ARG TARGETARCH
# Get act and actionlint from releases
RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \
amd64) act_arch=x86_64 ;; \
arm64) act_arch=$arch ;; \
esac \
&& curl -o /tmp/act.tgz -L \
"https://github.com/nektos/act/releases/download/v${ACT_VERSION}/act_Linux_${act_arch}.tar.gz" \
&& tar xf /tmp/act.tgz -C /usr/bin act \
&& rm /tmp/act.tgz \
&& curl -o /tmp/actiolint.zip -L \
"https://github.com/rhysd/actionlint/releases/download/v${ACTIONLINT_VERSION}/actionlint_${ACTIONLINT_VERSION}_linux_${arch}.tar.gz" \
&& tar xf /tmp/actiolint.zip -C /usr/bin actionlint \
&& rm /tmp/actiolint.zip
COPY run.sh /
COPY process_style_check_result.py /
CMD ["/bin/bash", "/run.sh"]

View File

@ -11,40 +11,7 @@ def process_result(result_folder):
description = ""
test_results = []
style_log_path = '{}/style_output.txt'.format(result_folder)
if not os.path.exists(style_log_path):
logging.info("No style check log on path %s", style_log_path)
return "exception", "No style check log", []
elif os.stat(style_log_path).st_size != 0:
description += "Style check failed. "
test_results.append(("Style check", "FAIL"))
status = "failure" # Disabled for now
else:
test_results.append(("Style check", "OK"))
typos_log_path = '{}/typos_output.txt'.format(result_folder)
if not os.path.exists(style_log_path):
logging.info("No typos check log on path %s", style_log_path)
return "exception", "No typos check log", []
elif os.stat(typos_log_path).st_size != 0:
description += "Typos check failed. "
test_results.append(("Typos check", "FAIL"))
status = "failure"
else:
test_results.append(("Typos check", "OK"))
whitespaces_log_path = '{}/whitespaces_output.txt'.format(result_folder)
if not os.path.exists(style_log_path):
logging.info("No whitespaces check log on path %s", style_log_path)
return "exception", "No whitespaces check log", []
elif os.stat(whitespaces_log_path).st_size != 0:
description += "Whitespaces check failed. "
test_results.append(("Whitespaces check", "FAIL"))
status = "failure"
else:
test_results.append(("Whitespaces check", "OK"))
duplicate_log_path = '{}/duplicate_output.txt'.format(result_folder)
duplicate_log_path = "{}/duplicate_output.txt".format(result_folder)
if not os.path.exists(duplicate_log_path):
logging.info("No header duplicates check log on path %s", duplicate_log_path)
return "exception", "No header duplicates check log", []
@ -55,7 +22,7 @@ def process_result(result_folder):
else:
test_results.append(("Header duplicates check", "OK"))
shellcheck_log_path = '{}/shellcheck_output.txt'.format(result_folder)
shellcheck_log_path = "{}/shellcheck_output.txt".format(result_folder)
if not os.path.exists(shellcheck_log_path):
logging.info("No shellcheck log on path %s", shellcheck_log_path)
return "exception", "No shellcheck log", []
@ -66,6 +33,50 @@ def process_result(result_folder):
else:
test_results.append(("Shellcheck", "OK"))
style_log_path = "{}/style_output.txt".format(result_folder)
if not os.path.exists(style_log_path):
logging.info("No style check log on path %s", style_log_path)
return "exception", "No style check log", []
elif os.stat(style_log_path).st_size != 0:
description += "Style check failed. "
test_results.append(("Style check", "FAIL"))
status = "failure"
else:
test_results.append(("Style check", "OK"))
typos_log_path = "{}/typos_output.txt".format(result_folder)
if not os.path.exists(typos_log_path):
logging.info("No typos check log on path %s", typos_log_path)
return "exception", "No typos check log", []
elif os.stat(typos_log_path).st_size != 0:
description += "Typos check failed. "
test_results.append(("Typos check", "FAIL"))
status = "failure"
else:
test_results.append(("Typos check", "OK"))
whitespaces_log_path = "{}/whitespaces_output.txt".format(result_folder)
if not os.path.exists(whitespaces_log_path):
logging.info("No whitespaces check log on path %s", whitespaces_log_path)
return "exception", "No whitespaces check log", []
elif os.stat(whitespaces_log_path).st_size != 0:
description += "Whitespaces check failed. "
test_results.append(("Whitespaces check", "FAIL"))
status = "failure"
else:
test_results.append(("Whitespaces check", "OK"))
workflows_log_path = "{}/workflows_output.txt".format(result_folder)
if not os.path.exists(workflows_log_path):
logging.info("No workflows check log on path %s", style_log_path)
return "exception", "No workflows check log", []
elif os.stat(whitespaces_log_path).st_size != 0:
description += "Workflows check failed. "
test_results.append(("Workflows check", "FAIL"))
status = "failure"
else:
test_results.append(("Workflows check", "OK"))
if not description:
description += "Style check success"
@ -73,20 +84,22 @@ def process_result(result_folder):
def write_results(results_file, status_file, results, status):
with open(results_file, 'w') as f:
out = csv.writer(f, delimiter='\t')
with open(results_file, "w") as f:
out = csv.writer(f, delimiter="\t")
out.writerows(results)
with open(status_file, 'w') as f:
out = csv.writer(f, delimiter='\t')
with open(status_file, "w") as f:
out = csv.writer(f, delimiter="\t")
out.writerow(status)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of style check")
parser.add_argument("--in-results-dir", default='/test_output/')
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv')
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv')
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
parser = argparse.ArgumentParser(
description="ClickHouse script for parsing results of style check"
)
parser.add_argument("--in-results-dir", default="/test_output/")
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
args = parser.parse_args()
state, description, test_results = process_result(args.in_results_dir)

View File

@ -3,9 +3,10 @@
# yaml check is not the best one
cd /ClickHouse/utils/check-style || echo -e "failure\tRepo not found" > /test_output/check_status.tsv
./check-duplicate-includes.sh |& tee /test_output/duplicate_output.txt
./check-style -n |& tee /test_output/style_output.txt
./check-typos |& tee /test_output/typos_output.txt
./check-whitespaces -n |& tee /test_output/whitespaces_output.txt
./check-duplicate-includes.sh |& tee /test_output/duplicate_output.txt
./check-workflows |& tee /test_output/workflows_output.txt
./shellcheck-run.sh |& tee /test_output/shellcheck_output.txt
/process_style_check_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv

View File

@ -1,5 +1,7 @@
# rebuild in #33610
# docker build -t clickhouse/unit-test .
FROM clickhouse/stateless-test
ARG FROM_TAG=latest
FROM clickhouse/stateless-test:$FROM_TAG
RUN apt-get install gdb

View File

@ -1,3 +1,4 @@
# rebuild in #33610
# docker build -t clickhouse/test-util .
FROM ubuntu:20.04

View File

@ -6,8 +6,8 @@ Minimal ClickHouse build example:
```bash
cmake .. \
-DCMAKE_C_COMPILER=$(which clang-11) \
-DCMAKE_CXX_COMPILER=$(which clang++-11) \
-DCMAKE_C_COMPILER=$(which clang-13) \
-DCMAKE_CXX_COMPILER=$(which clang++-13) \
-DCMAKE_BUILD_TYPE=Debug \
-DENABLE_CLICKHOUSE_ALL=OFF \
-DENABLE_CLICKHOUSE_SERVER=ON \

View File

@ -1,3 +1,3 @@
wget 'https://builds.clickhouse.com/master/freebsd/clickhouse'
fetch 'https://builds.clickhouse.com/master/freebsd/clickhouse'
chmod a+x ./clickhouse
sudo ./clickhouse install
su -m root -c './clickhouse install'

View File

@ -175,7 +175,7 @@ When we are going to read something from a part in `MergeTree`, we look at `prim
When you `INSERT` a bunch of data into `MergeTree`, that bunch is sorted by primary key order and forms a new part. There are background threads that periodically select some parts and merge them into a single sorted part to keep the number of parts relatively low. Thats why it is called `MergeTree`. Of course, merging leads to “write amplification”. All parts are immutable: they are only created and deleted, but not modified. When SELECT is executed, it holds a snapshot of the table (a set of parts). After merging, we also keep old parts for some time to make a recovery after failure easier, so if we see that some merged part is probably broken, we can replace it with its source parts.
`MergeTree` is not an LSM tree because it does not contain “memtable” and “log”: inserted data is written directly to the filesystem. This makes it suitable only to INSERT data in batches, not by individual row and not very frequently about once per second is ok, but a thousand times a second is not. We did it this way for simplicitys sake, and because we are already inserting data in batches in our applications.
`MergeTree` is not an LSM tree because it does not contain MEMTABLE and LOG: inserted data is written directly to the filesystem. This behavior makes MergeTree much more suitable to insert data in batches. Therefore frequently inserting small amounts of rows is not ideal for MergeTree. For example, a couple of rows per second is OK, but doing it a thousand times a second is not optimal for MergeTree. However, there is an async insert mode for small inserts to overcome this limitation. We did it this way for simplicitys sake, and because we are already inserting data in batches in our applications
There are MergeTree engines that are doing additional work during background merges. Examples are `CollapsingMergeTree` and `AggregatingMergeTree`. This could be treated as special support for updates. Keep in mind that these are not real updates because users usually have no control over the time when background merges are executed, and data in a `MergeTree` table is almost always stored in more than one part, not in completely merged form.

View File

@ -5,7 +5,7 @@ toc_title: Build on Mac OS X
# How to Build ClickHouse on Mac OS X {#how-to-build-clickhouse-on-mac-os-x}
!!! info "You don't have to build ClickHouse yourself!"
!!! info "You don't have to build ClickHouse yourself"
You can install pre-built ClickHouse as described in [Quick Start](https://clickhouse.com/#quick-start).
Follow `macOS (Intel)` or `macOS (Apple silicon)` installation instructions.

View File

@ -158,6 +158,8 @@ While inside the `build` directory, configure your build by running CMake. Befor
export CC=clang CXX=clang++
cmake ..
If you installed clang using the automatic installation script above, also specify the version of clang installed in the first command, e.g. `export CC=clang-13 CXX=clang++-13`. The clang version will be in the script output.
The `CC` variable specifies the compiler for C (short for C Compiler), and `CXX` variable instructs which C++ compiler is to be used for building.
For a faster build, you can resort to the `debug` build type - a build with no optimizations. For that supply the following parameter `-D CMAKE_BUILD_TYPE=Debug`:

View File

@ -7,7 +7,7 @@ toc_title: MaterializedPostgreSQL
Creates a ClickHouse database with tables from PostgreSQL database. Firstly, database with engine `MaterializedPostgreSQL` creates a snapshot of PostgreSQL database and loads required tables. Required tables can include any subset of tables from any subset of schemas from specified database. Along with the snapshot database engine acquires LSN and once initial dump of tables is performed - it starts pulling updates from WAL. After database is created, newly added tables to PostgreSQL database are not automatically added to replication. They have to be added manually with `ATTACH TABLE db.table` query.
Replication is implemented with PostgreSQL Logical Replication Protocol, which does not allow to replicate DDL, but allows to know whether replication breaking changes happened (column type changes, adding/removing columns). Such changes are detected and according tables stop receiving updates. Such tables can be automatically reloaded in the background in case required setting is turned on. Safest way for now is to use `ATTACH`/ `DETACH` queries to reload table completely. If DDL does not break replication (for example, renaming a column) table will still receive updates (insertion is done by position).
Replication is implemented with PostgreSQL Logical Replication Protocol, which does not allow to replicate DDL, but allows to know whether replication breaking changes happened (column type changes, adding/removing columns). Such changes are detected and according tables stop receiving updates. Such tables can be automatically reloaded in the background in case required setting is turned on (can be used starting from 22.1). Safest way for now is to use `ATTACH`/ `DETACH` queries to reload table completely. If DDL does not break replication (for example, renaming a column) table will still receive updates (insertion is done by position).
## Creating a Database {#creating-a-database}
@ -26,7 +26,7 @@ ENGINE = MaterializedPostgreSQL('host:port', 'database', 'user', 'password') [SE
## Example of Use {#example-of-use}
``` sql
CREATE DATABASE postgresql;
CREATE DATABASE postgres_db
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password');
SHOW TABLES FROM postgres_db;
@ -46,7 +46,7 @@ After `MaterializedPostgreSQL` database is created, it does not automatically de
ATTACH TABLE postgres_database.new_table;
```
Warning: before version 21.13 adding table to replication left unremoved temprorary replication slot (named `{db_name}_ch_replication_slot_tmp`). If attaching tables in clickhouse version before 21.13, make sure to delete it manually (`SELECT pg_drop_replication_slot('{db_name}_ch_replication_slot_tmp')`). Otherwise disk usage will grow. Issue is fixed in 21.13.
Warning: before version 22.1 adding table to replication left unremoved temprorary replication slot (named `{db_name}_ch_replication_slot_tmp`). If attaching tables in clickhouse version before 22.1, make sure to delete it manually (`SELECT pg_drop_replication_slot('{db_name}_ch_replication_slot_tmp')`). Otherwise disk usage will grow. Issue is fixed in 22.1.
## Dynamically removing tables from replication {#dynamically-removing-table-from-replication}
@ -77,7 +77,7 @@ Tables are accessed via schema name and table name at the same time:
``` sql
CREATE DATABASE database1
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password')
SETTINGS materialized_postgresql_tables_list = 'schema1.table1,schema2.table2,schema1.table3';
SETTINGS materialized_postgresql_tables_list = 'schema1.table1,schema2.table2,schema1.table3',
materialized_postgresql_tables_list_with_schema = 1;
SELECT * FROM database1.`schema1.table1`;
@ -156,6 +156,8 @@ Default value: empty list. (Default schema is used)
4. materialized_postgresql_allow_automatic_update {#materialized-postgresql-allow-automatic-update}
Do not use this setting before 22.1 version.
Allows reloading table in the background, when schema changes are detected. DDL queries on the PostgreSQL side are not replicated via ClickHouse [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) engine, because it is not allowed with PostgreSQL logical replication protocol, but the fact of DDL changes is detected transactionally. In this case, the default behaviour is to stop replicating those tables once DDL is detected. However, if this setting is enabled, then, instead of stopping the replication of those tables, they will be reloaded in the background via database snapshot without data losses and replication will continue for them.
Possible values:

View File

@ -0,0 +1,408 @@
---
toc_priority: 4
toc_title: Hive
---
# Hive {#hive}
The Hive engine allows you to perform `SELECT` quries on HDFS Hive table. Currently it supports input formats as below:
- Text: only supports simple scalar column types except `binary`
- ORC: support simple scalar columns types except `char`; only support complex types like `array`
- Parquet: support all simple scalar columns types; only support complex types like `array`
## Creating a Table {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [ALIAS expr1],
name2 [type2] [ALIAS expr2],
...
) ENGINE = Hive('thrift://host:port', 'database', 'table');
PARTITION BY expr
```
See a detailed description of the [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query) query.
The table structure can differ from the original Hive table structure:
- Column names should be the same as in the original Hive table, but you can use just some of these columns and in any order, also you can use some alias columns calculated from other columns.
- Column types should be the same from those in the original Hive table.
- Partition by expression should be consistent with the original Hive table, and columns in partition by expression should be in the table structure.
**Engine Parameters**
- `thrift://host:port` — Hive Metastore address
- `database` — Remote database name.
- `table` — Remote table name.
## Usage Example {#usage-example}
### How to Use Local Cache for HDFS Filesystem
We strongly advice you to enable local cache for remote filesystems. Benchmark shows that its almost 2x faster with cache.
Before using cache, add it to `config.xml`
``` xml
<local_cache_for_remote_fs>
<enable>true</enable>
<root_dir>local_cache</root_dir>
<limit_size>559096952</limit_size>
<bytes_read_before_flush>1048576</bytes_read_before_flush>
</local_cache_for_remote_fs>
```
- enable: ClickHouse will maintain local cache for remote filesystem(HDFS) after startup if true.
- root_dir: Required. The root directory to store local cache files for remote filesystem.
- limit_size: Required. The maximum size(in bytes) of local cache files.
- bytes_read_before_flush: Control bytes before flush to local filesystem when downloading file from remote filesystem. The default value is 1MB.
When ClickHouse is started up with local cache for remote filesystem enabled, users can still choose not to use cache with `settings use_local_cache_for_remote_fs = 0` in their query. `use_local_cache_for_remote_fs` is `false` in default.
### Query Hive Table with ORC Input Format
#### Create Table in Hive
``` text
hive > CREATE TABLE `test`.`test_orc`(
`f_tinyint` tinyint,
`f_smallint` smallint,
`f_int` int,
`f_integer` int,
`f_bigint` bigint,
`f_float` float,
`f_double` double,
`f_decimal` decimal(10,0),
`f_timestamp` timestamp,
`f_date` date,
`f_string` string,
`f_varchar` varchar(100),
`f_bool` boolean,
`f_binary` binary,
`f_array_int` array<int>,
`f_array_string` array<string>,
`f_array_float` array<float>,
`f_array_array_int` array<array<int>>,
`f_array_array_string` array<array<string>>,
`f_array_array_float` array<array<float>>)
PARTITIONED BY (
`day` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
'hdfs://testcluster/data/hive/test.db/test_orc'
OK
Time taken: 0.51 seconds
hive > insert into test.test_orc partition(day='2021-09-18') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, current_timestamp(), current_date(), 'hello world', 'hello world', 'hello world', true, 'hello world', array(1, 2, 3), array('hello world', 'hello world'), array(float(1.1), float(1.2)), array(array(1, 2), array(3, 4)), array(array('a', 'b'), array('c', 'd')), array(array(float(1.11), float(2.22)), array(float(3.33), float(4.44)));
OK
Time taken: 36.025 seconds
hive > select * from test.test_orc;
OK
1 2 3 4 5 6.11 7.22 8 2021-11-05 12:38:16.314 2021-11-05 hello world hello world hello world true hello world [1,2,3] ["hello world","hello world"] [1.1,1.2] [[1,2],[3,4]] [["a","b"],["c","d"]] [[1.11,2.22],[3.33,4.44]] 2021-09-18
Time taken: 0.295 seconds, Fetched: 1 row(s)
```
#### Create Table in ClickHouse
Table in ClickHouse, retrieving data from the Hive table created above:
``` sql
CREATE TABLE test.test_orc
(
`f_tinyint` Int8,
`f_smallint` Int16,
`f_int` Int32,
`f_integer` Int32,
`f_bigint` Int64,
`f_float` Float32,
`f_double` Float64,
`f_decimal` Float64,
`f_timestamp` DateTime,
`f_date` Date,
`f_string` String,
`f_varchar` String,
`f_bool` Bool,
`f_binary` String,
`f_array_int` Array(Int32),
`f_array_string` Array(String),
`f_array_float` Array(Float32),
`f_array_array_int` Array(Array(Int32)),
`f_array_array_string` Array(Array(String)),
`f_array_array_float` Array(Array(Float32)),
`day` String
)
ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc')
PARTITION BY day
```
``` sql
SELECT * FROM test.test_orc settings input_format_orc_allow_missing_columns = 1\G
```
``` text
SELECT *
FROM test.test_orc
SETTINGS input_format_orc_allow_missing_columns = 1
Query id: c3eaffdc-78ab-43cd-96a4-4acc5b480658
Row 1:
──────
f_tinyint: 1
f_smallint: 2
f_int: 3
f_integer: 4
f_bigint: 5
f_float: 6.11
f_double: 7.22
f_decimal: 8
f_timestamp: 2021-12-04 04:00:44
f_date: 2021-12-03
f_string: hello world
f_varchar: hello world
f_bool: true
f_binary: hello world
f_array_int: [1,2,3]
f_array_string: ['hello world','hello world']
f_array_float: [1.1,1.2]
f_array_array_int: [[1,2],[3,4]]
f_array_array_string: [['a','b'],['c','d']]
f_array_array_float: [[1.11,2.22],[3.33,4.44]]
day: 2021-09-18
1 rows in set. Elapsed: 0.078 sec.
```
### Query Hive Table with Parquet Input Format
#### Create Table in Hive
``` text
hive >
CREATE TABLE `test`.`test_parquet`(
`f_tinyint` tinyint,
`f_smallint` smallint,
`f_int` int,
`f_integer` int,
`f_bigint` bigint,
`f_float` float,
`f_double` double,
`f_decimal` decimal(10,0),
`f_timestamp` timestamp,
`f_date` date,
`f_string` string,
`f_varchar` varchar(100),
`f_char` char(100),
`f_bool` boolean,
`f_binary` binary,
`f_array_int` array<int>,
`f_array_string` array<string>,
`f_array_float` array<float>,
`f_array_array_int` array<array<int>>,
`f_array_array_string` array<array<string>>,
`f_array_array_float` array<array<float>>)
PARTITIONED BY (
`day` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'hdfs://testcluster/data/hive/test.db/test_parquet'
OK
Time taken: 0.51 seconds
hive > insert into test.test_parquet partition(day='2021-09-18') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, current_timestamp(), current_date(), 'hello world', 'hello world', 'hello world', true, 'hello world', array(1, 2, 3), array('hello world', 'hello world'), array(float(1.1), float(1.2)), array(array(1, 2), array(3, 4)), array(array('a', 'b'), array('c', 'd')), array(array(float(1.11), float(2.22)), array(float(3.33), float(4.44)));
OK
Time taken: 36.025 seconds
hive > select * from test.test_parquet;
OK
1 2 3 4 5 6.11 7.22 8 2021-12-14 17:54:56.743 2021-12-14 hello world hello world hello world true hello world [1,2,3] ["hello world","hello world"] [1.1,1.2] [[1,2],[3,4]] [["a","b"],["c","d"]] [[1.11,2.22],[3.33,4.44]] 2021-09-18
Time taken: 0.766 seconds, Fetched: 1 row(s)
```
#### Create Table in ClickHouse
Table in ClickHouse, retrieving data from the Hive table created above:
``` sql
CREATE TABLE test.test_parquet
(
`f_tinyint` Int8,
`f_smallint` Int16,
`f_int` Int32,
`f_integer` Int32,
`f_bigint` Int64,
`f_float` Float32,
`f_double` Float64,
`f_decimal` Float64,
`f_timestamp` DateTime,
`f_date` Date,
`f_string` String,
`f_varchar` String,
`f_char` String,
`f_bool` Bool,
`f_binary` String,
`f_array_int` Array(Int32),
`f_array_string` Array(String),
`f_array_float` Array(Float32),
`f_array_array_int` Array(Array(Int32)),
`f_array_array_string` Array(Array(String)),
`f_array_array_float` Array(Array(Float32)),
`day` String
)
ENGINE = Hive('thrift://localhost:9083', 'test', 'test_parquet')
PARTITION BY day
```
``` sql
SELECT * FROM test.test_parquet settings input_format_parquet_allow_missing_columns = 1\G
```
``` text
SELECT *
FROM test_parquet
SETTINGS input_format_parquet_allow_missing_columns = 1
Query id: 4e35cf02-c7b2-430d-9b81-16f438e5fca9
Row 1:
──────
f_tinyint: 1
f_smallint: 2
f_int: 3
f_integer: 4
f_bigint: 5
f_float: 6.11
f_double: 7.22
f_decimal: 8
f_timestamp: 2021-12-14 17:54:56
f_date: 2021-12-14
f_string: hello world
f_varchar: hello world
f_char: hello world
f_bool: true
f_binary: hello world
f_array_int: [1,2,3]
f_array_string: ['hello world','hello world']
f_array_float: [1.1,1.2]
f_array_array_int: [[1,2],[3,4]]
f_array_array_string: [['a','b'],['c','d']]
f_array_array_float: [[1.11,2.22],[3.33,4.44]]
day: 2021-09-18
1 rows in set. Elapsed: 0.357 sec.
```
### Query Hive Table with Text Input Format
#### Create Table in Hive
``` text
hive >
CREATE TABLE `test`.`test_text`(
`f_tinyint` tinyint,
`f_smallint` smallint,
`f_int` int,
`f_integer` int,
`f_bigint` bigint,
`f_float` float,
`f_double` double,
`f_decimal` decimal(10,0),
`f_timestamp` timestamp,
`f_date` date,
`f_string` string,
`f_varchar` varchar(100),
`f_char` char(100),
`f_bool` boolean,
`f_binary` binary,
`f_array_int` array<int>,
`f_array_string` array<string>,
`f_array_float` array<float>,
`f_array_array_int` array<array<int>>,
`f_array_array_string` array<array<string>>,
`f_array_array_float` array<array<float>>)
PARTITIONED BY (
`day` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'hdfs://testcluster/data/hive/test.db/test_text'
Time taken: 0.1 seconds, Fetched: 34 row(s)
hive > insert into test.test_text partition(day='2021-09-18') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, current_timestamp(), current_date(), 'hello world', 'hello world', 'hello world', true, 'hello world', array(1, 2, 3), array('hello world', 'hello world'), array(float(1.1), float(1.2)), array(array(1, 2), array(3, 4)), array(array('a', 'b'), array('c', 'd')), array(array(float(1.11), float(2.22)), array(float(3.33), float(4.44)));
OK
Time taken: 36.025 seconds
hive > select * from test.test_text;
OK
1 2 3 4 5 6.11 7.22 8 2021-12-14 18:11:17.239 2021-12-14 hello world hello world hello world true hello world [1,2,3] ["hello world","hello world"] [1.1,1.2] [[1,2],[3,4]] [["a","b"],["c","d"]] [[1.11,2.22],[3.33,4.44]] 2021-09-18
Time taken: 0.624 seconds, Fetched: 1 row(s)
```
#### Create Table in ClickHouse
Table in ClickHouse, retrieving data from the Hive table created above:
``` sql
CREATE TABLE test.test_text
(
`f_tinyint` Int8,
`f_smallint` Int16,
`f_int` Int32,
`f_integer` Int32,
`f_bigint` Int64,
`f_float` Float32,
`f_double` Float64,
`f_decimal` Float64,
`f_timestamp` DateTime,
`f_date` Date,
`f_string` String,
`f_varchar` String,
`f_char` String,
`f_bool` Bool,
`day` String
)
ENGINE = Hive('thrift://localhost:9083', 'test', 'test_text')
PARTITION BY day
```
``` sql
SELECT * FROM test.test_text settings input_format_skip_unknown_fields = 1, input_format_with_names_use_header = 1, date_time_input_format = 'best_effort'\G
```
``` text
SELECT *
FROM test.test_text
SETTINGS input_format_skip_unknown_fields = 1, input_format_with_names_use_header = 1, date_time_input_format = 'best_effort'
Query id: 55b79d35-56de-45b9-8be6-57282fbf1f44
Row 1:
──────
f_tinyint: 1
f_smallint: 2
f_int: 3
f_integer: 4
f_bigint: 5
f_float: 6.11
f_double: 7.22
f_decimal: 8
f_timestamp: 2021-12-14 18:11:17
f_date: 2021-12-14
f_string: hello world
f_varchar: hello world
f_char: hello world
f_bool: true
day: 2021-09-18
```

View File

@ -20,3 +20,4 @@ List of supported integrations:
- [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md)
- [PostgreSQL](../../../engines/table-engines/integrations/postgresql.md)
- [SQLite](../../../engines/table-engines/integrations/sqlite.md)
- [Hive](../../../engines/table-engines/integrations/hive.md)

View File

@ -66,4 +66,14 @@ SELECT COUNT() FROM mongo_table;
└─────────┘
```
You can also adjust connection timeout:
``` sql
CREATE TABLE mongo_table
(
key UInt64,
data String
) ENGINE = MongoDB('mongo2:27017', 'test', 'simple_table', 'testuser', 'clickhouse', 'connectTimeoutMS=100000');
```
[Original article](https://clickhouse.com/docs/en/engines/table-engines/integrations/mongodb/) <!--hide-->

View File

@ -339,7 +339,7 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234
For `Map` data type client can specify if index should be created for keys or values using [mapKeys](../../../sql-reference/functions/tuple-map-functions.md#mapkeys) or [mapValues](../../../sql-reference/functions/tuple-map-functions.md#mapvalues) function.
The following functions can use the filter: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md#hasarr-elem).
The following functions can use the filter: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md#hasarr-elem), [hasAny](../../../sql-reference/functions/array-functions.md#hasany), [hasAll](../../../sql-reference/functions/array-functions.md#hasall).
Example of index creation for `Map` data type

View File

@ -9,6 +9,8 @@ The HTTP interface lets you use ClickHouse on any platform from any programming
By default, `clickhouse-server` listens for HTTP on port 8123 (this can be changed in the config).
Sometimes, `curl` command is not available on user operating systems. On Ubuntu or Debian, run `sudo apt install curl`. Please refer this [documentation](https://curl.se/download.html) to install it before running the examples.
If you make a `GET /` request without parameters, it returns 200 response code and the string which defined in [http_server_default_response](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-http_server_default_response) default value “Ok.” (with a line feed at the end)
``` bash
@ -186,7 +188,7 @@ $ echo "SELECT 1" | gzip -c | \
```
``` bash
# Receiving compressed data from the server
# Receiving compressed data archive from the server
$ curl -vsS "http://localhost:8123/?enable_http_compression=1" \
-H 'Accept-Encoding: gzip' --output result.gz -d 'SELECT number FROM system.numbers LIMIT 3'
$ zcat result.gz
@ -195,6 +197,15 @@ $ zcat result.gz
2
```
```bash
# Receiving compressed data from the server and using the gunzip to receive decompressed data
$ curl -sS "http://localhost:8123/?enable_http_compression=1" \
-H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 3' | gunzip -
0
1
2
```
## Default Database {#default-database}
You can use the database URL parameter or the X-ClickHouse-Database header to specify the default database.

View File

@ -36,7 +36,7 @@ mysql>
```
For compatibility with all MySQL clients, it is recommended to specify user password with [double SHA1](../operations/settings/settings-users.md#password_double_sha1_hex) in configuration file.
If user password is specified using [SHA256](../operations/settings/settings-users.md#password_sha256_hex), some clients wont be able to authenticate (mysqljs and old versions of command-line tool mysql).
If user password is specified using [SHA256](../operations/settings/settings-users.md#password_sha256_hex), some clients wont be able to authenticate (mysqljs and old versions of command-line tool MySQL and MariaDB).
Restrictions:

View File

@ -60,8 +60,10 @@ toc_title: Adopters
| <a href="https://www.exness.com/" class="favicon">Exness</a> | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) |
| <a href="https://www.eventbunker.io/" class="favicon">EventBunker.io</a> | Serverless Data Processing | — | — | — | [Tweet, April 2021](https://twitter.com/Halil_D_/status/1379839133472985091) |
| <a href="https://fastnetmon.com/" class="favicon">FastNetMon</a> | DDoS Protection | Main Product | | — | [Official website](https://fastnetmon.com/docs-fnm-advanced/fastnetmon-advanced-traffic-persistency/) |
| <a href="https://www.firebolt.io/" class="favicon">Firebolt</a> | Analytics | Main product | - | - | [YouTube Tech Talk](https://www.youtube.com/watch?v=9rW9uEJ15tU) |
| <a href="https://www.flipkart.com/" class="favicon">Flipkart</a> | e-Commerce | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=239) |
| <a href="https://fun.co/rp" class="favicon">FunCorp</a> | Games | | — | 14 bn records/day as of Jan 2021 | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) |
| <a href="https://futurragroup.com/" class="favicon">Futurra Group</a> | Analytics | — | — | — | [Article in Russian, December 2021](https://dou.ua/forums/topic/35587/) |
| <a href="https://geniee.co.jp" class="favicon">Geniee</a> | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) |
| <a href="https://www.genotek.ru/" class="favicon">Genotek</a> | Bioinformatics | Main product | — | — | [Video, August 2020](https://youtu.be/v3KyZbz9lEE) |
| <a href="https://gigapipe.com/" class="favicon">Gigapipe</a> | Managed ClickHouse | Main product | — | — | [Official website](https://gigapipe.com/) |
@ -70,6 +72,7 @@ toc_title: Adopters
| <a href="https://www.grouparoo.com" class="favicon">Grouparoo</a> | Data Warehouse Integrations | Main product | — | — | [Official Website, November 2021](https://www.grouparoo.com/integrations) |
| <a href="https://www.huya.com/" class="favicon">HUYA</a> | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) |
| <a href="https://www.hydrolix.io/" class="favicon">Hydrolix</a> | Cloud data platform | Main product | — | — | [Documentation](https://docs.hydrolix.io/guide/query) |
| <a href="https://hystax.com" class="favicon">Hystax</a> | Cloud Operations | Observability Analytics | - | - | [Blog](https://hystax.com/clickhouse-for-real-time-cost-saving-analytics-how-to-stop-hammering-screws-and-use-an-electric-screwdriver/) |
| <a href="https://www.the-ica.com/" class="favicon">ICA</a> | FinTech | Risk Management | — | — | [Blog Post in English, Sep 2020](https://altinity.com/blog/clickhouse-vs-redshift-performance-for-fintech-risk-management?utm_campaign=ClickHouse%20vs%20RedShift&utm_content=143520807&utm_medium=social&utm_source=twitter&hss_channel=tw-3894792263) |
| <a href="https://www.idealista.com" class="favicon">Idealista</a> | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.com/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) |
| <a href="https://infobaleen.com" class="favicon">Infobaleen</a> | AI markting tool | Analytics | — | — | [Official site](https://infobaleen.com) |
@ -81,14 +84,18 @@ toc_title: Adopters
| <a href="https://ippon.tech" class="favicon">Ippon Technologies</a> | Technology Consulting | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=205) |
| <a href="https://www.ivi.ru/" class="favicon">Ivi</a> | Online Cinema | Analytics, Monitoring | — | — | [Article in Russian, Jan 2018](https://habr.com/en/company/ivi/blog/347408/) |
| <a href="https://jinshuju.net" class="favicon">Jinshuju 金数据</a> | BI Analytics | Main product | — | — | [Slides in Chinese, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf) |
| <a href="https://jitsu.com" class="favicon">Jitsu</a> | Cloud Software | Data Pipeline | — | — | [Documentation](https://jitsu.com/docs/destinations-configuration/clickhouse-destination), [Hacker News](https://news.ycombinator.com/item?id=29106082) |
| <a href="https://jitsu.com" class="favicon">Jitsu</a> | Cloud Software | Data Pipeline | — | — | [Documentation](https://jitsu.com/docs/destinations-configuration/clickhouse-destination), [Hacker News post](https://news.ycombinator.com/item?id=29106082) |
| <a href="https://juicefs.com/" class="favicon">JuiceFS</a> | Storage | Shopping Cart | - | - | [Blog](https://juicefs.com/blog/en/posts/shopee-clickhouse-with-juicefs/) |
| <a href="https://www.kakaocorp.com/" class="favicon">kakaocorp</a> | Internet company | — | — | — | [if(kakao)2020](https://tv.kakao.com/channel/3693125/cliplink/414129353), [if(kakao)2021](https://if.kakao.com/session/24) |
| <a href="https://www.kodiakdata.com/" class="favicon">Kodiak Data</a> | Clouds | Main product | — | — | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) |
| <a href="https://kontur.ru" class="favicon">Kontur</a> | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) |
| <a href="https://www.kuaishou.com/" class="favicon">Kuaishou</a> | Video | — | — | — | [ClickHouse Meetup, October 2018](https://clickhouse.com/blog/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/) |
| <a href="https://www.kgk-global.com/en/" class="favicon">KGK Global</a> | Vehicle monitoring | — | — | — | [Press release, June 2021](https://zoom.cnews.ru/news/item/530921) |
| <a href="https://www.lancom-systems.com/" class="favicon">LANCOM Systems</a> | Network Solutions | Traffic analysis | - | - | [ClickHouse Operator for Kubernetes](https://www.lancom-systems.com/), [Hacker News post] (https://news.ycombinator.com/item?id=29413660) |
| <a href="https://www.lbl.gov" class="favicon">Lawrence Berkeley National Laboratory</a> | Research | Traffic analysis | 5 servers | 55 TiB | [Slides in English, April 2019](https://www.smitasin.com/presentations/2019-04-17_DOE-NSM.pdf) |
| <a href="https://www.lever.co/" class="favicon">Lever</a> | Talent Management | Recruiting | - | - | [Hacker News post](https://news.ycombinator.com/item?id=29558544) |
| <a href="https://lifestreet.com/" class="favicon">LifeStreet</a> | Ad network | Main product | 75 servers (3 replicas) | 5.27 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) |
| <a href="https://lookforsale.ru/" class="favicon">Lookforsale</a> | E-Commerce | — | — | — | [Job Posting, December 2021](https://telegram.me/javascript_jobs/587318) |
| <a href="https://mcs.mail.ru/" class="favicon">Mail.ru Cloud Solutions</a> | Cloud services | Main product | — | — | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) |
| <a href="https://maxilect.com/" class="favicon">MAXILECT</a> | Ad Tech, Blockchain, ML, AI | — | — | — | [Job advertisement, 2021](https://www.linkedin.com/feed/update/urn:li:activity:6780842017229430784/) |
| <a href="https://tech.mymarilyn.ru" class="favicon">Marilyn</a> | Advertising | Statistics | — | — | [Talk in Russian, June 2017](https://www.youtube.com/watch?v=iXlIgx2khwc) |
@ -106,6 +113,7 @@ toc_title: Adopters
| <a href="https://ok.ru" class="favicon">Ok.ru</a> | Social Network | — | 72 servers | 810 TB compressed, 50bn rows/day, 1.5 TB/day | [SmartData conference, October 2021](https://assets.ctfassets.net/oxjq45e8ilak/4JPHkbJenLgZhBGGyyonFP/57472ec6987003ec4078d0941740703b/____________________ClickHouse_______________________.pdf) |
| <a href="https://omnicomm.ru/" class="favicon">Omnicomm</a> | Transportation Monitoring | — | — | — | [Facebook post, October 2021](https://www.facebook.com/OmnicommTeam/posts/2824479777774500) |
| <a href="https://www.oneapm.com/" class="favicon">OneAPM</a> | Monitoring and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) |
| <a href="https://opensee.io/" class="favicon">Opensee</a> | Financial Analytics | Main product | - | - | [Blog](https://opensee.io/news/from-moscow-to-wall-street-the-remarkable-journey-of-clickhouse/) |
| <a href="https://www.opentargets.org/" class="favicon">Open Targets</a> | Genome Research | Genome Search | — | — | [Tweet, October 2021](https://twitter.com/OpenTargets/status/1452570865342758913?s=20), [Blog](https://blog.opentargets.org/graphql/) |
| <a href="https://corp.ozon.com/" class="favicon">OZON</a> | E-commerce | — | — | — | [Official website](https://job.ozon.ru/vacancy/razrabotchik-clickhouse-ekspluatatsiya-40991870/) |
| <a href="https://panelbear.com/" class="favicon">Panelbear | Analytics | Monitoring and Analytics | — | — | [Tech Stack, November 2020](https://panelbear.com/blog/tech-stack/) |
@ -118,6 +126,7 @@ toc_title: Adopters
| <a href="https://prana-system.com/en/" class="favicon">PRANA</a> | Industrial predictive analytics | Main product | — | — | [News (russian), Feb 2021](https://habr.com/en/news/t/541392/) |
| <a href="https://www.qingcloud.com/" class="favicon">QINGCLOUD</a> | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) |
| <a href="https://qrator.net" class="favicon">Qrator</a> | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) |
| <a href="https://rvision.pro/en/" class="favicon">R-Vision</a> | Information Security | — | — | — | [Article in Russian, December 2021](https://www.anti-malware.ru/reviews/R-Vision-SENSE-15) |
| <a href="https://www.rbinternational.com/" class="favicon">Raiffeisenbank</a> | Banking | Analytics | — | — | [Lecture in Russian, December 2020](https://cs.hse.ru/announcements/421965599.html) |
| <a href="https://rambler.ru" class="favicon">Rambler</a> | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) |
| <a href="https://replicahq.com" class="favicon">Replica</a> | Urban Planning | Analytics | — | — | [Job advertisement](https://boards.greenhouse.io/replica/jobs/5547732002?gh_jid=5547732002) |
@ -144,6 +153,7 @@ toc_title: Adopters
| <a href="https://www.spotify.com" class="favicon">Spotify</a> | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) |
| <a href="https://www.staffcop.ru/" class="favicon">Staffcop</a> | Information Security | Main Product | — | — | [Official website, Documentation](https://www.staffcop.ru/sce43) |
| <a href="https://www.suning.com/" class="favicon">Suning</a> | E-Commerce | User behaviour analytics | — | — | [Blog article](https://www.sohu.com/a/434152235_411876) |
| <a href="https://superwall.me/" class="favicon">Superwall</a> | Monetization Tooling | Main product | — | — | [Word of mouth, Jan 2022](https://github.com/ClickHouse/ClickHouse/pull/33573) |
| <a href="https://www.teralytics.net/" class="favicon">Teralytics</a> | Mobility | Analytics | — | — | [Tech blog](https://www.teralytics.net/knowledge-hub/visualizing-mobility-data-the-scalability-challenge) |
| <a href="https://www.tencent.com" class="favicon">Tencent</a> | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) |
| <a href="https://www.tencent.com" class="favicon">Tencent</a> | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) |
@ -153,6 +163,7 @@ toc_title: Adopters
| <a href="https://www.tinybird.co/" class="favicon">Tinybird</a> | Real-time Data Products | Data processing | — | — | [Official website](https://www.tinybird.co/) |
| <a href="https://trafficstars.com/" class="favicon">Traffic Stars</a> | AD network | — | 300 servers in Europe/US | 1.8 PiB, 700 000 insert rps (as of 2021) | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) |
| <a href="https://www.uber.com" class="favicon">Uber</a> | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.com/meetup40/uber.pdf) |
| <a href="https://usetech.com/" class="favicon">UseTech</a> | Software Development | — | — | — | [Job Posting, December 2021](https://vk.com/wall136266658_2418) |
| <a href="https://hello.utmstat.com/" class="favicon">UTMSTAT</a> | Analytics | Main product | — | — | [Blog post, June 2020](https://vc.ru/tribuna/133956-striming-dannyh-iz-servisa-skvoznoy-analitiki-v-clickhouse) |
| <a href="https://vercel.com/" class="favicon">Vercel</a> | Traffic and Performance Analytics | — | — | — | Direct reference, October 2021 |
| <a href="https://vk.com" class="favicon">VKontakte</a> | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) |
@ -169,6 +180,7 @@ toc_title: Adopters
| <a href="https://cloud.yandex.ru/services/datalens" class="favicon">Yandex DataLens</a> | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.com/meetup38/datalens.pdf) |
| <a href="https://market.yandex.ru/" class="favicon">Yandex Market</a> | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) |
| <a href="https://metrica.yandex.com" class="favicon">Yandex Metrica</a> | Web analytics | Main product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.com/meetup40/introduction/#13) |
| <a href="https://www.yellowfinbi.com" class="favicon"><COMPANYNAME></a> | Analytics | Main product | - | - | [Integration](https://www.yellowfinbi.com/campaign/yellowfin-9-whats-new#el-30219e0e) |
| <a href="https://www.yotascale.com/" class="favicon">Yotascale</a> | Cloud | Data pipeline | — | 2 bn records/day | [LinkedIn (Accomplishments)](https://www.linkedin.com/in/adilsaleem/) |
| <a href="https://www.your-analytics.org/" class="favicon">Your Analytics</a> | Product Analytics | Main Product | — | - | [Tweet, November 2021](https://twitter.com/mikenikles/status/1459737241165565953) |
| <a href="https://zagravagames.com/en/" class="favicon">Zagrava Trading</a> | — | — | — | — | [Job offer, May 2021](https://twitter.com/datastackjobs/status/1394707267082063874) |
@ -178,9 +190,5 @@ toc_title: Adopters
| <a href="https://promo.croc.ru/digitalworker" class="favicon">Цифровой Рабочий</a> | Industrial IoT, Analytics | — | — | — | [Blog post in Russian, March 2021](https://habr.com/en/company/croc/blog/548018/) |
| <a href="https://shop.okraina.ru/" class="favicon">ООО «МПЗ Богородский»</a> | Agriculture | — | — | — | [Article in Russian, November 2020](https://cloud.yandex.ru/cases/okraina) |
| <a href="https://domclick.ru/" class="favicon">ДомКлик</a> | Real Estate | — | — | — | [Article in Russian, October 2021](https://habr.com/ru/company/domclick/blog/585936/) |
| <a href="https://futurragroup.com/" class="favicon">Futurra Group</a> | Analytics | — | — | — | [Article in Russian, December 2021](https://dou.ua/forums/topic/35587/) |
| <a href="https://usetech.com/" class="favicon">UseTech</a> | Software Development | — | — | — | [Job Posting, December 2021](https://vk.com/wall136266658_2418) |
| <a href="https://lookforsale.ru/" class="favicon">Lookforsale</a> | E-Commerce | — | — | — | [Job Posting, December 2021](https://telegram.me/javascript_jobs/587318) |
| <a href="https://rvision.pro/en/" class="favicon">R-Vision</a> | Information Security | — | — | — | [Article in Russian, December 2021](https://www.anti-malware.ru/reviews/R-Vision-SENSE-15) |
[Original article](https://clickhouse.com/docs/en/introduction/adopters/) <!--hide-->

View File

@ -3,14 +3,14 @@ toc_priority: 66
toc_title: ClickHouse Keeper
---
# [pre-production] ClickHouse Keeper
# [pre-production] ClickHouse Keeper {#clickHouse-keeper}
ClickHouse server uses [ZooKeeper](https://zookeeper.apache.org/) coordination system for data [replication](../engines/table-engines/mergetree-family/replication.md) and [distributed DDL](../sql-reference/distributed-ddl.md) queries execution. ClickHouse Keeper is an alternative coordination system compatible with ZooKeeper.
!!! warning "Warning"
This feature is currently in the pre-production stage. We test it in our CI and on small internal installations.
## Implementation details
## Implementation details {#implementation-details}
ZooKeeper is one of the first well-known open-source coordination systems. It's implemented in Java, has quite a simple and powerful data model. ZooKeeper's coordination algorithm called ZAB (ZooKeeper Atomic Broadcast) doesn't provide linearizability guarantees for reads, because each ZooKeeper node serves reads locally. Unlike ZooKeeper ClickHouse Keeper is written in C++ and uses [RAFT algorithm](https://raft.github.io/) [implementation](https://github.com/eBay/NuRaft). This algorithm allows to have linearizability for reads and writes, has several open-source implementations in different languages.
@ -21,7 +21,7 @@ ClickHouse Keeper supports Access Control List (ACL) the same way as [ZooKeeper]
!!! info "Note"
External integrations are not supported.
## Configuration
## Configuration {#configuration}
ClickHouse Keeper can be used as a standalone replacement for ZooKeeper or as an internal part of the ClickHouse server, but in both cases configuration is almost the same `.xml` file. The main ClickHouse Keeper configuration tag is `<keeper_server>`. Keeper configuration has the following parameters:
@ -36,7 +36,8 @@ Other common parameters are inherited from the ClickHouse server config (`listen
Internal coordination settings are located in `<keeper_server>.<coordination_settings>` section:
- `operation_timeout_ms` — Timeout for a single client operation (ms) (default: 10000).
- `session_timeout_ms` — Timeout for client session (ms) (default: 30000).
- `min_session_timeout_ms` — Min timeout for client session (ms) (default: 10000).
- `session_timeout_ms` — Max timeout for client session (ms) (default: 100000).
- `dead_session_check_period_ms` — How often ClickHouse Keeper check dead sessions and remove them (ms) (default: 500).
- `heart_beat_interval_ms` — How often a ClickHouse Keeper leader will send heartbeats to followers (ms) (default: 500).
- `election_timeout_lower_bound_ms` — If the follower didn't receive heartbeats from the leader in this interval, then it can initiate leader election (default: 1000).
@ -102,7 +103,7 @@ Examples of configuration for quorum with three nodes can be found in [integrati
</keeper_server>
```
## How to run
## How to run {#how-to-run}
ClickHouse Keeper is bundled into the ClickHouse server package, just add configuration of `<keeper_server>` and start ClickHouse server as always. If you want to run standalone ClickHouse Keeper you can start it in a similar way with:
@ -110,13 +111,14 @@ ClickHouse Keeper is bundled into the ClickHouse server package, just add config
clickhouse-keeper --config /etc/your_path_to_config/config.xml --daemon
```
## Four Letter Word Commands
## Four Letter Word Commands {#four-letter-word-commands}
ClickHouse Keeper also provides 4lw commands which are almost the same with Zookeeper. Each command is composed of four letters such as `mntr`, `stat` etc. There are some more interesting commands: `stat` gives some general information about the server and connected clients, while `srvr` and `cons` give extended details on server and connections respectively.
The 4lw commands has a white list configuration `four_letter_word_white_list` which has default value "conf,cons,crst,envi,ruok,srst,srvr,stat,wchc,wchs,dirs,mntr,isro".
You can issue the commands to ClickHouse Keeper via telnet or nc, at the client port.
```
echo mntr | nc localhost 9181
```
@ -296,7 +298,7 @@ Sessions with Ephemerals (1):
/clickhouse/task_queue/ddl
```
## [experimental] Migration from ZooKeeper
## [experimental] Migration from ZooKeeper {#migration-from-zookeeper}
Seamlessly migration from ZooKeeper to ClickHouse Keeper is impossible you have to stop your ZooKeeper cluster, convert data and start ClickHouse Keeper. `clickhouse-keeper-converter` tool allows converting ZooKeeper logs and snapshots to ClickHouse Keeper snapshot. It works only with ZooKeeper > 3.4. Steps for migration:

Some files were not shown because too many files have changed in this diff Show More