Merge branch 'master' into better-settings

This commit is contained in:
Alexey Milovidov 2023-11-11 09:26:59 +01:00
commit 04630c3e5a
294 changed files with 5428 additions and 4847 deletions

11
.github/actions/clean/action.yml vendored Normal file
View File

@ -0,0 +1,11 @@
name: Clean runner
description: Clean the runner's temp path on ending
runs:
using: "composite"
steps:
- name: Clean
shell: bash
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "${{runner.temp}}"

33
.github/actions/common_setup/action.yml vendored Normal file
View File

@ -0,0 +1,33 @@
name: Common setup
description: Setup necessary environments
inputs:
job_type:
description: the name to use in the TEMP_PATH and REPO_COPY
default: common
type: string
nested_job:
description: the fuse for unintended use inside of the reusable callable jobs
default: true
type: boolean
runs:
using: "composite"
steps:
- name: Setup and check ENV
shell: bash
run: |
echo "Setup the common ENV variables"
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/${{inputs.job_type}}
REPO_COPY=${{runner.temp}}/${{inputs.job_type}}/git-repo-copy
EOF
if [ -z "${{env.GITHUB_JOB_OVERRIDDEN}}" ] && [ "true" == "${{inputs.nested_job}}" ]; then
echo "The GITHUB_JOB_OVERRIDDEN ENV is unset, and must be set for the nested jobs"
exit 1
fi
- name: Setup $TEMP_PATH
shell: bash
run: |
# to remove every leftovers
sudo rm -fr "$TEMP_PATH"
mkdir -p "$REPO_COPY"
cp -a "$GITHUB_WORKSPACE"/. "$REPO_COPY"/

View File

@ -1,3 +1,4 @@
# yamllint disable rule:comments-indentation
name: BackportPR
env:
@ -169,320 +170,43 @@ jobs:
#########################################################################################
BuilderDebRelease:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_release
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # For a proper version and performance artifacts
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_release
checkout_depth: 0
BuilderDebAarch64:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_aarch64
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # For a proper version and performance artifacts
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_aarch64
checkout_depth: 0
BuilderDebAsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_asan
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_asan
BuilderDebTsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_tsan
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_tsan
BuilderDebDebug:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_debug
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
du -hs "$GITHUB_WORKSPACE/contrib" ||:
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_debug
BuilderBinDarwin:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_darwin
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
du -hs "$GITHUB_WORKSPACE/contrib" ||:
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_darwin
checkout_depth: 0
BuilderBinDarwinAarch64:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_darwin_aarch64
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
du -hs "$GITHUB_WORKSPACE/contrib" ||:
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_darwin_aarch64
checkout_depth: 0
############################################################################################
##################################### Docker images #######################################
############################################################################################

View File

@ -1,3 +1,4 @@
# yamllint disable rule:comments-indentation
name: MasterCI
env:
@ -184,789 +185,109 @@ jobs:
#########################################################################################
BuilderDebRelease:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_release
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # For a proper version and performance artifacts
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
checkout_depth: 0
build_name: package_release
BuilderDebAarch64:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_aarch64
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # For a proper version and performance artifacts
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
checkout_depth: 0
build_name: package_aarch64
BuilderBinRelease:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_release
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
checkout_depth: 0
build_name: binary_release
BuilderDebAsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_asan
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_asan
BuilderDebUBsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_ubsan
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_ubsan
BuilderDebTsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_tsan
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_tsan
BuilderDebMsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_msan
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_msan
BuilderDebDebug:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_debug
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
du -hs "$GITHUB_WORKSPACE/contrib" ||:
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_debug
##########################################################################################
##################################### SPECIAL BUILDS #####################################
##########################################################################################
BuilderBinClangTidy:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_tidy
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_tidy
BuilderBinDarwin:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_darwin
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
du -hs "$GITHUB_WORKSPACE/contrib" ||:
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_darwin
checkout_depth: 0
BuilderBinAarch64:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_aarch64
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_aarch64
checkout_depth: 0
BuilderBinFreeBSD:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_freebsd
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_freebsd
checkout_depth: 0
BuilderBinDarwinAarch64:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_darwin_aarch64
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
du -hs "$GITHUB_WORKSPACE/contrib" ||:
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_darwin_aarch64
checkout_depth: 0
BuilderBinPPC64:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_ppc64le
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_ppc64le
checkout_depth: 0
BuilderBinAmd64Compat:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_amd64_compat
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_amd64_compat
checkout_depth: 0
BuilderBinAarch64V80Compat:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_aarch64_v80compat
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_aarch64_v80compat
checkout_depth: 0
BuilderBinRISCV64:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_riscv64
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_riscv64
checkout_depth: 0
BuilderBinS390X:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_s390x
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_s390x
checkout_depth: 0
############################################################################################
##################################### Docker images #######################################
############################################################################################

View File

@ -1,3 +1,4 @@
# yamllint disable rule:comments-indentation
name: PullRequestCI
env:
@ -246,771 +247,100 @@ jobs:
#################################### ORDINARY BUILDS ####################################
#########################################################################################
BuilderDebRelease:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_release
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
fetch-depth: 0 # for performance artifact
filter: tree:0
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
BuilderBinRelease:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_release
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_release
checkout_depth: 0
BuilderDebAarch64:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_aarch64
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # for performance artifact
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_aarch64
checkout_depth: 0
BuilderBinRelease:
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_release
BuilderDebAsan:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_asan
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_asan
BuilderDebUBsan:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_ubsan
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_ubsan
BuilderDebTsan:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_tsan
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_tsan
BuilderDebMsan:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_msan
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_msan
BuilderDebDebug:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_debug
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
du -hs "$GITHUB_WORKSPACE/contrib" ||:
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_debug
##########################################################################################
##################################### SPECIAL BUILDS #####################################
##########################################################################################
BuilderBinClangTidy:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_tidy
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_tidy
BuilderBinDarwin:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_darwin
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
du -hs "$GITHUB_WORKSPACE/contrib" ||:
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_darwin
BuilderBinAarch64:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_aarch64
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_aarch64
BuilderBinFreeBSD:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_freebsd
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_freebsd
BuilderBinDarwinAarch64:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_darwin_aarch64
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
du -hs "$GITHUB_WORKSPACE/contrib" ||:
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_darwin_aarch64
BuilderBinPPC64:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_ppc64le
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_ppc64le
BuilderBinAmd64Compat:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_amd64_compat
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_amd64_compat
BuilderBinAarch64V80Compat:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_aarch64_v80compat
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_aarch64_v80compat
BuilderBinRISCV64:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_riscv64
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_riscv64
BuilderBinS390X:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_s390x
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_s390x
############################################################################################
##################################### Docker images #######################################
############################################################################################

View File

@ -1,3 +1,4 @@
# yamllint disable rule:comments-indentation
name: ReleaseBranchCI
env:
@ -140,401 +141,53 @@ jobs:
#########################################################################################
BuilderDebRelease:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_release
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_release
checkout_depth: 0
BuilderDebAarch64:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_aarch64
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # For a proper version and performance artifacts
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_aarch64
checkout_depth: 0
BuilderDebAsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_asan
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_asan
BuilderDebUBsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_ubsan
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_ubsan
BuilderDebTsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_tsan
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_tsan
BuilderDebMsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_msan
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_msan
BuilderDebDebug:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_debug
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
du -hs "$GITHUB_WORKSPACE/contrib" ||:
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_debug
BuilderBinDarwin:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_darwin
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
du -hs "$GITHUB_WORKSPACE/contrib" ||:
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_darwin
checkout_depth: 0
BuilderBinDarwinAarch64:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_darwin_aarch64
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
du -hs "$GITHUB_WORKSPACE/contrib" ||:
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_darwin_aarch64
checkout_depth: 0
############################################################################################
##################################### Docker images #######################################
############################################################################################

74
.github/workflows/reusable_build.yml vendored Normal file
View File

@ -0,0 +1,74 @@
### For the pure soul wishes to move it to another place
# https://github.com/orgs/community/discussions/9050
name: Build ClickHouse
'on':
workflow_call:
inputs:
build_name:
description: the value of build type from tests/ci/ci_config.py
required: true
type: string
checkout_depth:
description: the value of the git shallow checkout
required: false
type: number
default: 1
runner_type:
description: the label of runner to use
default: builder
type: string
additional_envs:
description: additional ENV variables to setup the job
type: string
jobs:
Build:
name: Build-${{inputs.build_name}}
runs-on: [self-hosted, '${{inputs.runner_type}}']
steps:
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: ${{inputs.checkout_depth}}
filter: tree:0
- name: Set build envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
IMAGES_PATH=${{runner.temp}}/images_path
GITHUB_JOB_OVERRIDDEN=Build-${{inputs.build_name}}
${{inputs.additional_envs}}
EOF
python3 "$GITHUB_WORKSPACE"/tests/ci/ci_config.py --build-name "${{inputs.build_name}}" >> "$GITHUB_ENV"
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
# This step is done in GITHUB_WORKSPACE,
# because it's broken in REPO_COPY for some reason
if: ${{ env.BUILD_SPARSE_CHECKOUT == 'true' }}
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
du -hs "$GITHUB_WORKSPACE/contrib" ||:
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Common setup
uses: ./.github/actions/common_setup
with:
job_type: build_check
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Build
run: |
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Clean
uses: ./.github/actions/clean

3
.gitmodules vendored
View File

@ -1,3 +1,6 @@
# Please do not use 'branch = ...' tags with submodule entries. Such tags make updating submodules a
# little bit more convenient but they do *not* specify the tracked submodule branch. Thus, they are
# more confusing than useful.
[submodule "contrib/zstd"]
path = contrib/zstd
url = https://github.com/facebook/zstd

View File

@ -1,6 +1,17 @@
[<img alt="ClickHouse — open source distributed column-oriented DBMS" width="400px" src="https://clickhouse.com/images/ch_gh_logo_rounded.png" />](https://clickhouse.com?utm_source=github)
<div align=center>
ClickHouse® is an open-source column-oriented database management system that allows generating analytical data reports in real-time.
[![Website](https://img.shields.io/website?up_message=AVAILABLE&down_message=DOWN&url=https%3A%2F%2Fclickhouse.com&style=for-the-badge)](https://clickhouse.com)
[![Apache 2.0 License](https://img.shields.io/badge/license-Apache%202.0-blueviolet?style=for-the-badge)](https://www.apache.org/licenses/LICENSE-2.0)
<picture align=center>
<source media="(prefers-color-scheme: dark)" srcset="https://github.com/ClickHouse/clickhouse-docs/assets/9611008/4ef9c104-2d3f-4646-b186-507358d2fe28">
<source media="(prefers-color-scheme: light)" srcset="https://github.com/ClickHouse/clickhouse-docs/assets/9611008/b001dc7b-5a45-4dcd-9275-e03beb7f9177">
<img alt="The ClickHouse company logo." src="https://github.com/ClickHouse/clickhouse-docs/assets/9611008/b001dc7b-5a45-4dcd-9275-e03beb7f9177">
</picture>
<h4>ClickHouse® is an open-source column-oriented database management system that allows generating analytical data reports in real-time.</h4>
</div>
## How To Install (Linux, macOS, FreeBSD)
```
@ -22,8 +33,7 @@ curl https://clickhouse.com/ | sh
## Upcoming Events
* [**ClickHouse Meetup in Beijing**](https://www.meetup.com/clickhouse-beijing-user-group/events/296334856/) - Nov 4
* [**ClickHouse Meetup in San Francisco**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/296334923/) - Nov 8
* [**ClickHouse Meetup in San Francisco**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/296334923/) - Nov 14
* [**ClickHouse Meetup in Singapore**](https://www.meetup.com/clickhouse-singapore-meetup-group/events/296334976/) - Nov 15
* [**ClickHouse Meetup in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/296488501/) - Nov 30
* [**ClickHouse Meetup in NYC**](https://www.meetup.com/clickhouse-new-york-user-group/events/296488779/) - Dec 11
@ -33,7 +43,7 @@ Also, keep an eye out for upcoming meetups around the world. Somewhere else you
## Recent Recordings
* **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"
* **Recording available**: [**v23.6 Release Webinar**](https://www.youtube.com/watch?v=cuf_hYn7dqU) All the features of 23.6, one convenient video! Watch it now!
* **Recording available**: [**v23.10 Release Webinar**](https://www.youtube.com/watch?v=PGQS6uPb970) All the features of 23.10, one convenient video! Watch it now!
* **All release webinar recordings**: [YouTube playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3jAlSy1JxyP8zluvXaN3nxU)

View File

@ -119,17 +119,16 @@
#include <base/types.h>
namespace DB
{
void abortOnFailedAssertion(const String & description);
[[noreturn]] void abortOnFailedAssertion(const String & description);
}
#define chassert(x) static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(#x)
#define chassert(x) do { static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(#x); } while (0)
#define UNREACHABLE() abort()
// clang-format off
#else
/// Here sizeof() trick is used to suppress unused warning for result,
/// since simple "(void)x" will evaluate the expression, while
/// "sizeof(!(x))" will not.
#define NIL_EXPRESSION(x) (void)sizeof(!(x))
#define chassert(x) NIL_EXPRESSION(x)
#define chassert(x) (void)sizeof(!(x))
#define UNREACHABLE() __builtin_unreachable()
#endif
#endif

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit eb1572129c71beb2156dcdaadc3fb136954aed96
Subproject commit b7ea89b817a18dc0eafc1f909d568869f02d2d04

2
contrib/grpc vendored

@ -1 +1 @@
Subproject commit bef8212d1e01f99e406c282ceab3d42da08e09ce
Subproject commit 267af8c3a1ea4a5a4d9e5a070ad2d1ac7c701923

View File

@ -6,12 +6,12 @@ FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
echo '/*' > $FILES_TO_CHECKOUT
echo '!/test/*' >> $FILES_TO_CHECKOUT
echo '/test/build/*' >> $FILES_TO_CHECKOUT
echo '/test/core/tsi/alts/fake_handshaker/*' >> $FILES_TO_CHECKOUT
echo '!/tools/*' >> $FILES_TO_CHECKOUT
echo '/tools/codegen/*' >> $FILES_TO_CHECKOUT
echo '!/examples/*' >> $FILES_TO_CHECKOUT
echo '!/doc/*' >> $FILES_TO_CHECKOUT
# FIXME why do we need csharp?
#echo '!/src/csharp/*' >> $FILES_TO_CHECKOUT
echo '!/src/csharp/*' >> $FILES_TO_CHECKOUT
echo '!/src/python/*' >> $FILES_TO_CHECKOUT
echo '!/src/objective-c/*' >> $FILES_TO_CHECKOUT
echo '!/src/php/*' >> $FILES_TO_CHECKOUT

View File

@ -1,11 +1,12 @@
#!/bin/sh
set -e
WORKDIR=$(dirname "$0")
WORKDIR=$(readlink -f "${WORKDIR}")
SCRIPT_PATH=$(realpath "$0")
SCRIPT_DIR=$(dirname "${SCRIPT_PATH}")
GIT_DIR=$(git -C "$SCRIPT_DIR" rev-parse --show-toplevel)
cd $GIT_DIR
"$WORKDIR/sparse-checkout/setup-sparse-checkout.sh"
contrib/sparse-checkout/setup-sparse-checkout.sh
git submodule init
git submodule sync
git submodule update --depth=1
git config --file .gitmodules --get-regexp .*path | sed 's/[^ ]* //' | xargs -I _ --max-procs 64 git submodule update --depth=1 --single-branch _

View File

@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="23.10.2.13"
ARG VERSION="23.10.3.5"
ARG PACKAGES="clickhouse-keeper"
# user/group precreated explicitly with fixed uid/gid on purpose.

View File

@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="23.10.2.13"
ARG VERSION="23.10.3.5"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# user/group precreated explicitly with fixed uid/gid on purpose.

View File

@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="23.10.2.13"
ARG VERSION="23.10.3.5"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image

View File

@ -53,31 +53,28 @@ function configure()
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
function randomize_config_boolean_value {
function randomize_keeper_config_boolean_value {
value=$(($RANDOM % 2))
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
sudo cat /etc/clickhouse-server/config.d/$2.xml \
| sed "s|<$1>[01]</$1>|<$1>$value</$1>|" \
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
> /etc/clickhouse-server/config.d/$2.xml.tmp
sudo mv /etc/clickhouse-server/config.d/$2.xml.tmp /etc/clickhouse-server/config.d/$2.xml
}
if [[ -n "$RANDOMIZE_KEEPER_FEATURE_FLAGS" ]] && [[ "$RANDOMIZE_KEEPER_FEATURE_FLAGS" -eq 1 ]]; then
# Randomize all Keeper feature flags
randomize_config_boolean_value filtered_list
randomize_config_boolean_value multi_read
randomize_config_boolean_value check_not_exists
randomize_config_boolean_value create_if_not_exists
randomize_config_boolean_value filtered_list keeper_port
randomize_config_boolean_value multi_read keeper_port
randomize_config_boolean_value check_not_exists keeper_port
randomize_config_boolean_value create_if_not_exists keeper_port
fi
sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
sudo chgrp clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
#Randomize merge tree setting allow_experimental_block_number_column
value=$(($RANDOM % 2))
sudo cat /etc/clickhouse-server/config.d/merge_tree_settings.xml \
| sed "s|<allow_experimental_block_number_column>[01]</allow_experimental_block_number_column>|<allow_experimental_block_number_column>$value</allow_experimental_block_number_column>|" \
> /etc/clickhouse-server/config.d/merge_tree_settings.xml.tmp
sudo mv /etc/clickhouse-server/config.d/merge_tree_settings.xml.tmp /etc/clickhouse-server/config.d/merge_tree_settings.xml
randomize_config_boolean_value use_compression zookeeper
randomize_config_boolean_value allow_experimental_block_number_column merge_tree_settings
# for clickhouse-server (via service)
echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment

View File

@ -0,0 +1,16 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.10.3.5-stable (b2ba7637a41) FIXME as compared to v23.10.2.13-stable (65d8522bb1d)
#### Improvement
* Backported in [#56513](https://github.com/ClickHouse/ClickHouse/issues/56513): Allow backup of materialized view with dropped inner table instead of failing the backup. [#56387](https://github.com/ClickHouse/ClickHouse/pull/56387) ([Kseniia Sumarokova](https://github.com/kssenii)).
#### NO CL CATEGORY
* Backported in [#56605](https://github.com/ClickHouse/ClickHouse/issues/56605):. [#56598](https://github.com/ClickHouse/ClickHouse/pull/56598) ([Maksim Kita](https://github.com/kitaisreal)).

View File

@ -1,5 +1,4 @@
---
slug: /en/getting-started/example-datasets/wikistat
sidebar_label: WikiStat
---

View File

@ -2156,7 +2156,7 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
- [input_format_parquet_local_file_min_bytes_for_seek](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_local_file_min_bytes_for_seek) - min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format. Default value - `8192`.
- [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`.
- [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`.
- [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `snappy`.
- [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `lz4`.
## ParquetMetadata {data-format-parquet-metadata}

View File

@ -1,5 +1,4 @@
---
slug: /en/operations/optimizing-performance/profile-guided-optimization
sidebar_position: 54
sidebar_label: Profile Guided Optimization (PGO)
---

View File

@ -2427,6 +2427,8 @@ This section contains the following parameters:
* hostname_levenshtein_distance - just like nearest_hostname, but it compares hostname in a levenshtein distance manner.
* first_or_random - selects the first ZooKeeper node, if it's not available then randomly selects one of remaining ZooKeeper nodes.
* round_robin - selects the first ZooKeeper node, if reconnection happens selects the next.
- `use_compression` — If set to true, enables compression in Keeper protocol.
**Example configuration**

View File

@ -0,0 +1,35 @@
---
slug: /en/operations/system-tables/symbols
---
# symbols
Contains information for introspection of `clickhouse` binary. It requires the introspection privilege to access.
This table is only useful for C++ experts and ClickHouse engineers.
Columns:
- `symbol` ([String](../../sql-reference/data-types/string.md)) — Symbol name in the binary. It is mangled. You can apply `demangle(symbol)` to obtain a readable name.
- `address_begin` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Start address of the symbol in the binary.
- `address_end` ([UInt64](../../sql-reference/data-types/int-uint.md)) — End address of the symbol in the binary.
- `name` ([String](../../sql-reference/data-types/string.md)) — Alias for `event`.
**Example**
``` sql
SELECT address_begin, address_end - address_begin AS size, demangle(symbol) FROM system.symbols ORDER BY size DESC LIMIT 10
```
``` text
┌─address_begin─┬─────size─┬─demangle(symbol)──────────────────────────────────────────────────────────────────┐
│ 25000976 │ 29466000 │ icudt70_dat │
│ 400605288 │ 2097272 │ arena_emap_global │
│ 18760592 │ 1048576 │ CLD2::kQuadChrome1015_2 │
│ 9807152 │ 884808 │ TopLevelDomainLookupHash::isValid(char const*, unsigned long)::wordlist │
│ 57442432 │ 850608 │ llvm::X86Insts │
│ 55682944 │ 681360 │ (anonymous namespace)::X86DAGToDAGISel::SelectCode(llvm::SDNode*)::MatcherTable │
│ 55130368 │ 502840 │ (anonymous namespace)::X86InstructionSelector::getMatchTable() const::MatchTable0 │
│ 402930616 │ 404032 │ qpl::ml::dispatcher::hw_dispatcher::get_instance()::instance │
│ 274131872 │ 356795 │ DB::SettingsTraits::Accessor::instance()::$_0::operator()() const │
│ 58293040 │ 249424 │ llvm::X86InstrNameData │
└───────────────┴──────────┴───────────────────────────────────────────────────────────────────────────────────┘
```

View File

@ -12,7 +12,6 @@ A client application to interact with clickhouse-keeper by its native protocol.
- `-q QUERY`, `--query=QUERY` — Query to execute. If this parameter is not passed, `clickhouse-keeper-client` will start in interactive mode.
- `-h HOST`, `--host=HOST` — Server host. Default value: `localhost`.
- `-p N`, `--port=N` — Server port. Default value: 9181
- `-c FILE_PATH`, `--config-file=FILE_PATH` — Set path of config file to get the connection string. Default value: `config.xml`.
- `--connection-timeout=TIMEOUT` — Set connection timeout in seconds. Default value: 10s.
- `--session-timeout=TIMEOUT` — Set session timeout in seconds. Default value: 10s.
- `--operation-timeout=TIMEOUT` — Set operation timeout in seconds. Default value: 10s.

View File

@ -2766,9 +2766,11 @@ Result:
## fromUnixTimestamp
Function converts Unix timestamp to a calendar date and a time of a day. When there is only a single argument of [Integer](../../sql-reference/data-types/int-uint.md) type, it acts in the same way as [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime) and return [DateTime](../../sql-reference/data-types/datetime.md) type.
This function converts a Unix timestamp to a calendar date and a time of a day.
fromUnixTimestamp uses MySQL datetime format style, refer to https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format.
It can be called in two ways:
When given a single argument of type [Integer](../../sql-reference/data-types/int-uint.md), it returns a value of type [DateTime](../../sql-reference/data-types/datetime.md), i.e. behaves like [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime).
Alias: `FROM_UNIXTIME`.
@ -2786,14 +2788,16 @@ Result:
└──────────────────────────────┘
```
When there are two or three arguments, the first an [Integer](../../sql-reference/data-types/int-uint.md), [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md), the second a constant format string and the third an optional constant time zone string — it acts in the same way as [formatDateTime](#formatdatetime) and return [String](../../sql-reference/data-types/string.md#string) type.
When given two or three arguments where the first argument is a value of type [Integer](../../sql-reference/data-types/int-uint.md), [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md), the second argument is a constant format string and the third argument is an optional constant time zone string, the function returns a value of type [String](../../sql-reference/data-types/string.md#string), i.e. it behaves like [formatDateTime](#formatdatetime). In this case, [MySQL's datetime format style](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format) is used.
For example:
**Example:**
```sql
SELECT fromUnixTimestamp(1234334543, '%Y-%m-%d %R:%S') AS DateTime;
```
Result:
```text
┌─DateTime────────────┐
│ 2009-02-11 14:42:23 │
@ -2806,19 +2810,20 @@ SELECT fromUnixTimestamp(1234334543, '%Y-%m-%d %R:%S') AS DateTime;
## fromUnixTimestampInJodaSyntax
Similar to fromUnixTimestamp, except that it formats time in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html.
Same as [fromUnixTimestamp](#fromUnixTimestamp) but when called in the second way (two or three arguments), the formatting is performed using [Joda style](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) instead of MySQL style.
**Example:**
``` sql
SELECT fromUnixTimestampInJodaSyntax(1669804872, 'yyyy-MM-dd HH:mm:ss', 'UTC');
SELECT fromUnixTimestampInJodaSyntax(1234334543, 'yyyy-MM-dd HH:mm:ss', 'UTC') AS DateTime;
```
Result:
```
┌─fromUnixTimestampInJodaSyntax(1669804872, 'yyyy-MM-dd HH:mm:ss', 'UTC')────┐
│ 2022-11-30 10:41:12
└────────────────────────────────────────────────────────────────────────────
┌─DateTime────────────┐
│ 2009-02-11 06:42:23
└─────────────────────┘
```
## toModifiedJulianDay

View File

@ -1840,9 +1840,9 @@ Converts an `Int64` to a `DateTime64` value with fixed sub-second precision and
**Syntax**
``` sql
fromUnixTimestamp64Milli(value [, timezone])
fromUnixTimestamp64Micro(value [, timezone])
fromUnixTimestamp64Nano(value [, timezone])
fromUnixTimestamp64Milli(value[, timezone])
fromUnixTimestamp64Micro(value[, timezone])
fromUnixTimestamp64Nano(value[, timezone])
```
**Arguments**

View File

@ -104,15 +104,14 @@
</url_scheme_mappers>
<!-- Add headers to response in options request. OPTIONS method is used in CORS preflight requests. -->
<!-- It is off by default. Next headers are obligate for CORS.-->
<!-- http_options_response>
<http_options_response>
<header>
<name>Access-Control-Allow-Origin</name>
<value>*</value>
</header>
<header>
<name>Access-Control-Allow-Headers</name>
<value>origin, x-requested-with</value>
<value>origin, x-requested-with, x-clickhouse-format, x-clickhouse-user, x-clickhouse-key, Authorization</value>
</header>
<header>
<name>Access-Control-Allow-Methods</name>
@ -122,7 +121,7 @@
<name>Access-Control-Max-Age</name>
<value>86400</value>
</header>
</http_options_response -->
</http_options_response>
<!-- It is the name that will be shown in the clickhouse-client.
By default, anything with "production" will be highlighted in red in query prompt.

View File

@ -466,7 +466,7 @@ public:
std::vector<DataSet *> data_vec;
data_vec.resize(places.size());
for (unsigned long i = 0; i < data_vec.size(); i++)
for (size_t i = 0; i < data_vec.size(); ++i)
data_vec[i] = &this->data(places[i]).set;
DataSet::parallelizeMergePrepare(data_vec, thread_pool);

View File

@ -143,7 +143,6 @@ namespace
void registerAggregateFunctionUniqCombined(AggregateFunctionFactory & factory)
{
using namespace std::placeholders;
factory.registerFunction("uniqCombined",
[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{

View File

@ -1,4 +1,4 @@
#include <Analyzer/Passes/AnyFunctionPass.h>
#include <Analyzer/Passes/MoveFunctionsOutOfAnyPass.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/IAggregateFunction.h>
@ -14,8 +14,80 @@ namespace DB
namespace
{
class AnyFunctionVisitor : public InDepthQueryTreeVisitorWithContext<AnyFunctionVisitor>
class AnyFunctionViMoveFunctionsOutOfAnyVisitor : public InDepthQueryTreeVisitorWithContext<AnyFunctionViMoveFunctionsOutOfAnyVisitor>
{
public:
using Base = InDepthQueryTreeVisitorWithContext<AnyFunctionViMoveFunctionsOutOfAnyVisitor>;
using Base::Base;
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_move_functions_out_of_any)
return;
auto * function_node = node->as<FunctionNode>();
if (!function_node)
return;
/// check function is any
const auto & function_name = function_node->getFunctionName();
if (function_name != "any" && function_name != "anyLast")
return;
auto & arguments = function_node->getArguments().getNodes();
if (arguments.size() != 1)
return;
auto * inside_function_node = arguments[0]->as<FunctionNode>();
/// check argument is a function
if (!inside_function_node)
return;
/// check arguments can not contain arrayJoin or lambda
if (!canRewrite(inside_function_node))
return;
auto & inside_function_node_arguments = inside_function_node->getArguments().getNodes();
/// case any(f())
if (inside_function_node_arguments.empty())
return;
auto it = node_to_rewritten_node.find(node.get());
if (it != node_to_rewritten_node.end())
{
node = it->second;
return;
}
/// checking done, rewrite function
bool changed_argument = false;
for (auto & inside_argument : inside_function_node_arguments)
{
if (inside_argument->as<ConstantNode>()) /// skip constant node
break;
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(function_name, {inside_argument->getResultType()}, {}, properties);
auto any_function = std::make_shared<FunctionNode>(function_name);
any_function->resolveAsAggregateFunction(std::move(aggregate_function));
auto & any_function_arguments = any_function->getArguments().getNodes();
any_function_arguments.push_back(std::move(inside_argument));
inside_argument = std::move(any_function);
changed_argument = true;
}
if (changed_argument)
{
node_to_rewritten_node.emplace(node.get(), arguments[0]);
node = arguments[0];
}
}
private:
bool canRewrite(const FunctionNode * function_node)
{
@ -45,90 +117,17 @@ private:
return true;
}
public:
using Base = InDepthQueryTreeVisitorWithContext<AnyFunctionVisitor>;
using Base::Base;
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_move_functions_out_of_any)
return;
auto * function_node = node->as<FunctionNode>();
if (!function_node)
return;
/// check function is any
const auto & function_name = function_node->getFunctionName();
if (!(function_name == "any" || function_name == "anyLast"))
return;
auto & arguments = function_node->getArguments().getNodes();
if (arguments.size() != 1)
return;
auto * inside_function_node = arguments[0]->as<FunctionNode>();
/// check argument is a function
if (!inside_function_node)
return;
/// check arguments can not contain arrayJoin or lambda
if (!canRewrite(inside_function_node))
return;
auto & inside_arguments = inside_function_node->getArguments().getNodes();
/// case any(f())
if (inside_arguments.empty())
return;
if (rewritten.contains(node.get()))
{
node = rewritten.at(node.get());
return;
}
/// checking done, rewrite function
bool pushed = false;
for (auto & inside_argument : inside_arguments)
{
if (inside_argument->as<ConstantNode>()) /// skip constant node
break;
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(function_name, {inside_argument->getResultType()}, {}, properties);
auto any_function = std::make_shared<FunctionNode>(function_name);
any_function->resolveAsAggregateFunction(std::move(aggregate_function));
auto & any_function_arguments = any_function->getArguments().getNodes();
any_function_arguments.push_back(std::move(inside_argument));
inside_argument = std::move(any_function);
pushed = true;
}
if (pushed)
{
rewritten.insert({node.get(), arguments[0]});
node = arguments[0];
}
}
private:
/// After query analysis alias will be rewritten to QueryTreeNode
/// whose memory address is same with the original one.
/// So we can reuse the rewritten one.
std::unordered_map<IQueryTreeNode *, QueryTreeNodePtr > rewritten;
/// After query analysis, alias identifier will be resolved to node whose memory address is same with the original one.
/// So we can reuse the rewritten function.
std::unordered_map<IQueryTreeNode *, QueryTreeNodePtr> node_to_rewritten_node;
};
}
void AnyFunctionPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
void MoveFunctionsOutOfAnyPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
{
AnyFunctionVisitor visitor(context);
AnyFunctionViMoveFunctionsOutOfAnyVisitor visitor(context);
visitor.visit(query_tree_node);
}

View File

@ -7,13 +7,13 @@ namespace DB
/** Rewrite 'any' and 'anyLast' functions pushing them inside original function.
*
* Example: any(f(x, y, g(z)))
* Result: f(any(x), any(y), g(any(z)))
* Example: SELECT any(f(x, y, g(z)));
* Result: SELECT f(any(x), any(y), g(any(z)));
*/
class AnyFunctionPass final : public IQueryTreePass
class MoveFunctionsOutOfAnyPass final : public IQueryTreePass
{
public:
String getName() override { return "AnyFunction"; }
String getName() override { return "MoveFunctionsOutOfAnyPass"; }
String getDescription() override
{

View File

@ -43,7 +43,7 @@
#include <Analyzer/Passes/CrossToInnerJoinPass.h>
#include <Analyzer/Passes/ShardNumColumnToFunctionPass.h>
#include <Analyzer/Passes/ConvertQueryToCNFPass.h>
#include <Analyzer/Passes/AnyFunctionPass.h>
#include <Analyzer/Passes/MoveFunctionsOutOfAnyPass.h>
#include <Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.h>
@ -164,9 +164,7 @@ private:
*
* TODO: Support setting optimize_substitute_columns.
* TODO: Support GROUP BY injective function elimination.
* TODO: Support setting optimize_move_functions_out_of_any.
* TODO: Support setting optimize_aggregators_of_group_by_keys.
* TODO: Support setting optimize_duplicate_order_by_and_distinct.
* TODO: Support setting optimize_monotonous_functions_in_order_by.
* TODO: Add optimizations based on function semantics. Example: SELECT * FROM test_table WHERE id != id. (id is not nullable column).
*/
@ -284,7 +282,7 @@ void addQueryTreePasses(QueryTreePassManager & manager)
manager.addPass(std::make_unique<CrossToInnerJoinPass>());
manager.addPass(std::make_unique<ShardNumColumnToFunctionPass>());
manager.addPass(std::make_unique<AnyFunctionPass>());
manager.addPass(std::make_unique<MoveFunctionsOutOfAnyPass>());
manager.addPass(std::make_unique<OptimizeDateOrDateTimeConverterWithPreimagePass>());
}

View File

@ -20,6 +20,12 @@
namespace fs = std::filesystem;
namespace ProfileEvents
{
extern const Event BackupEntriesCollectorMicroseconds;
extern const Event BackupEntriesCollectorForTablesDataMicroseconds;
extern const Event BackupEntriesCollectorRunPostTasksMicroseconds;
}
namespace DB
{
@ -82,7 +88,8 @@ BackupEntriesCollector::BackupEntriesCollector(
const BackupSettings & backup_settings_,
std::shared_ptr<IBackupCoordination> backup_coordination_,
const ReadSettings & read_settings_,
const ContextPtr & context_)
const ContextPtr & context_,
ThreadPool & threadpool_)
: backup_query_elements(backup_query_elements_)
, backup_settings(backup_settings_)
, backup_coordination(backup_coordination_)
@ -101,6 +108,7 @@ BackupEntriesCollector::BackupEntriesCollector(
context->getSettingsRef().backup_restore_keeper_max_retries,
context->getSettingsRef().backup_restore_keeper_retry_initial_backoff_ms,
context->getSettingsRef().backup_restore_keeper_retry_max_backoff_ms)
, threadpool(threadpool_)
{
}
@ -108,6 +116,8 @@ BackupEntriesCollector::~BackupEntriesCollector() = default;
BackupEntries BackupEntriesCollector::run()
{
auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::BackupEntriesCollectorMicroseconds);
/// run() can be called onle once.
if (!current_stage.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Already making backup entries");
@ -133,11 +143,19 @@ BackupEntries BackupEntriesCollector::run()
/// Make backup entries for the data of the found tables.
setStage(Stage::EXTRACTING_DATA_FROM_TABLES);
makeBackupEntriesForTablesData();
{
auto timer2 = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::BackupEntriesCollectorForTablesDataMicroseconds);
makeBackupEntriesForTablesData();
}
/// Run all the tasks added with addPostCollectingTask().
setStage(Stage::RUNNING_POST_TASKS);
runPostTasks();
{
auto timer2 = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::BackupEntriesCollectorRunPostTasksMicroseconds);
runPostTasks();
}
/// No more backup entries or tasks are allowed after this point.
@ -738,8 +756,20 @@ void BackupEntriesCollector::makeBackupEntriesForTablesData()
if (backup_settings.structure_only)
return;
std::vector<std::future<void>> futures;
for (const auto & table_name : table_infos | boost::adaptors::map_keys)
makeBackupEntriesForTableData(table_name);
{
futures.push_back(scheduleFromThreadPool<void>([&]()
{
makeBackupEntriesForTableData(table_name);
}, threadpool, "BackupCollect"));
}
/// Wait for all tasks.
for (auto & future : futures)
future.wait();
/// Make sure there is no exception.
for (auto & future : futures)
future.get();
}
void BackupEntriesCollector::makeBackupEntriesForTableData(const QualifiedTableName & table_name)
@ -775,20 +805,28 @@ void BackupEntriesCollector::makeBackupEntriesForTableData(const QualifiedTableN
}
}
void BackupEntriesCollector::addBackupEntry(const String & file_name, BackupEntryPtr backup_entry)
void BackupEntriesCollector::addBackupEntryUnlocked(const String & file_name, BackupEntryPtr backup_entry)
{
if (current_stage == Stage::WRITING_BACKUP)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding backup entries is not allowed");
backup_entries.emplace_back(file_name, backup_entry);
}
void BackupEntriesCollector::addBackupEntry(const String & file_name, BackupEntryPtr backup_entry)
{
std::lock_guard lock(mutex);
addBackupEntryUnlocked(file_name, backup_entry);
}
void BackupEntriesCollector::addBackupEntry(const std::pair<String, BackupEntryPtr> & backup_entry)
{
addBackupEntry(backup_entry.first, backup_entry.second);
std::lock_guard lock(mutex);
addBackupEntryUnlocked(backup_entry.first, backup_entry.second);
}
void BackupEntriesCollector::addBackupEntries(const BackupEntries & backup_entries_)
{
std::lock_guard lock(mutex);
if (current_stage == Stage::WRITING_BACKUP)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding of backup entries is not allowed");
insertAtEnd(backup_entries, backup_entries_);
@ -796,6 +834,7 @@ void BackupEntriesCollector::addBackupEntries(const BackupEntries & backup_entri
void BackupEntriesCollector::addBackupEntries(BackupEntries && backup_entries_)
{
std::lock_guard lock(mutex);
if (current_stage == Stage::WRITING_BACKUP)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding of backup entries is not allowed");
insertAtEnd(backup_entries, std::move(backup_entries_));
@ -803,6 +842,7 @@ void BackupEntriesCollector::addBackupEntries(BackupEntries && backup_entries_)
void BackupEntriesCollector::addPostTask(std::function<void()> task)
{
std::lock_guard lock(mutex);
if (current_stage == Stage::WRITING_BACKUP)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding of post tasks is not allowed");
post_tasks.push(std::move(task));
@ -824,6 +864,7 @@ void BackupEntriesCollector::runPostTasks()
size_t BackupEntriesCollector::getAccessCounter(AccessEntityType type)
{
std::lock_guard lock(mutex);
access_counters.resize(static_cast<size_t>(AccessEntityType::MAX));
return access_counters[static_cast<size_t>(type)]++;
}

View File

@ -31,7 +31,8 @@ public:
const BackupSettings & backup_settings_,
std::shared_ptr<IBackupCoordination> backup_coordination_,
const ReadSettings & read_settings_,
const ContextPtr & context_);
const ContextPtr & context_,
ThreadPool & threadpool_);
~BackupEntriesCollector();
/// Collects backup entries and returns the result.
@ -90,6 +91,8 @@ private:
void makeBackupEntriesForTablesData();
void makeBackupEntriesForTableData(const QualifiedTableName & table_name);
void addBackupEntryUnlocked(const String & file_name, BackupEntryPtr backup_entry);
void runPostTasks();
Strings setStage(const String & new_stage, const String & message = "");
@ -170,6 +173,9 @@ private:
BackupEntries backup_entries;
std::queue<std::function<void()>> post_tasks;
std::vector<size_t> access_counters;
ThreadPool & threadpool;
std::mutex mutex;
};
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Backups/BackupStatus.h>
#include <Common/ProfileEvents.h>
namespace DB
{
@ -47,6 +48,9 @@ struct BackupOperationInfo
std::exception_ptr exception;
String error_message;
/// Profile events collected during the backup.
std::shared_ptr<ProfileEvents::Counters::Snapshot> profile_counters = nullptr;
std::chrono::system_clock::time_point start_time;
std::chrono::system_clock::time_point end_time;
};

View File

@ -550,7 +550,9 @@ void BackupsWorker::doBackup(
/// Prepare backup entries.
BackupEntries backup_entries;
{
BackupEntriesCollector backup_entries_collector{backup_query->elements, backup_settings, backup_coordination, backup_create_params.read_settings, context};
BackupEntriesCollector backup_entries_collector(
backup_query->elements, backup_settings, backup_coordination,
backup_create_params.read_settings, context, getThreadPool(ThreadPoolId::BACKUP_MAKE_FILES_LIST));
backup_entries = backup_entries_collector.run();
}
@ -1056,6 +1058,7 @@ void BackupsWorker::setStatus(const String & id, BackupStatus status, bool throw
auto old_status = info.status;
info.status = status;
info.profile_counters = std::make_shared<ProfileEvents::Counters::Snapshot>(CurrentThread::getProfileEvents().getPartiallyAtomicSnapshot());
if (isFinalStatus(status))
info.end_time = std::chrono::system_clock::now();

View File

@ -89,6 +89,17 @@ add_headers_and_sources(clickhouse_common_io IO/Resource)
add_headers_and_sources(clickhouse_common_io IO/S3)
list (REMOVE_ITEM clickhouse_common_io_sources Common/malloc.cpp Common/new_delete.cpp)
add_headers_and_sources(clickhouse_compression Compression)
add_headers_and_sources(clickhouse_compression Parsers)
add_headers_and_sources(clickhouse_compression Core)
#Included these specific files to avoid linking grpc
add_glob(clickhouse_compression_headers Server/ServerType.h)
add_glob(clickhouse_compression_sources Server/ServerType.cpp)
add_headers_and_sources(clickhouse_compression Common/SSH)
add_library(clickhouse_compression ${clickhouse_compression_headers} ${clickhouse_compression_sources})
add_headers_and_sources(dbms Disks/IO)
add_headers_and_sources(dbms Disks/ObjectStorages)
if (TARGET ch_contrib::sqlite)
@ -270,6 +281,7 @@ target_include_directories (clickhouse_common_io PUBLIC "${ClickHouse_SOURCE_DIR
if (TARGET ch_contrib::llvm)
dbms_target_link_libraries (PUBLIC ch_contrib::llvm)
target_link_libraries (clickhouse_compression PUBLIC ch_contrib::llvm)
endif ()
if (TARGET ch_contrib::gwp_asan)
@ -293,6 +305,18 @@ target_link_libraries (clickhouse_common_io
common
ch_contrib::double_conversion
ch_contrib::dragonbox_to_chars
ch_contrib::libdivide
)
target_link_libraries (clickhouse_compression
PUBLIC
string_utils
pcg_random
clickhouse_parsers
PRIVATE
ch_contrib::lz4
ch_contrib::roaring
)
# Use X86 AVX2/AVX512 instructions to accelerate filter operations
@ -336,6 +360,7 @@ if (TARGET ch_contrib::crc32-vpmsum)
if (TARGET ch_contrib::ssh)
target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::ssh)
target_link_libraries(clickhouse_compression PUBLIC ch_contrib::ssh)
endif()
dbms_target_link_libraries(PUBLIC ch_contrib::abseil_swiss_tables)
@ -359,10 +384,12 @@ endif()
if (TARGET ch_contrib::krb5)
dbms_target_link_libraries(PRIVATE ch_contrib::krb5)
target_link_libraries (clickhouse_compression PRIVATE ch_contrib::krb5)
endif()
if (TARGET ch_contrib::nuraft)
dbms_target_link_libraries(PUBLIC ch_contrib::nuraft)
target_link_libraries (clickhouse_compression PUBLIC ch_contrib::nuraft)
endif()
dbms_target_link_libraries (
@ -432,6 +459,7 @@ endif ()
if (TARGET ch_contrib::ldap)
dbms_target_link_libraries (PRIVATE ch_contrib::ldap ch_contrib::lber)
target_link_libraries (clickhouse_compression PRIVATE ch_contrib::ldap ch_contrib::lber)
endif ()
dbms_target_link_libraries (PUBLIC ch_contrib::sparsehash)

View File

@ -131,7 +131,7 @@ void LocalConnection::sendQuery(
try
{
state->io = executeQuery(state->query, query_context, false, state->stage).second;
state->io = executeQuery(state->query, query_context, QueryFlags{}, state->stage).second;
if (state->io.pipeline.pushing())
{

View File

@ -293,7 +293,6 @@ ReplxxLineReader::ReplxxLineReader(
, word_break_characters(word_break_characters_)
, editor(getEditor())
{
using namespace std::placeholders;
using Replxx = replxx::Replxx;
if (!history_file_path.empty())

View File

@ -586,7 +586,6 @@
M(704, CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS) \
M(705, TABLE_NOT_EMPTY) \
M(706, LIBSSH_ERROR) \
M(707, GCP_ERROR) \
M(999, KEEPER_EXCEPTION) \
M(1000, POCO_EXCEPTION) \
M(1001, STD_EXCEPTION) \

View File

@ -41,12 +41,6 @@ namespace ErrorCodes
void abortOnFailedAssertion(const String & description)
{
LOG_FATAL(&Poco::Logger::root(), "Logical error: '{}'.", description);
/// This is to suppress -Wmissing-noreturn
volatile bool always_false = false;
if (always_false)
return;
abort();
}

View File

@ -19,7 +19,7 @@ namespace Poco { class Logger; }
namespace DB
{
void abortOnFailedAssertion(const String & description);
[[noreturn]] void abortOnFailedAssertion(const String & description);
/// This flag can be set for testing purposes - to check that no exceptions are thrown.
extern bool terminate_on_any_exception;

View File

@ -546,6 +546,10 @@ The server successfully detected this situation and will download merged part fr
M(IOUringCQEsCompleted, "Total number of successfully completed io_uring CQEs") \
M(IOUringCQEsFailed, "Total number of completed io_uring CQEs with failures") \
\
M(BackupEntriesCollectorMicroseconds, "Time spent making backup entries") \
M(BackupEntriesCollectorForTablesDataMicroseconds, "Time spent making backup entries for tables data") \
M(BackupEntriesCollectorRunPostTasksMicroseconds, "Time spent running post tasks after making backup entries") \
\
M(ReadTaskRequestsReceived, "The number of callbacks requested from the remote server back to the initiator server to choose the read task (for s3Cluster table function and similar). Measured on the initiator server side.") \
M(MergeTreeReadTaskRequestsReceived, "The number of callbacks requested from the remote server back to the initiator server to choose the read task (for MergeTree tables). Measured on the initiator server side.") \
\

View File

@ -10,6 +10,7 @@ target_compile_definitions (clickhouse_common_zookeeper PRIVATE -DZOOKEEPER_LOG)
target_link_libraries (clickhouse_common_zookeeper
PUBLIC
clickhouse_common_io
clickhouse_compression
common
PRIVATE
string_utils
@ -20,6 +21,7 @@ add_library(clickhouse_common_zookeeper_no_log ${clickhouse_common_zookeeper_hea
target_link_libraries (clickhouse_common_zookeeper_no_log
PUBLIC
clickhouse_common_io
clickhouse_compression
common
PRIVATE
string_utils

View File

@ -214,6 +214,10 @@ void ZooKeeperArgs::initFromKeeperSection(const Poco::Util::AbstractConfiguratio
.max_sec = config.getUInt(config_name + "." + key + ".max"),
};
}
else if (key == "use_compression")
{
use_compression = config.getBool(config_name + "." + key);
}
else
throw KeeperException(Coordination::Error::ZBADARGUMENTS, "Unknown key {} in config file", key);
}

View File

@ -44,6 +44,7 @@ struct ZooKeeperArgs
double recv_sleep_probability = 0.0;
UInt64 send_sleep_ms = 0;
UInt64 recv_sleep_ms = 0;
bool use_compression = false;
SessionLifetimeConfiguration fallback_session_lifetime = {};
DB::GetPriorityForLoadBalancing get_priority_load_balancing;

View File

@ -27,7 +27,6 @@ void ZooKeeperResponse::write(WriteBuffer & out) const
if (error == Error::ZOK)
writeImpl(buf);
Coordination::write(buf.str(), out);
out.next();
}
std::string ZooKeeperRequest::toString() const
@ -49,7 +48,6 @@ void ZooKeeperRequest::write(WriteBuffer & out) const
Coordination::write(getOpNum(), buf);
writeImpl(buf);
Coordination::write(buf.str(), out);
out.next();
}
void ZooKeeperSyncRequest::writeImpl(WriteBuffer & out) const

View File

@ -46,6 +46,7 @@ enum class OpNum : int32_t
OpNum getOpNum(int32_t raw_op_num);
static constexpr int32_t ZOOKEEPER_PROTOCOL_VERSION = 0;
static constexpr int32_t ZOOKEEPER_PROTOCOL_VERSION_WITH_COMPRESSION = 10;
static constexpr int32_t KEEPER_PROTOCOL_VERSION_CONNECTION_REJECT = 42;
static constexpr int32_t CLIENT_HANDSHAKE_LENGTH = 44;
static constexpr int32_t CLIENT_HANDSHAKE_LENGTH_WITH_READONLY = 45;

View File

@ -16,6 +16,9 @@
#include <Common/ZooKeeper/ZooKeeperIO.h>
#include <Common/logger_useful.h>
#include <Common/setThreadName.h>
#include <Compression/CompressedReadBuffer.h>
#include <Compression/CompressedWriteBuffer.h>
#include <Compression/CompressionFactory.h>
#include "Coordination/KeeperConstants.h"
#include "config.h"
@ -274,13 +277,34 @@ using namespace DB;
template <typename T>
void ZooKeeper::write(const T & x)
{
Coordination::write(x, *out);
Coordination::write(x, getWriteBuffer());
}
template <typename T>
void ZooKeeper::read(T & x)
{
Coordination::read(x, *in);
Coordination::read(x, getReadBuffer());
}
WriteBuffer & ZooKeeper::getWriteBuffer()
{
if (compressed_out)
return *compressed_out;
return *out;
}
void ZooKeeper::flushWriteBuffer()
{
if (compressed_out)
compressed_out->next();
out->next();
}
ReadBuffer & ZooKeeper::getReadBuffer()
{
if (compressed_in)
return *compressed_in;
return *in;
}
static void removeRootPath(String & path, const String & chroot)
@ -345,7 +369,23 @@ ZooKeeper::ZooKeeper(
if (args.enable_fault_injections_during_startup)
setupFaultDistributions();
connect(nodes, args.connection_timeout_ms * 1000);
try
{
use_compression = args.use_compression;
connect(nodes, args.connection_timeout_ms * 1000);
}
catch (...)
{
/// If we get exception & compression is enabled, then its possible that keeper does not support compression,
/// try without compression
if (use_compression)
{
use_compression = false;
connect(nodes, args.connection_timeout_ms * 1000);
}
else
throw;
}
if (!args.auth_scheme.empty())
sendAuth(args.auth_scheme, args.identity);
@ -424,6 +464,8 @@ void ZooKeeper::connect(
in.emplace(socket);
out.emplace(socket);
compressed_in.reset();
compressed_out.reset();
try
{
@ -444,7 +486,14 @@ void ZooKeeper::connect(
e.addMessage("while receiving handshake from ZooKeeper");
throw;
}
connected = true;
if (use_compression)
{
compressed_in.emplace(*in);
compressed_out.emplace(*out, CompressionCodecFactory::instance().get("LZ4", {}));
}
original_index = static_cast<Int8>(node.original_index);
if (i != 0)
@ -511,16 +560,17 @@ void ZooKeeper::sendHandshake()
std::array<char, passwd_len> passwd {};
write(handshake_length);
write(ZOOKEEPER_PROTOCOL_VERSION);
if (use_compression)
write(ZOOKEEPER_PROTOCOL_VERSION_WITH_COMPRESSION);
else
write(ZOOKEEPER_PROTOCOL_VERSION);
write(last_zxid_seen);
write(timeout);
write(previous_session_id);
write(passwd);
out->next();
flushWriteBuffer();
}
void ZooKeeper::receiveHandshake()
{
int32_t handshake_length;
@ -533,18 +583,22 @@ void ZooKeeper::receiveHandshake()
throw Exception(Error::ZMARSHALLINGERROR, "Unexpected handshake length received: {}", handshake_length);
read(protocol_version_read);
if (protocol_version_read != ZOOKEEPER_PROTOCOL_VERSION)
/// Special way to tell a client that server is not ready to serve it.
/// It's better for faster failover than just connection drop.
/// Implemented in clickhouse-keeper.
if (protocol_version_read == KEEPER_PROTOCOL_VERSION_CONNECTION_REJECT)
throw Exception::fromMessage(Error::ZCONNECTIONLOSS,
"Keeper server rejected the connection during the handshake. "
"Possibly it's overloaded, doesn't see leader or stale");
if (use_compression)
{
/// Special way to tell a client that server is not ready to serve it.
/// It's better for faster failover than just connection drop.
/// Implemented in clickhouse-keeper.
if (protocol_version_read == KEEPER_PROTOCOL_VERSION_CONNECTION_REJECT)
throw Exception::fromMessage(Error::ZCONNECTIONLOSS,
"Keeper server rejected the connection during the handshake. "
"Possibly it's overloaded, doesn't see leader or stale");
else
throw Exception(Error::ZMARSHALLINGERROR, "Unexpected protocol version: {}", protocol_version_read);
if (protocol_version_read != ZOOKEEPER_PROTOCOL_VERSION_WITH_COMPRESSION)
throw Exception(Error::ZMARSHALLINGERROR,"Unexpected protocol version with compression: {}", protocol_version_read);
}
else if (protocol_version_read != ZOOKEEPER_PROTOCOL_VERSION)
throw Exception(Error::ZMARSHALLINGERROR, "Unexpected protocol version: {}", protocol_version_read);
read(timeout);
if (timeout != args.session_timeout_ms)
@ -562,7 +616,8 @@ void ZooKeeper::sendAuth(const String & scheme, const String & data)
request.scheme = scheme;
request.data = data;
request.xid = AUTH_XID;
request.write(*out);
request.write(getWriteBuffer());
flushWriteBuffer();
int32_t length;
XID read_xid;
@ -578,10 +633,14 @@ void ZooKeeper::sendAuth(const String & scheme, const String & data)
if (read_xid != AUTH_XID)
throw Exception(Error::ZMARSHALLINGERROR, "Unexpected event received in reply to auth request: {}", read_xid);
int32_t actual_length = static_cast<int32_t>(in->count() - count_before_event);
if (length != actual_length)
if (!use_compression)
{
int32_t actual_length = static_cast<int32_t>(in->count() - count_before_event);
if (length != actual_length)
throw Exception(Error::ZMARSHALLINGERROR, "Response length doesn't match. Expected: {}, actual: {}", length, actual_length);
}
if (err != Error::ZOK)
throw Exception(Error::ZMARSHALLINGERROR, "Error received in reply to auth request. Code: {}. Message: {}",
static_cast<int32_t>(err), err);
@ -637,7 +696,8 @@ void ZooKeeper::sendThread()
info.request->addRootPath(args.chroot);
info.request->probably_sent = true;
info.request->write(*out);
info.request->write(getWriteBuffer());
flushWriteBuffer();
logOperationIfNeeded(info.request);
@ -653,7 +713,8 @@ void ZooKeeper::sendThread()
ZooKeeperHeartbeatRequest request;
request.xid = PING_XID;
request.write(*out);
request.write(getWriteBuffer());
flushWriteBuffer();
}
ProfileEvents::increment(ProfileEvents::ZooKeeperBytesSent, out->count() - prev_bytes_sent);
@ -825,7 +886,7 @@ void ZooKeeper::receiveEvent()
}
else
{
response->readImpl(*in);
response->readImpl(getReadBuffer());
response->removeRootPath(args.chroot);
}
/// Instead of setting the watch in sendEvent, set it in receiveEvent because need to check the response.
@ -858,9 +919,14 @@ void ZooKeeper::receiveEvent()
}
}
int32_t actual_length = static_cast<int32_t>(in->count() - count_before_event);
if (length != actual_length)
throw Exception(Error::ZMARSHALLINGERROR, "Response length doesn't match. Expected: {}, actual: {}", length, actual_length);
if (!use_compression)
{
int32_t actual_length = static_cast<int32_t>(in->count() - count_before_event);
if (length != actual_length)
throw Exception(Error::ZMARSHALLINGERROR, "Response length doesn't match. Expected: {}, actual: {}",
length, actual_length);
}
logOperationIfNeeded(request_info.request, response, /* finalize= */ false, elapsed_ms);
}

View File

@ -15,6 +15,8 @@
#include <IO/WriteBuffer.h>
#include <IO/ReadBufferFromPocoSocket.h>
#include <IO/WriteBufferFromPocoSocket.h>
#include <Compression/CompressedReadBuffer.h>
#include <Compression/CompressedWriteBuffer.h>
#include <Poco/Net/StreamSocket.h>
#include <Poco/Net/SocketAddress.h>
@ -239,8 +241,13 @@ private:
Poco::Net::StreamSocket socket;
/// To avoid excessive getpeername(2) calls.
Poco::Net::SocketAddress socket_address;
std::optional<ReadBufferFromPocoSocket> in;
std::optional<WriteBufferFromPocoSocket> out;
std::optional<CompressedReadBuffer> compressed_in;
std::optional<CompressedWriteBuffer> compressed_out;
bool use_compression = false;
int64_t session_id = 0;
@ -328,6 +335,10 @@ private:
template <typename T>
void read(T &);
WriteBuffer & getWriteBuffer();
void flushWriteBuffer();
ReadBuffer & getReadBuffer();
void logOperationIfNeeded(const ZooKeeperRequestPtr & request, const ZooKeeperResponsePtr & response = nullptr, bool finalize = false, UInt64 elapsed_ms = 0);
void initFeatureFlags();

View File

@ -2,7 +2,7 @@ clickhouse_add_executable(zkutil_test_commands zkutil_test_commands.cpp)
target_link_libraries(zkutil_test_commands PRIVATE clickhouse_common_zookeeper_no_log)
clickhouse_add_executable(zkutil_test_commands_new_lib zkutil_test_commands_new_lib.cpp)
target_link_libraries(zkutil_test_commands_new_lib PRIVATE clickhouse_common_zookeeper_no_log string_utils)
target_link_libraries(zkutil_test_commands_new_lib PRIVATE clickhouse_common_zookeeper_no_log clickhouse_compression string_utils)
clickhouse_add_executable(zkutil_test_async zkutil_test_async.cpp)
target_link_libraries(zkutil_test_async PRIVATE clickhouse_common_zookeeper_no_log)

View File

@ -36,7 +36,7 @@ void CoordinationSettings::loadFromConfig(const String & config_elem, const Poco
}
const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD = "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld,rclc,clrs,ftfl,ydld";
const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD = "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld,rclc,clrs,ftfl";
KeeperConfigurationAndSettings::KeeperConfigurationAndSettings()
: server_id(NOT_EXIST)

View File

@ -32,6 +32,7 @@ struct Settings;
M(Milliseconds, shutdown_timeout, 5000, "How much time we will wait until RAFT shutdown", 0) \
M(Milliseconds, session_shutdown_timeout, 10000, "How much time we will wait until sessions are closed during shutdown", 0) \
M(Milliseconds, startup_timeout, 180000, "How much time we will wait until RAFT to start.", 0) \
M(Milliseconds, sleep_before_leader_change_ms, 8000, "How much time we will wait before removing leader (so as leader could commit accepted but non-committed commands and they won't be lost -- leader removal is not synchronized with committing)", 0) \
M(LogsLevel, raft_logs_level, LogsLevel::information, "Log internal RAFT logs into main server log level. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'", 0) \
M(UInt64, rotate_log_storage_interval, 100000, "How many records will be stored in one log storage file", 0) \
M(UInt64, snapshots_to_keep, 3, "How many compressed snapshots to keep on disk", 0) \

View File

@ -172,9 +172,6 @@ void FourLetterCommandFactory::registerCommands(KeeperDispatcher & keeper_dispat
FourLetterCommandPtr feature_flags_command = std::make_shared<FeatureFlagsCommand>(keeper_dispatcher);
factory.registerCommand(feature_flags_command);
FourLetterCommandPtr yield_leadership_command = std::make_shared<YieldLeadershipCommand>(keeper_dispatcher);
factory.registerCommand(yield_leadership_command);
factory.initializeAllowList(keeper_dispatcher);
factory.setInitialize(true);
}
@ -582,10 +579,4 @@ String FeatureFlagsCommand::run()
return ret.str();
}
String YieldLeadershipCommand::run()
{
keeper_dispatcher.yieldLeadership();
return "Sent yield leadership request to leader.";
}
}

View File

@ -415,17 +415,4 @@ struct FeatureFlagsCommand : public IFourLetterCommand
~FeatureFlagsCommand() override = default;
};
/// Yield leadership and become follower.
struct YieldLeadershipCommand : public IFourLetterCommand
{
explicit YieldLeadershipCommand(KeeperDispatcher & keeper_dispatcher_)
: IFourLetterCommand(keeper_dispatcher_)
{
}
String name() override { return "ydld"; }
String run() override;
~YieldLeadershipCommand() override = default;
};
}

View File

@ -17,6 +17,5 @@ const String keeper_system_path = "/keeper";
const String keeper_api_version_path = keeper_system_path + "/api_version";
const String keeper_api_feature_flags_path = keeper_system_path + "/feature_flags";
const String keeper_config_path = keeper_system_path + "/config";
const String keeper_availability_zone_path = keeper_system_path + "/availability_zone";
}

View File

@ -32,17 +32,9 @@ KeeperContext::KeeperContext(bool standalone_keeper_)
system_nodes_with_data[keeper_api_version_path] = toString(static_cast<uint8_t>(KeeperApiVersion::WITH_MULTI_READ));
}
void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config, KeeperDispatcher * dispatcher_, const std::string & environment_az)
void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config, KeeperDispatcher * dispatcher_)
{
dispatcher = dispatcher_;
/// We only use the environment availability zone when configuration option is missing.
auto keeper_az = config.getString("keeper_server.availability_zone", environment_az);
if (!keeper_az.empty())
system_nodes_with_data[keeper_availability_zone_path] = keeper_az;
LOG_INFO(&Poco::Logger::get("KeeperContext"),
"Initialize the KeeperContext with availability zone: '{}', environment availability zone '{}'. ", keeper_az, environment_az);
digest_enabled = config.getBool("keeper_server.digest_enabled", false);
ignore_system_path_on_startup = config.getBool("keeper_server.ignore_system_path_on_startup", false);

View File

@ -3,6 +3,7 @@
#include <Disks/DiskSelector.h>
#include <IO/WriteBufferFromString.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <cstdint>
#include <memory>
@ -23,7 +24,7 @@ public:
SHUTDOWN
};
void initialize(const Poco::Util::AbstractConfiguration & config, KeeperDispatcher * dispatcher_, const std::string & environment_az);
void initialize(const Poco::Util::AbstractConfiguration & config, KeeperDispatcher * dispatcher_);
Phase getServerState() const;
void setServerState(Phase server_state_);

View File

@ -11,7 +11,6 @@
#include <Common/CurrentMetrics.h>
#include <Common/ProfileEvents.h>
#include <Common/logger_useful.h>
#include <IO/S3/Credentials.h>
#include <atomic>
#include <future>
@ -371,16 +370,7 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf
configuration_and_settings = KeeperConfigurationAndSettings::loadFromConfig(config, standalone_keeper);
keeper_context = std::make_shared<KeeperContext>(standalone_keeper);
String availability_zone;
try
{
availability_zone = DB::S3::getRunningAvailabilityZone();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
keeper_context->initialize(config, this, availability_zone);
keeper_context->initialize(config, this);
requests_queue = std::make_unique<RequestsQueue>(configuration_and_settings->coordination_settings->max_request_queue_size);
request_thread = ThreadFromGlobalPool([this] { requestThread(); });
@ -813,6 +803,8 @@ void KeeperDispatcher::clusterUpdateWithReconfigDisabledThread()
void KeeperDispatcher::clusterUpdateThread()
{
using enum KeeperServer::ConfigUpdateState;
bool last_command_was_leader_change = false;
auto & shutdown_called = keeper_context->shutdown_called;
while (!shutdown_called)
{
@ -820,13 +812,18 @@ void KeeperDispatcher::clusterUpdateThread()
if (!cluster_update_queue.pop(action))
return;
if (server->applyConfigUpdate(action))
if (const auto res = server->applyConfigUpdate(action, last_command_was_leader_change); res == Accepted)
LOG_DEBUG(log, "Processing config update {}: accepted", action);
else // TODO (myrrc) sleep a random amount? sleep less?
else
{
last_command_was_leader_change = res == WaitBeforeChangingLeader;
(void)cluster_update_queue.pushFront(action);
LOG_DEBUG(log, "Processing config update {}: declined, backoff", action);
std::this_thread::sleep_for(50ms);
std::this_thread::sleep_for(last_command_was_leader_change
? configuration_and_settings->coordination_settings->sleep_before_leader_change_ms
: 50ms);
}
}
}

View File

@ -237,12 +237,6 @@ public:
return server->requestLeader();
}
/// Yield leadership and become follower.
void yieldLeadership()
{
return server->yieldLeadership();
}
void recalculateStorageStats()
{
return server->recalculateStorageStats();

View File

@ -870,36 +870,50 @@ std::vector<int64_t> KeeperServer::getDeadSessions()
return state_machine->getDeadSessions();
}
bool KeeperServer::applyConfigUpdate(const ClusterUpdateAction & action)
KeeperServer::ConfigUpdateState KeeperServer::applyConfigUpdate(
const ClusterUpdateAction & action, bool last_command_was_leader_change)
{
using enum ConfigUpdateState;
std::lock_guard _{server_write_mutex};
if (const auto * add = std::get_if<AddRaftServer>(&action))
{
if (raft_instance->get_srv_config(add->id) != nullptr)
return true;
return Accepted;
auto resp = raft_instance->add_srv(static_cast<nuraft::srv_config>(*add));
resp->get();
return resp->get_accepted();
return resp->get_accepted() ? Accepted : Declined;
}
else if (const auto * remove = std::get_if<RemoveRaftServer>(&action))
{
// This corner case is the most problematic. Issue follows: if we agree on a number
// of commands but don't commit them on leader, and then issue a leadership change via
// yield/request, leader can pause writes before all commits, therefore commands will be lost
// (leadership change is not synchronized with committing in NuRaft).
// However, waiting till some commands get _committed_ instead of _agreed_ is a hard task
// regarding current library design, and this brings lots of levels of complexity
// (see https://github.com/ClickHouse/ClickHouse/pull/53481 history). So, a compromise here
// is a timeout before issuing a leadership change with an ability to change if user knows they
// have a particularly slow network.
if (remove->id == raft_instance->get_leader())
{
if (!last_command_was_leader_change)
return WaitBeforeChangingLeader;
if (isLeader())
raft_instance->yield_leadership();
else
raft_instance->request_leadership();
return false;
return Declined;
}
if (raft_instance->get_srv_config(remove->id) == nullptr)
return true;
return Accepted;
auto resp = raft_instance->remove_srv(remove->id);
resp->get();
return resp->get_accepted();
return resp->get_accepted() ? Accepted : Declined;
}
else if (const auto * update = std::get_if<UpdateRaftServerPriority>(&action))
{
@ -908,10 +922,10 @@ bool KeeperServer::applyConfigUpdate(const ClusterUpdateAction & action)
"Attempt to apply {} but server is not present in Raft",
action);
else if (ptr->get_priority() == update->priority)
return true;
return Accepted;
raft_instance->set_priority(update->id, update->priority, /*broadcast on live leader*/true);
return true;
return Accepted;
}
UNREACHABLE();
}
@ -1087,12 +1101,6 @@ bool KeeperServer::requestLeader()
return isLeader() || raft_instance->request_leadership();
}
void KeeperServer::yieldLeadership()
{
if (isLeader())
raft_instance->yield_leadership();
}
void KeeperServer::recalculateStorageStats()
{
state_machine->recalculateStorageStats();

View File

@ -128,7 +128,10 @@ public:
int getServerID() const { return server_id; }
bool applyConfigUpdate(const ClusterUpdateAction& action);
enum class ConfigUpdateState { Accepted, Declined, WaitBeforeChangingLeader };
ConfigUpdateState applyConfigUpdate(
const ClusterUpdateAction& action,
bool last_command_was_leader_change = false);
// TODO (myrrc) these functions should be removed once "reconfig" is stabilized
void applyConfigUpdateWithReconfigDisabled(const ClusterUpdateAction& action);
@ -141,8 +144,6 @@ public:
bool requestLeader();
void yieldLeadership();
void recalculateStorageStats();
};

View File

@ -1081,8 +1081,7 @@ struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProce
Coordination::ZooKeeperGetRequest & request = dynamic_cast<Coordination::ZooKeeperGetRequest &>(*zk_request);
if (request.path == Coordination::keeper_api_feature_flags_path
|| request.path == Coordination::keeper_config_path
|| request.path == Coordination::keeper_availability_zone_path)
|| request.path == Coordination::keeper_config_path)
return {};
if (!storage.uncommitted_state.getNode(request.path))

View File

@ -73,7 +73,7 @@ std::pair<String, StoragePtr> createTableFromAST(
auto table_function = factory.get(table_function_ast, context);
ColumnsDescription columns;
if (ast_create_query.columns_list && ast_create_query.columns_list->columns)
columns = InterpreterCreateQuery::getColumnsDescription(*ast_create_query.columns_list->columns, context, true);
columns = InterpreterCreateQuery::getColumnsDescription(*ast_create_query.columns_list->columns, context, true, false);
StoragePtr storage = table_function->execute(table_function_ast, context, ast_create_query.getTable(), std::move(columns));
storage->renameInMemory(ast_create_query);
return {ast_create_query.getTable(), storage};
@ -99,7 +99,7 @@ std::pair<String, StoragePtr> createTableFromAST(
}
else
{
columns = InterpreterCreateQuery::getColumnsDescription(*ast_create_query.columns_list->columns, context, true);
columns = InterpreterCreateQuery::getColumnsDescription(*ast_create_query.columns_list->columns, context, true, false);
constraints = InterpreterCreateQuery::getConstraintsDescription(ast_create_query.columns_list->constraints);
}
}

View File

@ -262,7 +262,11 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const
shards.back().push_back(DatabaseReplicaInfo{std::move(hostname), std::move(shard), std::move(replica)});
}
UInt16 default_port = getContext()->getTCPPort();
UInt16 default_port;
if (cluster_auth_info.cluster_secure_connection)
default_port = getContext()->getTCPPortSecure().value_or(DBMS_DEFAULT_SECURE_PORT);
else
default_port = getContext()->getTCPPort();
bool treat_local_as_remote = false;
bool treat_local_port_as_remote = getContext()->getApplicationType() == Context::ApplicationType::LOCAL;
@ -722,7 +726,7 @@ void DatabaseReplicated::checkQueryValid(const ASTPtr & query, ContextPtr query_
}
}
BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, ContextPtr query_context, bool internal)
BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, ContextPtr query_context, QueryFlags flags)
{
if (query_context->getCurrentTransaction() && query_context->getSettingsRef().throw_on_unsupported_query_inside_transaction)
@ -731,7 +735,7 @@ BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, Contex
if (is_readonly)
throw Exception(ErrorCodes::NO_ZOOKEEPER, "Database is in readonly mode, because it cannot connect to ZooKeeper");
if (!internal && (query_context->getClientInfo().query_kind != ClientInfo::QueryKind::INITIAL_QUERY))
if (!flags.internal && (query_context->getClientInfo().query_kind != ClientInfo::QueryKind::INITIAL_QUERY))
throw Exception(ErrorCodes::INCORRECT_QUERY, "It's not initial query. ON CLUSTER is not allowed for Replicated database.");
checkQueryValid(query, query_context);
@ -742,6 +746,7 @@ BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, Contex
entry.initiator = ddl_worker->getCommonHostID();
entry.setSettingsIfRequired(query_context);
entry.tracing_context = OpenTelemetry::CurrentContext();
entry.is_backup_restore = flags.distributed_backup_restore;
String node_path = ddl_worker->tryEnqueueAndExecuteEntry(entry, query_context);
Strings hosts_to_wait;
@ -919,14 +924,14 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
String query = fmt::format("CREATE DATABASE IF NOT EXISTS {} ENGINE=Ordinary", backQuoteIfNeed(to_db_name));
auto query_context = Context::createCopy(getContext());
query_context->setSetting("allow_deprecated_database_ordinary", 1);
executeQuery(query, query_context, true);
executeQuery(query, query_context, QueryFlags{ .internal = true });
/// But we want to avoid discarding UUID of ReplicatedMergeTree tables, because it will not work
/// if zookeeper_path contains {uuid} macro. Replicated database do not recreate replicated tables on recovery,
/// so it's ok to save UUID of replicated table.
query = fmt::format("CREATE DATABASE IF NOT EXISTS {} ENGINE=Atomic", backQuoteIfNeed(to_db_name_replicated));
query_context = Context::createCopy(getContext());
executeQuery(query, query_context, true);
executeQuery(query, query_context, QueryFlags{ .internal = true });
}
size_t moved_tables = 0;

View File

@ -46,7 +46,7 @@ public:
/// Try to execute DLL query on current host as initial query. If query is succeed,
/// then it will be executed on all replicas.
BlockIO tryEnqueueReplicatedDDL(const ASTPtr & query, ContextPtr query_context, bool internal) override;
BlockIO tryEnqueueReplicatedDDL(const ASTPtr & query, ContextPtr query_context, QueryFlags flags) override;
bool canExecuteReplicatedMetadataAlter() const override;

View File

@ -372,6 +372,7 @@ void DatabaseWithOwnTablesBase::createTableRestoredFromBackup(const ASTPtr & cre
/// Creates a table by executing a "CREATE TABLE" query.
InterpreterCreateQuery interpreter{create_table_query, local_context};
interpreter.setInternal(true);
interpreter.setIsRestoreFromBackup(true);
interpreter.execute();
}

View File

@ -3,6 +3,7 @@
#include <Core/UUID.h>
#include <Databases/LoadingStrictnessLevel.h>
#include <Interpreters/Context_fwd.h>
#include <Interpreters/executeQuery.h>
#include <Parsers/IAST_fwd.h>
#include <Storages/IStorage_fwd.h>
#include <base/types.h>
@ -345,7 +346,7 @@ public:
virtual bool shouldReplicateQuery(const ContextPtr & /*query_context*/, const ASTPtr & /*query_ptr*/) const { return false; }
virtual BlockIO tryEnqueueReplicatedDDL(const ASTPtr & /*query*/, ContextPtr /*query_context*/, [[maybe_unused]] bool internal = false) /// NOLINT
virtual BlockIO tryEnqueueReplicatedDDL(const ASTPtr & /*query*/, ContextPtr /*query_context*/, [[maybe_unused]] QueryFlags flags = {}) /// NOLINT
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Database engine {} does not have replicated DDL queue", getEngineName());
}

View File

@ -75,7 +75,7 @@ static BlockIO tryToExecuteQuery(const String & query_to_execute, ContextMutable
if (!database.empty())
query_context->setCurrentDatabase(database);
return executeQuery("/*" + comment + "*/ " + query_to_execute, query_context, true).second;
return executeQuery("/*" + comment + "*/ " + query_to_execute, query_context, QueryFlags{ .internal = true }).second;
}
catch (...)
{

View File

@ -168,7 +168,7 @@ QueryPipeline ClickHouseDictionarySource::createStreamForQuery(const String & qu
if (configuration.is_local)
{
pipeline = executeQuery(query, context_copy, true).second.pipeline;
pipeline = executeQuery(query, context_copy, QueryFlags{ .internal = true }).second.pipeline;
pipeline.convertStructureTo(empty_sample_block.getColumnsWithTypeAndName());
}
else
@ -190,7 +190,7 @@ std::string ClickHouseDictionarySource::doInvalidateQuery(const std::string & re
if (configuration.is_local)
{
return readInvalidateQuery(executeQuery(request, context_copy, true).second.pipeline);
return readInvalidateQuery(executeQuery(request, context_copy, QueryFlags{ .internal = true }).second.pipeline);
}
else
{

View File

@ -900,8 +900,6 @@ void registerDictionaryArrayHashed(DictionaryFactory & factory)
return std::make_unique<HashedArrayDictionary<DictionaryKeyType::Complex>>(dict_id, dict_struct, std::move(source_ptr), configuration);
};
using namespace std::placeholders;
factory.registerLayout("hashed_array",
[=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr global_context, bool /*created_from_ddl*/)
{

View File

@ -1246,8 +1246,6 @@ void registerDictionaryHashed(DictionaryFactory & factory)
}
};
using namespace std::placeholders;
factory.registerLayout("hashed",
[=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr global_context, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), global_context, DictionaryKeyType::Simple, /* sparse = */ false); }, false);
factory.registerLayout("sparse_hashed",

View File

@ -227,9 +227,7 @@ private:
struct KeyAttribute final
{
RangeStorageTypeContainer<KeyAttributeContainerType> container;
RangeStorageTypeContainer<InvalidIntervalsContainerType> invalid_intervals_container;
};
void createAttributes();

View File

@ -9,6 +9,7 @@ extract_into_parent_list(clickhouse_functions_sources dbms_sources
FunctionHelpers.cpp
extractTimeZoneFromFunctionArguments.cpp
FunctionsLogical.cpp
CastOverloadResolver.cpp
)
extract_into_parent_list(clickhouse_functions_headers dbms_headers
IFunction.h
@ -16,6 +17,7 @@ extract_into_parent_list(clickhouse_functions_headers dbms_headers
FunctionHelpers.h
extractTimeZoneFromFunctionArguments.h
FunctionsLogical.h
CastOverloadResolver.h
)
add_library(clickhouse_functions_obj OBJECT ${clickhouse_functions_headers} ${clickhouse_functions_sources})

View File

@ -1,10 +1,156 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsConversion.h>
#include <Functions/CastOverloadResolver.h>
#include <Interpreters/parseColumnsListForTableFunction.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
/** CastInternal does not preserve nullability of the data type,
* i.e. CastInternal(toNullable(toInt8(1)) as Int32) will be Int32(1).
*
* Cast preserves nullability according to setting `cast_keep_nullable`,
* i.e. Cast(toNullable(toInt8(1)) as Int32) will be Nullable(Int32(1)) if `cast_keep_nullable` == 1.
*/
template <CastType cast_type, bool internal, typename CastName, typename FunctionName>
class CastOverloadResolverImpl : public IFunctionOverloadResolver
{
public:
using MonotonicityForRange = FunctionCastBase::MonotonicityForRange;
static constexpr auto name = cast_type == CastType::accurate
? CastName::accurate_cast_name
: (cast_type == CastType::accurateOrNull ? CastName::accurate_cast_or_null_name : CastName::cast_name);
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 2; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
explicit CastOverloadResolverImpl(ContextPtr context_, std::optional<CastDiagnostic> diagnostic_, bool keep_nullable_, const DataTypeValidationSettings & data_type_validation_settings_)
: context(context_)
, diagnostic(std::move(diagnostic_))
, keep_nullable(keep_nullable_)
, data_type_validation_settings(data_type_validation_settings_)
{
}
static FunctionOverloadResolverPtr create(ContextPtr context)
{
const auto & settings_ref = context->getSettingsRef();
if constexpr (internal)
return createImpl(context, {}, false /*keep_nullable*/);
return createImpl(context, {}, settings_ref.cast_keep_nullable, DataTypeValidationSettings(settings_ref));
}
static FunctionOverloadResolverPtr createImpl(ContextPtr context, std::optional<CastDiagnostic> diagnostic = {}, bool keep_nullable = false, const DataTypeValidationSettings & data_type_validation_settings = {})
{
assert(!internal || !keep_nullable);
return std::make_unique<CastOverloadResolverImpl>(context, std::move(diagnostic), keep_nullable, data_type_validation_settings);
}
static FunctionOverloadResolverPtr createImpl(std::optional<CastDiagnostic> diagnostic = {}, bool keep_nullable = false, const DataTypeValidationSettings & data_type_validation_settings = {})
{
assert(!internal || !keep_nullable);
return std::make_unique<CastOverloadResolverImpl>(ContextPtr(), std::move(diagnostic), keep_nullable, data_type_validation_settings);
}
protected:
FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
{
DataTypes data_types(arguments.size());
for (size_t i = 0; i < arguments.size(); ++i)
data_types[i] = arguments[i].type;
auto monotonicity = MonotonicityHelper::getMonotonicityInformation(arguments.front().type, return_type.get());
return std::make_unique<FunctionCast<FunctionName>>(context, name, std::move(monotonicity), data_types, return_type, diagnostic, cast_type);
}
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
const auto & column = arguments.back().column;
if (!column)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument to {} must be a constant string describing type. "
"Instead there is non-constant column of type {}", getName(), arguments.back().type->getName());
const auto * type_col = checkAndGetColumnConst<ColumnString>(column.get());
if (!type_col)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument to {} must be a constant string describing type. "
"Instead there is a column with the following structure: {}", getName(), column->dumpStructure());
DataTypePtr type = DataTypeFactory::instance().get(type_col->getValue<String>());
validateDataType(type, data_type_validation_settings);
if constexpr (cast_type == CastType::accurateOrNull)
return makeNullable(type);
if constexpr (internal)
return type;
if (keep_nullable && arguments.front().type->isNullable() && type->canBeInsideNullable())
return makeNullable(type);
return type;
}
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForNothing() const override { return false; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
private:
ContextPtr context;
std::optional<CastDiagnostic> diagnostic;
bool keep_nullable;
DataTypeValidationSettings data_type_validation_settings;
};
struct CastOverloadName
{
static constexpr auto cast_name = "CAST";
static constexpr auto accurate_cast_name = "accurateCast";
static constexpr auto accurate_cast_or_null_name = "accurateCastOrNull";
};
struct CastInternalOverloadName
{
static constexpr auto cast_name = "_CAST";
static constexpr auto accurate_cast_name = "accurate_Cast";
static constexpr auto accurate_cast_or_null_name = "accurate_CastOrNull";
};
template <CastType cast_type>
using CastOverloadResolver = CastOverloadResolverImpl<cast_type, false, CastOverloadName, CastName>;
template <CastType cast_type>
using CastInternalOverloadResolver = CastOverloadResolverImpl<cast_type, true, CastInternalOverloadName, CastInternalName>;
FunctionOverloadResolverPtr createInternalCastOverloadResolver(CastType type, std::optional<CastDiagnostic> diagnostic)
{
switch (type)
{
case CastType::nonAccurate:
return CastInternalOverloadResolver<CastType::nonAccurate>::createImpl(diagnostic);
case CastType::accurate:
return CastInternalOverloadResolver<CastType::accurate>::createImpl(diagnostic);
case CastType::accurateOrNull:
return CastInternalOverloadResolver<CastType::accurateOrNull>::createImpl(diagnostic);
}
}
REGISTER_FUNCTION(CastOverloadResolvers)
{
factory.registerFunction<CastInternalOverloadResolver<CastType::nonAccurate>>({}, FunctionFactory::CaseInsensitive);

View File

@ -1,138 +1,29 @@
#pragma once
#include <Functions/FunctionsConversion.h>
#include <Interpreters/parseColumnsListForTableFunction.h>
#include <memory>
#include <optional>
#include <Interpreters/Context_fwd.h>
namespace DB
{
namespace ErrorCodes
class IFunctionOverloadResolver;
using FunctionOverloadResolverPtr = std::shared_ptr<IFunctionOverloadResolver>;
enum class CastType
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
/** CastInternal does not preserve nullability of the data type,
* i.e. CastInternal(toNullable(toInt8(1)) as Int32) will be Int32(1).
*
* Cast preserves nullability according to setting `cast_keep_nullable`,
* i.e. Cast(toNullable(toInt8(1)) as Int32) will be Nullable(Int32(1)) if `cast_keep_nullable` == 1.
*/
template <CastType cast_type, bool internal, typename CastName, typename FunctionName>
class CastOverloadResolverImpl : public IFunctionOverloadResolver
{
public:
using MonotonicityForRange = FunctionCastBase::MonotonicityForRange;
using Diagnostic = FunctionCastBase::Diagnostic;
static constexpr auto name = cast_type == CastType::accurate
? CastName::accurate_cast_name
: (cast_type == CastType::accurateOrNull ? CastName::accurate_cast_or_null_name : CastName::cast_name);
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 2; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
explicit CastOverloadResolverImpl(ContextPtr context_, std::optional<Diagnostic> diagnostic_, bool keep_nullable_, const DataTypeValidationSettings & data_type_validation_settings_)
: context(context_)
, diagnostic(std::move(diagnostic_))
, keep_nullable(keep_nullable_)
, data_type_validation_settings(data_type_validation_settings_)
{
}
static FunctionOverloadResolverPtr create(ContextPtr context)
{
const auto & settings_ref = context->getSettingsRef();
if constexpr (internal)
return createImpl(context, {}, false /*keep_nullable*/);
return createImpl(context, {}, settings_ref.cast_keep_nullable, DataTypeValidationSettings(settings_ref));
}
static FunctionOverloadResolverPtr createImpl(ContextPtr context, std::optional<Diagnostic> diagnostic = {}, bool keep_nullable = false, const DataTypeValidationSettings & data_type_validation_settings = {})
{
assert(!internal || !keep_nullable);
return std::make_unique<CastOverloadResolverImpl>(context, std::move(diagnostic), keep_nullable, data_type_validation_settings);
}
static FunctionOverloadResolverPtr createImpl(std::optional<Diagnostic> diagnostic = {}, bool keep_nullable = false, const DataTypeValidationSettings & data_type_validation_settings = {})
{
assert(!internal || !keep_nullable);
return std::make_unique<CastOverloadResolverImpl>(ContextPtr(), std::move(diagnostic), keep_nullable, data_type_validation_settings);
}
protected:
FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
{
DataTypes data_types(arguments.size());
for (size_t i = 0; i < arguments.size(); ++i)
data_types[i] = arguments[i].type;
auto monotonicity = MonotonicityHelper::getMonotonicityInformation(arguments.front().type, return_type.get());
return std::make_unique<FunctionCast<FunctionName>>(context, name, std::move(monotonicity), data_types, return_type, diagnostic, cast_type);
}
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
const auto & column = arguments.back().column;
if (!column)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument to {} must be a constant string describing type. "
"Instead there is non-constant column of type {}", getName(), arguments.back().type->getName());
const auto * type_col = checkAndGetColumnConst<ColumnString>(column.get());
if (!type_col)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument to {} must be a constant string describing type. "
"Instead there is a column with the following structure: {}", getName(), column->dumpStructure());
DataTypePtr type = DataTypeFactory::instance().get(type_col->getValue<String>());
validateDataType(type, data_type_validation_settings);
if constexpr (cast_type == CastType::accurateOrNull)
return makeNullable(type);
if constexpr (internal)
return type;
if (keep_nullable && arguments.front().type->isNullable() && type->canBeInsideNullable())
return makeNullable(type);
return type;
}
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForNothing() const override { return false; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
private:
ContextPtr context;
std::optional<Diagnostic> diagnostic;
bool keep_nullable;
DataTypeValidationSettings data_type_validation_settings;
nonAccurate,
accurate,
accurateOrNull
};
struct CastOverloadName
struct CastDiagnostic
{
static constexpr auto cast_name = "CAST";
static constexpr auto accurate_cast_name = "accurateCast";
static constexpr auto accurate_cast_or_null_name = "accurateCastOrNull";
std::string column_from;
std::string column_to;
};
struct CastInternalOverloadName
{
static constexpr auto cast_name = "_CAST";
static constexpr auto accurate_cast_name = "accurate_Cast";
static constexpr auto accurate_cast_or_null_name = "accurate_CastOrNull";
};
template <CastType cast_type>
using CastOverloadResolver = CastOverloadResolverImpl<cast_type, false, CastOverloadName, CastName>;
template <CastType cast_type>
using CastInternalOverloadResolver = CastOverloadResolverImpl<cast_type, true, CastInternalOverloadName, CastInternalName>;
FunctionOverloadResolverPtr createInternalCastOverloadResolver(CastType type, std::optional<CastDiagnostic> diagnostic);
}

View File

@ -0,0 +1,211 @@
#pragma once
#include <Columns/ColumnArray.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeString.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <Functions/Regexps.h>
#include <Interpreters/Context.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/castColumn.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/assert_cast.h>
#include <Common/typeid_cast.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
}
/** Functions that split strings into an array of strings or vice versa.
*
* splitByChar(sep, s[, max_substrings])
* splitByString(sep, s[, max_substrings])
* splitByRegexp(regexp, s[, max_substrings])
*
* splitByWhitespace(s[, max_substrings]) - split the string by whitespace characters
* splitByNonAlpha(s[, max_substrings]) - split the string by whitespace and punctuation characters
*
* extractAll(s, regexp) - select from the string the subsequences corresponding to the regexp.
* - first subpattern, if regexp has subpattern;
* - zero subpattern (the match part, otherwise);
* - otherwise, an empty array
*
* alphaTokens(s[, max_substrings]) - select from the string subsequence `[a-zA-Z]+`.
*
* URL functions are located separately.
*/
/// A function that takes a string, and returns an array of substrings created by some generator.
template <typename Generator>
class FunctionTokens : public IFunction
{
private:
using Pos = const char *;
bool max_substrings_includes_remaining_string;
public:
static constexpr auto name = Generator::name;
static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionTokens>(context); }
explicit FunctionTokens<Generator>(ContextPtr context)
{
const Settings & settings = context->getSettingsRef();
max_substrings_includes_remaining_string = settings.splitby_max_substrings_includes_remaining_string;
}
String getName() const override { return name; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
bool isVariadic() const override { return Generator::isVariadic(); }
size_t getNumberOfArguments() const override { return Generator::getNumberOfArguments(); }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
Generator::checkArguments(*this, arguments);
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
{
Generator generator;
generator.init(arguments, max_substrings_includes_remaining_string);
const auto & array_argument = arguments[generator.strings_argument_position];
const ColumnString * col_str = checkAndGetColumn<ColumnString>(array_argument.column.get());
const ColumnConst * col_str_const = checkAndGetColumnConstStringOrFixedString(array_argument.column.get());
auto col_res = ColumnArray::create(ColumnString::create());
ColumnString & res_strings = typeid_cast<ColumnString &>(col_res->getData());
ColumnString::Chars & res_strings_chars = res_strings.getChars();
ColumnString::Offsets & res_strings_offsets = res_strings.getOffsets();
ColumnArray::Offsets & res_offsets = col_res->getOffsets();
if (col_str)
{
const ColumnString::Chars & src_chars = col_str->getChars();
const ColumnString::Offsets & src_offsets = col_str->getOffsets();
res_offsets.reserve(src_offsets.size());
res_strings_offsets.reserve(src_offsets.size() * 5); /// Constant 5 - at random.
res_strings_chars.reserve(src_chars.size());
Pos token_begin = nullptr;
Pos token_end = nullptr;
size_t size = src_offsets.size();
ColumnString::Offset current_src_offset = 0;
ColumnArray::Offset current_dst_offset = 0;
ColumnString::Offset current_dst_strings_offset = 0;
for (size_t i = 0; i < size; ++i)
{
Pos pos = reinterpret_cast<Pos>(&src_chars[current_src_offset]);
current_src_offset = src_offsets[i];
Pos end = reinterpret_cast<Pos>(&src_chars[current_src_offset]) - 1;
generator.set(pos, end);
size_t j = 0;
while (generator.get(token_begin, token_end))
{
size_t token_size = token_end - token_begin;
res_strings_chars.resize(res_strings_chars.size() + token_size + 1);
memcpySmallAllowReadWriteOverflow15(&res_strings_chars[current_dst_strings_offset], token_begin, token_size);
res_strings_chars[current_dst_strings_offset + token_size] = 0;
current_dst_strings_offset += token_size + 1;
res_strings_offsets.push_back(current_dst_strings_offset);
++j;
}
current_dst_offset += j;
res_offsets.push_back(current_dst_offset);
}
return col_res;
}
else if (col_str_const)
{
String src = col_str_const->getValue<String>();
Array dst;
generator.set(src.data(), src.data() + src.size());
Pos token_begin = nullptr;
Pos token_end = nullptr;
while (generator.get(token_begin, token_end))
dst.push_back(String(token_begin, token_end - token_begin));
return result_type->createColumnConst(col_str_const->size(), dst);
}
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns {}, {} of arguments of function {}",
array_argument.column->getName(), array_argument.column->getName(), getName());
}
};
/// Helper functions for implementations
static inline std::optional<size_t> extractMaxSplits(
const ColumnsWithTypeAndName & arguments, size_t max_substrings_argument_position)
{
if (max_substrings_argument_position >= arguments.size())
return std::nullopt;
if (const ColumnConst * column = checkAndGetColumn<ColumnConst>(arguments[max_substrings_argument_position].column.get()))
{
size_t res = column->getUInt(0);
if (res)
return res;
}
return std::nullopt;
}
static inline void checkArgumentsWithSeparatorAndOptionalMaxSubstrings(
const IFunction & func, const ColumnsWithTypeAndName & arguments)
{
FunctionArgumentDescriptors mandatory_args{
{"separator", &isString<IDataType>, isColumnConst, "const String"},
{"s", &isString<IDataType>, nullptr, "String"}
};
FunctionArgumentDescriptors optional_args{
{"max_substrings", &isNativeInteger<IDataType>, isColumnConst, "const Number"},
};
validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args);
}
static inline void checkArgumentsWithOptionalMaxSubstrings(const IFunction & func, const ColumnsWithTypeAndName & arguments)
{
FunctionArgumentDescriptors mandatory_args{
{"s", &isString<IDataType>, nullptr, "String"},
};
FunctionArgumentDescriptors optional_args{
{"max_substrings", &isNativeInteger<IDataType>, isColumnConst, "const Number"},
};
validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args);
}
}

View File

@ -26,11 +26,13 @@ namespace ErrorCodes
class FunctionToUnixTimestamp64 : public IFunction
{
private:
size_t target_scale;
const size_t target_scale;
const char * name;
public:
FunctionToUnixTimestamp64(size_t target_scale_, const char * name_)
: target_scale(target_scale_), name(name_)
: target_scale(target_scale_)
, name(name_)
{
}
@ -42,8 +44,10 @@ public:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (!isDateTime64(arguments[0].type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The only argument for function {} must be DateTime64", name);
FunctionArgumentDescriptors args{
{"value", &isDateTime64<IDataType>, nullptr, "DateTime64"}
};
validateFunctionArgumentTypes(*this, arguments, args);
return std::make_shared<DataTypeInt64>();
}
@ -98,9 +102,10 @@ public:
class FunctionFromUnixTimestamp64 : public IFunction
{
private:
size_t target_scale;
const size_t target_scale;
const char * name;
const bool allow_nonconst_timezone_arguments;
public:
FunctionFromUnixTimestamp64(size_t target_scale_, const char * name_, ContextPtr context)
: target_scale(target_scale_)

View File

@ -53,6 +53,7 @@
#include <Functions/toFixedString.h>
#include <Functions/TransformDateTime64.h>
#include <Functions/FunctionsCodingIP.h>
#include <Functions/CastOverloadResolver.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <Columns/ColumnLowCardinality.h>
#include <Interpreters/Context.h>
@ -3127,14 +3128,8 @@ class ExecutableFunctionCast : public IExecutableFunction
public:
using WrapperType = std::function<ColumnPtr(ColumnsWithTypeAndName &, const DataTypePtr &, const ColumnNullable *, size_t)>;
struct Diagnostic
{
std::string column_from;
std::string column_to;
};
explicit ExecutableFunctionCast(
WrapperType && wrapper_function_, const char * name_, std::optional<Diagnostic> diagnostic_)
WrapperType && wrapper_function_, const char * name_, std::optional<CastDiagnostic> diagnostic_)
: wrapper_function(std::move(wrapper_function_)), name(name_), diagnostic(std::move(diagnostic_)) {}
String getName() const override { return name; }
@ -3170,24 +3165,16 @@ protected:
private:
WrapperType wrapper_function;
const char * name;
std::optional<Diagnostic> diagnostic;
std::optional<CastDiagnostic> diagnostic;
};
struct CastName { static constexpr auto name = "CAST"; };
struct CastInternalName { static constexpr auto name = "_CAST"; };
enum class CastType
{
nonAccurate,
accurate,
accurateOrNull
};
class FunctionCastBase : public IFunctionBase
{
public:
using MonotonicityForRange = std::function<Monotonicity(const IDataType &, const Field &, const Field &)>;
using Diagnostic = ExecutableFunctionCast::Diagnostic;
};
template <typename FunctionName>
@ -3201,7 +3188,7 @@ public:
, MonotonicityForRange && monotonicity_for_range_
, const DataTypes & argument_types_
, const DataTypePtr & return_type_
, std::optional<Diagnostic> diagnostic_
, std::optional<CastDiagnostic> diagnostic_
, CastType cast_type_)
: cast_name(cast_name_), monotonicity_for_range(std::move(monotonicity_for_range_))
, argument_types(argument_types_), return_type(return_type_), diagnostic(std::move(diagnostic_))
@ -3251,7 +3238,7 @@ private:
DataTypes argument_types;
DataTypePtr return_type;
std::optional<Diagnostic> diagnostic;
std::optional<CastDiagnostic> diagnostic;
CastType cast_type;
ContextPtr context;

View File

@ -1,73 +0,0 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsStringArray.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
}
template <typename DataType>
std::optional<Int64> extractMaxSplitsImpl(const ColumnWithTypeAndName & argument)
{
const auto * col = checkAndGetColumnConst<ColumnVector<DataType>>(argument.column.get());
if (!col)
return std::nullopt;
auto value = col->template getValue<DataType>();
return static_cast<Int64>(value);
}
std::optional<size_t> extractMaxSplits(const ColumnsWithTypeAndName & arguments, size_t max_substrings_argument_position)
{
if (max_substrings_argument_position >= arguments.size())
return std::nullopt;
std::optional<Int64> max_splits;
if (!((max_splits = extractMaxSplitsImpl<UInt8>(arguments[max_substrings_argument_position])) || (max_splits = extractMaxSplitsImpl<Int8>(arguments[max_substrings_argument_position]))
|| (max_splits = extractMaxSplitsImpl<UInt16>(arguments[max_substrings_argument_position])) || (max_splits = extractMaxSplitsImpl<Int16>(arguments[max_substrings_argument_position]))
|| (max_splits = extractMaxSplitsImpl<UInt32>(arguments[max_substrings_argument_position])) || (max_splits = extractMaxSplitsImpl<Int32>(arguments[max_substrings_argument_position]))
|| (max_splits = extractMaxSplitsImpl<UInt64>(arguments[max_substrings_argument_position])) || (max_splits = extractMaxSplitsImpl<Int64>(arguments[max_substrings_argument_position]))))
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal column {}, which is {}-th argument",
arguments[max_substrings_argument_position].column->getName(),
max_substrings_argument_position + 1);
if (*max_splits <= 0)
return std::nullopt;
return max_splits;
}
DataTypePtr FunctionArrayStringConcat::getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const
{
FunctionArgumentDescriptors mandatory_args{
{"arr", &isArray<IDataType>, nullptr, "Array"},
};
FunctionArgumentDescriptors optional_args{
{"separator", &isString<IDataType>, isColumnConst, "const String"},
};
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
return std::make_shared<DataTypeString>();
}
REGISTER_FUNCTION(StringArray)
{
factory.registerFunction<FunctionExtractAll>();
factory.registerFunction<FunctionSplitByAlpha>();
factory.registerAlias("splitByAlpha", FunctionSplitByAlpha::name);
factory.registerFunction<FunctionSplitByNonAlpha>();
factory.registerFunction<FunctionSplitByWhitespace>();
factory.registerFunction<FunctionSplitByChar>();
factory.registerFunction<FunctionSplitByString>();
factory.registerFunction<FunctionSplitByRegexp>();
factory.registerFunction<FunctionArrayStringConcat>();
}
}

View File

@ -1,990 +0,0 @@
#pragma once
#include <Columns/ColumnArray.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeString.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/FunctionsConversion.h>
#include <Functions/IFunction.h>
#include <Functions/Regexps.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/Context_fwd.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/assert_cast.h>
#include <Common/typeid_cast.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int ILLEGAL_COLUMN;
}
/** Functions that split strings into an array of strings or vice versa.
*
* splitByChar(sep, s[, max_substrings])
* splitByString(sep, s[, max_substrings])
* splitByRegexp(regexp, s[, max_substrings])
*
* splitByWhitespace(s[, max_substrings]) - split the string by whitespace characters
* splitByNonAlpha(s[, max_substrings]) - split the string by whitespace and punctuation characters
*
* extractAll(s, regexp) - select from the string the subsequences corresponding to the regexp.
* - first subpattern, if regexp has subpattern;
* - zero subpattern (the match part, otherwise);
* - otherwise, an empty array
*
* arrayStringConcat(arr)
* arrayStringConcat(arr, delimiter)
* - join an array of strings into one string via a separator.
*
* alphaTokens(s[, max_substrings]) - select from the string subsequence `[a-zA-Z]+`.
*
* URL functions are located separately.
*/
using Pos = const char *;
std::optional<size_t> extractMaxSplits(const ColumnsWithTypeAndName & arguments, size_t max_substrings_argument_position);
/// Substring generators. All of them have a common interface.
class SplitByAlphaImpl
{
private:
Pos pos;
Pos end;
std::optional<size_t> max_splits;
size_t splits;
bool max_substrings_includes_remaining_string;
public:
static constexpr auto name = "alphaTokens";
static String getName() { return name; }
static bool isVariadic() { return true; }
static size_t getNumberOfArguments() { return 0; }
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
{
FunctionArgumentDescriptors mandatory_args{
{"s", &isString<IDataType>, nullptr, "String"},
};
FunctionArgumentDescriptors optional_args{
{"max_substrings", &isNativeInteger<IDataType>, isColumnConst, "const Number"},
};
validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args);
}
static constexpr auto strings_argument_position = 0uz;
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
{
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
max_splits = extractMaxSplits(arguments, 1);
}
/// Called for each next string.
void set(Pos pos_, Pos end_)
{
pos = pos_;
end = end_;
splits = 0;
}
/// Get the next token, if any, or return false.
bool get(Pos & token_begin, Pos & token_end)
{
/// Skip garbage
while (pos < end && !isAlphaASCII(*pos))
++pos;
if (pos == end)
return false;
token_begin = pos;
if (max_splits)
{
if (max_substrings_includes_remaining_string)
{
if (splits == *max_splits - 1)
{
token_end = end;
pos = end;
return true;
}
}
else
if (splits == *max_splits)
return false;
}
while (pos < end && isAlphaASCII(*pos))
++pos;
token_end = pos;
++splits;
return true;
}
};
class SplitByNonAlphaImpl
{
private:
Pos pos;
Pos end;
std::optional<size_t> max_splits;
size_t splits;
bool max_substrings_includes_remaining_string;
public:
/// Get the name of the function.
static constexpr auto name = "splitByNonAlpha";
static String getName() { return name; }
static bool isVariadic() { return true; }
static size_t getNumberOfArguments() { return 0; }
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
{
SplitByAlphaImpl::checkArguments(func, arguments);
}
static constexpr auto strings_argument_position = 0uz;
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
{
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
max_splits = extractMaxSplits(arguments, 1);
}
/// Called for each next string.
void set(Pos pos_, Pos end_)
{
pos = pos_;
end = end_;
splits = 0;
}
/// Get the next token, if any, or return false.
bool get(Pos & token_begin, Pos & token_end)
{
/// Skip garbage
while (pos < end && (isWhitespaceASCII(*pos) || isPunctuationASCII(*pos)))
++pos;
if (pos == end)
return false;
token_begin = pos;
if (max_splits)
{
if (max_substrings_includes_remaining_string)
{
if (splits == *max_splits - 1)
{
token_end = end;
pos = end;
return true;
}
}
else
if (splits == *max_splits)
return false;
}
while (pos < end && !(isWhitespaceASCII(*pos) || isPunctuationASCII(*pos)))
++pos;
token_end = pos;
splits++;
return true;
}
};
class SplitByWhitespaceImpl
{
private:
Pos pos;
Pos end;
std::optional<size_t> max_splits;
size_t splits;
bool max_substrings_includes_remaining_string;
public:
static constexpr auto name = "splitByWhitespace";
static String getName() { return name; }
static bool isVariadic() { return true; }
static size_t getNumberOfArguments() { return 0; }
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
{
return SplitByNonAlphaImpl::checkArguments(func, arguments);
}
static constexpr auto strings_argument_position = 0uz;
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
{
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
max_splits = extractMaxSplits(arguments, 1);
}
/// Called for each next string.
void set(Pos pos_, Pos end_)
{
pos = pos_;
end = end_;
splits = 0;
}
/// Get the next token, if any, or return false.
bool get(Pos & token_begin, Pos & token_end)
{
/// Skip garbage
while (pos < end && isWhitespaceASCII(*pos))
++pos;
if (pos == end)
return false;
token_begin = pos;
if (max_splits)
{
if (max_substrings_includes_remaining_string)
{
if (splits == *max_splits - 1)
{
token_end = end;
pos = end;
return true;
}
}
else
if (splits == *max_splits)
return false;
}
while (pos < end && !isWhitespaceASCII(*pos))
++pos;
token_end = pos;
splits++;
return true;
}
};
class SplitByCharImpl
{
private:
Pos pos;
Pos end;
char separator;
std::optional<size_t> max_splits;
size_t splits;
bool max_substrings_includes_remaining_string;
public:
static constexpr auto name = "splitByChar";
static String getName() { return name; }
static bool isVariadic() { return true; }
static size_t getNumberOfArguments() { return 0; }
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
{
FunctionArgumentDescriptors mandatory_args{
{"separator", &isString<IDataType>, isColumnConst, "const String"},
{"s", &isString<IDataType>, nullptr, "String"}
};
FunctionArgumentDescriptors optional_args{
{"max_substrings", &isNativeInteger<IDataType>, isColumnConst, "const Number"},
};
validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args);
}
static constexpr auto strings_argument_position = 1uz;
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
{
const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
if (!col)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
"Must be constant string.", arguments[0].column->getName(), getName());
String sep_str = col->getValue<String>();
if (sep_str.size() != 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal separator for function {}. Must be exactly one byte.", getName());
separator = sep_str[0];
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
max_splits = extractMaxSplits(arguments, 2);
}
void set(Pos pos_, Pos end_)
{
pos = pos_;
end = end_;
splits = 0;
}
bool get(Pos & token_begin, Pos & token_end)
{
if (!pos)
return false;
token_begin = pos;
if (max_splits)
{
if (max_substrings_includes_remaining_string)
{
if (splits == *max_splits - 1)
{
token_end = end;
pos = nullptr;
return true;
}
}
else
if (splits == *max_splits)
return false;
}
pos = reinterpret_cast<Pos>(memchr(pos, separator, end - pos));
if (pos)
{
token_end = pos;
++pos;
++splits;
}
else
token_end = end;
return true;
}
};
class SplitByStringImpl
{
private:
Pos pos;
Pos end;
String separator;
std::optional<size_t> max_splits;
size_t splits;
bool max_substrings_includes_remaining_string;
public:
static constexpr auto name = "splitByString";
static String getName() { return name; }
static bool isVariadic() { return true; }
static size_t getNumberOfArguments() { return 0; }
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
{
SplitByCharImpl::checkArguments(func, arguments);
}
static constexpr auto strings_argument_position = 1uz;
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
{
const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
if (!col)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
"Must be constant string.", arguments[0].column->getName(), getName());
separator = col->getValue<String>();
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
max_splits = extractMaxSplits(arguments, 2);
}
/// Called for each next string.
void set(Pos pos_, Pos end_)
{
pos = pos_;
end = end_;
splits = 0;
}
/// Get the next token, if any, or return false.
bool get(Pos & token_begin, Pos & token_end)
{
if (separator.empty())
{
if (pos == end)
return false;
token_begin = pos;
if (max_splits)
{
if (max_substrings_includes_remaining_string)
{
if (splits == *max_splits - 1)
{
token_end = end;
pos = end;
return true;
}
}
else
if (splits == *max_splits)
return false;
}
pos += 1;
token_end = pos;
++splits;
}
else
{
if (!pos)
return false;
token_begin = pos;
if (max_splits)
{
if (max_substrings_includes_remaining_string)
{
if (splits == *max_splits - 1)
{
token_end = end;
pos = nullptr;
return true;
}
}
else
if (splits == *max_splits)
return false;
}
pos = reinterpret_cast<Pos>(memmem(pos, end - pos, separator.data(), separator.size()));
if (pos)
{
token_end = pos;
pos += separator.size();
++splits;
}
else
token_end = end;
}
return true;
}
};
class SplitByRegexpImpl
{
private:
Regexps::RegexpPtr re;
OptimizedRegularExpression::MatchVec matches;
Pos pos;
Pos end;
std::optional<size_t> max_splits;
size_t splits;
bool max_substrings_includes_remaining_string;
public:
static constexpr auto name = "splitByRegexp";
static String getName() { return name; }
static bool isVariadic() { return true; }
static size_t getNumberOfArguments() { return 0; }
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
{
SplitByStringImpl::checkArguments(func, arguments);
}
static constexpr auto strings_argument_position = 1uz;
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
{
const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
if (!col)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
"Must be constant string.", arguments[0].column->getName(), getName());
if (!col->getValue<String>().empty())
re = std::make_shared<OptimizedRegularExpression>(Regexps::createRegexp<false, false, false>(col->getValue<String>()));
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
max_splits = extractMaxSplits(arguments, 2);
}
/// Called for each next string.
void set(Pos pos_, Pos end_)
{
pos = pos_;
end = end_;
splits = 0;
}
/// Get the next token, if any, or return false.
bool get(Pos & token_begin, Pos & token_end)
{
if (!re)
{
if (pos == end)
return false;
token_begin = pos;
if (max_splits)
{
if (max_substrings_includes_remaining_string)
{
if (splits == *max_splits - 1)
{
token_end = end;
pos = end;
return true;
}
}
else
if (splits == *max_splits)
return false;
}
pos += 1;
token_end = pos;
++splits;
}
else
{
if (!pos || pos > end)
return false;
token_begin = pos;
if (max_splits)
{
if (max_substrings_includes_remaining_string)
{
if (splits == *max_splits - 1)
{
token_end = end;
pos = nullptr;
return true;
}
}
else
if (splits == *max_splits)
return false;
}
if (!re->match(pos, end - pos, matches) || !matches[0].length)
{
token_end = end;
pos = end + 1;
}
else
{
token_end = pos + matches[0].offset;
pos = token_end + matches[0].length;
++splits;
}
}
return true;
}
};
class ExtractAllImpl
{
private:
Regexps::RegexpPtr re;
OptimizedRegularExpression::MatchVec matches;
size_t capture;
Pos pos;
Pos end;
public:
static constexpr auto name = "extractAll";
static String getName() { return name; }
static bool isVariadic() { return false; }
static size_t getNumberOfArguments() { return 2; }
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
{
FunctionArgumentDescriptors mandatory_args{
{"haystack", &isString<IDataType>, nullptr, "String"},
{"pattern", &isString<IDataType>, isColumnConst, "const String"}
};
validateFunctionArgumentTypes(func, arguments, mandatory_args);
}
static constexpr auto strings_argument_position = 0uz;
void init(const ColumnsWithTypeAndName & arguments, bool /*max_substrings_includes_remaining_string*/)
{
const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
if (!col)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
"Must be constant string.", arguments[1].column->getName(), getName());
re = std::make_shared<OptimizedRegularExpression>(Regexps::createRegexp<false, false, false>(col->getValue<String>()));
capture = re->getNumberOfSubpatterns() > 0 ? 1 : 0;
matches.resize(capture + 1);
}
/// Called for each next string.
void set(Pos pos_, Pos end_)
{
pos = pos_;
end = end_;
}
/// Get the next token, if any, or return false.
bool get(Pos & token_begin, Pos & token_end)
{
if (!pos || pos > end)
return false;
if (!re->match(pos, end - pos, matches) || !matches[0].length)
return false;
if (matches[capture].offset == std::string::npos)
{
/// Empty match.
token_begin = pos;
token_end = pos;
}
else
{
token_begin = pos + matches[capture].offset;
token_end = token_begin + matches[capture].length;
}
pos += matches[0].offset + matches[0].length;
return true;
}
};
/// A function that takes a string, and returns an array of substrings created by some generator.
template <typename Generator>
class FunctionTokens : public IFunction
{
private:
bool max_substrings_includes_remaining_string;
public:
static constexpr auto name = Generator::name;
static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionTokens>(context); }
explicit FunctionTokens<Generator>(ContextPtr context)
{
const Settings & settings = context->getSettingsRef();
max_substrings_includes_remaining_string = settings.splitby_max_substrings_includes_remaining_string;
}
String getName() const override { return name; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
bool isVariadic() const override { return Generator::isVariadic(); }
size_t getNumberOfArguments() const override { return Generator::getNumberOfArguments(); }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
Generator::checkArguments(*this, arguments);
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
{
Generator generator;
generator.init(arguments, max_substrings_includes_remaining_string);
const auto & array_argument = arguments[generator.strings_argument_position];
const ColumnString * col_str = checkAndGetColumn<ColumnString>(array_argument.column.get());
const ColumnConst * col_str_const = checkAndGetColumnConstStringOrFixedString(array_argument.column.get());
auto col_res = ColumnArray::create(ColumnString::create());
ColumnString & res_strings = typeid_cast<ColumnString &>(col_res->getData());
ColumnString::Chars & res_strings_chars = res_strings.getChars();
ColumnString::Offsets & res_strings_offsets = res_strings.getOffsets();
ColumnArray::Offsets & res_offsets = col_res->getOffsets();
if (col_str)
{
const ColumnString::Chars & src_chars = col_str->getChars();
const ColumnString::Offsets & src_offsets = col_str->getOffsets();
res_offsets.reserve(src_offsets.size());
res_strings_offsets.reserve(src_offsets.size() * 5); /// Constant 5 - at random.
res_strings_chars.reserve(src_chars.size());
Pos token_begin = nullptr;
Pos token_end = nullptr;
size_t size = src_offsets.size();
ColumnString::Offset current_src_offset = 0;
ColumnArray::Offset current_dst_offset = 0;
ColumnString::Offset current_dst_strings_offset = 0;
for (size_t i = 0; i < size; ++i)
{
Pos pos = reinterpret_cast<Pos>(&src_chars[current_src_offset]);
current_src_offset = src_offsets[i];
Pos end = reinterpret_cast<Pos>(&src_chars[current_src_offset]) - 1;
generator.set(pos, end);
size_t j = 0;
while (generator.get(token_begin, token_end))
{
size_t token_size = token_end - token_begin;
res_strings_chars.resize(res_strings_chars.size() + token_size + 1);
memcpySmallAllowReadWriteOverflow15(&res_strings_chars[current_dst_strings_offset], token_begin, token_size);
res_strings_chars[current_dst_strings_offset + token_size] = 0;
current_dst_strings_offset += token_size + 1;
res_strings_offsets.push_back(current_dst_strings_offset);
++j;
}
current_dst_offset += j;
res_offsets.push_back(current_dst_offset);
}
return col_res;
}
else if (col_str_const)
{
String src = col_str_const->getValue<String>();
Array dst;
generator.set(src.data(), src.data() + src.size());
Pos token_begin = nullptr;
Pos token_end = nullptr;
while (generator.get(token_begin, token_end))
dst.push_back(String(token_begin, token_end - token_begin));
return result_type->createColumnConst(col_str_const->size(), dst);
}
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns {}, {} of arguments of function {}",
array_argument.column->getName(), array_argument.column->getName(), getName());
}
};
/// Joins an array of type serializable to string into one string via a separator.
class FunctionArrayStringConcat : public IFunction
{
private:
static void executeInternal(
const ColumnString::Chars & src_chars,
const ColumnString::Offsets & src_string_offsets,
const ColumnArray::Offsets & src_array_offsets,
const char * delimiter,
const size_t delimiter_size,
ColumnString::Chars & dst_chars,
ColumnString::Offsets & dst_string_offsets,
const char8_t * null_map)
{
size_t size = src_array_offsets.size();
if (!size)
return;
/// With a small margin - as if the separator goes after the last string of the array.
dst_chars.resize(
src_chars.size()
+ delimiter_size * src_string_offsets.size() /// Separators after each string...
+ src_array_offsets.size() /// Zero byte after each joined string
- src_string_offsets.size()); /// The former zero byte after each string of the array
/// There will be as many strings as there were arrays.
dst_string_offsets.resize(src_array_offsets.size());
ColumnArray::Offset current_src_array_offset = 0;
ColumnString::Offset current_dst_string_offset = 0;
/// Loop through the array of strings.
for (size_t i = 0; i < size; ++i)
{
bool first_non_null = true;
/// Loop through the rows within the array. /// NOTE You can do everything in one copy, if the separator has a size of 1.
for (auto next_src_array_offset = src_array_offsets[i]; current_src_array_offset < next_src_array_offset; ++current_src_array_offset)
{
if (null_map && null_map[current_src_array_offset]) [[unlikely]]
continue;
if (!first_non_null)
{
memcpy(&dst_chars[current_dst_string_offset], delimiter, delimiter_size);
current_dst_string_offset += delimiter_size;
}
first_non_null = false;
const auto current_src_string_offset = current_src_array_offset ? src_string_offsets[current_src_array_offset - 1] : 0;
size_t bytes_to_copy = src_string_offsets[current_src_array_offset] - current_src_string_offset - 1;
memcpySmallAllowReadWriteOverflow15(
&dst_chars[current_dst_string_offset], &src_chars[current_src_string_offset], bytes_to_copy);
current_dst_string_offset += bytes_to_copy;
}
dst_chars[current_dst_string_offset] = 0;
++current_dst_string_offset;
dst_string_offsets[i] = current_dst_string_offset;
}
dst_chars.resize(dst_string_offsets.back());
}
static void executeInternal(
const ColumnString & col_string,
const ColumnArray & col_arr,
const String & delimiter,
ColumnString & col_res,
const char8_t * null_map = nullptr)
{
executeInternal(
col_string.getChars(),
col_string.getOffsets(),
col_arr.getOffsets(),
delimiter.data(),
delimiter.size(),
col_res.getChars(),
col_res.getOffsets(),
null_map);
}
static ColumnPtr serializeNestedColumn(const ColumnArray & col_arr, const DataTypePtr & nested_type)
{
if (isString(nested_type))
{
return col_arr.getDataPtr();
}
else if (const ColumnNullable * col_nullable = checkAndGetColumn<ColumnNullable>(col_arr.getData());
col_nullable && isString(col_nullable->getNestedColumn().getDataType()))
{
return col_nullable->getNestedColumnPtr();
}
else
{
ColumnsWithTypeAndName cols;
cols.emplace_back(col_arr.getDataPtr(), nested_type, "tmp");
return ConvertImplGenericToString<ColumnString>::execute(cols, std::make_shared<DataTypeString>(), col_arr.size());
}
}
public:
static constexpr auto name = "arrayStringConcat";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayStringConcat>(); }
String getName() const override
{
return name;
}
bool isVariadic() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override;
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
{
String delimiter;
if (arguments.size() == 2)
{
const ColumnConst * col_delim = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
if (!col_delim)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument for function {} must be constant string.", getName());
delimiter = col_delim->getValue<String>();
}
const auto & nested_type = assert_cast<const DataTypeArray &>(*arguments[0].type).getNestedType();
if (const ColumnConst * col_const_arr = checkAndGetColumnConst<ColumnArray>(arguments[0].column.get());
col_const_arr && isString(nested_type))
{
Array src_arr = col_const_arr->getValue<Array>();
String dst_str;
bool first_non_null = true;
for (size_t i = 0, size = src_arr.size(); i < size; ++i)
{
if (src_arr[i].isNull())
continue;
if (!first_non_null)
dst_str += delimiter;
first_non_null = false;
dst_str += src_arr[i].get<const String &>();
}
return result_type->createColumnConst(col_const_arr->size(), dst_str);
}
ColumnPtr src_column = arguments[0].column->convertToFullColumnIfConst();
const ColumnArray & col_arr = assert_cast<const ColumnArray &>(*src_column.get());
ColumnPtr str_subcolumn = serializeNestedColumn(col_arr, nested_type);
const ColumnString & col_string = assert_cast<const ColumnString &>(*str_subcolumn.get());
auto col_res = ColumnString::create();
if (const ColumnNullable * col_nullable = checkAndGetColumn<ColumnNullable>(col_arr.getData()))
executeInternal(col_string, col_arr, delimiter, *col_res, col_nullable->getNullMapData().data());
else
executeInternal(col_string, col_arr, delimiter, *col_res);
return col_res;
}
};
using FunctionSplitByAlpha = FunctionTokens<SplitByAlphaImpl>;
using FunctionSplitByNonAlpha = FunctionTokens<SplitByNonAlphaImpl>;
using FunctionSplitByWhitespace = FunctionTokens<SplitByWhitespaceImpl>;
using FunctionSplitByChar = FunctionTokens<SplitByCharImpl>;
using FunctionSplitByString = FunctionTokens<SplitByStringImpl>;
using FunctionSplitByRegexp = FunctionTokens<SplitByRegexpImpl>;
using FunctionExtractAll = FunctionTokens<ExtractAllImpl>;
}

View File

@ -1,9 +1,15 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsStringArray.h>
#include <Functions/FunctionTokens.h>
namespace DB
{
namespace
{
using Pos = const char *;
class URLPathHierarchyImpl
{
private:
@ -14,7 +20,6 @@ private:
public:
static constexpr auto name = "URLPathHierarchy";
static String getName() { return name; }
static bool isVariadic() { return false; }
static size_t getNumberOfArguments() { return 1; }
@ -95,9 +100,10 @@ public:
};
struct NameURLPathHierarchy { static constexpr auto name = "URLPathHierarchy"; };
using FunctionURLPathHierarchy = FunctionTokens<URLPathHierarchyImpl>;
}
REGISTER_FUNCTION(URLPathHierarchy)
{
factory.registerFunction<FunctionURLPathHierarchy>();

View File

@ -1,9 +1,14 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsStringArray.h>
#include <Functions/FunctionTokens.h>
namespace DB
{
namespace
{
using Pos = const char *;
class URLHierarchyImpl
{
private:
@ -13,7 +18,6 @@ private:
public:
static constexpr auto name = "URLHierarchy";
static String getName() { return name; }
static bool isVariadic() { return false; }
static size_t getNumberOfArguments() { return 1; }
@ -97,9 +101,10 @@ public:
};
struct NameURLHierarchy { static constexpr auto name = "URLHierarchy"; };
using FunctionURLHierarchy = FunctionTokens<URLHierarchyImpl>;
}
REGISTER_FUNCTION(URLHierarchy)
{
factory.registerFunction<FunctionURLHierarchy>();

View File

@ -1,9 +1,14 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsStringArray.h>
#include <Functions/FunctionTokens.h>
namespace DB
{
namespace
{
using Pos = const char *;
class ExtractURLParameterNamesImpl
{
private:
@ -13,7 +18,6 @@ private:
public:
static constexpr auto name = "extractURLParameterNames";
static String getName() { return name; }
static bool isVariadic() { return false; }
static size_t getNumberOfArguments() { return 1; }
@ -80,9 +84,10 @@ public:
}
};
struct NameExtractURLParameterNames { static constexpr auto name = "extractURLParameterNames"; };
using FunctionExtractURLParameterNames = FunctionTokens<ExtractURLParameterNamesImpl>;
}
REGISTER_FUNCTION(ExtractURLParameterNames)
{
factory.registerFunction<FunctionExtractURLParameterNames>();

View File

@ -1,9 +1,15 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsStringArray.h>
#include <Functions/FunctionTokens.h>
namespace DB
{
namespace
{
using Pos = const char *;
class ExtractURLParametersImpl
{
private:
@ -13,7 +19,6 @@ private:
public:
static constexpr auto name = "extractURLParameters";
static String getName() { return name; }
static bool isVariadic() { return false; }
static size_t getNumberOfArguments() { return 1; }
@ -88,9 +93,10 @@ public:
}
};
struct NameExtractURLParameters { static constexpr auto name = "extractURLParameters"; };
using FunctionExtractURLParameters = FunctionTokens<ExtractURLParametersImpl>;
}
REGISTER_FUNCTION(ExtractURLParameters)
{
factory.registerFunction<FunctionExtractURLParameters>();

View File

@ -0,0 +1,104 @@
#include <Functions/FunctionTokens.h>
#include <Functions/FunctionFactory.h>
#include <Common/StringUtils/StringUtils.h>
namespace DB
{
/** Functions that split strings into an array of strings or vice versa.
*
* alphaTokens(s[, max_substrings]) - select from the string subsequence `[a-zA-Z]+`.
*/
namespace
{
using Pos = const char *;
class SplitByAlphaImpl
{
private:
Pos pos;
Pos end;
std::optional<size_t> max_splits;
size_t splits;
bool max_substrings_includes_remaining_string;
public:
static constexpr auto name = "alphaTokens";
static bool isVariadic() { return true; }
static size_t getNumberOfArguments() { return 0; }
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
{
checkArgumentsWithOptionalMaxSubstrings(func, arguments);
}
static constexpr auto strings_argument_position = 0uz;
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
{
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
max_splits = extractMaxSplits(arguments, 1);
}
/// Called for each next string.
void set(Pos pos_, Pos end_)
{
pos = pos_;
end = end_;
splits = 0;
}
/// Get the next token, if any, or return false.
bool get(Pos & token_begin, Pos & token_end)
{
/// Skip garbage
while (pos < end && !isAlphaASCII(*pos))
++pos;
if (pos == end)
return false;
token_begin = pos;
if (max_splits)
{
if (max_substrings_includes_remaining_string)
{
if (splits == *max_splits - 1)
{
token_end = end;
pos = end;
return true;
}
}
else
if (splits == *max_splits)
return false;
}
while (pos < end && isAlphaASCII(*pos))
++pos;
token_end = pos;
++splits;
return true;
}
};
using FunctionSplitByAlpha = FunctionTokens<SplitByAlphaImpl>;
}
REGISTER_FUNCTION(SplitByAlpha)
{
factory.registerFunction<FunctionSplitByAlpha>();
factory.registerAlias("splitByAlpha", FunctionSplitByAlpha::name);
}
}

View File

@ -4,7 +4,6 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <base/range.h>
namespace DB
@ -46,10 +45,10 @@ private:
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (!isString(arguments[0]))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName());
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of the first argument of function {}", arguments[0]->getName(), getName());
if (!isString(arguments[1]))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[1]->getName(), getName());
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of the second argument of function {}", arguments[1]->getName(), getName());
return std::make_shared<DataTypeString>();
}

View File

@ -0,0 +1,202 @@
#include <Columns/ColumnArray.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeString.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <Functions/Regexps.h>
#include <Functions/FunctionFactory.h>
#include <Interpreters/Context.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/castColumn.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/assert_cast.h>
/** arrayStringConcat(arr)
* arrayStringConcat(arr, delimiter)
* - join an array of strings into one string via a separator.
*/
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
}
namespace
{
/// Joins an array of type serializable to string into one string via a separator.
class FunctionArrayStringConcat : public IFunction
{
private:
static void executeInternal(
const ColumnString::Chars & src_chars,
const ColumnString::Offsets & src_string_offsets,
const ColumnArray::Offsets & src_array_offsets,
const char * delimiter,
const size_t delimiter_size,
ColumnString::Chars & dst_chars,
ColumnString::Offsets & dst_string_offsets,
const char8_t * null_map)
{
size_t size = src_array_offsets.size();
if (!size)
return;
/// With a small margin - as if the separator goes after the last string of the array.
dst_chars.resize(
src_chars.size()
+ delimiter_size * src_string_offsets.size() /// Separators after each string...
+ src_array_offsets.size() /// Zero byte after each joined string
- src_string_offsets.size()); /// The former zero byte after each string of the array
/// There will be as many strings as there were arrays.
dst_string_offsets.resize(src_array_offsets.size());
ColumnArray::Offset current_src_array_offset = 0;
ColumnString::Offset current_dst_string_offset = 0;
/// Loop through the array of strings.
for (size_t i = 0; i < size; ++i)
{
bool first_non_null = true;
/// Loop through the rows within the array. /// NOTE You can do everything in one copy, if the separator has a size of 1.
for (auto next_src_array_offset = src_array_offsets[i]; current_src_array_offset < next_src_array_offset; ++current_src_array_offset)
{
if (null_map && null_map[current_src_array_offset]) [[unlikely]]
continue;
if (!first_non_null)
{
memcpy(&dst_chars[current_dst_string_offset], delimiter, delimiter_size);
current_dst_string_offset += delimiter_size;
}
first_non_null = false;
const auto current_src_string_offset = current_src_array_offset ? src_string_offsets[current_src_array_offset - 1] : 0;
size_t bytes_to_copy = src_string_offsets[current_src_array_offset] - current_src_string_offset - 1;
memcpySmallAllowReadWriteOverflow15(
&dst_chars[current_dst_string_offset], &src_chars[current_src_string_offset], bytes_to_copy);
current_dst_string_offset += bytes_to_copy;
}
dst_chars[current_dst_string_offset] = 0;
++current_dst_string_offset;
dst_string_offsets[i] = current_dst_string_offset;
}
dst_chars.resize(dst_string_offsets.back());
}
static void executeInternal(
const ColumnString & col_string,
const ColumnArray & col_arr,
const String & delimiter,
ColumnString & col_res,
const char8_t * null_map = nullptr)
{
executeInternal(
col_string.getChars(),
col_string.getOffsets(),
col_arr.getOffsets(),
delimiter.data(),
delimiter.size(),
col_res.getChars(),
col_res.getOffsets(),
null_map);
}
static ColumnPtr serializeNestedColumn(const ColumnArray & col_arr, const DataTypePtr & nested_type)
{
DataTypePtr type = nested_type;
ColumnPtr column = col_arr.getDataPtr();
if (type->isNullable())
{
type = removeNullable(type);
column = assert_cast<const ColumnNullable &>(*column).getNestedColumnPtr();
}
return castColumn({column, type, "tmp"}, std::make_shared<DataTypeString>());
}
public:
static constexpr auto name = "arrayStringConcat";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayStringConcat>(); }
String getName() const override
{
return name;
}
bool isVariadic() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
FunctionArgumentDescriptors mandatory_args
{
{"arr", &isArray<IDataType>, nullptr, "Array"},
};
FunctionArgumentDescriptors optional_args
{
{"separator", &isString<IDataType>, isColumnConst, "const String"},
};
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
return std::make_shared<DataTypeString>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const override
{
String delimiter;
if (arguments.size() == 2)
{
const ColumnConst * col_delim = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
if (!col_delim)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument for function {} must be constant string.", getName());
delimiter = col_delim->getValue<String>();
}
const auto & nested_type = assert_cast<const DataTypeArray &>(*arguments[0].type).getNestedType();
const ColumnArray & col_arr = assert_cast<const ColumnArray &>(*arguments[0].column);
ColumnPtr str_subcolumn = serializeNestedColumn(col_arr, nested_type);
const ColumnString & col_string = assert_cast<const ColumnString &>(*str_subcolumn.get());
auto col_res = ColumnString::create();
if (const ColumnNullable * col_nullable = checkAndGetColumn<ColumnNullable>(col_arr.getData()))
executeInternal(col_string, col_arr, delimiter, *col_res, col_nullable->getNullMapData().data());
else
executeInternal(col_string, col_arr, delimiter, *col_res);
return col_res;
}
};
}
REGISTER_FUNCTION(ArrayStringConcat)
{
factory.registerFunction<FunctionArrayStringConcat>();
}
}

View File

@ -0,0 +1,122 @@
#include <Columns/ColumnArray.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeString.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/FunctionTokens.h>
#include <Functions/FunctionFactory.h>
#include <Functions/Regexps.h>
#include <Interpreters/Context.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/assert_cast.h>
#include <Common/typeid_cast.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
}
/** Functions that split strings into an array of strings or vice versa.
*
* extractAll(s, regexp) - select from the string the subsequences corresponding to the regexp.
* - first subpattern, if regexp has subpattern;
* - zero subpattern (the match part, otherwise);
* - otherwise, an empty array
*/
namespace
{
using Pos = const char *;
class ExtractAllImpl
{
private:
Regexps::RegexpPtr re;
OptimizedRegularExpression::MatchVec matches;
size_t capture;
Pos pos;
Pos end;
public:
static constexpr auto name = "extractAll";
static String getName() { return name; }
static bool isVariadic() { return false; }
static size_t getNumberOfArguments() { return 2; }
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
{
FunctionArgumentDescriptors mandatory_args{
{"haystack", &isString<IDataType>, nullptr, "String"},
{"pattern", &isString<IDataType>, isColumnConst, "const String"}
};
validateFunctionArgumentTypes(func, arguments, mandatory_args);
}
static constexpr auto strings_argument_position = 0uz;
void init(const ColumnsWithTypeAndName & arguments, bool /*max_substrings_includes_remaining_string*/)
{
const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
if (!col)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
"Must be constant string.", arguments[1].column->getName(), getName());
re = std::make_shared<OptimizedRegularExpression>(Regexps::createRegexp<false, false, false>(col->getValue<String>()));
capture = re->getNumberOfSubpatterns() > 0 ? 1 : 0;
matches.resize(capture + 1);
}
/// Called for each next string.
void set(Pos pos_, Pos end_)
{
pos = pos_;
end = end_;
}
/// Get the next token, if any, or return false.
bool get(Pos & token_begin, Pos & token_end)
{
if (!pos || pos > end)
return false;
if (!re->match(pos, end - pos, matches) || !matches[0].length)
return false;
if (matches[capture].offset == std::string::npos)
{
/// Empty match.
token_begin = pos;
token_end = pos;
}
else
{
token_begin = pos + matches[capture].offset;
token_end = token_begin + matches[capture].length;
}
pos += matches[0].offset + matches[0].length;
return true;
}
};
using FunctionExtractAll = FunctionTokens<ExtractAllImpl>;
}
REGISTER_FUNCTION(ExtractAll)
{
factory.registerFunction<FunctionExtractAll>();
}
}

View File

@ -9,12 +9,14 @@
#include <Functions/DateTimeTransforms.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/FunctionsConversion.h>
#include <Functions/IFunction.h>
#include <Functions/castTypeToEither.h>
#include <Functions/extractTimeZoneFromFunctionArguments.h>
#include <Functions/numLiteralChars.h>
#include <Interpreters/Context.h>
#include <Interpreters/castColumn.h>
#include <IO/WriteHelpers.h>
#include <Common/Concepts.h>
@ -803,18 +805,7 @@ public:
{
if (arguments.size() == 1)
{
if (!castType(arguments[0].type.get(), [&](const auto & type)
{
using FromDataType = std::decay_t<decltype(type)>;
res = ConvertImpl<FromDataType, DataTypeDateTime, Name>::execute(arguments, result_type, input_rows_count);
return true;
}))
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
"Illegal column {} of function {}, must be Integer, Date, Date32, DateTime "
"or DateTime64 when arguments size is 1.",
arguments[0].column->getName(), getName());
}
return castColumn(arguments[0], result_type);
}
else
{

View File

@ -7,15 +7,16 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/FunctionsConversion.h>
#include <Functions/IFunction.h>
#include <Functions/castTypeToEither.h>
#include <Functions/numLiteralChars.h>
#include <Interpreters/Context.h>
#include <IO/WriteHelpers.h>
#include <base/types.h>
#include <boost/algorithm/string/case_conv.hpp>
namespace DB
{
namespace ErrorCodes

View File

@ -0,0 +1,122 @@
#include <Columns/ColumnConst.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/FunctionTokens.h>
#include <Functions/FunctionFactory.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/assert_cast.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int ILLEGAL_COLUMN;
}
/** Functions that split strings into an array of strings or vice versa.
*
* splitByChar(sep, s[, max_substrings])
*/
namespace
{
using Pos = const char *;
class SplitByCharImpl
{
private:
Pos pos;
Pos end;
char separator;
std::optional<size_t> max_splits;
size_t splits;
bool max_substrings_includes_remaining_string;
public:
static constexpr auto name = "splitByChar";
static bool isVariadic() { return true; }
static size_t getNumberOfArguments() { return 0; }
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
{
checkArgumentsWithSeparatorAndOptionalMaxSubstrings(func, arguments);
}
static constexpr auto strings_argument_position = 1uz;
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
{
const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
if (!col)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
"Must be constant string.", arguments[0].column->getName(), name);
String sep_str = col->getValue<String>();
if (sep_str.size() != 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal separator for function {}. Must be exactly one byte.", name);
separator = sep_str[0];
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
max_splits = extractMaxSplits(arguments, 2);
}
void set(Pos pos_, Pos end_)
{
pos = pos_;
end = end_;
splits = 0;
}
bool get(Pos & token_begin, Pos & token_end)
{
if (!pos)
return false;
token_begin = pos;
if (max_splits)
{
if (max_substrings_includes_remaining_string)
{
if (splits == *max_splits - 1)
{
token_end = end;
pos = nullptr;
return true;
}
}
else
if (splits == *max_splits)
return false;
}
pos = reinterpret_cast<Pos>(memchr(pos, separator, end - pos));
if (pos)
{
token_end = pos;
++pos;
++splits;
}
else
token_end = end;
return true;
}
};
using FunctionSplitByChar = FunctionTokens<SplitByCharImpl>;
}
REGISTER_FUNCTION(SplitByChar)
{
factory.registerFunction<FunctionSplitByChar>();
}
}

View File

@ -0,0 +1,113 @@
#include <Columns/ColumnArray.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeString.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/FunctionTokens.h>
#include <Functions/FunctionFactory.h>
#include <Functions/Regexps.h>
#include <Interpreters/Context.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/assert_cast.h>
#include <Common/typeid_cast.h>
namespace DB
{
/** Functions that split strings into an array of strings or vice versa.
*
* splitByNonAlpha(s[, max_substrings]) - split the string by whitespace and punctuation characters
*/
namespace
{
using Pos = const char *;
class SplitByNonAlphaImpl
{
private:
Pos pos;
Pos end;
std::optional<size_t> max_splits;
size_t splits;
bool max_substrings_includes_remaining_string;
public:
/// Get the name of the function.
static constexpr auto name = "splitByNonAlpha";
static bool isVariadic() { return true; }
static size_t getNumberOfArguments() { return 0; }
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
{
checkArgumentsWithOptionalMaxSubstrings(func, arguments);
}
static constexpr auto strings_argument_position = 0uz;
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
{
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
max_splits = extractMaxSplits(arguments, 1);
}
/// Called for each next string.
void set(Pos pos_, Pos end_)
{
pos = pos_;
end = end_;
splits = 0;
}
/// Get the next token, if any, or return false.
bool get(Pos & token_begin, Pos & token_end)
{
/// Skip garbage
while (pos < end && (isWhitespaceASCII(*pos) || isPunctuationASCII(*pos)))
++pos;
if (pos == end)
return false;
token_begin = pos;
if (max_splits)
{
if (max_substrings_includes_remaining_string)
{
if (splits == *max_splits - 1)
{
token_end = end;
pos = end;
return true;
}
}
else
if (splits == *max_splits)
return false;
}
while (pos < end && !(isWhitespaceASCII(*pos) || isPunctuationASCII(*pos)))
++pos;
token_end = pos;
splits++;
return true;
}
};
using FunctionSplitByNonAlpha = FunctionTokens<SplitByNonAlphaImpl>;
}
REGISTER_FUNCTION(SplitByNonAlpha)
{
factory.registerFunction<FunctionSplitByNonAlpha>();
}
}

View File

@ -0,0 +1,156 @@
#include <Columns/ColumnConst.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/FunctionTokens.h>
#include <Functions/FunctionFactory.h>
#include <Functions/Regexps.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/assert_cast.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
}
/** Functions that split strings into an array of strings or vice versa.
*
* splitByRegexp(regexp, s[, max_substrings])
*/
namespace
{
using Pos = const char *;
class SplitByRegexpImpl
{
private:
Regexps::RegexpPtr re;
OptimizedRegularExpression::MatchVec matches;
Pos pos;
Pos end;
std::optional<size_t> max_splits;
size_t splits;
bool max_substrings_includes_remaining_string;
public:
static constexpr auto name = "splitByRegexp";
static bool isVariadic() { return true; }
static size_t getNumberOfArguments() { return 0; }
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
{
checkArgumentsWithSeparatorAndOptionalMaxSubstrings(func, arguments);
}
static constexpr auto strings_argument_position = 1uz;
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
{
const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
if (!col)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
"Must be constant string.", arguments[0].column->getName(), name);
if (!col->getValue<String>().empty())
re = std::make_shared<OptimizedRegularExpression>(Regexps::createRegexp<false, false, false>(col->getValue<String>()));
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
max_splits = extractMaxSplits(arguments, 2);
}
/// Called for each next string.
void set(Pos pos_, Pos end_)
{
pos = pos_;
end = end_;
splits = 0;
}
/// Get the next token, if any, or return false.
bool get(Pos & token_begin, Pos & token_end)
{
if (!re)
{
if (pos == end)
return false;
token_begin = pos;
if (max_splits)
{
if (max_substrings_includes_remaining_string)
{
if (splits == *max_splits - 1)
{
token_end = end;
pos = end;
return true;
}
}
else
if (splits == *max_splits)
return false;
}
pos += 1;
token_end = pos;
++splits;
}
else
{
if (!pos || pos > end)
return false;
token_begin = pos;
if (max_splits)
{
if (max_substrings_includes_remaining_string)
{
if (splits == *max_splits - 1)
{
token_end = end;
pos = nullptr;
return true;
}
}
else
if (splits == *max_splits)
return false;
}
if (!re->match(pos, end - pos, matches) || !matches[0].length)
{
token_end = end;
pos = end + 1;
}
else
{
token_end = pos + matches[0].offset;
pos = token_end + matches[0].length;
++splits;
}
}
return true;
}
};
using FunctionSplitByRegexp = FunctionTokens<SplitByRegexpImpl>;
}
REGISTER_FUNCTION(SplitByRegexp)
{
factory.registerFunction<FunctionSplitByRegexp>();
}
}

View File

@ -0,0 +1,148 @@
#include <Columns/ColumnConst.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/FunctionTokens.h>
#include <Functions/FunctionFactory.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/assert_cast.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
}
/** Functions that split strings into an array of strings or vice versa.
*
* splitByString(sep, s[, max_substrings])
*/
namespace
{
using Pos = const char *;
class SplitByStringImpl
{
private:
Pos pos;
Pos end;
String separator;
std::optional<size_t> max_splits;
size_t splits;
bool max_substrings_includes_remaining_string;
public:
static constexpr auto name = "splitByString";
static bool isVariadic() { return true; }
static size_t getNumberOfArguments() { return 0; }
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
{
checkArgumentsWithSeparatorAndOptionalMaxSubstrings(func, arguments);
}
static constexpr auto strings_argument_position = 1uz;
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
{
const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
if (!col)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
"Must be constant string.", arguments[0].column->getName(), name);
separator = col->getValue<String>();
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
max_splits = extractMaxSplits(arguments, 2);
}
/// Called for each next string.
void set(Pos pos_, Pos end_)
{
pos = pos_;
end = end_;
splits = 0;
}
/// Get the next token, if any, or return false.
bool get(Pos & token_begin, Pos & token_end)
{
if (separator.empty())
{
if (pos == end)
return false;
token_begin = pos;
if (max_splits)
{
if (max_substrings_includes_remaining_string)
{
if (splits == *max_splits - 1)
{
token_end = end;
pos = end;
return true;
}
}
else
if (splits == *max_splits)
return false;
}
pos += 1;
token_end = pos;
++splits;
}
else
{
if (!pos)
return false;
token_begin = pos;
if (max_splits)
{
if (max_substrings_includes_remaining_string)
{
if (splits == *max_splits - 1)
{
token_end = end;
pos = nullptr;
return true;
}
}
else
if (splits == *max_splits)
return false;
}
pos = reinterpret_cast<Pos>(memmem(pos, end - pos, separator.data(), separator.size()));
if (pos)
{
token_end = pos;
pos += separator.size();
++splits;
}
else
token_end = end;
}
return true;
}
};
using FunctionSplitByString = FunctionTokens<SplitByStringImpl>;
}
REGISTER_FUNCTION(SplitByString)
{
factory.registerFunction<FunctionSplitByString>();
}
}

View File

@ -0,0 +1,101 @@
#include <Functions/FunctionTokens.h>
#include <Functions/FunctionFactory.h>
#include <Common/StringUtils/StringUtils.h>
namespace DB
{
/** Functions that split strings into an array of strings or vice versa.
*
* splitByWhitespace(s[, max_substrings]) - split the string by whitespace characters
*/
namespace
{
using Pos = const char *;
class SplitByWhitespaceImpl
{
private:
Pos pos;
Pos end;
std::optional<size_t> max_splits;
size_t splits;
bool max_substrings_includes_remaining_string;
public:
static constexpr auto name = "splitByWhitespace";
static bool isVariadic() { return true; }
static size_t getNumberOfArguments() { return 0; }
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
{
checkArgumentsWithOptionalMaxSubstrings(func, arguments);
}
static constexpr auto strings_argument_position = 0uz;
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
{
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
max_splits = extractMaxSplits(arguments, 1);
}
/// Called for each next string.
void set(Pos pos_, Pos end_)
{
pos = pos_;
end = end_;
splits = 0;
}
/// Get the next token, if any, or return false.
bool get(Pos & token_begin, Pos & token_end)
{
/// Skip garbage
while (pos < end && isWhitespaceASCII(*pos))
++pos;
if (pos == end)
return false;
token_begin = pos;
if (max_splits)
{
if (max_substrings_includes_remaining_string)
{
if (splits == *max_splits - 1)
{
token_end = end;
pos = end;
return true;
}
}
else
if (splits == *max_splits)
return false;
}
while (pos < end && !isWhitespaceASCII(*pos))
++pos;
token_end = pos;
splits++;
return true;
}
};
using FunctionSplitByWhitespace = FunctionTokens<SplitByWhitespaceImpl>;
}
REGISTER_FUNCTION(SplitByWhitespace)
{
factory.registerFunction<FunctionSplitByWhitespace>();
}
}

View File

@ -1,6 +1,5 @@
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsConversion.h>
#include <Functions/CastOverloadResolver.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeString.h>
@ -12,6 +11,14 @@ namespace
{
class FunctionToBool : public IFunction
{
private:
ContextPtr context;
static String getReturnTypeName(const DataTypePtr & argument)
{
return argument->isNullable() ? "Nullable(Bool)" : "Bool";
}
public:
static constexpr auto name = "toBool";
@ -32,8 +39,7 @@ namespace
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
auto bool_type = DataTypeFactory::instance().get("Bool");
return arguments[0]->isNullable() ? makeNullable(bool_type) : bool_type;
return DataTypeFactory::instance().get(getReturnTypeName(arguments[0]));
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t) const override
@ -42,18 +48,17 @@ namespace
{
arguments[0],
{
DataTypeString().createColumnConst(arguments[0].column->size(), arguments[0].type->isNullable() ? "Nullable(Bool)" : "Bool"),
DataTypeString().createColumnConst(arguments[0].column->size(), getReturnTypeName(arguments[0].type)),
std::make_shared<DataTypeString>(),
""
}
};
FunctionOverloadResolverPtr func_builder_cast = CastInternalOverloadResolver<CastType::nonAccurate>::createImpl();
FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(CastType::nonAccurate, {});
auto func_cast = func_builder_cast->build(cast_args);
return func_cast->execute(cast_args, result_type, arguments[0].column->size());
}
};
}
REGISTER_FUNCTION(ToBool)

View File

@ -387,27 +387,44 @@ Model::CompleteMultipartUploadOutcome Client::CompleteMultipartUpload(const Comp
auto outcome = doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>(
request, [this](const Model::CompleteMultipartUploadRequest & req) { return CompleteMultipartUpload(req); });
if (!outcome.IsSuccess() || provider_type != ProviderType::GCS)
return outcome;
const auto & key = request.GetKey();
const auto & bucket = request.GetBucket();
/// For GCS we will try to compose object at the end, otherwise we cannot do a native copy
/// for the object (e.g. for backups)
/// We don't care if the compose fails, because the upload was still successful, only the
/// performance for copying the object will be affected
S3::ComposeObjectRequest compose_req;
compose_req.SetBucket(bucket);
compose_req.SetKey(key);
compose_req.SetComponentNames({key});
compose_req.SetContentType("binary/octet-stream");
auto compose_outcome = ComposeObject(compose_req);
if (!outcome.IsSuccess()
&& outcome.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_UPLOAD)
{
auto check_request = HeadObjectRequest()
.WithBucket(bucket)
.WithKey(key);
auto check_outcome = HeadObject(check_request);
if (compose_outcome.IsSuccess())
LOG_TRACE(log, "Composing object was successful");
else
LOG_INFO(log, "Failed to compose object. Message: {}, Key: {}, Bucket: {}", compose_outcome.GetError().GetMessage(), key, bucket);
/// if the key exists, than MultipartUpload has been completed at some of the retries
/// rewrite outcome with success status
if (check_outcome.IsSuccess())
outcome = Aws::S3::Model::CompleteMultipartUploadOutcome(Aws::S3::Model::CompleteMultipartUploadResult());
}
if (outcome.IsSuccess() && provider_type == ProviderType::GCS)
{
/// For GCS we will try to compose object at the end, otherwise we cannot do a native copy
/// for the object (e.g. for backups)
/// We don't care if the compose fails, because the upload was still successful, only the
/// performance for copying the object will be affected
S3::ComposeObjectRequest compose_req;
compose_req.SetBucket(bucket);
compose_req.SetKey(key);
compose_req.SetComponentNames({key});
compose_req.SetContentType("binary/octet-stream");
auto compose_outcome = ComposeObject(compose_req);
if (compose_outcome.IsSuccess())
LOG_TRACE(log, "Composing object was successful");
else
LOG_INFO(
log,
"Failed to compose object. Message: {}, Key: {}, Bucket: {}",
compose_outcome.GetError().GetMessage(), key, bucket);
}
return outcome;
}

View File

@ -1,9 +1,4 @@
#include <exception>
#include <variant>
#include <IO/S3/Credentials.h>
#include <boost/algorithm/string/classification.hpp>
#include <Poco/Exception.h>
#include "Common/Exception.h"
#if USE_AWS_S3
@ -16,7 +11,6 @@
# include <aws/core/utils/UUID.h>
# include <aws/core/http/HttpClientFactory.h>
# include <IO/S3/PocoHTTPClientFactory.h>
# include <aws/core/utils/HashingUtils.h>
# include <aws/core/platform/FileSystem.h>
@ -28,16 +22,6 @@
# include <fstream>
# include <base/EnumReflection.h>
#include <boost/algorithm/string.hpp>
#include <boost/algorithm/string/split.hpp>
#include <Poco/URI.h>
#include <Poco/Net/HTTPClientSession.h>
#include <Poco/Net/HTTPRequest.h>
#include <Poco/Net/HTTPResponse.h>
#include <Poco/StreamCopier.h>
namespace DB
{
@ -45,8 +29,6 @@ namespace DB
namespace ErrorCodes
{
extern const int AWS_ERROR;
extern const int GCP_ERROR;
extern const int UNSUPPORTED_METHOD;
}
namespace S3
@ -169,6 +151,30 @@ Aws::String AWSEC2MetadataClient::getDefaultCredentialsSecurely() const
return GetResourceWithAWSWebServiceResult(credentials_request).GetPayload();
}
Aws::String AWSEC2MetadataClient::getCurrentAvailabilityZone() const
{
String user_agent_string = awsComputeUserAgentString();
auto [new_token, response_code] = getEC2MetadataToken(user_agent_string);
if (response_code != Aws::Http::HttpResponseCode::OK || new_token.empty())
throw DB::Exception(ErrorCodes::AWS_ERROR,
"Failed to make token request. HTTP response code: {}", response_code);
token = std::move(new_token);
const String url = endpoint + EC2_AVAILABILITY_ZONE_RESOURCE;
std::shared_ptr<Aws::Http::HttpRequest> profile_request(
Aws::Http::CreateHttpRequest(url, Aws::Http::HttpMethod::HTTP_GET, Aws::Utils::Stream::DefaultResponseStreamFactoryMethod));
profile_request->SetHeaderValue(EC2_IMDS_TOKEN_HEADER, token);
profile_request->SetUserAgent(user_agent_string);
const auto result = GetResourceWithAWSWebServiceResult(profile_request);
if (result.GetResponseCode() != Aws::Http::HttpResponseCode::OK)
throw DB::Exception(ErrorCodes::AWS_ERROR,
"Failed to get availability zone. HTTP response code: {}", result.GetResponseCode());
return Aws::Utils::StringUtils::Trim(result.GetPayload().c_str());
}
std::pair<Aws::String, Aws::Http::HttpResponseCode> AWSEC2MetadataClient::getEC2MetadataToken(const std::string & user_agent_string) const
{
std::lock_guard locker(token_mutex);
@ -193,10 +199,10 @@ Aws::String AWSEC2MetadataClient::getCurrentRegion() const
return Aws::Region::AWS_GLOBAL;
}
static Aws::String getAWSMetadataEndpoint()
std::shared_ptr<AWSEC2MetadataClient> InitEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration)
{
auto * logger = &Poco::Logger::get("AWSEC2InstanceProfileConfigLoader");
Aws::String ec2_metadata_service_endpoint = Aws::Environment::GetEnv("AWS_EC2_METADATA_SERVICE_ENDPOINT");
auto * logger = &Poco::Logger::get("AWSEC2InstanceProfileConfigLoader");
if (ec2_metadata_service_endpoint.empty())
{
Aws::String ec2_metadata_service_endpoint_mode = Aws::Environment::GetEnv("AWS_EC2_METADATA_SERVICE_ENDPOINT_MODE");
@ -227,95 +233,8 @@ static Aws::String getAWSMetadataEndpoint()
}
}
}
return ec2_metadata_service_endpoint;
}
std::shared_ptr<AWSEC2MetadataClient> InitEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration)
{
auto endpoint = getAWSMetadataEndpoint();
return std::make_shared<AWSEC2MetadataClient>(client_configuration, endpoint.c_str());
}
String AWSEC2MetadataClient::getAvailabilityZoneOrException()
{
Poco::URI uri(getAWSMetadataEndpoint() + EC2_AVAILABILITY_ZONE_RESOURCE);
Poco::Net::HTTPClientSession session(uri.getHost(), uri.getPort());
Poco::Net::HTTPResponse response;
Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, uri.getPath());
session.sendRequest(request);
std::istream & rs = session.receiveResponse(response);
if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK)
throw DB::Exception(ErrorCodes::AWS_ERROR, "Failed to get AWS availability zone. HTTP response code: {}", response.getStatus());
String response_data;
Poco::StreamCopier::copyToString(rs, response_data);
return response_data;
}
String getGCPAvailabilityZoneOrException()
{
Poco::URI uri(String(GCP_METADATA_SERVICE_ENDPOINT) + "/computeMetadata/v1/instance/zone");
Poco::Net::HTTPClientSession session(uri.getHost(), uri.getPort());
Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, uri.getPath());
Poco::Net::HTTPResponse response;
request.set("Metadata-Flavor", "Google");
session.sendRequest(request);
std::istream & rs = session.receiveResponse(response);
if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK)
throw DB::Exception(ErrorCodes::GCP_ERROR, "Failed to get GCP availability zone. HTTP response code: {}", response.getStatus());
String response_data;
Poco::StreamCopier::copyToString(rs, response_data);
Strings zone_info;
boost::split(zone_info, response_data, boost::is_any_of("/"));
/// We expect GCP returns a string as "projects/123456789/zones/us-central1a".
if (zone_info.size() != 4)
throw DB::Exception(ErrorCodes::GCP_ERROR, "Invalid format of GCP zone information, expect projects/<project-number>/zones/<zone-value>, got {}", response_data);
return zone_info[3];
}
String getRunningAvailabilityZoneImpl()
{
LOG_INFO(&Poco::Logger::get("Application"), "Trying to detect the availability zone.");
try
{
auto aws_az = AWSEC2MetadataClient::getAvailabilityZoneOrException();
return aws_az;
}
catch (const DB::Exception & aws_ex)
{
try
{
auto gcp_zone = getGCPAvailabilityZoneOrException();
return gcp_zone;
}
catch (const DB::Exception & gcp_ex)
{
throw DB::Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Failed to find the availability zone, tried AWS and GCP. AWS Error: {}\nGCP Error: {}", aws_ex.displayText(), gcp_ex.displayText());
}
}
}
std::variant<String, std::exception_ptr> getRunningAvailabilityZoneImplOrException()
{
try
{
return getRunningAvailabilityZoneImpl();
}
catch (...)
{
return std::current_exception();
}
}
String getRunningAvailabilityZone()
{
static auto az_or_exception = getRunningAvailabilityZoneImplOrException();
if (const auto * az = std::get_if<String>(&az_or_exception))
return *az;
else
std::rethrow_exception(std::get<std::exception_ptr>(az_or_exception));
LOG_INFO(logger, "Using IMDS endpoint: {}", ec2_metadata_service_endpoint);
return std::make_shared<AWSEC2MetadataClient>(client_configuration, ec2_metadata_service_endpoint.c_str());
}
AWSEC2InstanceProfileConfigLoader::AWSEC2InstanceProfileConfigLoader(const std::shared_ptr<AWSEC2MetadataClient> & client_, bool use_secure_pull_)
@ -784,6 +703,7 @@ S3CredentialsProviderChain::S3CredentialsProviderChain(
aws_client_configuration.requestTimeoutMs = 1000;
aws_client_configuration.retryStrategy = std::make_shared<Aws::Client::DefaultRetryStrategy>(1, 1000);
auto ec2_metadata_client = InitEC2MetadataClient(aws_client_configuration);
auto config_loader = std::make_shared<AWSEC2InstanceProfileConfigLoader>(ec2_metadata_client, !credentials_configuration.use_insecure_imds_request);
@ -801,21 +721,4 @@ S3CredentialsProviderChain::S3CredentialsProviderChain(
}
#else
namespace DB
{
namespace S3
{
String getRunningAvailabilityZone()
{
throw Poco::Exception("Does not support availability zone detection for non-cloud environment");
}
}
}
#endif

Some files were not shown because too many files have changed in this diff Show More