mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-26 01:22:04 +00:00
Merge branch 'master' into better-settings
This commit is contained in:
commit
04630c3e5a
11
.github/actions/clean/action.yml
vendored
Normal file
11
.github/actions/clean/action.yml
vendored
Normal file
@ -0,0 +1,11 @@
|
||||
name: Clean runner
|
||||
description: Clean the runner's temp path on ending
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Clean
|
||||
shell: bash
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "${{runner.temp}}"
|
33
.github/actions/common_setup/action.yml
vendored
Normal file
33
.github/actions/common_setup/action.yml
vendored
Normal file
@ -0,0 +1,33 @@
|
||||
name: Common setup
|
||||
description: Setup necessary environments
|
||||
inputs:
|
||||
job_type:
|
||||
description: the name to use in the TEMP_PATH and REPO_COPY
|
||||
default: common
|
||||
type: string
|
||||
nested_job:
|
||||
description: the fuse for unintended use inside of the reusable callable jobs
|
||||
default: true
|
||||
type: boolean
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Setup and check ENV
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Setup the common ENV variables"
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/${{inputs.job_type}}
|
||||
REPO_COPY=${{runner.temp}}/${{inputs.job_type}}/git-repo-copy
|
||||
EOF
|
||||
if [ -z "${{env.GITHUB_JOB_OVERRIDDEN}}" ] && [ "true" == "${{inputs.nested_job}}" ]; then
|
||||
echo "The GITHUB_JOB_OVERRIDDEN ENV is unset, and must be set for the nested jobs"
|
||||
exit 1
|
||||
fi
|
||||
- name: Setup $TEMP_PATH
|
||||
shell: bash
|
||||
run: |
|
||||
# to remove every leftovers
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$REPO_COPY"
|
||||
cp -a "$GITHUB_WORKSPACE"/. "$REPO_COPY"/
|
328
.github/workflows/backport_branches.yml
vendored
328
.github/workflows/backport_branches.yml
vendored
@ -1,3 +1,4 @@
|
||||
# yamllint disable rule:comments-indentation
|
||||
name: BackportPR
|
||||
|
||||
env:
|
||||
@ -169,320 +170,43 @@ jobs:
|
||||
#########################################################################################
|
||||
BuilderDebRelease:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_release
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # For a proper version and performance artifacts
|
||||
filter: tree:0
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_release
|
||||
checkout_depth: 0
|
||||
BuilderDebAarch64:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_aarch64
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # For a proper version and performance artifacts
|
||||
filter: tree:0
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_aarch64
|
||||
checkout_depth: 0
|
||||
BuilderDebAsan:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_asan
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_asan
|
||||
BuilderDebTsan:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_tsan
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_tsan
|
||||
BuilderDebDebug:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_debug
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
|
||||
run: |
|
||||
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
|
||||
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
|
||||
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
|
||||
du -hs "$GITHUB_WORKSPACE/contrib" ||:
|
||||
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_debug
|
||||
BuilderBinDarwin:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_darwin
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
filter: tree:0
|
||||
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
|
||||
run: |
|
||||
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
|
||||
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
|
||||
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
|
||||
du -hs "$GITHUB_WORKSPACE/contrib" ||:
|
||||
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_darwin
|
||||
checkout_depth: 0
|
||||
BuilderBinDarwinAarch64:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_darwin_aarch64
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
filter: tree:0
|
||||
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
|
||||
run: |
|
||||
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
|
||||
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
|
||||
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
|
||||
du -hs "$GITHUB_WORKSPACE/contrib" ||:
|
||||
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_darwin_aarch64
|
||||
checkout_depth: 0
|
||||
############################################################################################
|
||||
##################################### Docker images #######################################
|
||||
############################################################################################
|
||||
|
813
.github/workflows/master.yml
vendored
813
.github/workflows/master.yml
vendored
@ -1,3 +1,4 @@
|
||||
# yamllint disable rule:comments-indentation
|
||||
name: MasterCI
|
||||
|
||||
env:
|
||||
@ -184,789 +185,109 @@ jobs:
|
||||
#########################################################################################
|
||||
BuilderDebRelease:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_release
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # For a proper version and performance artifacts
|
||||
filter: tree:0
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
checkout_depth: 0
|
||||
build_name: package_release
|
||||
BuilderDebAarch64:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_aarch64
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ runner.temp }}/images_path
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # For a proper version and performance artifacts
|
||||
filter: tree:0
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
checkout_depth: 0
|
||||
build_name: package_aarch64
|
||||
BuilderBinRelease:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_release
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
filter: tree:0
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
checkout_depth: 0
|
||||
build_name: binary_release
|
||||
BuilderDebAsan:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_asan
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_asan
|
||||
BuilderDebUBsan:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_ubsan
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_ubsan
|
||||
BuilderDebTsan:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_tsan
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_tsan
|
||||
BuilderDebMsan:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_msan
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_msan
|
||||
BuilderDebDebug:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_debug
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
|
||||
run: |
|
||||
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
|
||||
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
|
||||
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
|
||||
du -hs "$GITHUB_WORKSPACE/contrib" ||:
|
||||
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_debug
|
||||
##########################################################################################
|
||||
##################################### SPECIAL BUILDS #####################################
|
||||
##########################################################################################
|
||||
BuilderBinClangTidy:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_tidy
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_tidy
|
||||
BuilderBinDarwin:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_darwin
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
filter: tree:0
|
||||
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
|
||||
run: |
|
||||
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
|
||||
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
|
||||
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
|
||||
du -hs "$GITHUB_WORKSPACE/contrib" ||:
|
||||
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_darwin
|
||||
checkout_depth: 0
|
||||
BuilderBinAarch64:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_aarch64
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
filter: tree:0
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_aarch64
|
||||
checkout_depth: 0
|
||||
BuilderBinFreeBSD:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_freebsd
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
filter: tree:0
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_freebsd
|
||||
checkout_depth: 0
|
||||
BuilderBinDarwinAarch64:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_darwin_aarch64
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
filter: tree:0
|
||||
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
|
||||
run: |
|
||||
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
|
||||
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
|
||||
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
|
||||
du -hs "$GITHUB_WORKSPACE/contrib" ||:
|
||||
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_darwin_aarch64
|
||||
checkout_depth: 0
|
||||
BuilderBinPPC64:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_ppc64le
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
filter: tree:0
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_ppc64le
|
||||
checkout_depth: 0
|
||||
BuilderBinAmd64Compat:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_amd64_compat
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
filter: tree:0
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_amd64_compat
|
||||
checkout_depth: 0
|
||||
BuilderBinAarch64V80Compat:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_aarch64_v80compat
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
filter: tree:0
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_aarch64_v80compat
|
||||
checkout_depth: 0
|
||||
BuilderBinRISCV64:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_riscv64
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
filter: tree:0
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_riscv64
|
||||
checkout_depth: 0
|
||||
BuilderBinS390X:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_s390x
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
filter: tree:0
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_s390x
|
||||
checkout_depth: 0
|
||||
############################################################################################
|
||||
##################################### Docker images #######################################
|
||||
############################################################################################
|
||||
|
822
.github/workflows/pull_request.yml
vendored
822
.github/workflows/pull_request.yml
vendored
@ -1,3 +1,4 @@
|
||||
# yamllint disable rule:comments-indentation
|
||||
name: PullRequestCI
|
||||
|
||||
env:
|
||||
@ -246,771 +247,100 @@ jobs:
|
||||
#################################### ORDINARY BUILDS ####################################
|
||||
#########################################################################################
|
||||
BuilderDebRelease:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_release
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
fetch-depth: 0 # for performance artifact
|
||||
filter: tree:0
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
BuilderBinRelease:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_release
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_release
|
||||
checkout_depth: 0
|
||||
BuilderDebAarch64:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_aarch64
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ runner.temp }}/images_path
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # for performance artifact
|
||||
filter: tree:0
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_aarch64
|
||||
checkout_depth: 0
|
||||
BuilderBinRelease:
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_release
|
||||
BuilderDebAsan:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_asan
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_asan
|
||||
BuilderDebUBsan:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_ubsan
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_ubsan
|
||||
BuilderDebTsan:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_tsan
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_tsan
|
||||
BuilderDebMsan:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_msan
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_msan
|
||||
BuilderDebDebug:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_debug
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
|
||||
run: |
|
||||
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
|
||||
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
|
||||
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
|
||||
du -hs "$GITHUB_WORKSPACE/contrib" ||:
|
||||
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_debug
|
||||
##########################################################################################
|
||||
##################################### SPECIAL BUILDS #####################################
|
||||
##########################################################################################
|
||||
BuilderBinClangTidy:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_tidy
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_tidy
|
||||
BuilderBinDarwin:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_darwin
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
|
||||
run: |
|
||||
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
|
||||
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
|
||||
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
|
||||
du -hs "$GITHUB_WORKSPACE/contrib" ||:
|
||||
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_darwin
|
||||
BuilderBinAarch64:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_aarch64
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_aarch64
|
||||
BuilderBinFreeBSD:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_freebsd
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_freebsd
|
||||
BuilderBinDarwinAarch64:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_darwin_aarch64
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
|
||||
run: |
|
||||
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
|
||||
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
|
||||
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
|
||||
du -hs "$GITHUB_WORKSPACE/contrib" ||:
|
||||
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_darwin_aarch64
|
||||
BuilderBinPPC64:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_ppc64le
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_ppc64le
|
||||
BuilderBinAmd64Compat:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_amd64_compat
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_amd64_compat
|
||||
BuilderBinAarch64V80Compat:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_aarch64_v80compat
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_aarch64_v80compat
|
||||
BuilderBinRISCV64:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_riscv64
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_riscv64
|
||||
BuilderBinS390X:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_s390x
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_s390x
|
||||
############################################################################################
|
||||
##################################### Docker images #######################################
|
||||
############################################################################################
|
||||
|
411
.github/workflows/release_branches.yml
vendored
411
.github/workflows/release_branches.yml
vendored
@ -1,3 +1,4 @@
|
||||
# yamllint disable rule:comments-indentation
|
||||
name: ReleaseBranchCI
|
||||
|
||||
env:
|
||||
@ -140,401 +141,53 @@ jobs:
|
||||
#########################################################################################
|
||||
BuilderDebRelease:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_release
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
filter: tree:0
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_release
|
||||
checkout_depth: 0
|
||||
BuilderDebAarch64:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_aarch64
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ runner.temp }}/images_path
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # For a proper version and performance artifacts
|
||||
filter: tree:0
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_aarch64
|
||||
checkout_depth: 0
|
||||
BuilderDebAsan:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_asan
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_asan
|
||||
BuilderDebUBsan:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_ubsan
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_ubsan
|
||||
BuilderDebTsan:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_tsan
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_tsan
|
||||
BuilderDebMsan:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_msan
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_msan
|
||||
BuilderDebDebug:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_debug
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
|
||||
run: |
|
||||
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
|
||||
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
|
||||
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
|
||||
du -hs "$GITHUB_WORKSPACE/contrib" ||:
|
||||
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_debug
|
||||
BuilderBinDarwin:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_darwin
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
filter: tree:0
|
||||
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
|
||||
run: |
|
||||
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
|
||||
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
|
||||
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
|
||||
du -hs "$GITHUB_WORKSPACE/contrib" ||:
|
||||
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_darwin
|
||||
checkout_depth: 0
|
||||
BuilderBinDarwinAarch64:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_darwin_aarch64
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
filter: tree:0
|
||||
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
|
||||
run: |
|
||||
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
|
||||
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
|
||||
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
|
||||
du -hs "$GITHUB_WORKSPACE/contrib" ||:
|
||||
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_darwin_aarch64
|
||||
checkout_depth: 0
|
||||
############################################################################################
|
||||
##################################### Docker images #######################################
|
||||
############################################################################################
|
||||
|
74
.github/workflows/reusable_build.yml
vendored
Normal file
74
.github/workflows/reusable_build.yml
vendored
Normal file
@ -0,0 +1,74 @@
|
||||
### For the pure soul wishes to move it to another place
|
||||
# https://github.com/orgs/community/discussions/9050
|
||||
|
||||
name: Build ClickHouse
|
||||
'on':
|
||||
workflow_call:
|
||||
inputs:
|
||||
build_name:
|
||||
description: the value of build type from tests/ci/ci_config.py
|
||||
required: true
|
||||
type: string
|
||||
checkout_depth:
|
||||
description: the value of the git shallow checkout
|
||||
required: false
|
||||
type: number
|
||||
default: 1
|
||||
runner_type:
|
||||
description: the label of runner to use
|
||||
default: builder
|
||||
type: string
|
||||
additional_envs:
|
||||
description: additional ENV variables to setup the job
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
Build:
|
||||
name: Build-${{inputs.build_name}}
|
||||
runs-on: [self-hosted, '${{inputs.runner_type}}']
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: ${{inputs.checkout_depth}}
|
||||
filter: tree:0
|
||||
- name: Set build envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
GITHUB_JOB_OVERRIDDEN=Build-${{inputs.build_name}}
|
||||
${{inputs.additional_envs}}
|
||||
EOF
|
||||
python3 "$GITHUB_WORKSPACE"/tests/ci/ci_config.py --build-name "${{inputs.build_name}}" >> "$GITHUB_ENV"
|
||||
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
|
||||
# This step is done in GITHUB_WORKSPACE,
|
||||
# because it's broken in REPO_COPY for some reason
|
||||
if: ${{ env.BUILD_SPARSE_CHECKOUT == 'true' }}
|
||||
run: |
|
||||
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
|
||||
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
|
||||
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
|
||||
du -hs "$GITHUB_WORKSPACE/contrib" ||:
|
||||
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
|
||||
- name: Common setup
|
||||
uses: ./.github/actions/common_setup
|
||||
with:
|
||||
job_type: build_check
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Build
|
||||
run: |
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Clean
|
||||
uses: ./.github/actions/clean
|
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -1,3 +1,6 @@
|
||||
# Please do not use 'branch = ...' tags with submodule entries. Such tags make updating submodules a
|
||||
# little bit more convenient but they do *not* specify the tracked submodule branch. Thus, they are
|
||||
# more confusing than useful.
|
||||
[submodule "contrib/zstd"]
|
||||
path = contrib/zstd
|
||||
url = https://github.com/facebook/zstd
|
||||
|
20
README.md
20
README.md
@ -1,6 +1,17 @@
|
||||
[<img alt="ClickHouse — open source distributed column-oriented DBMS" width="400px" src="https://clickhouse.com/images/ch_gh_logo_rounded.png" />](https://clickhouse.com?utm_source=github)
|
||||
<div align=center>
|
||||
|
||||
ClickHouse® is an open-source column-oriented database management system that allows generating analytical data reports in real-time.
|
||||
[![Website](https://img.shields.io/website?up_message=AVAILABLE&down_message=DOWN&url=https%3A%2F%2Fclickhouse.com&style=for-the-badge)](https://clickhouse.com)
|
||||
[![Apache 2.0 License](https://img.shields.io/badge/license-Apache%202.0-blueviolet?style=for-the-badge)](https://www.apache.org/licenses/LICENSE-2.0)
|
||||
|
||||
<picture align=center>
|
||||
<source media="(prefers-color-scheme: dark)" srcset="https://github.com/ClickHouse/clickhouse-docs/assets/9611008/4ef9c104-2d3f-4646-b186-507358d2fe28">
|
||||
<source media="(prefers-color-scheme: light)" srcset="https://github.com/ClickHouse/clickhouse-docs/assets/9611008/b001dc7b-5a45-4dcd-9275-e03beb7f9177">
|
||||
<img alt="The ClickHouse company logo." src="https://github.com/ClickHouse/clickhouse-docs/assets/9611008/b001dc7b-5a45-4dcd-9275-e03beb7f9177">
|
||||
</picture>
|
||||
|
||||
<h4>ClickHouse® is an open-source column-oriented database management system that allows generating analytical data reports in real-time.</h4>
|
||||
|
||||
</div>
|
||||
|
||||
## How To Install (Linux, macOS, FreeBSD)
|
||||
```
|
||||
@ -22,8 +33,7 @@ curl https://clickhouse.com/ | sh
|
||||
|
||||
## Upcoming Events
|
||||
|
||||
* [**ClickHouse Meetup in Beijing**](https://www.meetup.com/clickhouse-beijing-user-group/events/296334856/) - Nov 4
|
||||
* [**ClickHouse Meetup in San Francisco**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/296334923/) - Nov 8
|
||||
* [**ClickHouse Meetup in San Francisco**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/296334923/) - Nov 14
|
||||
* [**ClickHouse Meetup in Singapore**](https://www.meetup.com/clickhouse-singapore-meetup-group/events/296334976/) - Nov 15
|
||||
* [**ClickHouse Meetup in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/296488501/) - Nov 30
|
||||
* [**ClickHouse Meetup in NYC**](https://www.meetup.com/clickhouse-new-york-user-group/events/296488779/) - Dec 11
|
||||
@ -33,7 +43,7 @@ Also, keep an eye out for upcoming meetups around the world. Somewhere else you
|
||||
|
||||
## Recent Recordings
|
||||
* **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"
|
||||
* **Recording available**: [**v23.6 Release Webinar**](https://www.youtube.com/watch?v=cuf_hYn7dqU) All the features of 23.6, one convenient video! Watch it now!
|
||||
* **Recording available**: [**v23.10 Release Webinar**](https://www.youtube.com/watch?v=PGQS6uPb970) All the features of 23.10, one convenient video! Watch it now!
|
||||
* **All release webinar recordings**: [YouTube playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3jAlSy1JxyP8zluvXaN3nxU)
|
||||
|
||||
|
||||
|
@ -119,17 +119,16 @@
|
||||
#include <base/types.h>
|
||||
namespace DB
|
||||
{
|
||||
void abortOnFailedAssertion(const String & description);
|
||||
[[noreturn]] void abortOnFailedAssertion(const String & description);
|
||||
}
|
||||
#define chassert(x) static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(#x)
|
||||
#define chassert(x) do { static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(#x); } while (0)
|
||||
#define UNREACHABLE() abort()
|
||||
// clang-format off
|
||||
#else
|
||||
/// Here sizeof() trick is used to suppress unused warning for result,
|
||||
/// since simple "(void)x" will evaluate the expression, while
|
||||
/// "sizeof(!(x))" will not.
|
||||
#define NIL_EXPRESSION(x) (void)sizeof(!(x))
|
||||
#define chassert(x) NIL_EXPRESSION(x)
|
||||
#define chassert(x) (void)sizeof(!(x))
|
||||
#define UNREACHABLE() __builtin_unreachable()
|
||||
#endif
|
||||
#endif
|
||||
|
2
contrib/NuRaft
vendored
2
contrib/NuRaft
vendored
@ -1 +1 @@
|
||||
Subproject commit eb1572129c71beb2156dcdaadc3fb136954aed96
|
||||
Subproject commit b7ea89b817a18dc0eafc1f909d568869f02d2d04
|
2
contrib/grpc
vendored
2
contrib/grpc
vendored
@ -1 +1 @@
|
||||
Subproject commit bef8212d1e01f99e406c282ceab3d42da08e09ce
|
||||
Subproject commit 267af8c3a1ea4a5a4d9e5a070ad2d1ac7c701923
|
@ -6,12 +6,12 @@ FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
|
||||
echo '/*' > $FILES_TO_CHECKOUT
|
||||
echo '!/test/*' >> $FILES_TO_CHECKOUT
|
||||
echo '/test/build/*' >> $FILES_TO_CHECKOUT
|
||||
echo '/test/core/tsi/alts/fake_handshaker/*' >> $FILES_TO_CHECKOUT
|
||||
echo '!/tools/*' >> $FILES_TO_CHECKOUT
|
||||
echo '/tools/codegen/*' >> $FILES_TO_CHECKOUT
|
||||
echo '!/examples/*' >> $FILES_TO_CHECKOUT
|
||||
echo '!/doc/*' >> $FILES_TO_CHECKOUT
|
||||
# FIXME why do we need csharp?
|
||||
#echo '!/src/csharp/*' >> $FILES_TO_CHECKOUT
|
||||
echo '!/src/csharp/*' >> $FILES_TO_CHECKOUT
|
||||
echo '!/src/python/*' >> $FILES_TO_CHECKOUT
|
||||
echo '!/src/objective-c/*' >> $FILES_TO_CHECKOUT
|
||||
echo '!/src/php/*' >> $FILES_TO_CHECKOUT
|
||||
|
11
contrib/update-submodules.sh
vendored
11
contrib/update-submodules.sh
vendored
@ -1,11 +1,12 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
WORKDIR=$(dirname "$0")
|
||||
WORKDIR=$(readlink -f "${WORKDIR}")
|
||||
SCRIPT_PATH=$(realpath "$0")
|
||||
SCRIPT_DIR=$(dirname "${SCRIPT_PATH}")
|
||||
GIT_DIR=$(git -C "$SCRIPT_DIR" rev-parse --show-toplevel)
|
||||
cd $GIT_DIR
|
||||
|
||||
"$WORKDIR/sparse-checkout/setup-sparse-checkout.sh"
|
||||
contrib/sparse-checkout/setup-sparse-checkout.sh
|
||||
git submodule init
|
||||
git submodule sync
|
||||
git submodule update --depth=1
|
||||
git config --file .gitmodules --get-regexp .*path | sed 's/[^ ]* //' | xargs -I _ --max-procs 64 git submodule update --depth=1 --single-branch _
|
||||
|
@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="23.10.2.13"
|
||||
ARG VERSION="23.10.3.5"
|
||||
ARG PACKAGES="clickhouse-keeper"
|
||||
|
||||
# user/group precreated explicitly with fixed uid/gid on purpose.
|
||||
|
@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="23.10.2.13"
|
||||
ARG VERSION="23.10.3.5"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
# user/group precreated explicitly with fixed uid/gid on purpose.
|
||||
|
@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
|
||||
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
|
||||
ARG VERSION="23.10.2.13"
|
||||
ARG VERSION="23.10.3.5"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
# set non-empty deb_location_url url to create a docker image
|
||||
|
@ -53,31 +53,28 @@ function configure()
|
||||
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
|
||||
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
|
||||
function randomize_config_boolean_value {
|
||||
function randomize_keeper_config_boolean_value {
|
||||
value=$(($RANDOM % 2))
|
||||
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
|
||||
sudo cat /etc/clickhouse-server/config.d/$2.xml \
|
||||
| sed "s|<$1>[01]</$1>|<$1>$value</$1>|" \
|
||||
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
|
||||
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
> /etc/clickhouse-server/config.d/$2.xml.tmp
|
||||
sudo mv /etc/clickhouse-server/config.d/$2.xml.tmp /etc/clickhouse-server/config.d/$2.xml
|
||||
}
|
||||
|
||||
if [[ -n "$RANDOMIZE_KEEPER_FEATURE_FLAGS" ]] && [[ "$RANDOMIZE_KEEPER_FEATURE_FLAGS" -eq 1 ]]; then
|
||||
# Randomize all Keeper feature flags
|
||||
randomize_config_boolean_value filtered_list
|
||||
randomize_config_boolean_value multi_read
|
||||
randomize_config_boolean_value check_not_exists
|
||||
randomize_config_boolean_value create_if_not_exists
|
||||
randomize_config_boolean_value filtered_list keeper_port
|
||||
randomize_config_boolean_value multi_read keeper_port
|
||||
randomize_config_boolean_value check_not_exists keeper_port
|
||||
randomize_config_boolean_value create_if_not_exists keeper_port
|
||||
fi
|
||||
|
||||
sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
sudo chgrp clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
|
||||
#Randomize merge tree setting allow_experimental_block_number_column
|
||||
value=$(($RANDOM % 2))
|
||||
sudo cat /etc/clickhouse-server/config.d/merge_tree_settings.xml \
|
||||
| sed "s|<allow_experimental_block_number_column>[01]</allow_experimental_block_number_column>|<allow_experimental_block_number_column>$value</allow_experimental_block_number_column>|" \
|
||||
> /etc/clickhouse-server/config.d/merge_tree_settings.xml.tmp
|
||||
sudo mv /etc/clickhouse-server/config.d/merge_tree_settings.xml.tmp /etc/clickhouse-server/config.d/merge_tree_settings.xml
|
||||
randomize_config_boolean_value use_compression zookeeper
|
||||
|
||||
randomize_config_boolean_value allow_experimental_block_number_column merge_tree_settings
|
||||
|
||||
# for clickhouse-server (via service)
|
||||
echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment
|
||||
|
16
docs/changelogs/v23.10.3.5-stable.md
Normal file
16
docs/changelogs/v23.10.3.5-stable.md
Normal file
@ -0,0 +1,16 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.10.3.5-stable (b2ba7637a41) FIXME as compared to v23.10.2.13-stable (65d8522bb1d)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#56513](https://github.com/ClickHouse/ClickHouse/issues/56513): Allow backup of materialized view with dropped inner table instead of failing the backup. [#56387](https://github.com/ClickHouse/ClickHouse/pull/56387) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
|
||||
#### NO CL CATEGORY
|
||||
|
||||
* Backported in [#56605](https://github.com/ClickHouse/ClickHouse/issues/56605):. [#56598](https://github.com/ClickHouse/ClickHouse/pull/56598) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
|
@ -1,5 +1,4 @@
|
||||
---
|
||||
slug: /en/getting-started/example-datasets/wikistat
|
||||
sidebar_label: WikiStat
|
||||
---
|
||||
|
||||
|
@ -2156,7 +2156,7 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
|
||||
- [input_format_parquet_local_file_min_bytes_for_seek](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_local_file_min_bytes_for_seek) - min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format. Default value - `8192`.
|
||||
- [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`.
|
||||
- [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`.
|
||||
- [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `snappy`.
|
||||
- [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `lz4`.
|
||||
|
||||
## ParquetMetadata {data-format-parquet-metadata}
|
||||
|
||||
|
@ -1,5 +1,4 @@
|
||||
---
|
||||
slug: /en/operations/optimizing-performance/profile-guided-optimization
|
||||
sidebar_position: 54
|
||||
sidebar_label: Profile Guided Optimization (PGO)
|
||||
---
|
||||
|
@ -2427,6 +2427,8 @@ This section contains the following parameters:
|
||||
* hostname_levenshtein_distance - just like nearest_hostname, but it compares hostname in a levenshtein distance manner.
|
||||
* first_or_random - selects the first ZooKeeper node, if it's not available then randomly selects one of remaining ZooKeeper nodes.
|
||||
* round_robin - selects the first ZooKeeper node, if reconnection happens selects the next.
|
||||
- `use_compression` — If set to true, enables compression in Keeper protocol.
|
||||
|
||||
|
||||
**Example configuration**
|
||||
|
||||
|
35
docs/en/operations/system-tables/symbols.md
Normal file
35
docs/en/operations/system-tables/symbols.md
Normal file
@ -0,0 +1,35 @@
|
||||
---
|
||||
slug: /en/operations/system-tables/symbols
|
||||
---
|
||||
# symbols
|
||||
|
||||
Contains information for introspection of `clickhouse` binary. It requires the introspection privilege to access.
|
||||
This table is only useful for C++ experts and ClickHouse engineers.
|
||||
|
||||
Columns:
|
||||
|
||||
- `symbol` ([String](../../sql-reference/data-types/string.md)) — Symbol name in the binary. It is mangled. You can apply `demangle(symbol)` to obtain a readable name.
|
||||
- `address_begin` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Start address of the symbol in the binary.
|
||||
- `address_end` ([UInt64](../../sql-reference/data-types/int-uint.md)) — End address of the symbol in the binary.
|
||||
- `name` ([String](../../sql-reference/data-types/string.md)) — Alias for `event`.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT address_begin, address_end - address_begin AS size, demangle(symbol) FROM system.symbols ORDER BY size DESC LIMIT 10
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─address_begin─┬─────size─┬─demangle(symbol)──────────────────────────────────────────────────────────────────┐
|
||||
│ 25000976 │ 29466000 │ icudt70_dat │
|
||||
│ 400605288 │ 2097272 │ arena_emap_global │
|
||||
│ 18760592 │ 1048576 │ CLD2::kQuadChrome1015_2 │
|
||||
│ 9807152 │ 884808 │ TopLevelDomainLookupHash::isValid(char const*, unsigned long)::wordlist │
|
||||
│ 57442432 │ 850608 │ llvm::X86Insts │
|
||||
│ 55682944 │ 681360 │ (anonymous namespace)::X86DAGToDAGISel::SelectCode(llvm::SDNode*)::MatcherTable │
|
||||
│ 55130368 │ 502840 │ (anonymous namespace)::X86InstructionSelector::getMatchTable() const::MatchTable0 │
|
||||
│ 402930616 │ 404032 │ qpl::ml::dispatcher::hw_dispatcher::get_instance()::instance │
|
||||
│ 274131872 │ 356795 │ DB::SettingsTraits::Accessor::instance()::$_0::operator()() const │
|
||||
│ 58293040 │ 249424 │ llvm::X86InstrNameData │
|
||||
└───────────────┴──────────┴───────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
@ -12,7 +12,6 @@ A client application to interact with clickhouse-keeper by its native protocol.
|
||||
- `-q QUERY`, `--query=QUERY` — Query to execute. If this parameter is not passed, `clickhouse-keeper-client` will start in interactive mode.
|
||||
- `-h HOST`, `--host=HOST` — Server host. Default value: `localhost`.
|
||||
- `-p N`, `--port=N` — Server port. Default value: 9181
|
||||
- `-c FILE_PATH`, `--config-file=FILE_PATH` — Set path of config file to get the connection string. Default value: `config.xml`.
|
||||
- `--connection-timeout=TIMEOUT` — Set connection timeout in seconds. Default value: 10s.
|
||||
- `--session-timeout=TIMEOUT` — Set session timeout in seconds. Default value: 10s.
|
||||
- `--operation-timeout=TIMEOUT` — Set operation timeout in seconds. Default value: 10s.
|
||||
|
@ -2766,9 +2766,11 @@ Result:
|
||||
|
||||
## fromUnixTimestamp
|
||||
|
||||
Function converts Unix timestamp to a calendar date and a time of a day. When there is only a single argument of [Integer](../../sql-reference/data-types/int-uint.md) type, it acts in the same way as [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime) and return [DateTime](../../sql-reference/data-types/datetime.md) type.
|
||||
This function converts a Unix timestamp to a calendar date and a time of a day.
|
||||
|
||||
fromUnixTimestamp uses MySQL datetime format style, refer to https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format.
|
||||
It can be called in two ways:
|
||||
|
||||
When given a single argument of type [Integer](../../sql-reference/data-types/int-uint.md), it returns a value of type [DateTime](../../sql-reference/data-types/datetime.md), i.e. behaves like [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime).
|
||||
|
||||
Alias: `FROM_UNIXTIME`.
|
||||
|
||||
@ -2786,14 +2788,16 @@ Result:
|
||||
└──────────────────────────────┘
|
||||
```
|
||||
|
||||
When there are two or three arguments, the first an [Integer](../../sql-reference/data-types/int-uint.md), [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md), the second a constant format string and the third an optional constant time zone string — it acts in the same way as [formatDateTime](#formatdatetime) and return [String](../../sql-reference/data-types/string.md#string) type.
|
||||
When given two or three arguments where the first argument is a value of type [Integer](../../sql-reference/data-types/int-uint.md), [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md), the second argument is a constant format string and the third argument is an optional constant time zone string, the function returns a value of type [String](../../sql-reference/data-types/string.md#string), i.e. it behaves like [formatDateTime](#formatdatetime). In this case, [MySQL's datetime format style](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format) is used.
|
||||
|
||||
For example:
|
||||
**Example:**
|
||||
|
||||
```sql
|
||||
SELECT fromUnixTimestamp(1234334543, '%Y-%m-%d %R:%S') AS DateTime;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─DateTime────────────┐
|
||||
│ 2009-02-11 14:42:23 │
|
||||
@ -2806,19 +2810,20 @@ SELECT fromUnixTimestamp(1234334543, '%Y-%m-%d %R:%S') AS DateTime;
|
||||
|
||||
## fromUnixTimestampInJodaSyntax
|
||||
|
||||
Similar to fromUnixTimestamp, except that it formats time in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html.
|
||||
Same as [fromUnixTimestamp](#fromUnixTimestamp) but when called in the second way (two or three arguments), the formatting is performed using [Joda style](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) instead of MySQL style.
|
||||
|
||||
**Example:**
|
||||
|
||||
``` sql
|
||||
SELECT fromUnixTimestampInJodaSyntax(1669804872, 'yyyy-MM-dd HH:mm:ss', 'UTC');
|
||||
SELECT fromUnixTimestampInJodaSyntax(1234334543, 'yyyy-MM-dd HH:mm:ss', 'UTC') AS DateTime;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```
|
||||
┌─fromUnixTimestampInJodaSyntax(1669804872, 'yyyy-MM-dd HH:mm:ss', 'UTC')────┐
|
||||
│ 2022-11-30 10:41:12 │
|
||||
└────────────────────────────────────────────────────────────────────────────┘
|
||||
┌─DateTime────────────┐
|
||||
│ 2009-02-11 06:42:23 │
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
## toModifiedJulianDay
|
||||
|
@ -1840,9 +1840,9 @@ Converts an `Int64` to a `DateTime64` value with fixed sub-second precision and
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
fromUnixTimestamp64Milli(value [, timezone])
|
||||
fromUnixTimestamp64Micro(value [, timezone])
|
||||
fromUnixTimestamp64Nano(value [, timezone])
|
||||
fromUnixTimestamp64Milli(value[, timezone])
|
||||
fromUnixTimestamp64Micro(value[, timezone])
|
||||
fromUnixTimestamp64Nano(value[, timezone])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
@ -104,15 +104,14 @@
|
||||
</url_scheme_mappers>
|
||||
|
||||
<!-- Add headers to response in options request. OPTIONS method is used in CORS preflight requests. -->
|
||||
<!-- It is off by default. Next headers are obligate for CORS.-->
|
||||
<!-- http_options_response>
|
||||
<http_options_response>
|
||||
<header>
|
||||
<name>Access-Control-Allow-Origin</name>
|
||||
<value>*</value>
|
||||
</header>
|
||||
<header>
|
||||
<name>Access-Control-Allow-Headers</name>
|
||||
<value>origin, x-requested-with</value>
|
||||
<value>origin, x-requested-with, x-clickhouse-format, x-clickhouse-user, x-clickhouse-key, Authorization</value>
|
||||
</header>
|
||||
<header>
|
||||
<name>Access-Control-Allow-Methods</name>
|
||||
@ -122,7 +121,7 @@
|
||||
<name>Access-Control-Max-Age</name>
|
||||
<value>86400</value>
|
||||
</header>
|
||||
</http_options_response -->
|
||||
</http_options_response>
|
||||
|
||||
<!-- It is the name that will be shown in the clickhouse-client.
|
||||
By default, anything with "production" will be highlighted in red in query prompt.
|
||||
|
@ -466,7 +466,7 @@ public:
|
||||
std::vector<DataSet *> data_vec;
|
||||
data_vec.resize(places.size());
|
||||
|
||||
for (unsigned long i = 0; i < data_vec.size(); i++)
|
||||
for (size_t i = 0; i < data_vec.size(); ++i)
|
||||
data_vec[i] = &this->data(places[i]).set;
|
||||
|
||||
DataSet::parallelizeMergePrepare(data_vec, thread_pool);
|
||||
|
@ -143,7 +143,6 @@ namespace
|
||||
|
||||
void registerAggregateFunctionUniqCombined(AggregateFunctionFactory & factory)
|
||||
{
|
||||
using namespace std::placeholders;
|
||||
factory.registerFunction("uniqCombined",
|
||||
[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
{
|
||||
|
@ -1,4 +1,4 @@
|
||||
#include <Analyzer/Passes/AnyFunctionPass.h>
|
||||
#include <Analyzer/Passes/MoveFunctionsOutOfAnyPass.h>
|
||||
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
@ -14,8 +14,80 @@ namespace DB
|
||||
namespace
|
||||
{
|
||||
|
||||
class AnyFunctionVisitor : public InDepthQueryTreeVisitorWithContext<AnyFunctionVisitor>
|
||||
class AnyFunctionViMoveFunctionsOutOfAnyVisitor : public InDepthQueryTreeVisitorWithContext<AnyFunctionViMoveFunctionsOutOfAnyVisitor>
|
||||
{
|
||||
public:
|
||||
using Base = InDepthQueryTreeVisitorWithContext<AnyFunctionViMoveFunctionsOutOfAnyVisitor>;
|
||||
using Base::Base;
|
||||
|
||||
void enterImpl(QueryTreeNodePtr & node)
|
||||
{
|
||||
if (!getSettings().optimize_move_functions_out_of_any)
|
||||
return;
|
||||
|
||||
auto * function_node = node->as<FunctionNode>();
|
||||
if (!function_node)
|
||||
return;
|
||||
|
||||
/// check function is any
|
||||
const auto & function_name = function_node->getFunctionName();
|
||||
if (function_name != "any" && function_name != "anyLast")
|
||||
return;
|
||||
|
||||
auto & arguments = function_node->getArguments().getNodes();
|
||||
if (arguments.size() != 1)
|
||||
return;
|
||||
|
||||
auto * inside_function_node = arguments[0]->as<FunctionNode>();
|
||||
|
||||
/// check argument is a function
|
||||
if (!inside_function_node)
|
||||
return;
|
||||
|
||||
/// check arguments can not contain arrayJoin or lambda
|
||||
if (!canRewrite(inside_function_node))
|
||||
return;
|
||||
|
||||
auto & inside_function_node_arguments = inside_function_node->getArguments().getNodes();
|
||||
|
||||
/// case any(f())
|
||||
if (inside_function_node_arguments.empty())
|
||||
return;
|
||||
|
||||
auto it = node_to_rewritten_node.find(node.get());
|
||||
if (it != node_to_rewritten_node.end())
|
||||
{
|
||||
node = it->second;
|
||||
return;
|
||||
}
|
||||
|
||||
/// checking done, rewrite function
|
||||
bool changed_argument = false;
|
||||
for (auto & inside_argument : inside_function_node_arguments)
|
||||
{
|
||||
if (inside_argument->as<ConstantNode>()) /// skip constant node
|
||||
break;
|
||||
|
||||
AggregateFunctionProperties properties;
|
||||
auto aggregate_function = AggregateFunctionFactory::instance().get(function_name, {inside_argument->getResultType()}, {}, properties);
|
||||
|
||||
auto any_function = std::make_shared<FunctionNode>(function_name);
|
||||
any_function->resolveAsAggregateFunction(std::move(aggregate_function));
|
||||
|
||||
auto & any_function_arguments = any_function->getArguments().getNodes();
|
||||
any_function_arguments.push_back(std::move(inside_argument));
|
||||
|
||||
inside_argument = std::move(any_function);
|
||||
changed_argument = true;
|
||||
}
|
||||
|
||||
if (changed_argument)
|
||||
{
|
||||
node_to_rewritten_node.emplace(node.get(), arguments[0]);
|
||||
node = arguments[0];
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
bool canRewrite(const FunctionNode * function_node)
|
||||
{
|
||||
@ -45,90 +117,17 @@ private:
|
||||
return true;
|
||||
}
|
||||
|
||||
public:
|
||||
using Base = InDepthQueryTreeVisitorWithContext<AnyFunctionVisitor>;
|
||||
using Base::Base;
|
||||
|
||||
void enterImpl(QueryTreeNodePtr & node)
|
||||
{
|
||||
if (!getSettings().optimize_move_functions_out_of_any)
|
||||
return;
|
||||
|
||||
auto * function_node = node->as<FunctionNode>();
|
||||
if (!function_node)
|
||||
return;
|
||||
|
||||
/// check function is any
|
||||
const auto & function_name = function_node->getFunctionName();
|
||||
if (!(function_name == "any" || function_name == "anyLast"))
|
||||
return;
|
||||
|
||||
auto & arguments = function_node->getArguments().getNodes();
|
||||
if (arguments.size() != 1)
|
||||
return;
|
||||
|
||||
auto * inside_function_node = arguments[0]->as<FunctionNode>();
|
||||
|
||||
/// check argument is a function
|
||||
if (!inside_function_node)
|
||||
return;
|
||||
|
||||
/// check arguments can not contain arrayJoin or lambda
|
||||
if (!canRewrite(inside_function_node))
|
||||
return;
|
||||
|
||||
auto & inside_arguments = inside_function_node->getArguments().getNodes();
|
||||
|
||||
/// case any(f())
|
||||
if (inside_arguments.empty())
|
||||
return;
|
||||
|
||||
if (rewritten.contains(node.get()))
|
||||
{
|
||||
node = rewritten.at(node.get());
|
||||
return;
|
||||
}
|
||||
|
||||
/// checking done, rewrite function
|
||||
bool pushed = false;
|
||||
for (auto & inside_argument : inside_arguments)
|
||||
{
|
||||
if (inside_argument->as<ConstantNode>()) /// skip constant node
|
||||
break;
|
||||
|
||||
AggregateFunctionProperties properties;
|
||||
auto aggregate_function = AggregateFunctionFactory::instance().get(function_name, {inside_argument->getResultType()}, {}, properties);
|
||||
|
||||
auto any_function = std::make_shared<FunctionNode>(function_name);
|
||||
any_function->resolveAsAggregateFunction(std::move(aggregate_function));
|
||||
|
||||
auto & any_function_arguments = any_function->getArguments().getNodes();
|
||||
any_function_arguments.push_back(std::move(inside_argument));
|
||||
|
||||
inside_argument = std::move(any_function);
|
||||
pushed = true;
|
||||
}
|
||||
|
||||
if (pushed)
|
||||
{
|
||||
rewritten.insert({node.get(), arguments[0]});
|
||||
node = arguments[0];
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
/// After query analysis alias will be rewritten to QueryTreeNode
|
||||
/// whose memory address is same with the original one.
|
||||
/// So we can reuse the rewritten one.
|
||||
std::unordered_map<IQueryTreeNode *, QueryTreeNodePtr > rewritten;
|
||||
/// After query analysis, alias identifier will be resolved to node whose memory address is same with the original one.
|
||||
/// So we can reuse the rewritten function.
|
||||
std::unordered_map<IQueryTreeNode *, QueryTreeNodePtr> node_to_rewritten_node;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
void AnyFunctionPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
|
||||
void MoveFunctionsOutOfAnyPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
|
||||
{
|
||||
AnyFunctionVisitor visitor(context);
|
||||
AnyFunctionViMoveFunctionsOutOfAnyVisitor visitor(context);
|
||||
visitor.visit(query_tree_node);
|
||||
}
|
||||
|
@ -7,13 +7,13 @@ namespace DB
|
||||
|
||||
/** Rewrite 'any' and 'anyLast' functions pushing them inside original function.
|
||||
*
|
||||
* Example: any(f(x, y, g(z)))
|
||||
* Result: f(any(x), any(y), g(any(z)))
|
||||
* Example: SELECT any(f(x, y, g(z)));
|
||||
* Result: SELECT f(any(x), any(y), g(any(z)));
|
||||
*/
|
||||
class AnyFunctionPass final : public IQueryTreePass
|
||||
class MoveFunctionsOutOfAnyPass final : public IQueryTreePass
|
||||
{
|
||||
public:
|
||||
String getName() override { return "AnyFunction"; }
|
||||
String getName() override { return "MoveFunctionsOutOfAnyPass"; }
|
||||
|
||||
String getDescription() override
|
||||
{
|
@ -43,7 +43,7 @@
|
||||
#include <Analyzer/Passes/CrossToInnerJoinPass.h>
|
||||
#include <Analyzer/Passes/ShardNumColumnToFunctionPass.h>
|
||||
#include <Analyzer/Passes/ConvertQueryToCNFPass.h>
|
||||
#include <Analyzer/Passes/AnyFunctionPass.h>
|
||||
#include <Analyzer/Passes/MoveFunctionsOutOfAnyPass.h>
|
||||
#include <Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.h>
|
||||
|
||||
|
||||
@ -164,9 +164,7 @@ private:
|
||||
*
|
||||
* TODO: Support setting optimize_substitute_columns.
|
||||
* TODO: Support GROUP BY injective function elimination.
|
||||
* TODO: Support setting optimize_move_functions_out_of_any.
|
||||
* TODO: Support setting optimize_aggregators_of_group_by_keys.
|
||||
* TODO: Support setting optimize_duplicate_order_by_and_distinct.
|
||||
* TODO: Support setting optimize_monotonous_functions_in_order_by.
|
||||
* TODO: Add optimizations based on function semantics. Example: SELECT * FROM test_table WHERE id != id. (id is not nullable column).
|
||||
*/
|
||||
@ -284,7 +282,7 @@ void addQueryTreePasses(QueryTreePassManager & manager)
|
||||
manager.addPass(std::make_unique<CrossToInnerJoinPass>());
|
||||
manager.addPass(std::make_unique<ShardNumColumnToFunctionPass>());
|
||||
|
||||
manager.addPass(std::make_unique<AnyFunctionPass>());
|
||||
manager.addPass(std::make_unique<MoveFunctionsOutOfAnyPass>());
|
||||
manager.addPass(std::make_unique<OptimizeDateOrDateTimeConverterWithPreimagePass>());
|
||||
|
||||
}
|
||||
|
@ -20,6 +20,12 @@
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event BackupEntriesCollectorMicroseconds;
|
||||
extern const Event BackupEntriesCollectorForTablesDataMicroseconds;
|
||||
extern const Event BackupEntriesCollectorRunPostTasksMicroseconds;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -82,7 +88,8 @@ BackupEntriesCollector::BackupEntriesCollector(
|
||||
const BackupSettings & backup_settings_,
|
||||
std::shared_ptr<IBackupCoordination> backup_coordination_,
|
||||
const ReadSettings & read_settings_,
|
||||
const ContextPtr & context_)
|
||||
const ContextPtr & context_,
|
||||
ThreadPool & threadpool_)
|
||||
: backup_query_elements(backup_query_elements_)
|
||||
, backup_settings(backup_settings_)
|
||||
, backup_coordination(backup_coordination_)
|
||||
@ -101,6 +108,7 @@ BackupEntriesCollector::BackupEntriesCollector(
|
||||
context->getSettingsRef().backup_restore_keeper_max_retries,
|
||||
context->getSettingsRef().backup_restore_keeper_retry_initial_backoff_ms,
|
||||
context->getSettingsRef().backup_restore_keeper_retry_max_backoff_ms)
|
||||
, threadpool(threadpool_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -108,6 +116,8 @@ BackupEntriesCollector::~BackupEntriesCollector() = default;
|
||||
|
||||
BackupEntries BackupEntriesCollector::run()
|
||||
{
|
||||
auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::BackupEntriesCollectorMicroseconds);
|
||||
|
||||
/// run() can be called onle once.
|
||||
if (!current_stage.empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Already making backup entries");
|
||||
@ -133,11 +143,19 @@ BackupEntries BackupEntriesCollector::run()
|
||||
|
||||
/// Make backup entries for the data of the found tables.
|
||||
setStage(Stage::EXTRACTING_DATA_FROM_TABLES);
|
||||
makeBackupEntriesForTablesData();
|
||||
|
||||
{
|
||||
auto timer2 = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::BackupEntriesCollectorForTablesDataMicroseconds);
|
||||
makeBackupEntriesForTablesData();
|
||||
}
|
||||
|
||||
/// Run all the tasks added with addPostCollectingTask().
|
||||
setStage(Stage::RUNNING_POST_TASKS);
|
||||
runPostTasks();
|
||||
|
||||
{
|
||||
auto timer2 = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::BackupEntriesCollectorRunPostTasksMicroseconds);
|
||||
runPostTasks();
|
||||
}
|
||||
|
||||
/// No more backup entries or tasks are allowed after this point.
|
||||
|
||||
@ -738,8 +756,20 @@ void BackupEntriesCollector::makeBackupEntriesForTablesData()
|
||||
if (backup_settings.structure_only)
|
||||
return;
|
||||
|
||||
std::vector<std::future<void>> futures;
|
||||
for (const auto & table_name : table_infos | boost::adaptors::map_keys)
|
||||
makeBackupEntriesForTableData(table_name);
|
||||
{
|
||||
futures.push_back(scheduleFromThreadPool<void>([&]()
|
||||
{
|
||||
makeBackupEntriesForTableData(table_name);
|
||||
}, threadpool, "BackupCollect"));
|
||||
}
|
||||
/// Wait for all tasks.
|
||||
for (auto & future : futures)
|
||||
future.wait();
|
||||
/// Make sure there is no exception.
|
||||
for (auto & future : futures)
|
||||
future.get();
|
||||
}
|
||||
|
||||
void BackupEntriesCollector::makeBackupEntriesForTableData(const QualifiedTableName & table_name)
|
||||
@ -775,20 +805,28 @@ void BackupEntriesCollector::makeBackupEntriesForTableData(const QualifiedTableN
|
||||
}
|
||||
}
|
||||
|
||||
void BackupEntriesCollector::addBackupEntry(const String & file_name, BackupEntryPtr backup_entry)
|
||||
void BackupEntriesCollector::addBackupEntryUnlocked(const String & file_name, BackupEntryPtr backup_entry)
|
||||
{
|
||||
if (current_stage == Stage::WRITING_BACKUP)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding backup entries is not allowed");
|
||||
backup_entries.emplace_back(file_name, backup_entry);
|
||||
}
|
||||
|
||||
void BackupEntriesCollector::addBackupEntry(const String & file_name, BackupEntryPtr backup_entry)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
addBackupEntryUnlocked(file_name, backup_entry);
|
||||
}
|
||||
|
||||
void BackupEntriesCollector::addBackupEntry(const std::pair<String, BackupEntryPtr> & backup_entry)
|
||||
{
|
||||
addBackupEntry(backup_entry.first, backup_entry.second);
|
||||
std::lock_guard lock(mutex);
|
||||
addBackupEntryUnlocked(backup_entry.first, backup_entry.second);
|
||||
}
|
||||
|
||||
void BackupEntriesCollector::addBackupEntries(const BackupEntries & backup_entries_)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
if (current_stage == Stage::WRITING_BACKUP)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding of backup entries is not allowed");
|
||||
insertAtEnd(backup_entries, backup_entries_);
|
||||
@ -796,6 +834,7 @@ void BackupEntriesCollector::addBackupEntries(const BackupEntries & backup_entri
|
||||
|
||||
void BackupEntriesCollector::addBackupEntries(BackupEntries && backup_entries_)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
if (current_stage == Stage::WRITING_BACKUP)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding of backup entries is not allowed");
|
||||
insertAtEnd(backup_entries, std::move(backup_entries_));
|
||||
@ -803,6 +842,7 @@ void BackupEntriesCollector::addBackupEntries(BackupEntries && backup_entries_)
|
||||
|
||||
void BackupEntriesCollector::addPostTask(std::function<void()> task)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
if (current_stage == Stage::WRITING_BACKUP)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding of post tasks is not allowed");
|
||||
post_tasks.push(std::move(task));
|
||||
@ -824,6 +864,7 @@ void BackupEntriesCollector::runPostTasks()
|
||||
|
||||
size_t BackupEntriesCollector::getAccessCounter(AccessEntityType type)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
access_counters.resize(static_cast<size_t>(AccessEntityType::MAX));
|
||||
return access_counters[static_cast<size_t>(type)]++;
|
||||
}
|
||||
|
@ -31,7 +31,8 @@ public:
|
||||
const BackupSettings & backup_settings_,
|
||||
std::shared_ptr<IBackupCoordination> backup_coordination_,
|
||||
const ReadSettings & read_settings_,
|
||||
const ContextPtr & context_);
|
||||
const ContextPtr & context_,
|
||||
ThreadPool & threadpool_);
|
||||
~BackupEntriesCollector();
|
||||
|
||||
/// Collects backup entries and returns the result.
|
||||
@ -90,6 +91,8 @@ private:
|
||||
void makeBackupEntriesForTablesData();
|
||||
void makeBackupEntriesForTableData(const QualifiedTableName & table_name);
|
||||
|
||||
void addBackupEntryUnlocked(const String & file_name, BackupEntryPtr backup_entry);
|
||||
|
||||
void runPostTasks();
|
||||
|
||||
Strings setStage(const String & new_stage, const String & message = "");
|
||||
@ -170,6 +173,9 @@ private:
|
||||
BackupEntries backup_entries;
|
||||
std::queue<std::function<void()>> post_tasks;
|
||||
std::vector<size_t> access_counters;
|
||||
|
||||
ThreadPool & threadpool;
|
||||
std::mutex mutex;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Backups/BackupStatus.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -47,6 +48,9 @@ struct BackupOperationInfo
|
||||
std::exception_ptr exception;
|
||||
String error_message;
|
||||
|
||||
/// Profile events collected during the backup.
|
||||
std::shared_ptr<ProfileEvents::Counters::Snapshot> profile_counters = nullptr;
|
||||
|
||||
std::chrono::system_clock::time_point start_time;
|
||||
std::chrono::system_clock::time_point end_time;
|
||||
};
|
||||
|
@ -550,7 +550,9 @@ void BackupsWorker::doBackup(
|
||||
/// Prepare backup entries.
|
||||
BackupEntries backup_entries;
|
||||
{
|
||||
BackupEntriesCollector backup_entries_collector{backup_query->elements, backup_settings, backup_coordination, backup_create_params.read_settings, context};
|
||||
BackupEntriesCollector backup_entries_collector(
|
||||
backup_query->elements, backup_settings, backup_coordination,
|
||||
backup_create_params.read_settings, context, getThreadPool(ThreadPoolId::BACKUP_MAKE_FILES_LIST));
|
||||
backup_entries = backup_entries_collector.run();
|
||||
}
|
||||
|
||||
@ -1056,6 +1058,7 @@ void BackupsWorker::setStatus(const String & id, BackupStatus status, bool throw
|
||||
auto old_status = info.status;
|
||||
|
||||
info.status = status;
|
||||
info.profile_counters = std::make_shared<ProfileEvents::Counters::Snapshot>(CurrentThread::getProfileEvents().getPartiallyAtomicSnapshot());
|
||||
|
||||
if (isFinalStatus(status))
|
||||
info.end_time = std::chrono::system_clock::now();
|
||||
|
@ -89,6 +89,17 @@ add_headers_and_sources(clickhouse_common_io IO/Resource)
|
||||
add_headers_and_sources(clickhouse_common_io IO/S3)
|
||||
list (REMOVE_ITEM clickhouse_common_io_sources Common/malloc.cpp Common/new_delete.cpp)
|
||||
|
||||
|
||||
add_headers_and_sources(clickhouse_compression Compression)
|
||||
add_headers_and_sources(clickhouse_compression Parsers)
|
||||
add_headers_and_sources(clickhouse_compression Core)
|
||||
#Included these specific files to avoid linking grpc
|
||||
add_glob(clickhouse_compression_headers Server/ServerType.h)
|
||||
add_glob(clickhouse_compression_sources Server/ServerType.cpp)
|
||||
add_headers_and_sources(clickhouse_compression Common/SSH)
|
||||
add_library(clickhouse_compression ${clickhouse_compression_headers} ${clickhouse_compression_sources})
|
||||
|
||||
|
||||
add_headers_and_sources(dbms Disks/IO)
|
||||
add_headers_and_sources(dbms Disks/ObjectStorages)
|
||||
if (TARGET ch_contrib::sqlite)
|
||||
@ -270,6 +281,7 @@ target_include_directories (clickhouse_common_io PUBLIC "${ClickHouse_SOURCE_DIR
|
||||
|
||||
if (TARGET ch_contrib::llvm)
|
||||
dbms_target_link_libraries (PUBLIC ch_contrib::llvm)
|
||||
target_link_libraries (clickhouse_compression PUBLIC ch_contrib::llvm)
|
||||
endif ()
|
||||
|
||||
if (TARGET ch_contrib::gwp_asan)
|
||||
@ -293,6 +305,18 @@ target_link_libraries (clickhouse_common_io
|
||||
common
|
||||
ch_contrib::double_conversion
|
||||
ch_contrib::dragonbox_to_chars
|
||||
ch_contrib::libdivide
|
||||
)
|
||||
|
||||
|
||||
target_link_libraries (clickhouse_compression
|
||||
PUBLIC
|
||||
string_utils
|
||||
pcg_random
|
||||
clickhouse_parsers
|
||||
PRIVATE
|
||||
ch_contrib::lz4
|
||||
ch_contrib::roaring
|
||||
)
|
||||
|
||||
# Use X86 AVX2/AVX512 instructions to accelerate filter operations
|
||||
@ -336,6 +360,7 @@ if (TARGET ch_contrib::crc32-vpmsum)
|
||||
|
||||
if (TARGET ch_contrib::ssh)
|
||||
target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::ssh)
|
||||
target_link_libraries(clickhouse_compression PUBLIC ch_contrib::ssh)
|
||||
endif()
|
||||
|
||||
dbms_target_link_libraries(PUBLIC ch_contrib::abseil_swiss_tables)
|
||||
@ -359,10 +384,12 @@ endif()
|
||||
|
||||
if (TARGET ch_contrib::krb5)
|
||||
dbms_target_link_libraries(PRIVATE ch_contrib::krb5)
|
||||
target_link_libraries (clickhouse_compression PRIVATE ch_contrib::krb5)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::nuraft)
|
||||
dbms_target_link_libraries(PUBLIC ch_contrib::nuraft)
|
||||
target_link_libraries (clickhouse_compression PUBLIC ch_contrib::nuraft)
|
||||
endif()
|
||||
|
||||
dbms_target_link_libraries (
|
||||
@ -432,6 +459,7 @@ endif ()
|
||||
|
||||
if (TARGET ch_contrib::ldap)
|
||||
dbms_target_link_libraries (PRIVATE ch_contrib::ldap ch_contrib::lber)
|
||||
target_link_libraries (clickhouse_compression PRIVATE ch_contrib::ldap ch_contrib::lber)
|
||||
endif ()
|
||||
dbms_target_link_libraries (PUBLIC ch_contrib::sparsehash)
|
||||
|
||||
|
@ -131,7 +131,7 @@ void LocalConnection::sendQuery(
|
||||
|
||||
try
|
||||
{
|
||||
state->io = executeQuery(state->query, query_context, false, state->stage).second;
|
||||
state->io = executeQuery(state->query, query_context, QueryFlags{}, state->stage).second;
|
||||
|
||||
if (state->io.pipeline.pushing())
|
||||
{
|
||||
|
@ -293,7 +293,6 @@ ReplxxLineReader::ReplxxLineReader(
|
||||
, word_break_characters(word_break_characters_)
|
||||
, editor(getEditor())
|
||||
{
|
||||
using namespace std::placeholders;
|
||||
using Replxx = replxx::Replxx;
|
||||
|
||||
if (!history_file_path.empty())
|
||||
|
@ -586,7 +586,6 @@
|
||||
M(704, CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS) \
|
||||
M(705, TABLE_NOT_EMPTY) \
|
||||
M(706, LIBSSH_ERROR) \
|
||||
M(707, GCP_ERROR) \
|
||||
M(999, KEEPER_EXCEPTION) \
|
||||
M(1000, POCO_EXCEPTION) \
|
||||
M(1001, STD_EXCEPTION) \
|
||||
|
@ -41,12 +41,6 @@ namespace ErrorCodes
|
||||
void abortOnFailedAssertion(const String & description)
|
||||
{
|
||||
LOG_FATAL(&Poco::Logger::root(), "Logical error: '{}'.", description);
|
||||
|
||||
/// This is to suppress -Wmissing-noreturn
|
||||
volatile bool always_false = false;
|
||||
if (always_false)
|
||||
return;
|
||||
|
||||
abort();
|
||||
}
|
||||
|
||||
|
@ -19,7 +19,7 @@ namespace Poco { class Logger; }
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void abortOnFailedAssertion(const String & description);
|
||||
[[noreturn]] void abortOnFailedAssertion(const String & description);
|
||||
|
||||
/// This flag can be set for testing purposes - to check that no exceptions are thrown.
|
||||
extern bool terminate_on_any_exception;
|
||||
|
@ -546,6 +546,10 @@ The server successfully detected this situation and will download merged part fr
|
||||
M(IOUringCQEsCompleted, "Total number of successfully completed io_uring CQEs") \
|
||||
M(IOUringCQEsFailed, "Total number of completed io_uring CQEs with failures") \
|
||||
\
|
||||
M(BackupEntriesCollectorMicroseconds, "Time spent making backup entries") \
|
||||
M(BackupEntriesCollectorForTablesDataMicroseconds, "Time spent making backup entries for tables data") \
|
||||
M(BackupEntriesCollectorRunPostTasksMicroseconds, "Time spent running post tasks after making backup entries") \
|
||||
\
|
||||
M(ReadTaskRequestsReceived, "The number of callbacks requested from the remote server back to the initiator server to choose the read task (for s3Cluster table function and similar). Measured on the initiator server side.") \
|
||||
M(MergeTreeReadTaskRequestsReceived, "The number of callbacks requested from the remote server back to the initiator server to choose the read task (for MergeTree tables). Measured on the initiator server side.") \
|
||||
\
|
||||
|
@ -10,6 +10,7 @@ target_compile_definitions (clickhouse_common_zookeeper PRIVATE -DZOOKEEPER_LOG)
|
||||
target_link_libraries (clickhouse_common_zookeeper
|
||||
PUBLIC
|
||||
clickhouse_common_io
|
||||
clickhouse_compression
|
||||
common
|
||||
PRIVATE
|
||||
string_utils
|
||||
@ -20,6 +21,7 @@ add_library(clickhouse_common_zookeeper_no_log ${clickhouse_common_zookeeper_hea
|
||||
target_link_libraries (clickhouse_common_zookeeper_no_log
|
||||
PUBLIC
|
||||
clickhouse_common_io
|
||||
clickhouse_compression
|
||||
common
|
||||
PRIVATE
|
||||
string_utils
|
||||
|
@ -214,6 +214,10 @@ void ZooKeeperArgs::initFromKeeperSection(const Poco::Util::AbstractConfiguratio
|
||||
.max_sec = config.getUInt(config_name + "." + key + ".max"),
|
||||
};
|
||||
}
|
||||
else if (key == "use_compression")
|
||||
{
|
||||
use_compression = config.getBool(config_name + "." + key);
|
||||
}
|
||||
else
|
||||
throw KeeperException(Coordination::Error::ZBADARGUMENTS, "Unknown key {} in config file", key);
|
||||
}
|
||||
|
@ -44,6 +44,7 @@ struct ZooKeeperArgs
|
||||
double recv_sleep_probability = 0.0;
|
||||
UInt64 send_sleep_ms = 0;
|
||||
UInt64 recv_sleep_ms = 0;
|
||||
bool use_compression = false;
|
||||
|
||||
SessionLifetimeConfiguration fallback_session_lifetime = {};
|
||||
DB::GetPriorityForLoadBalancing get_priority_load_balancing;
|
||||
|
@ -27,7 +27,6 @@ void ZooKeeperResponse::write(WriteBuffer & out) const
|
||||
if (error == Error::ZOK)
|
||||
writeImpl(buf);
|
||||
Coordination::write(buf.str(), out);
|
||||
out.next();
|
||||
}
|
||||
|
||||
std::string ZooKeeperRequest::toString() const
|
||||
@ -49,7 +48,6 @@ void ZooKeeperRequest::write(WriteBuffer & out) const
|
||||
Coordination::write(getOpNum(), buf);
|
||||
writeImpl(buf);
|
||||
Coordination::write(buf.str(), out);
|
||||
out.next();
|
||||
}
|
||||
|
||||
void ZooKeeperSyncRequest::writeImpl(WriteBuffer & out) const
|
||||
|
@ -46,6 +46,7 @@ enum class OpNum : int32_t
|
||||
OpNum getOpNum(int32_t raw_op_num);
|
||||
|
||||
static constexpr int32_t ZOOKEEPER_PROTOCOL_VERSION = 0;
|
||||
static constexpr int32_t ZOOKEEPER_PROTOCOL_VERSION_WITH_COMPRESSION = 10;
|
||||
static constexpr int32_t KEEPER_PROTOCOL_VERSION_CONNECTION_REJECT = 42;
|
||||
static constexpr int32_t CLIENT_HANDSHAKE_LENGTH = 44;
|
||||
static constexpr int32_t CLIENT_HANDSHAKE_LENGTH_WITH_READONLY = 45;
|
||||
|
@ -16,6 +16,9 @@
|
||||
#include <Common/ZooKeeper/ZooKeeperIO.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include <Compression/CompressedReadBuffer.h>
|
||||
#include <Compression/CompressedWriteBuffer.h>
|
||||
#include <Compression/CompressionFactory.h>
|
||||
|
||||
#include "Coordination/KeeperConstants.h"
|
||||
#include "config.h"
|
||||
@ -274,13 +277,34 @@ using namespace DB;
|
||||
template <typename T>
|
||||
void ZooKeeper::write(const T & x)
|
||||
{
|
||||
Coordination::write(x, *out);
|
||||
Coordination::write(x, getWriteBuffer());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ZooKeeper::read(T & x)
|
||||
{
|
||||
Coordination::read(x, *in);
|
||||
Coordination::read(x, getReadBuffer());
|
||||
}
|
||||
|
||||
WriteBuffer & ZooKeeper::getWriteBuffer()
|
||||
{
|
||||
if (compressed_out)
|
||||
return *compressed_out;
|
||||
return *out;
|
||||
}
|
||||
|
||||
void ZooKeeper::flushWriteBuffer()
|
||||
{
|
||||
if (compressed_out)
|
||||
compressed_out->next();
|
||||
out->next();
|
||||
}
|
||||
|
||||
ReadBuffer & ZooKeeper::getReadBuffer()
|
||||
{
|
||||
if (compressed_in)
|
||||
return *compressed_in;
|
||||
return *in;
|
||||
}
|
||||
|
||||
static void removeRootPath(String & path, const String & chroot)
|
||||
@ -345,7 +369,23 @@ ZooKeeper::ZooKeeper(
|
||||
if (args.enable_fault_injections_during_startup)
|
||||
setupFaultDistributions();
|
||||
|
||||
connect(nodes, args.connection_timeout_ms * 1000);
|
||||
try
|
||||
{
|
||||
use_compression = args.use_compression;
|
||||
connect(nodes, args.connection_timeout_ms * 1000);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
/// If we get exception & compression is enabled, then its possible that keeper does not support compression,
|
||||
/// try without compression
|
||||
if (use_compression)
|
||||
{
|
||||
use_compression = false;
|
||||
connect(nodes, args.connection_timeout_ms * 1000);
|
||||
}
|
||||
else
|
||||
throw;
|
||||
}
|
||||
|
||||
if (!args.auth_scheme.empty())
|
||||
sendAuth(args.auth_scheme, args.identity);
|
||||
@ -424,6 +464,8 @@ void ZooKeeper::connect(
|
||||
|
||||
in.emplace(socket);
|
||||
out.emplace(socket);
|
||||
compressed_in.reset();
|
||||
compressed_out.reset();
|
||||
|
||||
try
|
||||
{
|
||||
@ -444,7 +486,14 @@ void ZooKeeper::connect(
|
||||
e.addMessage("while receiving handshake from ZooKeeper");
|
||||
throw;
|
||||
}
|
||||
|
||||
connected = true;
|
||||
if (use_compression)
|
||||
{
|
||||
compressed_in.emplace(*in);
|
||||
compressed_out.emplace(*out, CompressionCodecFactory::instance().get("LZ4", {}));
|
||||
}
|
||||
|
||||
original_index = static_cast<Int8>(node.original_index);
|
||||
|
||||
if (i != 0)
|
||||
@ -511,16 +560,17 @@ void ZooKeeper::sendHandshake()
|
||||
std::array<char, passwd_len> passwd {};
|
||||
|
||||
write(handshake_length);
|
||||
write(ZOOKEEPER_PROTOCOL_VERSION);
|
||||
if (use_compression)
|
||||
write(ZOOKEEPER_PROTOCOL_VERSION_WITH_COMPRESSION);
|
||||
else
|
||||
write(ZOOKEEPER_PROTOCOL_VERSION);
|
||||
write(last_zxid_seen);
|
||||
write(timeout);
|
||||
write(previous_session_id);
|
||||
write(passwd);
|
||||
|
||||
out->next();
|
||||
flushWriteBuffer();
|
||||
}
|
||||
|
||||
|
||||
void ZooKeeper::receiveHandshake()
|
||||
{
|
||||
int32_t handshake_length;
|
||||
@ -533,18 +583,22 @@ void ZooKeeper::receiveHandshake()
|
||||
throw Exception(Error::ZMARSHALLINGERROR, "Unexpected handshake length received: {}", handshake_length);
|
||||
|
||||
read(protocol_version_read);
|
||||
if (protocol_version_read != ZOOKEEPER_PROTOCOL_VERSION)
|
||||
|
||||
/// Special way to tell a client that server is not ready to serve it.
|
||||
/// It's better for faster failover than just connection drop.
|
||||
/// Implemented in clickhouse-keeper.
|
||||
if (protocol_version_read == KEEPER_PROTOCOL_VERSION_CONNECTION_REJECT)
|
||||
throw Exception::fromMessage(Error::ZCONNECTIONLOSS,
|
||||
"Keeper server rejected the connection during the handshake. "
|
||||
"Possibly it's overloaded, doesn't see leader or stale");
|
||||
|
||||
if (use_compression)
|
||||
{
|
||||
/// Special way to tell a client that server is not ready to serve it.
|
||||
/// It's better for faster failover than just connection drop.
|
||||
/// Implemented in clickhouse-keeper.
|
||||
if (protocol_version_read == KEEPER_PROTOCOL_VERSION_CONNECTION_REJECT)
|
||||
throw Exception::fromMessage(Error::ZCONNECTIONLOSS,
|
||||
"Keeper server rejected the connection during the handshake. "
|
||||
"Possibly it's overloaded, doesn't see leader or stale");
|
||||
else
|
||||
throw Exception(Error::ZMARSHALLINGERROR, "Unexpected protocol version: {}", protocol_version_read);
|
||||
if (protocol_version_read != ZOOKEEPER_PROTOCOL_VERSION_WITH_COMPRESSION)
|
||||
throw Exception(Error::ZMARSHALLINGERROR,"Unexpected protocol version with compression: {}", protocol_version_read);
|
||||
}
|
||||
else if (protocol_version_read != ZOOKEEPER_PROTOCOL_VERSION)
|
||||
throw Exception(Error::ZMARSHALLINGERROR, "Unexpected protocol version: {}", protocol_version_read);
|
||||
|
||||
read(timeout);
|
||||
if (timeout != args.session_timeout_ms)
|
||||
@ -562,7 +616,8 @@ void ZooKeeper::sendAuth(const String & scheme, const String & data)
|
||||
request.scheme = scheme;
|
||||
request.data = data;
|
||||
request.xid = AUTH_XID;
|
||||
request.write(*out);
|
||||
request.write(getWriteBuffer());
|
||||
flushWriteBuffer();
|
||||
|
||||
int32_t length;
|
||||
XID read_xid;
|
||||
@ -578,10 +633,14 @@ void ZooKeeper::sendAuth(const String & scheme, const String & data)
|
||||
if (read_xid != AUTH_XID)
|
||||
throw Exception(Error::ZMARSHALLINGERROR, "Unexpected event received in reply to auth request: {}", read_xid);
|
||||
|
||||
int32_t actual_length = static_cast<int32_t>(in->count() - count_before_event);
|
||||
if (length != actual_length)
|
||||
if (!use_compression)
|
||||
{
|
||||
int32_t actual_length = static_cast<int32_t>(in->count() - count_before_event);
|
||||
if (length != actual_length)
|
||||
throw Exception(Error::ZMARSHALLINGERROR, "Response length doesn't match. Expected: {}, actual: {}", length, actual_length);
|
||||
|
||||
}
|
||||
|
||||
if (err != Error::ZOK)
|
||||
throw Exception(Error::ZMARSHALLINGERROR, "Error received in reply to auth request. Code: {}. Message: {}",
|
||||
static_cast<int32_t>(err), err);
|
||||
@ -637,7 +696,8 @@ void ZooKeeper::sendThread()
|
||||
info.request->addRootPath(args.chroot);
|
||||
|
||||
info.request->probably_sent = true;
|
||||
info.request->write(*out);
|
||||
info.request->write(getWriteBuffer());
|
||||
flushWriteBuffer();
|
||||
|
||||
logOperationIfNeeded(info.request);
|
||||
|
||||
@ -653,7 +713,8 @@ void ZooKeeper::sendThread()
|
||||
|
||||
ZooKeeperHeartbeatRequest request;
|
||||
request.xid = PING_XID;
|
||||
request.write(*out);
|
||||
request.write(getWriteBuffer());
|
||||
flushWriteBuffer();
|
||||
}
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::ZooKeeperBytesSent, out->count() - prev_bytes_sent);
|
||||
@ -825,7 +886,7 @@ void ZooKeeper::receiveEvent()
|
||||
}
|
||||
else
|
||||
{
|
||||
response->readImpl(*in);
|
||||
response->readImpl(getReadBuffer());
|
||||
response->removeRootPath(args.chroot);
|
||||
}
|
||||
/// Instead of setting the watch in sendEvent, set it in receiveEvent because need to check the response.
|
||||
@ -858,9 +919,14 @@ void ZooKeeper::receiveEvent()
|
||||
}
|
||||
}
|
||||
|
||||
int32_t actual_length = static_cast<int32_t>(in->count() - count_before_event);
|
||||
if (length != actual_length)
|
||||
throw Exception(Error::ZMARSHALLINGERROR, "Response length doesn't match. Expected: {}, actual: {}", length, actual_length);
|
||||
if (!use_compression)
|
||||
{
|
||||
int32_t actual_length = static_cast<int32_t>(in->count() - count_before_event);
|
||||
|
||||
if (length != actual_length)
|
||||
throw Exception(Error::ZMARSHALLINGERROR, "Response length doesn't match. Expected: {}, actual: {}",
|
||||
length, actual_length);
|
||||
}
|
||||
|
||||
logOperationIfNeeded(request_info.request, response, /* finalize= */ false, elapsed_ms);
|
||||
}
|
||||
|
@ -15,6 +15,8 @@
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <IO/ReadBufferFromPocoSocket.h>
|
||||
#include <IO/WriteBufferFromPocoSocket.h>
|
||||
#include <Compression/CompressedReadBuffer.h>
|
||||
#include <Compression/CompressedWriteBuffer.h>
|
||||
|
||||
#include <Poco/Net/StreamSocket.h>
|
||||
#include <Poco/Net/SocketAddress.h>
|
||||
@ -239,8 +241,13 @@ private:
|
||||
Poco::Net::StreamSocket socket;
|
||||
/// To avoid excessive getpeername(2) calls.
|
||||
Poco::Net::SocketAddress socket_address;
|
||||
|
||||
std::optional<ReadBufferFromPocoSocket> in;
|
||||
std::optional<WriteBufferFromPocoSocket> out;
|
||||
std::optional<CompressedReadBuffer> compressed_in;
|
||||
std::optional<CompressedWriteBuffer> compressed_out;
|
||||
|
||||
bool use_compression = false;
|
||||
|
||||
int64_t session_id = 0;
|
||||
|
||||
@ -328,6 +335,10 @@ private:
|
||||
template <typename T>
|
||||
void read(T &);
|
||||
|
||||
WriteBuffer & getWriteBuffer();
|
||||
void flushWriteBuffer();
|
||||
ReadBuffer & getReadBuffer();
|
||||
|
||||
void logOperationIfNeeded(const ZooKeeperRequestPtr & request, const ZooKeeperResponsePtr & response = nullptr, bool finalize = false, UInt64 elapsed_ms = 0);
|
||||
|
||||
void initFeatureFlags();
|
||||
|
@ -2,7 +2,7 @@ clickhouse_add_executable(zkutil_test_commands zkutil_test_commands.cpp)
|
||||
target_link_libraries(zkutil_test_commands PRIVATE clickhouse_common_zookeeper_no_log)
|
||||
|
||||
clickhouse_add_executable(zkutil_test_commands_new_lib zkutil_test_commands_new_lib.cpp)
|
||||
target_link_libraries(zkutil_test_commands_new_lib PRIVATE clickhouse_common_zookeeper_no_log string_utils)
|
||||
target_link_libraries(zkutil_test_commands_new_lib PRIVATE clickhouse_common_zookeeper_no_log clickhouse_compression string_utils)
|
||||
|
||||
clickhouse_add_executable(zkutil_test_async zkutil_test_async.cpp)
|
||||
target_link_libraries(zkutil_test_async PRIVATE clickhouse_common_zookeeper_no_log)
|
||||
|
@ -36,7 +36,7 @@ void CoordinationSettings::loadFromConfig(const String & config_elem, const Poco
|
||||
}
|
||||
|
||||
|
||||
const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD = "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld,rclc,clrs,ftfl,ydld";
|
||||
const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD = "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld,rclc,clrs,ftfl";
|
||||
|
||||
KeeperConfigurationAndSettings::KeeperConfigurationAndSettings()
|
||||
: server_id(NOT_EXIST)
|
||||
|
@ -32,6 +32,7 @@ struct Settings;
|
||||
M(Milliseconds, shutdown_timeout, 5000, "How much time we will wait until RAFT shutdown", 0) \
|
||||
M(Milliseconds, session_shutdown_timeout, 10000, "How much time we will wait until sessions are closed during shutdown", 0) \
|
||||
M(Milliseconds, startup_timeout, 180000, "How much time we will wait until RAFT to start.", 0) \
|
||||
M(Milliseconds, sleep_before_leader_change_ms, 8000, "How much time we will wait before removing leader (so as leader could commit accepted but non-committed commands and they won't be lost -- leader removal is not synchronized with committing)", 0) \
|
||||
M(LogsLevel, raft_logs_level, LogsLevel::information, "Log internal RAFT logs into main server log level. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'", 0) \
|
||||
M(UInt64, rotate_log_storage_interval, 100000, "How many records will be stored in one log storage file", 0) \
|
||||
M(UInt64, snapshots_to_keep, 3, "How many compressed snapshots to keep on disk", 0) \
|
||||
|
@ -172,9 +172,6 @@ void FourLetterCommandFactory::registerCommands(KeeperDispatcher & keeper_dispat
|
||||
FourLetterCommandPtr feature_flags_command = std::make_shared<FeatureFlagsCommand>(keeper_dispatcher);
|
||||
factory.registerCommand(feature_flags_command);
|
||||
|
||||
FourLetterCommandPtr yield_leadership_command = std::make_shared<YieldLeadershipCommand>(keeper_dispatcher);
|
||||
factory.registerCommand(yield_leadership_command);
|
||||
|
||||
factory.initializeAllowList(keeper_dispatcher);
|
||||
factory.setInitialize(true);
|
||||
}
|
||||
@ -582,10 +579,4 @@ String FeatureFlagsCommand::run()
|
||||
return ret.str();
|
||||
}
|
||||
|
||||
String YieldLeadershipCommand::run()
|
||||
{
|
||||
keeper_dispatcher.yieldLeadership();
|
||||
return "Sent yield leadership request to leader.";
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -415,17 +415,4 @@ struct FeatureFlagsCommand : public IFourLetterCommand
|
||||
~FeatureFlagsCommand() override = default;
|
||||
};
|
||||
|
||||
/// Yield leadership and become follower.
|
||||
struct YieldLeadershipCommand : public IFourLetterCommand
|
||||
{
|
||||
explicit YieldLeadershipCommand(KeeperDispatcher & keeper_dispatcher_)
|
||||
: IFourLetterCommand(keeper_dispatcher_)
|
||||
{
|
||||
}
|
||||
|
||||
String name() override { return "ydld"; }
|
||||
String run() override;
|
||||
~YieldLeadershipCommand() override = default;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -17,6 +17,5 @@ const String keeper_system_path = "/keeper";
|
||||
const String keeper_api_version_path = keeper_system_path + "/api_version";
|
||||
const String keeper_api_feature_flags_path = keeper_system_path + "/feature_flags";
|
||||
const String keeper_config_path = keeper_system_path + "/config";
|
||||
const String keeper_availability_zone_path = keeper_system_path + "/availability_zone";
|
||||
|
||||
}
|
||||
|
@ -32,17 +32,9 @@ KeeperContext::KeeperContext(bool standalone_keeper_)
|
||||
system_nodes_with_data[keeper_api_version_path] = toString(static_cast<uint8_t>(KeeperApiVersion::WITH_MULTI_READ));
|
||||
}
|
||||
|
||||
void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config, KeeperDispatcher * dispatcher_, const std::string & environment_az)
|
||||
void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config, KeeperDispatcher * dispatcher_)
|
||||
{
|
||||
dispatcher = dispatcher_;
|
||||
|
||||
/// We only use the environment availability zone when configuration option is missing.
|
||||
auto keeper_az = config.getString("keeper_server.availability_zone", environment_az);
|
||||
if (!keeper_az.empty())
|
||||
system_nodes_with_data[keeper_availability_zone_path] = keeper_az;
|
||||
LOG_INFO(&Poco::Logger::get("KeeperContext"),
|
||||
"Initialize the KeeperContext with availability zone: '{}', environment availability zone '{}'. ", keeper_az, environment_az);
|
||||
|
||||
digest_enabled = config.getBool("keeper_server.digest_enabled", false);
|
||||
ignore_system_path_on_startup = config.getBool("keeper_server.ignore_system_path_on_startup", false);
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Disks/DiskSelector.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
|
||||
@ -23,7 +24,7 @@ public:
|
||||
SHUTDOWN
|
||||
};
|
||||
|
||||
void initialize(const Poco::Util::AbstractConfiguration & config, KeeperDispatcher * dispatcher_, const std::string & environment_az);
|
||||
void initialize(const Poco::Util::AbstractConfiguration & config, KeeperDispatcher * dispatcher_);
|
||||
|
||||
Phase getServerState() const;
|
||||
void setServerState(Phase server_state_);
|
||||
|
@ -11,7 +11,6 @@
|
||||
#include <Common/CurrentMetrics.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <IO/S3/Credentials.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <future>
|
||||
@ -371,16 +370,7 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf
|
||||
configuration_and_settings = KeeperConfigurationAndSettings::loadFromConfig(config, standalone_keeper);
|
||||
|
||||
keeper_context = std::make_shared<KeeperContext>(standalone_keeper);
|
||||
String availability_zone;
|
||||
try
|
||||
{
|
||||
availability_zone = DB::S3::getRunningAvailabilityZone();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
keeper_context->initialize(config, this, availability_zone);
|
||||
keeper_context->initialize(config, this);
|
||||
|
||||
requests_queue = std::make_unique<RequestsQueue>(configuration_and_settings->coordination_settings->max_request_queue_size);
|
||||
request_thread = ThreadFromGlobalPool([this] { requestThread(); });
|
||||
@ -813,6 +803,8 @@ void KeeperDispatcher::clusterUpdateWithReconfigDisabledThread()
|
||||
|
||||
void KeeperDispatcher::clusterUpdateThread()
|
||||
{
|
||||
using enum KeeperServer::ConfigUpdateState;
|
||||
bool last_command_was_leader_change = false;
|
||||
auto & shutdown_called = keeper_context->shutdown_called;
|
||||
while (!shutdown_called)
|
||||
{
|
||||
@ -820,13 +812,18 @@ void KeeperDispatcher::clusterUpdateThread()
|
||||
if (!cluster_update_queue.pop(action))
|
||||
return;
|
||||
|
||||
if (server->applyConfigUpdate(action))
|
||||
if (const auto res = server->applyConfigUpdate(action, last_command_was_leader_change); res == Accepted)
|
||||
LOG_DEBUG(log, "Processing config update {}: accepted", action);
|
||||
else // TODO (myrrc) sleep a random amount? sleep less?
|
||||
else
|
||||
{
|
||||
last_command_was_leader_change = res == WaitBeforeChangingLeader;
|
||||
|
||||
(void)cluster_update_queue.pushFront(action);
|
||||
LOG_DEBUG(log, "Processing config update {}: declined, backoff", action);
|
||||
std::this_thread::sleep_for(50ms);
|
||||
|
||||
std::this_thread::sleep_for(last_command_was_leader_change
|
||||
? configuration_and_settings->coordination_settings->sleep_before_leader_change_ms
|
||||
: 50ms);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -237,12 +237,6 @@ public:
|
||||
return server->requestLeader();
|
||||
}
|
||||
|
||||
/// Yield leadership and become follower.
|
||||
void yieldLeadership()
|
||||
{
|
||||
return server->yieldLeadership();
|
||||
}
|
||||
|
||||
void recalculateStorageStats()
|
||||
{
|
||||
return server->recalculateStorageStats();
|
||||
|
@ -870,36 +870,50 @@ std::vector<int64_t> KeeperServer::getDeadSessions()
|
||||
return state_machine->getDeadSessions();
|
||||
}
|
||||
|
||||
bool KeeperServer::applyConfigUpdate(const ClusterUpdateAction & action)
|
||||
KeeperServer::ConfigUpdateState KeeperServer::applyConfigUpdate(
|
||||
const ClusterUpdateAction & action, bool last_command_was_leader_change)
|
||||
{
|
||||
using enum ConfigUpdateState;
|
||||
std::lock_guard _{server_write_mutex};
|
||||
|
||||
if (const auto * add = std::get_if<AddRaftServer>(&action))
|
||||
{
|
||||
if (raft_instance->get_srv_config(add->id) != nullptr)
|
||||
return true;
|
||||
return Accepted;
|
||||
|
||||
auto resp = raft_instance->add_srv(static_cast<nuraft::srv_config>(*add));
|
||||
resp->get();
|
||||
return resp->get_accepted();
|
||||
return resp->get_accepted() ? Accepted : Declined;
|
||||
}
|
||||
else if (const auto * remove = std::get_if<RemoveRaftServer>(&action))
|
||||
{
|
||||
// This corner case is the most problematic. Issue follows: if we agree on a number
|
||||
// of commands but don't commit them on leader, and then issue a leadership change via
|
||||
// yield/request, leader can pause writes before all commits, therefore commands will be lost
|
||||
// (leadership change is not synchronized with committing in NuRaft).
|
||||
// However, waiting till some commands get _committed_ instead of _agreed_ is a hard task
|
||||
// regarding current library design, and this brings lots of levels of complexity
|
||||
// (see https://github.com/ClickHouse/ClickHouse/pull/53481 history). So, a compromise here
|
||||
// is a timeout before issuing a leadership change with an ability to change if user knows they
|
||||
// have a particularly slow network.
|
||||
if (remove->id == raft_instance->get_leader())
|
||||
{
|
||||
if (!last_command_was_leader_change)
|
||||
return WaitBeforeChangingLeader;
|
||||
|
||||
if (isLeader())
|
||||
raft_instance->yield_leadership();
|
||||
else
|
||||
raft_instance->request_leadership();
|
||||
return false;
|
||||
return Declined;
|
||||
}
|
||||
|
||||
if (raft_instance->get_srv_config(remove->id) == nullptr)
|
||||
return true;
|
||||
return Accepted;
|
||||
|
||||
auto resp = raft_instance->remove_srv(remove->id);
|
||||
resp->get();
|
||||
return resp->get_accepted();
|
||||
return resp->get_accepted() ? Accepted : Declined;
|
||||
}
|
||||
else if (const auto * update = std::get_if<UpdateRaftServerPriority>(&action))
|
||||
{
|
||||
@ -908,10 +922,10 @@ bool KeeperServer::applyConfigUpdate(const ClusterUpdateAction & action)
|
||||
"Attempt to apply {} but server is not present in Raft",
|
||||
action);
|
||||
else if (ptr->get_priority() == update->priority)
|
||||
return true;
|
||||
return Accepted;
|
||||
|
||||
raft_instance->set_priority(update->id, update->priority, /*broadcast on live leader*/true);
|
||||
return true;
|
||||
return Accepted;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
@ -1087,12 +1101,6 @@ bool KeeperServer::requestLeader()
|
||||
return isLeader() || raft_instance->request_leadership();
|
||||
}
|
||||
|
||||
void KeeperServer::yieldLeadership()
|
||||
{
|
||||
if (isLeader())
|
||||
raft_instance->yield_leadership();
|
||||
}
|
||||
|
||||
void KeeperServer::recalculateStorageStats()
|
||||
{
|
||||
state_machine->recalculateStorageStats();
|
||||
|
@ -128,7 +128,10 @@ public:
|
||||
|
||||
int getServerID() const { return server_id; }
|
||||
|
||||
bool applyConfigUpdate(const ClusterUpdateAction& action);
|
||||
enum class ConfigUpdateState { Accepted, Declined, WaitBeforeChangingLeader };
|
||||
ConfigUpdateState applyConfigUpdate(
|
||||
const ClusterUpdateAction& action,
|
||||
bool last_command_was_leader_change = false);
|
||||
|
||||
// TODO (myrrc) these functions should be removed once "reconfig" is stabilized
|
||||
void applyConfigUpdateWithReconfigDisabled(const ClusterUpdateAction& action);
|
||||
@ -141,8 +144,6 @@ public:
|
||||
|
||||
bool requestLeader();
|
||||
|
||||
void yieldLeadership();
|
||||
|
||||
void recalculateStorageStats();
|
||||
};
|
||||
|
||||
|
@ -1081,8 +1081,7 @@ struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProce
|
||||
Coordination::ZooKeeperGetRequest & request = dynamic_cast<Coordination::ZooKeeperGetRequest &>(*zk_request);
|
||||
|
||||
if (request.path == Coordination::keeper_api_feature_flags_path
|
||||
|| request.path == Coordination::keeper_config_path
|
||||
|| request.path == Coordination::keeper_availability_zone_path)
|
||||
|| request.path == Coordination::keeper_config_path)
|
||||
return {};
|
||||
|
||||
if (!storage.uncommitted_state.getNode(request.path))
|
||||
|
@ -73,7 +73,7 @@ std::pair<String, StoragePtr> createTableFromAST(
|
||||
auto table_function = factory.get(table_function_ast, context);
|
||||
ColumnsDescription columns;
|
||||
if (ast_create_query.columns_list && ast_create_query.columns_list->columns)
|
||||
columns = InterpreterCreateQuery::getColumnsDescription(*ast_create_query.columns_list->columns, context, true);
|
||||
columns = InterpreterCreateQuery::getColumnsDescription(*ast_create_query.columns_list->columns, context, true, false);
|
||||
StoragePtr storage = table_function->execute(table_function_ast, context, ast_create_query.getTable(), std::move(columns));
|
||||
storage->renameInMemory(ast_create_query);
|
||||
return {ast_create_query.getTable(), storage};
|
||||
@ -99,7 +99,7 @@ std::pair<String, StoragePtr> createTableFromAST(
|
||||
}
|
||||
else
|
||||
{
|
||||
columns = InterpreterCreateQuery::getColumnsDescription(*ast_create_query.columns_list->columns, context, true);
|
||||
columns = InterpreterCreateQuery::getColumnsDescription(*ast_create_query.columns_list->columns, context, true, false);
|
||||
constraints = InterpreterCreateQuery::getConstraintsDescription(ast_create_query.columns_list->constraints);
|
||||
}
|
||||
}
|
||||
|
@ -262,7 +262,11 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const
|
||||
shards.back().push_back(DatabaseReplicaInfo{std::move(hostname), std::move(shard), std::move(replica)});
|
||||
}
|
||||
|
||||
UInt16 default_port = getContext()->getTCPPort();
|
||||
UInt16 default_port;
|
||||
if (cluster_auth_info.cluster_secure_connection)
|
||||
default_port = getContext()->getTCPPortSecure().value_or(DBMS_DEFAULT_SECURE_PORT);
|
||||
else
|
||||
default_port = getContext()->getTCPPort();
|
||||
|
||||
bool treat_local_as_remote = false;
|
||||
bool treat_local_port_as_remote = getContext()->getApplicationType() == Context::ApplicationType::LOCAL;
|
||||
@ -722,7 +726,7 @@ void DatabaseReplicated::checkQueryValid(const ASTPtr & query, ContextPtr query_
|
||||
}
|
||||
}
|
||||
|
||||
BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, ContextPtr query_context, bool internal)
|
||||
BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, ContextPtr query_context, QueryFlags flags)
|
||||
{
|
||||
|
||||
if (query_context->getCurrentTransaction() && query_context->getSettingsRef().throw_on_unsupported_query_inside_transaction)
|
||||
@ -731,7 +735,7 @@ BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, Contex
|
||||
if (is_readonly)
|
||||
throw Exception(ErrorCodes::NO_ZOOKEEPER, "Database is in readonly mode, because it cannot connect to ZooKeeper");
|
||||
|
||||
if (!internal && (query_context->getClientInfo().query_kind != ClientInfo::QueryKind::INITIAL_QUERY))
|
||||
if (!flags.internal && (query_context->getClientInfo().query_kind != ClientInfo::QueryKind::INITIAL_QUERY))
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "It's not initial query. ON CLUSTER is not allowed for Replicated database.");
|
||||
|
||||
checkQueryValid(query, query_context);
|
||||
@ -742,6 +746,7 @@ BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, Contex
|
||||
entry.initiator = ddl_worker->getCommonHostID();
|
||||
entry.setSettingsIfRequired(query_context);
|
||||
entry.tracing_context = OpenTelemetry::CurrentContext();
|
||||
entry.is_backup_restore = flags.distributed_backup_restore;
|
||||
String node_path = ddl_worker->tryEnqueueAndExecuteEntry(entry, query_context);
|
||||
|
||||
Strings hosts_to_wait;
|
||||
@ -919,14 +924,14 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
|
||||
String query = fmt::format("CREATE DATABASE IF NOT EXISTS {} ENGINE=Ordinary", backQuoteIfNeed(to_db_name));
|
||||
auto query_context = Context::createCopy(getContext());
|
||||
query_context->setSetting("allow_deprecated_database_ordinary", 1);
|
||||
executeQuery(query, query_context, true);
|
||||
executeQuery(query, query_context, QueryFlags{ .internal = true });
|
||||
|
||||
/// But we want to avoid discarding UUID of ReplicatedMergeTree tables, because it will not work
|
||||
/// if zookeeper_path contains {uuid} macro. Replicated database do not recreate replicated tables on recovery,
|
||||
/// so it's ok to save UUID of replicated table.
|
||||
query = fmt::format("CREATE DATABASE IF NOT EXISTS {} ENGINE=Atomic", backQuoteIfNeed(to_db_name_replicated));
|
||||
query_context = Context::createCopy(getContext());
|
||||
executeQuery(query, query_context, true);
|
||||
executeQuery(query, query_context, QueryFlags{ .internal = true });
|
||||
}
|
||||
|
||||
size_t moved_tables = 0;
|
||||
|
@ -46,7 +46,7 @@ public:
|
||||
|
||||
/// Try to execute DLL query on current host as initial query. If query is succeed,
|
||||
/// then it will be executed on all replicas.
|
||||
BlockIO tryEnqueueReplicatedDDL(const ASTPtr & query, ContextPtr query_context, bool internal) override;
|
||||
BlockIO tryEnqueueReplicatedDDL(const ASTPtr & query, ContextPtr query_context, QueryFlags flags) override;
|
||||
|
||||
bool canExecuteReplicatedMetadataAlter() const override;
|
||||
|
||||
|
@ -372,6 +372,7 @@ void DatabaseWithOwnTablesBase::createTableRestoredFromBackup(const ASTPtr & cre
|
||||
/// Creates a table by executing a "CREATE TABLE" query.
|
||||
InterpreterCreateQuery interpreter{create_table_query, local_context};
|
||||
interpreter.setInternal(true);
|
||||
interpreter.setIsRestoreFromBackup(true);
|
||||
interpreter.execute();
|
||||
}
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Core/UUID.h>
|
||||
#include <Databases/LoadingStrictnessLevel.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <Interpreters/executeQuery.h>
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
#include <Storages/IStorage_fwd.h>
|
||||
#include <base/types.h>
|
||||
@ -345,7 +346,7 @@ public:
|
||||
|
||||
virtual bool shouldReplicateQuery(const ContextPtr & /*query_context*/, const ASTPtr & /*query_ptr*/) const { return false; }
|
||||
|
||||
virtual BlockIO tryEnqueueReplicatedDDL(const ASTPtr & /*query*/, ContextPtr /*query_context*/, [[maybe_unused]] bool internal = false) /// NOLINT
|
||||
virtual BlockIO tryEnqueueReplicatedDDL(const ASTPtr & /*query*/, ContextPtr /*query_context*/, [[maybe_unused]] QueryFlags flags = {}) /// NOLINT
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Database engine {} does not have replicated DDL queue", getEngineName());
|
||||
}
|
||||
|
@ -75,7 +75,7 @@ static BlockIO tryToExecuteQuery(const String & query_to_execute, ContextMutable
|
||||
if (!database.empty())
|
||||
query_context->setCurrentDatabase(database);
|
||||
|
||||
return executeQuery("/*" + comment + "*/ " + query_to_execute, query_context, true).second;
|
||||
return executeQuery("/*" + comment + "*/ " + query_to_execute, query_context, QueryFlags{ .internal = true }).second;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
|
@ -168,7 +168,7 @@ QueryPipeline ClickHouseDictionarySource::createStreamForQuery(const String & qu
|
||||
|
||||
if (configuration.is_local)
|
||||
{
|
||||
pipeline = executeQuery(query, context_copy, true).second.pipeline;
|
||||
pipeline = executeQuery(query, context_copy, QueryFlags{ .internal = true }).second.pipeline;
|
||||
pipeline.convertStructureTo(empty_sample_block.getColumnsWithTypeAndName());
|
||||
}
|
||||
else
|
||||
@ -190,7 +190,7 @@ std::string ClickHouseDictionarySource::doInvalidateQuery(const std::string & re
|
||||
|
||||
if (configuration.is_local)
|
||||
{
|
||||
return readInvalidateQuery(executeQuery(request, context_copy, true).second.pipeline);
|
||||
return readInvalidateQuery(executeQuery(request, context_copy, QueryFlags{ .internal = true }).second.pipeline);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -900,8 +900,6 @@ void registerDictionaryArrayHashed(DictionaryFactory & factory)
|
||||
return std::make_unique<HashedArrayDictionary<DictionaryKeyType::Complex>>(dict_id, dict_struct, std::move(source_ptr), configuration);
|
||||
};
|
||||
|
||||
using namespace std::placeholders;
|
||||
|
||||
factory.registerLayout("hashed_array",
|
||||
[=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr global_context, bool /*created_from_ddl*/)
|
||||
{
|
||||
|
@ -1246,8 +1246,6 @@ void registerDictionaryHashed(DictionaryFactory & factory)
|
||||
}
|
||||
};
|
||||
|
||||
using namespace std::placeholders;
|
||||
|
||||
factory.registerLayout("hashed",
|
||||
[=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr global_context, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), global_context, DictionaryKeyType::Simple, /* sparse = */ false); }, false);
|
||||
factory.registerLayout("sparse_hashed",
|
||||
|
@ -227,9 +227,7 @@ private:
|
||||
struct KeyAttribute final
|
||||
{
|
||||
RangeStorageTypeContainer<KeyAttributeContainerType> container;
|
||||
|
||||
RangeStorageTypeContainer<InvalidIntervalsContainerType> invalid_intervals_container;
|
||||
|
||||
};
|
||||
|
||||
void createAttributes();
|
||||
|
@ -9,6 +9,7 @@ extract_into_parent_list(clickhouse_functions_sources dbms_sources
|
||||
FunctionHelpers.cpp
|
||||
extractTimeZoneFromFunctionArguments.cpp
|
||||
FunctionsLogical.cpp
|
||||
CastOverloadResolver.cpp
|
||||
)
|
||||
extract_into_parent_list(clickhouse_functions_headers dbms_headers
|
||||
IFunction.h
|
||||
@ -16,6 +17,7 @@ extract_into_parent_list(clickhouse_functions_headers dbms_headers
|
||||
FunctionHelpers.h
|
||||
extractTimeZoneFromFunctionArguments.h
|
||||
FunctionsLogical.h
|
||||
CastOverloadResolver.h
|
||||
)
|
||||
|
||||
add_library(clickhouse_functions_obj OBJECT ${clickhouse_functions_headers} ${clickhouse_functions_sources})
|
||||
|
@ -1,10 +1,156 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionsConversion.h>
|
||||
#include <Functions/CastOverloadResolver.h>
|
||||
#include <Interpreters/parseColumnsListForTableFunction.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
/** CastInternal does not preserve nullability of the data type,
|
||||
* i.e. CastInternal(toNullable(toInt8(1)) as Int32) will be Int32(1).
|
||||
*
|
||||
* Cast preserves nullability according to setting `cast_keep_nullable`,
|
||||
* i.e. Cast(toNullable(toInt8(1)) as Int32) will be Nullable(Int32(1)) if `cast_keep_nullable` == 1.
|
||||
*/
|
||||
template <CastType cast_type, bool internal, typename CastName, typename FunctionName>
|
||||
class CastOverloadResolverImpl : public IFunctionOverloadResolver
|
||||
{
|
||||
public:
|
||||
using MonotonicityForRange = FunctionCastBase::MonotonicityForRange;
|
||||
|
||||
static constexpr auto name = cast_type == CastType::accurate
|
||||
? CastName::accurate_cast_name
|
||||
: (cast_type == CastType::accurateOrNull ? CastName::accurate_cast_or_null_name : CastName::cast_name);
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 2; }
|
||||
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
|
||||
|
||||
explicit CastOverloadResolverImpl(ContextPtr context_, std::optional<CastDiagnostic> diagnostic_, bool keep_nullable_, const DataTypeValidationSettings & data_type_validation_settings_)
|
||||
: context(context_)
|
||||
, diagnostic(std::move(diagnostic_))
|
||||
, keep_nullable(keep_nullable_)
|
||||
, data_type_validation_settings(data_type_validation_settings_)
|
||||
{
|
||||
}
|
||||
|
||||
static FunctionOverloadResolverPtr create(ContextPtr context)
|
||||
{
|
||||
const auto & settings_ref = context->getSettingsRef();
|
||||
|
||||
if constexpr (internal)
|
||||
return createImpl(context, {}, false /*keep_nullable*/);
|
||||
|
||||
return createImpl(context, {}, settings_ref.cast_keep_nullable, DataTypeValidationSettings(settings_ref));
|
||||
}
|
||||
|
||||
static FunctionOverloadResolverPtr createImpl(ContextPtr context, std::optional<CastDiagnostic> diagnostic = {}, bool keep_nullable = false, const DataTypeValidationSettings & data_type_validation_settings = {})
|
||||
{
|
||||
assert(!internal || !keep_nullable);
|
||||
return std::make_unique<CastOverloadResolverImpl>(context, std::move(diagnostic), keep_nullable, data_type_validation_settings);
|
||||
}
|
||||
|
||||
static FunctionOverloadResolverPtr createImpl(std::optional<CastDiagnostic> diagnostic = {}, bool keep_nullable = false, const DataTypeValidationSettings & data_type_validation_settings = {})
|
||||
{
|
||||
assert(!internal || !keep_nullable);
|
||||
return std::make_unique<CastOverloadResolverImpl>(ContextPtr(), std::move(diagnostic), keep_nullable, data_type_validation_settings);
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
|
||||
{
|
||||
DataTypes data_types(arguments.size());
|
||||
|
||||
for (size_t i = 0; i < arguments.size(); ++i)
|
||||
data_types[i] = arguments[i].type;
|
||||
|
||||
auto monotonicity = MonotonicityHelper::getMonotonicityInformation(arguments.front().type, return_type.get());
|
||||
return std::make_unique<FunctionCast<FunctionName>>(context, name, std::move(monotonicity), data_types, return_type, diagnostic, cast_type);
|
||||
}
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
const auto & column = arguments.back().column;
|
||||
if (!column)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument to {} must be a constant string describing type. "
|
||||
"Instead there is non-constant column of type {}", getName(), arguments.back().type->getName());
|
||||
|
||||
const auto * type_col = checkAndGetColumnConst<ColumnString>(column.get());
|
||||
if (!type_col)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument to {} must be a constant string describing type. "
|
||||
"Instead there is a column with the following structure: {}", getName(), column->dumpStructure());
|
||||
|
||||
DataTypePtr type = DataTypeFactory::instance().get(type_col->getValue<String>());
|
||||
validateDataType(type, data_type_validation_settings);
|
||||
|
||||
if constexpr (cast_type == CastType::accurateOrNull)
|
||||
return makeNullable(type);
|
||||
|
||||
if constexpr (internal)
|
||||
return type;
|
||||
|
||||
if (keep_nullable && arguments.front().type->isNullable() && type->canBeInsideNullable())
|
||||
return makeNullable(type);
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
bool useDefaultImplementationForNulls() const override { return false; }
|
||||
bool useDefaultImplementationForNothing() const override { return false; }
|
||||
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
|
||||
|
||||
private:
|
||||
ContextPtr context;
|
||||
std::optional<CastDiagnostic> diagnostic;
|
||||
bool keep_nullable;
|
||||
DataTypeValidationSettings data_type_validation_settings;
|
||||
};
|
||||
|
||||
|
||||
struct CastOverloadName
|
||||
{
|
||||
static constexpr auto cast_name = "CAST";
|
||||
static constexpr auto accurate_cast_name = "accurateCast";
|
||||
static constexpr auto accurate_cast_or_null_name = "accurateCastOrNull";
|
||||
};
|
||||
|
||||
struct CastInternalOverloadName
|
||||
{
|
||||
static constexpr auto cast_name = "_CAST";
|
||||
static constexpr auto accurate_cast_name = "accurate_Cast";
|
||||
static constexpr auto accurate_cast_or_null_name = "accurate_CastOrNull";
|
||||
};
|
||||
|
||||
template <CastType cast_type>
|
||||
using CastOverloadResolver = CastOverloadResolverImpl<cast_type, false, CastOverloadName, CastName>;
|
||||
|
||||
template <CastType cast_type>
|
||||
using CastInternalOverloadResolver = CastOverloadResolverImpl<cast_type, true, CastInternalOverloadName, CastInternalName>;
|
||||
|
||||
|
||||
FunctionOverloadResolverPtr createInternalCastOverloadResolver(CastType type, std::optional<CastDiagnostic> diagnostic)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case CastType::nonAccurate:
|
||||
return CastInternalOverloadResolver<CastType::nonAccurate>::createImpl(diagnostic);
|
||||
case CastType::accurate:
|
||||
return CastInternalOverloadResolver<CastType::accurate>::createImpl(diagnostic);
|
||||
case CastType::accurateOrNull:
|
||||
return CastInternalOverloadResolver<CastType::accurateOrNull>::createImpl(diagnostic);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
REGISTER_FUNCTION(CastOverloadResolvers)
|
||||
{
|
||||
factory.registerFunction<CastInternalOverloadResolver<CastType::nonAccurate>>({}, FunctionFactory::CaseInsensitive);
|
||||
|
@ -1,138 +1,29 @@
|
||||
#pragma once
|
||||
#include <Functions/FunctionsConversion.h>
|
||||
#include <Interpreters/parseColumnsListForTableFunction.h>
|
||||
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
class IFunctionOverloadResolver;
|
||||
using FunctionOverloadResolverPtr = std::shared_ptr<IFunctionOverloadResolver>;
|
||||
|
||||
enum class CastType
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
/** CastInternal does not preserve nullability of the data type,
|
||||
* i.e. CastInternal(toNullable(toInt8(1)) as Int32) will be Int32(1).
|
||||
*
|
||||
* Cast preserves nullability according to setting `cast_keep_nullable`,
|
||||
* i.e. Cast(toNullable(toInt8(1)) as Int32) will be Nullable(Int32(1)) if `cast_keep_nullable` == 1.
|
||||
*/
|
||||
template <CastType cast_type, bool internal, typename CastName, typename FunctionName>
|
||||
class CastOverloadResolverImpl : public IFunctionOverloadResolver
|
||||
{
|
||||
public:
|
||||
using MonotonicityForRange = FunctionCastBase::MonotonicityForRange;
|
||||
using Diagnostic = FunctionCastBase::Diagnostic;
|
||||
|
||||
static constexpr auto name = cast_type == CastType::accurate
|
||||
? CastName::accurate_cast_name
|
||||
: (cast_type == CastType::accurateOrNull ? CastName::accurate_cast_or_null_name : CastName::cast_name);
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 2; }
|
||||
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
|
||||
|
||||
explicit CastOverloadResolverImpl(ContextPtr context_, std::optional<Diagnostic> diagnostic_, bool keep_nullable_, const DataTypeValidationSettings & data_type_validation_settings_)
|
||||
: context(context_)
|
||||
, diagnostic(std::move(diagnostic_))
|
||||
, keep_nullable(keep_nullable_)
|
||||
, data_type_validation_settings(data_type_validation_settings_)
|
||||
{
|
||||
}
|
||||
|
||||
static FunctionOverloadResolverPtr create(ContextPtr context)
|
||||
{
|
||||
const auto & settings_ref = context->getSettingsRef();
|
||||
|
||||
if constexpr (internal)
|
||||
return createImpl(context, {}, false /*keep_nullable*/);
|
||||
|
||||
return createImpl(context, {}, settings_ref.cast_keep_nullable, DataTypeValidationSettings(settings_ref));
|
||||
}
|
||||
|
||||
static FunctionOverloadResolverPtr createImpl(ContextPtr context, std::optional<Diagnostic> diagnostic = {}, bool keep_nullable = false, const DataTypeValidationSettings & data_type_validation_settings = {})
|
||||
{
|
||||
assert(!internal || !keep_nullable);
|
||||
return std::make_unique<CastOverloadResolverImpl>(context, std::move(diagnostic), keep_nullable, data_type_validation_settings);
|
||||
}
|
||||
|
||||
static FunctionOverloadResolverPtr createImpl(std::optional<Diagnostic> diagnostic = {}, bool keep_nullable = false, const DataTypeValidationSettings & data_type_validation_settings = {})
|
||||
{
|
||||
assert(!internal || !keep_nullable);
|
||||
return std::make_unique<CastOverloadResolverImpl>(ContextPtr(), std::move(diagnostic), keep_nullable, data_type_validation_settings);
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
|
||||
{
|
||||
DataTypes data_types(arguments.size());
|
||||
|
||||
for (size_t i = 0; i < arguments.size(); ++i)
|
||||
data_types[i] = arguments[i].type;
|
||||
|
||||
auto monotonicity = MonotonicityHelper::getMonotonicityInformation(arguments.front().type, return_type.get());
|
||||
return std::make_unique<FunctionCast<FunctionName>>(context, name, std::move(monotonicity), data_types, return_type, diagnostic, cast_type);
|
||||
}
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
const auto & column = arguments.back().column;
|
||||
if (!column)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument to {} must be a constant string describing type. "
|
||||
"Instead there is non-constant column of type {}", getName(), arguments.back().type->getName());
|
||||
|
||||
const auto * type_col = checkAndGetColumnConst<ColumnString>(column.get());
|
||||
if (!type_col)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument to {} must be a constant string describing type. "
|
||||
"Instead there is a column with the following structure: {}", getName(), column->dumpStructure());
|
||||
|
||||
DataTypePtr type = DataTypeFactory::instance().get(type_col->getValue<String>());
|
||||
validateDataType(type, data_type_validation_settings);
|
||||
|
||||
if constexpr (cast_type == CastType::accurateOrNull)
|
||||
return makeNullable(type);
|
||||
|
||||
if constexpr (internal)
|
||||
return type;
|
||||
|
||||
if (keep_nullable && arguments.front().type->isNullable() && type->canBeInsideNullable())
|
||||
return makeNullable(type);
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
bool useDefaultImplementationForNulls() const override { return false; }
|
||||
bool useDefaultImplementationForNothing() const override { return false; }
|
||||
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
|
||||
|
||||
private:
|
||||
ContextPtr context;
|
||||
std::optional<Diagnostic> diagnostic;
|
||||
bool keep_nullable;
|
||||
DataTypeValidationSettings data_type_validation_settings;
|
||||
nonAccurate,
|
||||
accurate,
|
||||
accurateOrNull
|
||||
};
|
||||
|
||||
|
||||
struct CastOverloadName
|
||||
struct CastDiagnostic
|
||||
{
|
||||
static constexpr auto cast_name = "CAST";
|
||||
static constexpr auto accurate_cast_name = "accurateCast";
|
||||
static constexpr auto accurate_cast_or_null_name = "accurateCastOrNull";
|
||||
std::string column_from;
|
||||
std::string column_to;
|
||||
};
|
||||
|
||||
struct CastInternalOverloadName
|
||||
{
|
||||
static constexpr auto cast_name = "_CAST";
|
||||
static constexpr auto accurate_cast_name = "accurate_Cast";
|
||||
static constexpr auto accurate_cast_or_null_name = "accurate_CastOrNull";
|
||||
};
|
||||
|
||||
template <CastType cast_type>
|
||||
using CastOverloadResolver = CastOverloadResolverImpl<cast_type, false, CastOverloadName, CastName>;
|
||||
|
||||
template <CastType cast_type>
|
||||
using CastInternalOverloadResolver = CastOverloadResolverImpl<cast_type, true, CastInternalOverloadName, CastInternalName>;
|
||||
FunctionOverloadResolverPtr createInternalCastOverloadResolver(CastType type, std::optional<CastDiagnostic> diagnostic);
|
||||
|
||||
}
|
||||
|
211
src/Functions/FunctionTokens.h
Normal file
211
src/Functions/FunctionTokens.h
Normal file
@ -0,0 +1,211 @@
|
||||
#pragma once
|
||||
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/Regexps.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/castColumn.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
|
||||
/** Functions that split strings into an array of strings or vice versa.
|
||||
*
|
||||
* splitByChar(sep, s[, max_substrings])
|
||||
* splitByString(sep, s[, max_substrings])
|
||||
* splitByRegexp(regexp, s[, max_substrings])
|
||||
*
|
||||
* splitByWhitespace(s[, max_substrings]) - split the string by whitespace characters
|
||||
* splitByNonAlpha(s[, max_substrings]) - split the string by whitespace and punctuation characters
|
||||
*
|
||||
* extractAll(s, regexp) - select from the string the subsequences corresponding to the regexp.
|
||||
* - first subpattern, if regexp has subpattern;
|
||||
* - zero subpattern (the match part, otherwise);
|
||||
* - otherwise, an empty array
|
||||
*
|
||||
* alphaTokens(s[, max_substrings]) - select from the string subsequence `[a-zA-Z]+`.
|
||||
*
|
||||
* URL functions are located separately.
|
||||
*/
|
||||
|
||||
|
||||
/// A function that takes a string, and returns an array of substrings created by some generator.
|
||||
template <typename Generator>
|
||||
class FunctionTokens : public IFunction
|
||||
{
|
||||
private:
|
||||
using Pos = const char *;
|
||||
bool max_substrings_includes_remaining_string;
|
||||
|
||||
public:
|
||||
static constexpr auto name = Generator::name;
|
||||
static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionTokens>(context); }
|
||||
|
||||
explicit FunctionTokens<Generator>(ContextPtr context)
|
||||
{
|
||||
const Settings & settings = context->getSettingsRef();
|
||||
max_substrings_includes_remaining_string = settings.splitby_max_substrings_includes_remaining_string;
|
||||
}
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
|
||||
bool isVariadic() const override { return Generator::isVariadic(); }
|
||||
|
||||
size_t getNumberOfArguments() const override { return Generator::getNumberOfArguments(); }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
Generator::checkArguments(*this, arguments);
|
||||
|
||||
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
|
||||
{
|
||||
Generator generator;
|
||||
generator.init(arguments, max_substrings_includes_remaining_string);
|
||||
|
||||
const auto & array_argument = arguments[generator.strings_argument_position];
|
||||
|
||||
const ColumnString * col_str = checkAndGetColumn<ColumnString>(array_argument.column.get());
|
||||
const ColumnConst * col_str_const = checkAndGetColumnConstStringOrFixedString(array_argument.column.get());
|
||||
|
||||
auto col_res = ColumnArray::create(ColumnString::create());
|
||||
|
||||
ColumnString & res_strings = typeid_cast<ColumnString &>(col_res->getData());
|
||||
ColumnString::Chars & res_strings_chars = res_strings.getChars();
|
||||
ColumnString::Offsets & res_strings_offsets = res_strings.getOffsets();
|
||||
|
||||
ColumnArray::Offsets & res_offsets = col_res->getOffsets();
|
||||
|
||||
if (col_str)
|
||||
{
|
||||
const ColumnString::Chars & src_chars = col_str->getChars();
|
||||
const ColumnString::Offsets & src_offsets = col_str->getOffsets();
|
||||
|
||||
res_offsets.reserve(src_offsets.size());
|
||||
res_strings_offsets.reserve(src_offsets.size() * 5); /// Constant 5 - at random.
|
||||
res_strings_chars.reserve(src_chars.size());
|
||||
|
||||
Pos token_begin = nullptr;
|
||||
Pos token_end = nullptr;
|
||||
|
||||
size_t size = src_offsets.size();
|
||||
ColumnString::Offset current_src_offset = 0;
|
||||
ColumnArray::Offset current_dst_offset = 0;
|
||||
ColumnString::Offset current_dst_strings_offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
Pos pos = reinterpret_cast<Pos>(&src_chars[current_src_offset]);
|
||||
current_src_offset = src_offsets[i];
|
||||
Pos end = reinterpret_cast<Pos>(&src_chars[current_src_offset]) - 1;
|
||||
|
||||
generator.set(pos, end);
|
||||
size_t j = 0;
|
||||
while (generator.get(token_begin, token_end))
|
||||
{
|
||||
size_t token_size = token_end - token_begin;
|
||||
|
||||
res_strings_chars.resize(res_strings_chars.size() + token_size + 1);
|
||||
memcpySmallAllowReadWriteOverflow15(&res_strings_chars[current_dst_strings_offset], token_begin, token_size);
|
||||
res_strings_chars[current_dst_strings_offset + token_size] = 0;
|
||||
|
||||
current_dst_strings_offset += token_size + 1;
|
||||
res_strings_offsets.push_back(current_dst_strings_offset);
|
||||
++j;
|
||||
}
|
||||
|
||||
current_dst_offset += j;
|
||||
res_offsets.push_back(current_dst_offset);
|
||||
}
|
||||
|
||||
return col_res;
|
||||
}
|
||||
else if (col_str_const)
|
||||
{
|
||||
String src = col_str_const->getValue<String>();
|
||||
Array dst;
|
||||
|
||||
generator.set(src.data(), src.data() + src.size());
|
||||
Pos token_begin = nullptr;
|
||||
Pos token_end = nullptr;
|
||||
|
||||
while (generator.get(token_begin, token_end))
|
||||
dst.push_back(String(token_begin, token_end - token_begin));
|
||||
|
||||
return result_type->createColumnConst(col_str_const->size(), dst);
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns {}, {} of arguments of function {}",
|
||||
array_argument.column->getName(), array_argument.column->getName(), getName());
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/// Helper functions for implementations
|
||||
static inline std::optional<size_t> extractMaxSplits(
|
||||
const ColumnsWithTypeAndName & arguments, size_t max_substrings_argument_position)
|
||||
{
|
||||
if (max_substrings_argument_position >= arguments.size())
|
||||
return std::nullopt;
|
||||
|
||||
if (const ColumnConst * column = checkAndGetColumn<ColumnConst>(arguments[max_substrings_argument_position].column.get()))
|
||||
{
|
||||
size_t res = column->getUInt(0);
|
||||
if (res)
|
||||
return res;
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
static inline void checkArgumentsWithSeparatorAndOptionalMaxSubstrings(
|
||||
const IFunction & func, const ColumnsWithTypeAndName & arguments)
|
||||
{
|
||||
FunctionArgumentDescriptors mandatory_args{
|
||||
{"separator", &isString<IDataType>, isColumnConst, "const String"},
|
||||
{"s", &isString<IDataType>, nullptr, "String"}
|
||||
};
|
||||
|
||||
FunctionArgumentDescriptors optional_args{
|
||||
{"max_substrings", &isNativeInteger<IDataType>, isColumnConst, "const Number"},
|
||||
};
|
||||
|
||||
validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args);
|
||||
}
|
||||
|
||||
static inline void checkArgumentsWithOptionalMaxSubstrings(const IFunction & func, const ColumnsWithTypeAndName & arguments)
|
||||
{
|
||||
FunctionArgumentDescriptors mandatory_args{
|
||||
{"s", &isString<IDataType>, nullptr, "String"},
|
||||
};
|
||||
|
||||
FunctionArgumentDescriptors optional_args{
|
||||
{"max_substrings", &isNativeInteger<IDataType>, isColumnConst, "const Number"},
|
||||
};
|
||||
|
||||
validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args);
|
||||
}
|
||||
|
||||
}
|
@ -26,11 +26,13 @@ namespace ErrorCodes
|
||||
class FunctionToUnixTimestamp64 : public IFunction
|
||||
{
|
||||
private:
|
||||
size_t target_scale;
|
||||
const size_t target_scale;
|
||||
const char * name;
|
||||
|
||||
public:
|
||||
FunctionToUnixTimestamp64(size_t target_scale_, const char * name_)
|
||||
: target_scale(target_scale_), name(name_)
|
||||
: target_scale(target_scale_)
|
||||
, name(name_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -42,8 +44,10 @@ public:
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
if (!isDateTime64(arguments[0].type))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The only argument for function {} must be DateTime64", name);
|
||||
FunctionArgumentDescriptors args{
|
||||
{"value", &isDateTime64<IDataType>, nullptr, "DateTime64"}
|
||||
};
|
||||
validateFunctionArgumentTypes(*this, arguments, args);
|
||||
|
||||
return std::make_shared<DataTypeInt64>();
|
||||
}
|
||||
@ -98,9 +102,10 @@ public:
|
||||
class FunctionFromUnixTimestamp64 : public IFunction
|
||||
{
|
||||
private:
|
||||
size_t target_scale;
|
||||
const size_t target_scale;
|
||||
const char * name;
|
||||
const bool allow_nonconst_timezone_arguments;
|
||||
|
||||
public:
|
||||
FunctionFromUnixTimestamp64(size_t target_scale_, const char * name_, ContextPtr context)
|
||||
: target_scale(target_scale_)
|
||||
|
@ -53,6 +53,7 @@
|
||||
#include <Functions/toFixedString.h>
|
||||
#include <Functions/TransformDateTime64.h>
|
||||
#include <Functions/FunctionsCodingIP.h>
|
||||
#include <Functions/CastOverloadResolver.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <Columns/ColumnLowCardinality.h>
|
||||
#include <Interpreters/Context.h>
|
||||
@ -3127,14 +3128,8 @@ class ExecutableFunctionCast : public IExecutableFunction
|
||||
public:
|
||||
using WrapperType = std::function<ColumnPtr(ColumnsWithTypeAndName &, const DataTypePtr &, const ColumnNullable *, size_t)>;
|
||||
|
||||
struct Diagnostic
|
||||
{
|
||||
std::string column_from;
|
||||
std::string column_to;
|
||||
};
|
||||
|
||||
explicit ExecutableFunctionCast(
|
||||
WrapperType && wrapper_function_, const char * name_, std::optional<Diagnostic> diagnostic_)
|
||||
WrapperType && wrapper_function_, const char * name_, std::optional<CastDiagnostic> diagnostic_)
|
||||
: wrapper_function(std::move(wrapper_function_)), name(name_), diagnostic(std::move(diagnostic_)) {}
|
||||
|
||||
String getName() const override { return name; }
|
||||
@ -3170,24 +3165,16 @@ protected:
|
||||
private:
|
||||
WrapperType wrapper_function;
|
||||
const char * name;
|
||||
std::optional<Diagnostic> diagnostic;
|
||||
std::optional<CastDiagnostic> diagnostic;
|
||||
};
|
||||
|
||||
struct CastName { static constexpr auto name = "CAST"; };
|
||||
struct CastInternalName { static constexpr auto name = "_CAST"; };
|
||||
|
||||
enum class CastType
|
||||
{
|
||||
nonAccurate,
|
||||
accurate,
|
||||
accurateOrNull
|
||||
};
|
||||
|
||||
class FunctionCastBase : public IFunctionBase
|
||||
{
|
||||
public:
|
||||
using MonotonicityForRange = std::function<Monotonicity(const IDataType &, const Field &, const Field &)>;
|
||||
using Diagnostic = ExecutableFunctionCast::Diagnostic;
|
||||
};
|
||||
|
||||
template <typename FunctionName>
|
||||
@ -3201,7 +3188,7 @@ public:
|
||||
, MonotonicityForRange && monotonicity_for_range_
|
||||
, const DataTypes & argument_types_
|
||||
, const DataTypePtr & return_type_
|
||||
, std::optional<Diagnostic> diagnostic_
|
||||
, std::optional<CastDiagnostic> diagnostic_
|
||||
, CastType cast_type_)
|
||||
: cast_name(cast_name_), monotonicity_for_range(std::move(monotonicity_for_range_))
|
||||
, argument_types(argument_types_), return_type(return_type_), diagnostic(std::move(diagnostic_))
|
||||
@ -3251,7 +3238,7 @@ private:
|
||||
DataTypes argument_types;
|
||||
DataTypePtr return_type;
|
||||
|
||||
std::optional<Diagnostic> diagnostic;
|
||||
std::optional<CastDiagnostic> diagnostic;
|
||||
CastType cast_type;
|
||||
ContextPtr context;
|
||||
|
||||
|
@ -1,73 +0,0 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionsStringArray.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
template <typename DataType>
|
||||
std::optional<Int64> extractMaxSplitsImpl(const ColumnWithTypeAndName & argument)
|
||||
{
|
||||
const auto * col = checkAndGetColumnConst<ColumnVector<DataType>>(argument.column.get());
|
||||
if (!col)
|
||||
return std::nullopt;
|
||||
|
||||
auto value = col->template getValue<DataType>();
|
||||
return static_cast<Int64>(value);
|
||||
}
|
||||
|
||||
std::optional<size_t> extractMaxSplits(const ColumnsWithTypeAndName & arguments, size_t max_substrings_argument_position)
|
||||
{
|
||||
if (max_substrings_argument_position >= arguments.size())
|
||||
return std::nullopt;
|
||||
|
||||
std::optional<Int64> max_splits;
|
||||
if (!((max_splits = extractMaxSplitsImpl<UInt8>(arguments[max_substrings_argument_position])) || (max_splits = extractMaxSplitsImpl<Int8>(arguments[max_substrings_argument_position]))
|
||||
|| (max_splits = extractMaxSplitsImpl<UInt16>(arguments[max_substrings_argument_position])) || (max_splits = extractMaxSplitsImpl<Int16>(arguments[max_substrings_argument_position]))
|
||||
|| (max_splits = extractMaxSplitsImpl<UInt32>(arguments[max_substrings_argument_position])) || (max_splits = extractMaxSplitsImpl<Int32>(arguments[max_substrings_argument_position]))
|
||||
|| (max_splits = extractMaxSplitsImpl<UInt64>(arguments[max_substrings_argument_position])) || (max_splits = extractMaxSplitsImpl<Int64>(arguments[max_substrings_argument_position]))))
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Illegal column {}, which is {}-th argument",
|
||||
arguments[max_substrings_argument_position].column->getName(),
|
||||
max_substrings_argument_position + 1);
|
||||
|
||||
if (*max_splits <= 0)
|
||||
return std::nullopt;
|
||||
|
||||
return max_splits;
|
||||
}
|
||||
|
||||
DataTypePtr FunctionArrayStringConcat::getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const
|
||||
{
|
||||
FunctionArgumentDescriptors mandatory_args{
|
||||
{"arr", &isArray<IDataType>, nullptr, "Array"},
|
||||
};
|
||||
|
||||
FunctionArgumentDescriptors optional_args{
|
||||
{"separator", &isString<IDataType>, isColumnConst, "const String"},
|
||||
};
|
||||
|
||||
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
|
||||
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(StringArray)
|
||||
{
|
||||
factory.registerFunction<FunctionExtractAll>();
|
||||
|
||||
factory.registerFunction<FunctionSplitByAlpha>();
|
||||
factory.registerAlias("splitByAlpha", FunctionSplitByAlpha::name);
|
||||
factory.registerFunction<FunctionSplitByNonAlpha>();
|
||||
factory.registerFunction<FunctionSplitByWhitespace>();
|
||||
factory.registerFunction<FunctionSplitByChar>();
|
||||
factory.registerFunction<FunctionSplitByString>();
|
||||
factory.registerFunction<FunctionSplitByRegexp>();
|
||||
factory.registerFunction<FunctionArrayStringConcat>();
|
||||
}
|
||||
|
||||
}
|
@ -1,990 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/FunctionsConversion.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/Regexps.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
|
||||
/** Functions that split strings into an array of strings or vice versa.
|
||||
*
|
||||
* splitByChar(sep, s[, max_substrings])
|
||||
* splitByString(sep, s[, max_substrings])
|
||||
* splitByRegexp(regexp, s[, max_substrings])
|
||||
*
|
||||
* splitByWhitespace(s[, max_substrings]) - split the string by whitespace characters
|
||||
* splitByNonAlpha(s[, max_substrings]) - split the string by whitespace and punctuation characters
|
||||
*
|
||||
* extractAll(s, regexp) - select from the string the subsequences corresponding to the regexp.
|
||||
* - first subpattern, if regexp has subpattern;
|
||||
* - zero subpattern (the match part, otherwise);
|
||||
* - otherwise, an empty array
|
||||
*
|
||||
* arrayStringConcat(arr)
|
||||
* arrayStringConcat(arr, delimiter)
|
||||
* - join an array of strings into one string via a separator.
|
||||
*
|
||||
* alphaTokens(s[, max_substrings]) - select from the string subsequence `[a-zA-Z]+`.
|
||||
*
|
||||
* URL functions are located separately.
|
||||
*/
|
||||
|
||||
|
||||
using Pos = const char *;
|
||||
|
||||
std::optional<size_t> extractMaxSplits(const ColumnsWithTypeAndName & arguments, size_t max_substrings_argument_position);
|
||||
|
||||
/// Substring generators. All of them have a common interface.
|
||||
|
||||
class SplitByAlphaImpl
|
||||
{
|
||||
private:
|
||||
Pos pos;
|
||||
Pos end;
|
||||
std::optional<size_t> max_splits;
|
||||
size_t splits;
|
||||
bool max_substrings_includes_remaining_string;
|
||||
|
||||
public:
|
||||
static constexpr auto name = "alphaTokens";
|
||||
static String getName() { return name; }
|
||||
|
||||
static bool isVariadic() { return true; }
|
||||
|
||||
static size_t getNumberOfArguments() { return 0; }
|
||||
|
||||
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
|
||||
{
|
||||
FunctionArgumentDescriptors mandatory_args{
|
||||
{"s", &isString<IDataType>, nullptr, "String"},
|
||||
};
|
||||
|
||||
FunctionArgumentDescriptors optional_args{
|
||||
{"max_substrings", &isNativeInteger<IDataType>, isColumnConst, "const Number"},
|
||||
};
|
||||
|
||||
validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args);
|
||||
}
|
||||
|
||||
static constexpr auto strings_argument_position = 0uz;
|
||||
|
||||
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
|
||||
{
|
||||
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
|
||||
max_splits = extractMaxSplits(arguments, 1);
|
||||
}
|
||||
|
||||
/// Called for each next string.
|
||||
void set(Pos pos_, Pos end_)
|
||||
{
|
||||
pos = pos_;
|
||||
end = end_;
|
||||
splits = 0;
|
||||
}
|
||||
|
||||
/// Get the next token, if any, or return false.
|
||||
bool get(Pos & token_begin, Pos & token_end)
|
||||
{
|
||||
/// Skip garbage
|
||||
while (pos < end && !isAlphaASCII(*pos))
|
||||
++pos;
|
||||
|
||||
if (pos == end)
|
||||
return false;
|
||||
|
||||
token_begin = pos;
|
||||
|
||||
if (max_splits)
|
||||
{
|
||||
if (max_substrings_includes_remaining_string)
|
||||
{
|
||||
if (splits == *max_splits - 1)
|
||||
{
|
||||
token_end = end;
|
||||
pos = end;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (splits == *max_splits)
|
||||
return false;
|
||||
}
|
||||
|
||||
while (pos < end && isAlphaASCII(*pos))
|
||||
++pos;
|
||||
|
||||
token_end = pos;
|
||||
++splits;
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
class SplitByNonAlphaImpl
|
||||
{
|
||||
private:
|
||||
Pos pos;
|
||||
Pos end;
|
||||
std::optional<size_t> max_splits;
|
||||
size_t splits;
|
||||
bool max_substrings_includes_remaining_string;
|
||||
|
||||
public:
|
||||
/// Get the name of the function.
|
||||
static constexpr auto name = "splitByNonAlpha";
|
||||
static String getName() { return name; }
|
||||
|
||||
static bool isVariadic() { return true; }
|
||||
static size_t getNumberOfArguments() { return 0; }
|
||||
|
||||
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
|
||||
{
|
||||
SplitByAlphaImpl::checkArguments(func, arguments);
|
||||
}
|
||||
|
||||
static constexpr auto strings_argument_position = 0uz;
|
||||
|
||||
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
|
||||
{
|
||||
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
|
||||
max_splits = extractMaxSplits(arguments, 1);
|
||||
}
|
||||
|
||||
/// Called for each next string.
|
||||
void set(Pos pos_, Pos end_)
|
||||
{
|
||||
pos = pos_;
|
||||
end = end_;
|
||||
splits = 0;
|
||||
}
|
||||
|
||||
/// Get the next token, if any, or return false.
|
||||
bool get(Pos & token_begin, Pos & token_end)
|
||||
{
|
||||
/// Skip garbage
|
||||
while (pos < end && (isWhitespaceASCII(*pos) || isPunctuationASCII(*pos)))
|
||||
++pos;
|
||||
|
||||
if (pos == end)
|
||||
return false;
|
||||
|
||||
token_begin = pos;
|
||||
|
||||
if (max_splits)
|
||||
{
|
||||
if (max_substrings_includes_remaining_string)
|
||||
{
|
||||
if (splits == *max_splits - 1)
|
||||
{
|
||||
token_end = end;
|
||||
pos = end;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (splits == *max_splits)
|
||||
return false;
|
||||
}
|
||||
|
||||
while (pos < end && !(isWhitespaceASCII(*pos) || isPunctuationASCII(*pos)))
|
||||
++pos;
|
||||
|
||||
token_end = pos;
|
||||
splits++;
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
class SplitByWhitespaceImpl
|
||||
{
|
||||
private:
|
||||
Pos pos;
|
||||
Pos end;
|
||||
std::optional<size_t> max_splits;
|
||||
size_t splits;
|
||||
bool max_substrings_includes_remaining_string;
|
||||
|
||||
public:
|
||||
static constexpr auto name = "splitByWhitespace";
|
||||
static String getName() { return name; }
|
||||
|
||||
static bool isVariadic() { return true; }
|
||||
static size_t getNumberOfArguments() { return 0; }
|
||||
|
||||
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
|
||||
{
|
||||
return SplitByNonAlphaImpl::checkArguments(func, arguments);
|
||||
}
|
||||
|
||||
static constexpr auto strings_argument_position = 0uz;
|
||||
|
||||
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
|
||||
{
|
||||
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
|
||||
max_splits = extractMaxSplits(arguments, 1);
|
||||
}
|
||||
|
||||
/// Called for each next string.
|
||||
void set(Pos pos_, Pos end_)
|
||||
{
|
||||
pos = pos_;
|
||||
end = end_;
|
||||
splits = 0;
|
||||
}
|
||||
|
||||
/// Get the next token, if any, or return false.
|
||||
bool get(Pos & token_begin, Pos & token_end)
|
||||
{
|
||||
/// Skip garbage
|
||||
while (pos < end && isWhitespaceASCII(*pos))
|
||||
++pos;
|
||||
|
||||
if (pos == end)
|
||||
return false;
|
||||
|
||||
token_begin = pos;
|
||||
|
||||
if (max_splits)
|
||||
{
|
||||
if (max_substrings_includes_remaining_string)
|
||||
{
|
||||
if (splits == *max_splits - 1)
|
||||
{
|
||||
token_end = end;
|
||||
pos = end;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (splits == *max_splits)
|
||||
return false;
|
||||
}
|
||||
|
||||
while (pos < end && !isWhitespaceASCII(*pos))
|
||||
++pos;
|
||||
|
||||
token_end = pos;
|
||||
splits++;
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
class SplitByCharImpl
|
||||
{
|
||||
private:
|
||||
Pos pos;
|
||||
Pos end;
|
||||
char separator;
|
||||
std::optional<size_t> max_splits;
|
||||
size_t splits;
|
||||
bool max_substrings_includes_remaining_string;
|
||||
|
||||
public:
|
||||
static constexpr auto name = "splitByChar";
|
||||
static String getName() { return name; }
|
||||
static bool isVariadic() { return true; }
|
||||
static size_t getNumberOfArguments() { return 0; }
|
||||
|
||||
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
|
||||
{
|
||||
FunctionArgumentDescriptors mandatory_args{
|
||||
{"separator", &isString<IDataType>, isColumnConst, "const String"},
|
||||
{"s", &isString<IDataType>, nullptr, "String"}
|
||||
};
|
||||
|
||||
FunctionArgumentDescriptors optional_args{
|
||||
{"max_substrings", &isNativeInteger<IDataType>, isColumnConst, "const Number"},
|
||||
};
|
||||
|
||||
validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args);
|
||||
}
|
||||
|
||||
static constexpr auto strings_argument_position = 1uz;
|
||||
|
||||
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
|
||||
{
|
||||
const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
|
||||
|
||||
if (!col)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
|
||||
"Must be constant string.", arguments[0].column->getName(), getName());
|
||||
|
||||
String sep_str = col->getValue<String>();
|
||||
|
||||
if (sep_str.size() != 1)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal separator for function {}. Must be exactly one byte.", getName());
|
||||
|
||||
separator = sep_str[0];
|
||||
|
||||
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
|
||||
max_splits = extractMaxSplits(arguments, 2);
|
||||
}
|
||||
|
||||
void set(Pos pos_, Pos end_)
|
||||
{
|
||||
pos = pos_;
|
||||
end = end_;
|
||||
splits = 0;
|
||||
}
|
||||
|
||||
bool get(Pos & token_begin, Pos & token_end)
|
||||
{
|
||||
if (!pos)
|
||||
return false;
|
||||
|
||||
token_begin = pos;
|
||||
|
||||
if (max_splits)
|
||||
{
|
||||
if (max_substrings_includes_remaining_string)
|
||||
{
|
||||
if (splits == *max_splits - 1)
|
||||
{
|
||||
token_end = end;
|
||||
pos = nullptr;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (splits == *max_splits)
|
||||
return false;
|
||||
}
|
||||
|
||||
pos = reinterpret_cast<Pos>(memchr(pos, separator, end - pos));
|
||||
if (pos)
|
||||
{
|
||||
token_end = pos;
|
||||
++pos;
|
||||
++splits;
|
||||
}
|
||||
else
|
||||
token_end = end;
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class SplitByStringImpl
|
||||
{
|
||||
private:
|
||||
Pos pos;
|
||||
Pos end;
|
||||
String separator;
|
||||
std::optional<size_t> max_splits;
|
||||
size_t splits;
|
||||
bool max_substrings_includes_remaining_string;
|
||||
|
||||
public:
|
||||
static constexpr auto name = "splitByString";
|
||||
static String getName() { return name; }
|
||||
static bool isVariadic() { return true; }
|
||||
static size_t getNumberOfArguments() { return 0; }
|
||||
|
||||
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
|
||||
{
|
||||
SplitByCharImpl::checkArguments(func, arguments);
|
||||
}
|
||||
|
||||
static constexpr auto strings_argument_position = 1uz;
|
||||
|
||||
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
|
||||
{
|
||||
const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
|
||||
|
||||
if (!col)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
|
||||
"Must be constant string.", arguments[0].column->getName(), getName());
|
||||
|
||||
separator = col->getValue<String>();
|
||||
|
||||
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
|
||||
max_splits = extractMaxSplits(arguments, 2);
|
||||
}
|
||||
|
||||
/// Called for each next string.
|
||||
void set(Pos pos_, Pos end_)
|
||||
{
|
||||
pos = pos_;
|
||||
end = end_;
|
||||
splits = 0;
|
||||
}
|
||||
|
||||
/// Get the next token, if any, or return false.
|
||||
bool get(Pos & token_begin, Pos & token_end)
|
||||
{
|
||||
if (separator.empty())
|
||||
{
|
||||
if (pos == end)
|
||||
return false;
|
||||
|
||||
token_begin = pos;
|
||||
|
||||
if (max_splits)
|
||||
{
|
||||
if (max_substrings_includes_remaining_string)
|
||||
{
|
||||
if (splits == *max_splits - 1)
|
||||
{
|
||||
token_end = end;
|
||||
pos = end;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (splits == *max_splits)
|
||||
return false;
|
||||
}
|
||||
|
||||
pos += 1;
|
||||
token_end = pos;
|
||||
++splits;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!pos)
|
||||
return false;
|
||||
|
||||
token_begin = pos;
|
||||
|
||||
if (max_splits)
|
||||
{
|
||||
if (max_substrings_includes_remaining_string)
|
||||
{
|
||||
if (splits == *max_splits - 1)
|
||||
{
|
||||
token_end = end;
|
||||
pos = nullptr;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (splits == *max_splits)
|
||||
return false;
|
||||
}
|
||||
|
||||
pos = reinterpret_cast<Pos>(memmem(pos, end - pos, separator.data(), separator.size()));
|
||||
if (pos)
|
||||
{
|
||||
token_end = pos;
|
||||
pos += separator.size();
|
||||
++splits;
|
||||
}
|
||||
else
|
||||
token_end = end;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
class SplitByRegexpImpl
|
||||
{
|
||||
private:
|
||||
Regexps::RegexpPtr re;
|
||||
OptimizedRegularExpression::MatchVec matches;
|
||||
|
||||
Pos pos;
|
||||
Pos end;
|
||||
|
||||
std::optional<size_t> max_splits;
|
||||
size_t splits;
|
||||
bool max_substrings_includes_remaining_string;
|
||||
|
||||
public:
|
||||
static constexpr auto name = "splitByRegexp";
|
||||
static String getName() { return name; }
|
||||
|
||||
static bool isVariadic() { return true; }
|
||||
static size_t getNumberOfArguments() { return 0; }
|
||||
|
||||
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
|
||||
{
|
||||
SplitByStringImpl::checkArguments(func, arguments);
|
||||
}
|
||||
|
||||
static constexpr auto strings_argument_position = 1uz;
|
||||
|
||||
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
|
||||
{
|
||||
const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
|
||||
|
||||
if (!col)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
|
||||
"Must be constant string.", arguments[0].column->getName(), getName());
|
||||
|
||||
if (!col->getValue<String>().empty())
|
||||
re = std::make_shared<OptimizedRegularExpression>(Regexps::createRegexp<false, false, false>(col->getValue<String>()));
|
||||
|
||||
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
|
||||
max_splits = extractMaxSplits(arguments, 2);
|
||||
}
|
||||
|
||||
/// Called for each next string.
|
||||
void set(Pos pos_, Pos end_)
|
||||
{
|
||||
pos = pos_;
|
||||
end = end_;
|
||||
splits = 0;
|
||||
}
|
||||
|
||||
/// Get the next token, if any, or return false.
|
||||
bool get(Pos & token_begin, Pos & token_end)
|
||||
{
|
||||
if (!re)
|
||||
{
|
||||
if (pos == end)
|
||||
return false;
|
||||
|
||||
token_begin = pos;
|
||||
|
||||
if (max_splits)
|
||||
{
|
||||
if (max_substrings_includes_remaining_string)
|
||||
{
|
||||
if (splits == *max_splits - 1)
|
||||
{
|
||||
token_end = end;
|
||||
pos = end;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (splits == *max_splits)
|
||||
return false;
|
||||
}
|
||||
|
||||
pos += 1;
|
||||
token_end = pos;
|
||||
++splits;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!pos || pos > end)
|
||||
return false;
|
||||
|
||||
token_begin = pos;
|
||||
|
||||
if (max_splits)
|
||||
{
|
||||
if (max_substrings_includes_remaining_string)
|
||||
{
|
||||
if (splits == *max_splits - 1)
|
||||
{
|
||||
token_end = end;
|
||||
pos = nullptr;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (splits == *max_splits)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!re->match(pos, end - pos, matches) || !matches[0].length)
|
||||
{
|
||||
token_end = end;
|
||||
pos = end + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
token_end = pos + matches[0].offset;
|
||||
pos = token_end + matches[0].length;
|
||||
++splits;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
class ExtractAllImpl
|
||||
{
|
||||
private:
|
||||
Regexps::RegexpPtr re;
|
||||
OptimizedRegularExpression::MatchVec matches;
|
||||
size_t capture;
|
||||
|
||||
Pos pos;
|
||||
Pos end;
|
||||
public:
|
||||
static constexpr auto name = "extractAll";
|
||||
static String getName() { return name; }
|
||||
static bool isVariadic() { return false; }
|
||||
static size_t getNumberOfArguments() { return 2; }
|
||||
|
||||
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
|
||||
{
|
||||
FunctionArgumentDescriptors mandatory_args{
|
||||
{"haystack", &isString<IDataType>, nullptr, "String"},
|
||||
{"pattern", &isString<IDataType>, isColumnConst, "const String"}
|
||||
};
|
||||
|
||||
validateFunctionArgumentTypes(func, arguments, mandatory_args);
|
||||
}
|
||||
|
||||
static constexpr auto strings_argument_position = 0uz;
|
||||
|
||||
void init(const ColumnsWithTypeAndName & arguments, bool /*max_substrings_includes_remaining_string*/)
|
||||
{
|
||||
const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
|
||||
|
||||
if (!col)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
|
||||
"Must be constant string.", arguments[1].column->getName(), getName());
|
||||
|
||||
re = std::make_shared<OptimizedRegularExpression>(Regexps::createRegexp<false, false, false>(col->getValue<String>()));
|
||||
capture = re->getNumberOfSubpatterns() > 0 ? 1 : 0;
|
||||
|
||||
matches.resize(capture + 1);
|
||||
}
|
||||
|
||||
/// Called for each next string.
|
||||
void set(Pos pos_, Pos end_)
|
||||
{
|
||||
pos = pos_;
|
||||
end = end_;
|
||||
}
|
||||
|
||||
/// Get the next token, if any, or return false.
|
||||
bool get(Pos & token_begin, Pos & token_end)
|
||||
{
|
||||
if (!pos || pos > end)
|
||||
return false;
|
||||
|
||||
if (!re->match(pos, end - pos, matches) || !matches[0].length)
|
||||
return false;
|
||||
|
||||
if (matches[capture].offset == std::string::npos)
|
||||
{
|
||||
/// Empty match.
|
||||
token_begin = pos;
|
||||
token_end = pos;
|
||||
}
|
||||
else
|
||||
{
|
||||
token_begin = pos + matches[capture].offset;
|
||||
token_end = token_begin + matches[capture].length;
|
||||
}
|
||||
|
||||
pos += matches[0].offset + matches[0].length;
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
/// A function that takes a string, and returns an array of substrings created by some generator.
|
||||
template <typename Generator>
|
||||
class FunctionTokens : public IFunction
|
||||
{
|
||||
private:
|
||||
bool max_substrings_includes_remaining_string;
|
||||
|
||||
public:
|
||||
static constexpr auto name = Generator::name;
|
||||
static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionTokens>(context); }
|
||||
|
||||
explicit FunctionTokens<Generator>(ContextPtr context)
|
||||
{
|
||||
const Settings & settings = context->getSettingsRef();
|
||||
max_substrings_includes_remaining_string = settings.splitby_max_substrings_includes_remaining_string;
|
||||
}
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
|
||||
bool isVariadic() const override { return Generator::isVariadic(); }
|
||||
|
||||
size_t getNumberOfArguments() const override { return Generator::getNumberOfArguments(); }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
Generator::checkArguments(*this, arguments);
|
||||
|
||||
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
|
||||
{
|
||||
Generator generator;
|
||||
generator.init(arguments, max_substrings_includes_remaining_string);
|
||||
|
||||
const auto & array_argument = arguments[generator.strings_argument_position];
|
||||
|
||||
const ColumnString * col_str = checkAndGetColumn<ColumnString>(array_argument.column.get());
|
||||
const ColumnConst * col_str_const = checkAndGetColumnConstStringOrFixedString(array_argument.column.get());
|
||||
|
||||
auto col_res = ColumnArray::create(ColumnString::create());
|
||||
|
||||
ColumnString & res_strings = typeid_cast<ColumnString &>(col_res->getData());
|
||||
ColumnString::Chars & res_strings_chars = res_strings.getChars();
|
||||
ColumnString::Offsets & res_strings_offsets = res_strings.getOffsets();
|
||||
|
||||
ColumnArray::Offsets & res_offsets = col_res->getOffsets();
|
||||
|
||||
if (col_str)
|
||||
{
|
||||
const ColumnString::Chars & src_chars = col_str->getChars();
|
||||
const ColumnString::Offsets & src_offsets = col_str->getOffsets();
|
||||
|
||||
res_offsets.reserve(src_offsets.size());
|
||||
res_strings_offsets.reserve(src_offsets.size() * 5); /// Constant 5 - at random.
|
||||
res_strings_chars.reserve(src_chars.size());
|
||||
|
||||
Pos token_begin = nullptr;
|
||||
Pos token_end = nullptr;
|
||||
|
||||
size_t size = src_offsets.size();
|
||||
ColumnString::Offset current_src_offset = 0;
|
||||
ColumnArray::Offset current_dst_offset = 0;
|
||||
ColumnString::Offset current_dst_strings_offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
Pos pos = reinterpret_cast<Pos>(&src_chars[current_src_offset]);
|
||||
current_src_offset = src_offsets[i];
|
||||
Pos end = reinterpret_cast<Pos>(&src_chars[current_src_offset]) - 1;
|
||||
|
||||
generator.set(pos, end);
|
||||
size_t j = 0;
|
||||
while (generator.get(token_begin, token_end))
|
||||
{
|
||||
size_t token_size = token_end - token_begin;
|
||||
|
||||
res_strings_chars.resize(res_strings_chars.size() + token_size + 1);
|
||||
memcpySmallAllowReadWriteOverflow15(&res_strings_chars[current_dst_strings_offset], token_begin, token_size);
|
||||
res_strings_chars[current_dst_strings_offset + token_size] = 0;
|
||||
|
||||
current_dst_strings_offset += token_size + 1;
|
||||
res_strings_offsets.push_back(current_dst_strings_offset);
|
||||
++j;
|
||||
}
|
||||
|
||||
current_dst_offset += j;
|
||||
res_offsets.push_back(current_dst_offset);
|
||||
}
|
||||
|
||||
return col_res;
|
||||
}
|
||||
else if (col_str_const)
|
||||
{
|
||||
String src = col_str_const->getValue<String>();
|
||||
Array dst;
|
||||
|
||||
generator.set(src.data(), src.data() + src.size());
|
||||
Pos token_begin = nullptr;
|
||||
Pos token_end = nullptr;
|
||||
|
||||
while (generator.get(token_begin, token_end))
|
||||
dst.push_back(String(token_begin, token_end - token_begin));
|
||||
|
||||
return result_type->createColumnConst(col_str_const->size(), dst);
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns {}, {} of arguments of function {}",
|
||||
array_argument.column->getName(), array_argument.column->getName(), getName());
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/// Joins an array of type serializable to string into one string via a separator.
|
||||
class FunctionArrayStringConcat : public IFunction
|
||||
{
|
||||
private:
|
||||
static void executeInternal(
|
||||
const ColumnString::Chars & src_chars,
|
||||
const ColumnString::Offsets & src_string_offsets,
|
||||
const ColumnArray::Offsets & src_array_offsets,
|
||||
const char * delimiter,
|
||||
const size_t delimiter_size,
|
||||
ColumnString::Chars & dst_chars,
|
||||
ColumnString::Offsets & dst_string_offsets,
|
||||
const char8_t * null_map)
|
||||
{
|
||||
size_t size = src_array_offsets.size();
|
||||
|
||||
if (!size)
|
||||
return;
|
||||
|
||||
/// With a small margin - as if the separator goes after the last string of the array.
|
||||
dst_chars.resize(
|
||||
src_chars.size()
|
||||
+ delimiter_size * src_string_offsets.size() /// Separators after each string...
|
||||
+ src_array_offsets.size() /// Zero byte after each joined string
|
||||
- src_string_offsets.size()); /// The former zero byte after each string of the array
|
||||
|
||||
/// There will be as many strings as there were arrays.
|
||||
dst_string_offsets.resize(src_array_offsets.size());
|
||||
|
||||
ColumnArray::Offset current_src_array_offset = 0;
|
||||
|
||||
ColumnString::Offset current_dst_string_offset = 0;
|
||||
|
||||
/// Loop through the array of strings.
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
bool first_non_null = true;
|
||||
/// Loop through the rows within the array. /// NOTE You can do everything in one copy, if the separator has a size of 1.
|
||||
for (auto next_src_array_offset = src_array_offsets[i]; current_src_array_offset < next_src_array_offset; ++current_src_array_offset)
|
||||
{
|
||||
if (null_map && null_map[current_src_array_offset]) [[unlikely]]
|
||||
continue;
|
||||
|
||||
if (!first_non_null)
|
||||
{
|
||||
memcpy(&dst_chars[current_dst_string_offset], delimiter, delimiter_size);
|
||||
current_dst_string_offset += delimiter_size;
|
||||
}
|
||||
first_non_null = false;
|
||||
|
||||
const auto current_src_string_offset = current_src_array_offset ? src_string_offsets[current_src_array_offset - 1] : 0;
|
||||
size_t bytes_to_copy = src_string_offsets[current_src_array_offset] - current_src_string_offset - 1;
|
||||
|
||||
memcpySmallAllowReadWriteOverflow15(
|
||||
&dst_chars[current_dst_string_offset], &src_chars[current_src_string_offset], bytes_to_copy);
|
||||
|
||||
current_dst_string_offset += bytes_to_copy;
|
||||
}
|
||||
|
||||
dst_chars[current_dst_string_offset] = 0;
|
||||
++current_dst_string_offset;
|
||||
|
||||
dst_string_offsets[i] = current_dst_string_offset;
|
||||
}
|
||||
|
||||
dst_chars.resize(dst_string_offsets.back());
|
||||
}
|
||||
|
||||
static void executeInternal(
|
||||
const ColumnString & col_string,
|
||||
const ColumnArray & col_arr,
|
||||
const String & delimiter,
|
||||
ColumnString & col_res,
|
||||
const char8_t * null_map = nullptr)
|
||||
{
|
||||
executeInternal(
|
||||
col_string.getChars(),
|
||||
col_string.getOffsets(),
|
||||
col_arr.getOffsets(),
|
||||
delimiter.data(),
|
||||
delimiter.size(),
|
||||
col_res.getChars(),
|
||||
col_res.getOffsets(),
|
||||
null_map);
|
||||
}
|
||||
|
||||
static ColumnPtr serializeNestedColumn(const ColumnArray & col_arr, const DataTypePtr & nested_type)
|
||||
{
|
||||
if (isString(nested_type))
|
||||
{
|
||||
return col_arr.getDataPtr();
|
||||
}
|
||||
else if (const ColumnNullable * col_nullable = checkAndGetColumn<ColumnNullable>(col_arr.getData());
|
||||
col_nullable && isString(col_nullable->getNestedColumn().getDataType()))
|
||||
{
|
||||
return col_nullable->getNestedColumnPtr();
|
||||
}
|
||||
else
|
||||
{
|
||||
ColumnsWithTypeAndName cols;
|
||||
cols.emplace_back(col_arr.getDataPtr(), nested_type, "tmp");
|
||||
return ConvertImplGenericToString<ColumnString>::execute(cols, std::make_shared<DataTypeString>(), col_arr.size());
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
static constexpr auto name = "arrayStringConcat";
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayStringConcat>(); }
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
bool isVariadic() const override { return true; }
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override;
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
|
||||
{
|
||||
String delimiter;
|
||||
if (arguments.size() == 2)
|
||||
{
|
||||
const ColumnConst * col_delim = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
|
||||
if (!col_delim)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument for function {} must be constant string.", getName());
|
||||
|
||||
delimiter = col_delim->getValue<String>();
|
||||
}
|
||||
|
||||
const auto & nested_type = assert_cast<const DataTypeArray &>(*arguments[0].type).getNestedType();
|
||||
if (const ColumnConst * col_const_arr = checkAndGetColumnConst<ColumnArray>(arguments[0].column.get());
|
||||
col_const_arr && isString(nested_type))
|
||||
{
|
||||
Array src_arr = col_const_arr->getValue<Array>();
|
||||
String dst_str;
|
||||
bool first_non_null = true;
|
||||
for (size_t i = 0, size = src_arr.size(); i < size; ++i)
|
||||
{
|
||||
if (src_arr[i].isNull())
|
||||
continue;
|
||||
if (!first_non_null)
|
||||
dst_str += delimiter;
|
||||
first_non_null = false;
|
||||
dst_str += src_arr[i].get<const String &>();
|
||||
}
|
||||
|
||||
return result_type->createColumnConst(col_const_arr->size(), dst_str);
|
||||
}
|
||||
|
||||
ColumnPtr src_column = arguments[0].column->convertToFullColumnIfConst();
|
||||
const ColumnArray & col_arr = assert_cast<const ColumnArray &>(*src_column.get());
|
||||
|
||||
ColumnPtr str_subcolumn = serializeNestedColumn(col_arr, nested_type);
|
||||
const ColumnString & col_string = assert_cast<const ColumnString &>(*str_subcolumn.get());
|
||||
|
||||
auto col_res = ColumnString::create();
|
||||
if (const ColumnNullable * col_nullable = checkAndGetColumn<ColumnNullable>(col_arr.getData()))
|
||||
executeInternal(col_string, col_arr, delimiter, *col_res, col_nullable->getNullMapData().data());
|
||||
else
|
||||
executeInternal(col_string, col_arr, delimiter, *col_res);
|
||||
return col_res;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
using FunctionSplitByAlpha = FunctionTokens<SplitByAlphaImpl>;
|
||||
using FunctionSplitByNonAlpha = FunctionTokens<SplitByNonAlphaImpl>;
|
||||
using FunctionSplitByWhitespace = FunctionTokens<SplitByWhitespaceImpl>;
|
||||
using FunctionSplitByChar = FunctionTokens<SplitByCharImpl>;
|
||||
using FunctionSplitByString = FunctionTokens<SplitByStringImpl>;
|
||||
using FunctionSplitByRegexp = FunctionTokens<SplitByRegexpImpl>;
|
||||
using FunctionExtractAll = FunctionTokens<ExtractAllImpl>;
|
||||
|
||||
}
|
@ -1,9 +1,15 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionsStringArray.h>
|
||||
#include <Functions/FunctionTokens.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
using Pos = const char *;
|
||||
|
||||
class URLPathHierarchyImpl
|
||||
{
|
||||
private:
|
||||
@ -14,7 +20,6 @@ private:
|
||||
|
||||
public:
|
||||
static constexpr auto name = "URLPathHierarchy";
|
||||
static String getName() { return name; }
|
||||
|
||||
static bool isVariadic() { return false; }
|
||||
static size_t getNumberOfArguments() { return 1; }
|
||||
@ -95,9 +100,10 @@ public:
|
||||
};
|
||||
|
||||
|
||||
struct NameURLPathHierarchy { static constexpr auto name = "URLPathHierarchy"; };
|
||||
using FunctionURLPathHierarchy = FunctionTokens<URLPathHierarchyImpl>;
|
||||
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(URLPathHierarchy)
|
||||
{
|
||||
factory.registerFunction<FunctionURLPathHierarchy>();
|
||||
|
@ -1,9 +1,14 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionsStringArray.h>
|
||||
#include <Functions/FunctionTokens.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
using Pos = const char *;
|
||||
|
||||
class URLHierarchyImpl
|
||||
{
|
||||
private:
|
||||
@ -13,7 +18,6 @@ private:
|
||||
|
||||
public:
|
||||
static constexpr auto name = "URLHierarchy";
|
||||
static String getName() { return name; }
|
||||
|
||||
static bool isVariadic() { return false; }
|
||||
static size_t getNumberOfArguments() { return 1; }
|
||||
@ -97,9 +101,10 @@ public:
|
||||
};
|
||||
|
||||
|
||||
struct NameURLHierarchy { static constexpr auto name = "URLHierarchy"; };
|
||||
using FunctionURLHierarchy = FunctionTokens<URLHierarchyImpl>;
|
||||
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(URLHierarchy)
|
||||
{
|
||||
factory.registerFunction<FunctionURLHierarchy>();
|
||||
|
@ -1,9 +1,14 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionsStringArray.h>
|
||||
#include <Functions/FunctionTokens.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
using Pos = const char *;
|
||||
|
||||
class ExtractURLParameterNamesImpl
|
||||
{
|
||||
private:
|
||||
@ -13,7 +18,6 @@ private:
|
||||
|
||||
public:
|
||||
static constexpr auto name = "extractURLParameterNames";
|
||||
static String getName() { return name; }
|
||||
|
||||
static bool isVariadic() { return false; }
|
||||
static size_t getNumberOfArguments() { return 1; }
|
||||
@ -80,9 +84,10 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
struct NameExtractURLParameterNames { static constexpr auto name = "extractURLParameterNames"; };
|
||||
using FunctionExtractURLParameterNames = FunctionTokens<ExtractURLParameterNamesImpl>;
|
||||
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(ExtractURLParameterNames)
|
||||
{
|
||||
factory.registerFunction<FunctionExtractURLParameterNames>();
|
||||
|
@ -1,9 +1,15 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionsStringArray.h>
|
||||
#include <Functions/FunctionTokens.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
using Pos = const char *;
|
||||
|
||||
class ExtractURLParametersImpl
|
||||
{
|
||||
private:
|
||||
@ -13,7 +19,6 @@ private:
|
||||
|
||||
public:
|
||||
static constexpr auto name = "extractURLParameters";
|
||||
static String getName() { return name; }
|
||||
|
||||
static bool isVariadic() { return false; }
|
||||
static size_t getNumberOfArguments() { return 1; }
|
||||
@ -88,9 +93,10 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
struct NameExtractURLParameters { static constexpr auto name = "extractURLParameters"; };
|
||||
using FunctionExtractURLParameters = FunctionTokens<ExtractURLParametersImpl>;
|
||||
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(ExtractURLParameters)
|
||||
{
|
||||
factory.registerFunction<FunctionExtractURLParameters>();
|
||||
|
104
src/Functions/alphaTokens.cpp
Normal file
104
src/Functions/alphaTokens.cpp
Normal file
@ -0,0 +1,104 @@
|
||||
|
||||
#include <Functions/FunctionTokens.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** Functions that split strings into an array of strings or vice versa.
|
||||
*
|
||||
* alphaTokens(s[, max_substrings]) - select from the string subsequence `[a-zA-Z]+`.
|
||||
*/
|
||||
namespace
|
||||
{
|
||||
|
||||
using Pos = const char *;
|
||||
|
||||
class SplitByAlphaImpl
|
||||
{
|
||||
private:
|
||||
Pos pos;
|
||||
Pos end;
|
||||
std::optional<size_t> max_splits;
|
||||
size_t splits;
|
||||
bool max_substrings_includes_remaining_string;
|
||||
|
||||
public:
|
||||
static constexpr auto name = "alphaTokens";
|
||||
|
||||
static bool isVariadic() { return true; }
|
||||
|
||||
static size_t getNumberOfArguments() { return 0; }
|
||||
|
||||
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
|
||||
{
|
||||
checkArgumentsWithOptionalMaxSubstrings(func, arguments);
|
||||
}
|
||||
|
||||
static constexpr auto strings_argument_position = 0uz;
|
||||
|
||||
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
|
||||
{
|
||||
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
|
||||
max_splits = extractMaxSplits(arguments, 1);
|
||||
}
|
||||
|
||||
/// Called for each next string.
|
||||
void set(Pos pos_, Pos end_)
|
||||
{
|
||||
pos = pos_;
|
||||
end = end_;
|
||||
splits = 0;
|
||||
}
|
||||
|
||||
/// Get the next token, if any, or return false.
|
||||
bool get(Pos & token_begin, Pos & token_end)
|
||||
{
|
||||
/// Skip garbage
|
||||
while (pos < end && !isAlphaASCII(*pos))
|
||||
++pos;
|
||||
|
||||
if (pos == end)
|
||||
return false;
|
||||
|
||||
token_begin = pos;
|
||||
|
||||
if (max_splits)
|
||||
{
|
||||
if (max_substrings_includes_remaining_string)
|
||||
{
|
||||
if (splits == *max_splits - 1)
|
||||
{
|
||||
token_end = end;
|
||||
pos = end;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (splits == *max_splits)
|
||||
return false;
|
||||
}
|
||||
|
||||
while (pos < end && isAlphaASCII(*pos))
|
||||
++pos;
|
||||
|
||||
token_end = pos;
|
||||
++splits;
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
using FunctionSplitByAlpha = FunctionTokens<SplitByAlphaImpl>;
|
||||
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(SplitByAlpha)
|
||||
{
|
||||
factory.registerFunction<FunctionSplitByAlpha>();
|
||||
factory.registerAlias("splitByAlpha", FunctionSplitByAlpha::name);
|
||||
}
|
||||
|
||||
}
|
@ -4,7 +4,6 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <base/range.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -46,10 +45,10 @@ private:
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
if (!isString(arguments[0]))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName());
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of the first argument of function {}", arguments[0]->getName(), getName());
|
||||
|
||||
if (!isString(arguments[1]))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[1]->getName(), getName());
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of the second argument of function {}", arguments[1]->getName(), getName());
|
||||
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
202
src/Functions/arrayStringConcat.cpp
Normal file
202
src/Functions/arrayStringConcat.cpp
Normal file
@ -0,0 +1,202 @@
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/Regexps.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/castColumn.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
|
||||
/** arrayStringConcat(arr)
|
||||
* arrayStringConcat(arr, delimiter)
|
||||
* - join an array of strings into one string via a separator.
|
||||
*/
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
/// Joins an array of type serializable to string into one string via a separator.
|
||||
class FunctionArrayStringConcat : public IFunction
|
||||
{
|
||||
private:
|
||||
static void executeInternal(
|
||||
const ColumnString::Chars & src_chars,
|
||||
const ColumnString::Offsets & src_string_offsets,
|
||||
const ColumnArray::Offsets & src_array_offsets,
|
||||
const char * delimiter,
|
||||
const size_t delimiter_size,
|
||||
ColumnString::Chars & dst_chars,
|
||||
ColumnString::Offsets & dst_string_offsets,
|
||||
const char8_t * null_map)
|
||||
{
|
||||
size_t size = src_array_offsets.size();
|
||||
|
||||
if (!size)
|
||||
return;
|
||||
|
||||
/// With a small margin - as if the separator goes after the last string of the array.
|
||||
dst_chars.resize(
|
||||
src_chars.size()
|
||||
+ delimiter_size * src_string_offsets.size() /// Separators after each string...
|
||||
+ src_array_offsets.size() /// Zero byte after each joined string
|
||||
- src_string_offsets.size()); /// The former zero byte after each string of the array
|
||||
|
||||
/// There will be as many strings as there were arrays.
|
||||
dst_string_offsets.resize(src_array_offsets.size());
|
||||
|
||||
ColumnArray::Offset current_src_array_offset = 0;
|
||||
|
||||
ColumnString::Offset current_dst_string_offset = 0;
|
||||
|
||||
/// Loop through the array of strings.
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
bool first_non_null = true;
|
||||
/// Loop through the rows within the array. /// NOTE You can do everything in one copy, if the separator has a size of 1.
|
||||
for (auto next_src_array_offset = src_array_offsets[i]; current_src_array_offset < next_src_array_offset; ++current_src_array_offset)
|
||||
{
|
||||
if (null_map && null_map[current_src_array_offset]) [[unlikely]]
|
||||
continue;
|
||||
|
||||
if (!first_non_null)
|
||||
{
|
||||
memcpy(&dst_chars[current_dst_string_offset], delimiter, delimiter_size);
|
||||
current_dst_string_offset += delimiter_size;
|
||||
}
|
||||
first_non_null = false;
|
||||
|
||||
const auto current_src_string_offset = current_src_array_offset ? src_string_offsets[current_src_array_offset - 1] : 0;
|
||||
size_t bytes_to_copy = src_string_offsets[current_src_array_offset] - current_src_string_offset - 1;
|
||||
|
||||
memcpySmallAllowReadWriteOverflow15(
|
||||
&dst_chars[current_dst_string_offset], &src_chars[current_src_string_offset], bytes_to_copy);
|
||||
|
||||
current_dst_string_offset += bytes_to_copy;
|
||||
}
|
||||
|
||||
dst_chars[current_dst_string_offset] = 0;
|
||||
++current_dst_string_offset;
|
||||
|
||||
dst_string_offsets[i] = current_dst_string_offset;
|
||||
}
|
||||
|
||||
dst_chars.resize(dst_string_offsets.back());
|
||||
}
|
||||
|
||||
static void executeInternal(
|
||||
const ColumnString & col_string,
|
||||
const ColumnArray & col_arr,
|
||||
const String & delimiter,
|
||||
ColumnString & col_res,
|
||||
const char8_t * null_map = nullptr)
|
||||
{
|
||||
executeInternal(
|
||||
col_string.getChars(),
|
||||
col_string.getOffsets(),
|
||||
col_arr.getOffsets(),
|
||||
delimiter.data(),
|
||||
delimiter.size(),
|
||||
col_res.getChars(),
|
||||
col_res.getOffsets(),
|
||||
null_map);
|
||||
}
|
||||
|
||||
static ColumnPtr serializeNestedColumn(const ColumnArray & col_arr, const DataTypePtr & nested_type)
|
||||
{
|
||||
DataTypePtr type = nested_type;
|
||||
ColumnPtr column = col_arr.getDataPtr();
|
||||
|
||||
if (type->isNullable())
|
||||
{
|
||||
type = removeNullable(type);
|
||||
column = assert_cast<const ColumnNullable &>(*column).getNestedColumnPtr();
|
||||
}
|
||||
|
||||
return castColumn({column, type, "tmp"}, std::make_shared<DataTypeString>());
|
||||
}
|
||||
|
||||
public:
|
||||
static constexpr auto name = "arrayStringConcat";
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayStringConcat>(); }
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
bool isVariadic() const override { return true; }
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
FunctionArgumentDescriptors mandatory_args
|
||||
{
|
||||
{"arr", &isArray<IDataType>, nullptr, "Array"},
|
||||
};
|
||||
|
||||
FunctionArgumentDescriptors optional_args
|
||||
{
|
||||
{"separator", &isString<IDataType>, isColumnConst, "const String"},
|
||||
};
|
||||
|
||||
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
|
||||
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const override
|
||||
{
|
||||
String delimiter;
|
||||
if (arguments.size() == 2)
|
||||
{
|
||||
const ColumnConst * col_delim = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
|
||||
if (!col_delim)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument for function {} must be constant string.", getName());
|
||||
|
||||
delimiter = col_delim->getValue<String>();
|
||||
}
|
||||
|
||||
const auto & nested_type = assert_cast<const DataTypeArray &>(*arguments[0].type).getNestedType();
|
||||
const ColumnArray & col_arr = assert_cast<const ColumnArray &>(*arguments[0].column);
|
||||
|
||||
ColumnPtr str_subcolumn = serializeNestedColumn(col_arr, nested_type);
|
||||
const ColumnString & col_string = assert_cast<const ColumnString &>(*str_subcolumn.get());
|
||||
|
||||
auto col_res = ColumnString::create();
|
||||
if (const ColumnNullable * col_nullable = checkAndGetColumn<ColumnNullable>(col_arr.getData()))
|
||||
executeInternal(col_string, col_arr, delimiter, *col_res, col_nullable->getNullMapData().data());
|
||||
else
|
||||
executeInternal(col_string, col_arr, delimiter, *col_res);
|
||||
|
||||
return col_res;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(ArrayStringConcat)
|
||||
{
|
||||
factory.registerFunction<FunctionArrayStringConcat>();
|
||||
}
|
||||
|
||||
}
|
122
src/Functions/extractAll.cpp
Normal file
122
src/Functions/extractAll.cpp
Normal file
@ -0,0 +1,122 @@
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/FunctionTokens.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/Regexps.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
|
||||
/** Functions that split strings into an array of strings or vice versa.
|
||||
*
|
||||
* extractAll(s, regexp) - select from the string the subsequences corresponding to the regexp.
|
||||
* - first subpattern, if regexp has subpattern;
|
||||
* - zero subpattern (the match part, otherwise);
|
||||
* - otherwise, an empty array
|
||||
*/
|
||||
namespace
|
||||
{
|
||||
|
||||
using Pos = const char *;
|
||||
|
||||
class ExtractAllImpl
|
||||
{
|
||||
private:
|
||||
Regexps::RegexpPtr re;
|
||||
OptimizedRegularExpression::MatchVec matches;
|
||||
size_t capture;
|
||||
|
||||
Pos pos;
|
||||
Pos end;
|
||||
public:
|
||||
static constexpr auto name = "extractAll";
|
||||
static String getName() { return name; }
|
||||
static bool isVariadic() { return false; }
|
||||
static size_t getNumberOfArguments() { return 2; }
|
||||
|
||||
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
|
||||
{
|
||||
FunctionArgumentDescriptors mandatory_args{
|
||||
{"haystack", &isString<IDataType>, nullptr, "String"},
|
||||
{"pattern", &isString<IDataType>, isColumnConst, "const String"}
|
||||
};
|
||||
|
||||
validateFunctionArgumentTypes(func, arguments, mandatory_args);
|
||||
}
|
||||
|
||||
static constexpr auto strings_argument_position = 0uz;
|
||||
|
||||
void init(const ColumnsWithTypeAndName & arguments, bool /*max_substrings_includes_remaining_string*/)
|
||||
{
|
||||
const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
|
||||
|
||||
if (!col)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
|
||||
"Must be constant string.", arguments[1].column->getName(), getName());
|
||||
|
||||
re = std::make_shared<OptimizedRegularExpression>(Regexps::createRegexp<false, false, false>(col->getValue<String>()));
|
||||
capture = re->getNumberOfSubpatterns() > 0 ? 1 : 0;
|
||||
|
||||
matches.resize(capture + 1);
|
||||
}
|
||||
|
||||
/// Called for each next string.
|
||||
void set(Pos pos_, Pos end_)
|
||||
{
|
||||
pos = pos_;
|
||||
end = end_;
|
||||
}
|
||||
|
||||
/// Get the next token, if any, or return false.
|
||||
bool get(Pos & token_begin, Pos & token_end)
|
||||
{
|
||||
if (!pos || pos > end)
|
||||
return false;
|
||||
|
||||
if (!re->match(pos, end - pos, matches) || !matches[0].length)
|
||||
return false;
|
||||
|
||||
if (matches[capture].offset == std::string::npos)
|
||||
{
|
||||
/// Empty match.
|
||||
token_begin = pos;
|
||||
token_end = pos;
|
||||
}
|
||||
else
|
||||
{
|
||||
token_begin = pos + matches[capture].offset;
|
||||
token_end = token_begin + matches[capture].length;
|
||||
}
|
||||
|
||||
pos += matches[0].offset + matches[0].length;
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
using FunctionExtractAll = FunctionTokens<ExtractAllImpl>;
|
||||
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(ExtractAll)
|
||||
{
|
||||
factory.registerFunction<FunctionExtractAll>();
|
||||
}
|
||||
|
||||
}
|
@ -9,12 +9,14 @@
|
||||
#include <Functions/DateTimeTransforms.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/FunctionsConversion.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/castTypeToEither.h>
|
||||
#include <Functions/extractTimeZoneFromFunctionArguments.h>
|
||||
#include <Functions/numLiteralChars.h>
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/castColumn.h>
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#include <Common/Concepts.h>
|
||||
@ -803,18 +805,7 @@ public:
|
||||
{
|
||||
if (arguments.size() == 1)
|
||||
{
|
||||
if (!castType(arguments[0].type.get(), [&](const auto & type)
|
||||
{
|
||||
using FromDataType = std::decay_t<decltype(type)>;
|
||||
res = ConvertImpl<FromDataType, DataTypeDateTime, Name>::execute(arguments, result_type, input_rows_count);
|
||||
return true;
|
||||
}))
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Illegal column {} of function {}, must be Integer, Date, Date32, DateTime "
|
||||
"or DateTime64 when arguments size is 1.",
|
||||
arguments[0].column->getName(), getName());
|
||||
}
|
||||
return castColumn(arguments[0], result_type);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -7,15 +7,16 @@
|
||||
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/FunctionsConversion.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/castTypeToEither.h>
|
||||
#include <Functions/numLiteralChars.h>
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <base/types.h>
|
||||
#include <boost/algorithm/string/case_conv.hpp>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
|
122
src/Functions/splitByChar.cpp
Normal file
122
src/Functions/splitByChar.cpp
Normal file
@ -0,0 +1,122 @@
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/FunctionTokens.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
|
||||
/** Functions that split strings into an array of strings or vice versa.
|
||||
*
|
||||
* splitByChar(sep, s[, max_substrings])
|
||||
*/
|
||||
namespace
|
||||
{
|
||||
|
||||
using Pos = const char *;
|
||||
|
||||
class SplitByCharImpl
|
||||
{
|
||||
private:
|
||||
Pos pos;
|
||||
Pos end;
|
||||
char separator;
|
||||
std::optional<size_t> max_splits;
|
||||
size_t splits;
|
||||
bool max_substrings_includes_remaining_string;
|
||||
|
||||
public:
|
||||
static constexpr auto name = "splitByChar";
|
||||
static bool isVariadic() { return true; }
|
||||
static size_t getNumberOfArguments() { return 0; }
|
||||
|
||||
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
|
||||
{
|
||||
checkArgumentsWithSeparatorAndOptionalMaxSubstrings(func, arguments);
|
||||
}
|
||||
|
||||
static constexpr auto strings_argument_position = 1uz;
|
||||
|
||||
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
|
||||
{
|
||||
const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
|
||||
|
||||
if (!col)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
|
||||
"Must be constant string.", arguments[0].column->getName(), name);
|
||||
|
||||
String sep_str = col->getValue<String>();
|
||||
|
||||
if (sep_str.size() != 1)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal separator for function {}. Must be exactly one byte.", name);
|
||||
|
||||
separator = sep_str[0];
|
||||
|
||||
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
|
||||
max_splits = extractMaxSplits(arguments, 2);
|
||||
}
|
||||
|
||||
void set(Pos pos_, Pos end_)
|
||||
{
|
||||
pos = pos_;
|
||||
end = end_;
|
||||
splits = 0;
|
||||
}
|
||||
|
||||
bool get(Pos & token_begin, Pos & token_end)
|
||||
{
|
||||
if (!pos)
|
||||
return false;
|
||||
|
||||
token_begin = pos;
|
||||
|
||||
if (max_splits)
|
||||
{
|
||||
if (max_substrings_includes_remaining_string)
|
||||
{
|
||||
if (splits == *max_splits - 1)
|
||||
{
|
||||
token_end = end;
|
||||
pos = nullptr;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (splits == *max_splits)
|
||||
return false;
|
||||
}
|
||||
|
||||
pos = reinterpret_cast<Pos>(memchr(pos, separator, end - pos));
|
||||
if (pos)
|
||||
{
|
||||
token_end = pos;
|
||||
++pos;
|
||||
++splits;
|
||||
}
|
||||
else
|
||||
token_end = end;
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
using FunctionSplitByChar = FunctionTokens<SplitByCharImpl>;
|
||||
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(SplitByChar)
|
||||
{
|
||||
factory.registerFunction<FunctionSplitByChar>();
|
||||
}
|
||||
|
||||
}
|
113
src/Functions/splitByNonAlpha.cpp
Normal file
113
src/Functions/splitByNonAlpha.cpp
Normal file
@ -0,0 +1,113 @@
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/FunctionTokens.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/Regexps.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** Functions that split strings into an array of strings or vice versa.
|
||||
*
|
||||
* splitByNonAlpha(s[, max_substrings]) - split the string by whitespace and punctuation characters
|
||||
*/
|
||||
namespace
|
||||
{
|
||||
|
||||
using Pos = const char *;
|
||||
|
||||
class SplitByNonAlphaImpl
|
||||
{
|
||||
private:
|
||||
Pos pos;
|
||||
Pos end;
|
||||
std::optional<size_t> max_splits;
|
||||
size_t splits;
|
||||
bool max_substrings_includes_remaining_string;
|
||||
|
||||
public:
|
||||
/// Get the name of the function.
|
||||
static constexpr auto name = "splitByNonAlpha";
|
||||
|
||||
static bool isVariadic() { return true; }
|
||||
static size_t getNumberOfArguments() { return 0; }
|
||||
|
||||
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
|
||||
{
|
||||
checkArgumentsWithOptionalMaxSubstrings(func, arguments);
|
||||
}
|
||||
|
||||
static constexpr auto strings_argument_position = 0uz;
|
||||
|
||||
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
|
||||
{
|
||||
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
|
||||
max_splits = extractMaxSplits(arguments, 1);
|
||||
}
|
||||
|
||||
/// Called for each next string.
|
||||
void set(Pos pos_, Pos end_)
|
||||
{
|
||||
pos = pos_;
|
||||
end = end_;
|
||||
splits = 0;
|
||||
}
|
||||
|
||||
/// Get the next token, if any, or return false.
|
||||
bool get(Pos & token_begin, Pos & token_end)
|
||||
{
|
||||
/// Skip garbage
|
||||
while (pos < end && (isWhitespaceASCII(*pos) || isPunctuationASCII(*pos)))
|
||||
++pos;
|
||||
|
||||
if (pos == end)
|
||||
return false;
|
||||
|
||||
token_begin = pos;
|
||||
|
||||
if (max_splits)
|
||||
{
|
||||
if (max_substrings_includes_remaining_string)
|
||||
{
|
||||
if (splits == *max_splits - 1)
|
||||
{
|
||||
token_end = end;
|
||||
pos = end;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (splits == *max_splits)
|
||||
return false;
|
||||
}
|
||||
|
||||
while (pos < end && !(isWhitespaceASCII(*pos) || isPunctuationASCII(*pos)))
|
||||
++pos;
|
||||
|
||||
token_end = pos;
|
||||
splits++;
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
using FunctionSplitByNonAlpha = FunctionTokens<SplitByNonAlphaImpl>;
|
||||
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(SplitByNonAlpha)
|
||||
{
|
||||
factory.registerFunction<FunctionSplitByNonAlpha>();
|
||||
}
|
||||
|
||||
}
|
156
src/Functions/splitByRegexp.cpp
Normal file
156
src/Functions/splitByRegexp.cpp
Normal file
@ -0,0 +1,156 @@
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/FunctionTokens.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/Regexps.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
|
||||
/** Functions that split strings into an array of strings or vice versa.
|
||||
*
|
||||
* splitByRegexp(regexp, s[, max_substrings])
|
||||
*/
|
||||
namespace
|
||||
{
|
||||
|
||||
using Pos = const char *;
|
||||
|
||||
class SplitByRegexpImpl
|
||||
{
|
||||
private:
|
||||
Regexps::RegexpPtr re;
|
||||
OptimizedRegularExpression::MatchVec matches;
|
||||
|
||||
Pos pos;
|
||||
Pos end;
|
||||
|
||||
std::optional<size_t> max_splits;
|
||||
size_t splits;
|
||||
bool max_substrings_includes_remaining_string;
|
||||
|
||||
public:
|
||||
static constexpr auto name = "splitByRegexp";
|
||||
|
||||
static bool isVariadic() { return true; }
|
||||
static size_t getNumberOfArguments() { return 0; }
|
||||
|
||||
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
|
||||
{
|
||||
checkArgumentsWithSeparatorAndOptionalMaxSubstrings(func, arguments);
|
||||
}
|
||||
|
||||
static constexpr auto strings_argument_position = 1uz;
|
||||
|
||||
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
|
||||
{
|
||||
const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
|
||||
|
||||
if (!col)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
|
||||
"Must be constant string.", arguments[0].column->getName(), name);
|
||||
|
||||
if (!col->getValue<String>().empty())
|
||||
re = std::make_shared<OptimizedRegularExpression>(Regexps::createRegexp<false, false, false>(col->getValue<String>()));
|
||||
|
||||
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
|
||||
max_splits = extractMaxSplits(arguments, 2);
|
||||
}
|
||||
|
||||
/// Called for each next string.
|
||||
void set(Pos pos_, Pos end_)
|
||||
{
|
||||
pos = pos_;
|
||||
end = end_;
|
||||
splits = 0;
|
||||
}
|
||||
|
||||
/// Get the next token, if any, or return false.
|
||||
bool get(Pos & token_begin, Pos & token_end)
|
||||
{
|
||||
if (!re)
|
||||
{
|
||||
if (pos == end)
|
||||
return false;
|
||||
|
||||
token_begin = pos;
|
||||
|
||||
if (max_splits)
|
||||
{
|
||||
if (max_substrings_includes_remaining_string)
|
||||
{
|
||||
if (splits == *max_splits - 1)
|
||||
{
|
||||
token_end = end;
|
||||
pos = end;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (splits == *max_splits)
|
||||
return false;
|
||||
}
|
||||
|
||||
pos += 1;
|
||||
token_end = pos;
|
||||
++splits;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!pos || pos > end)
|
||||
return false;
|
||||
|
||||
token_begin = pos;
|
||||
|
||||
if (max_splits)
|
||||
{
|
||||
if (max_substrings_includes_remaining_string)
|
||||
{
|
||||
if (splits == *max_splits - 1)
|
||||
{
|
||||
token_end = end;
|
||||
pos = nullptr;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (splits == *max_splits)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!re->match(pos, end - pos, matches) || !matches[0].length)
|
||||
{
|
||||
token_end = end;
|
||||
pos = end + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
token_end = pos + matches[0].offset;
|
||||
pos = token_end + matches[0].length;
|
||||
++splits;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
using FunctionSplitByRegexp = FunctionTokens<SplitByRegexpImpl>;
|
||||
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(SplitByRegexp)
|
||||
{
|
||||
factory.registerFunction<FunctionSplitByRegexp>();
|
||||
}
|
||||
|
||||
}
|
148
src/Functions/splitByString.cpp
Normal file
148
src/Functions/splitByString.cpp
Normal file
@ -0,0 +1,148 @@
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/FunctionTokens.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
|
||||
/** Functions that split strings into an array of strings or vice versa.
|
||||
*
|
||||
* splitByString(sep, s[, max_substrings])
|
||||
*/
|
||||
namespace
|
||||
{
|
||||
|
||||
using Pos = const char *;
|
||||
|
||||
class SplitByStringImpl
|
||||
{
|
||||
private:
|
||||
Pos pos;
|
||||
Pos end;
|
||||
String separator;
|
||||
std::optional<size_t> max_splits;
|
||||
size_t splits;
|
||||
bool max_substrings_includes_remaining_string;
|
||||
|
||||
public:
|
||||
static constexpr auto name = "splitByString";
|
||||
static bool isVariadic() { return true; }
|
||||
static size_t getNumberOfArguments() { return 0; }
|
||||
|
||||
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
|
||||
{
|
||||
checkArgumentsWithSeparatorAndOptionalMaxSubstrings(func, arguments);
|
||||
}
|
||||
|
||||
static constexpr auto strings_argument_position = 1uz;
|
||||
|
||||
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
|
||||
{
|
||||
const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
|
||||
|
||||
if (!col)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
|
||||
"Must be constant string.", arguments[0].column->getName(), name);
|
||||
|
||||
separator = col->getValue<String>();
|
||||
|
||||
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
|
||||
max_splits = extractMaxSplits(arguments, 2);
|
||||
}
|
||||
|
||||
/// Called for each next string.
|
||||
void set(Pos pos_, Pos end_)
|
||||
{
|
||||
pos = pos_;
|
||||
end = end_;
|
||||
splits = 0;
|
||||
}
|
||||
|
||||
/// Get the next token, if any, or return false.
|
||||
bool get(Pos & token_begin, Pos & token_end)
|
||||
{
|
||||
if (separator.empty())
|
||||
{
|
||||
if (pos == end)
|
||||
return false;
|
||||
|
||||
token_begin = pos;
|
||||
|
||||
if (max_splits)
|
||||
{
|
||||
if (max_substrings_includes_remaining_string)
|
||||
{
|
||||
if (splits == *max_splits - 1)
|
||||
{
|
||||
token_end = end;
|
||||
pos = end;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (splits == *max_splits)
|
||||
return false;
|
||||
}
|
||||
|
||||
pos += 1;
|
||||
token_end = pos;
|
||||
++splits;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!pos)
|
||||
return false;
|
||||
|
||||
token_begin = pos;
|
||||
|
||||
if (max_splits)
|
||||
{
|
||||
if (max_substrings_includes_remaining_string)
|
||||
{
|
||||
if (splits == *max_splits - 1)
|
||||
{
|
||||
token_end = end;
|
||||
pos = nullptr;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (splits == *max_splits)
|
||||
return false;
|
||||
}
|
||||
|
||||
pos = reinterpret_cast<Pos>(memmem(pos, end - pos, separator.data(), separator.size()));
|
||||
if (pos)
|
||||
{
|
||||
token_end = pos;
|
||||
pos += separator.size();
|
||||
++splits;
|
||||
}
|
||||
else
|
||||
token_end = end;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
using FunctionSplitByString = FunctionTokens<SplitByStringImpl>;
|
||||
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(SplitByString)
|
||||
{
|
||||
factory.registerFunction<FunctionSplitByString>();
|
||||
}
|
||||
|
||||
}
|
101
src/Functions/splitByWhitespace.cpp
Normal file
101
src/Functions/splitByWhitespace.cpp
Normal file
@ -0,0 +1,101 @@
|
||||
#include <Functions/FunctionTokens.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** Functions that split strings into an array of strings or vice versa.
|
||||
*
|
||||
* splitByWhitespace(s[, max_substrings]) - split the string by whitespace characters
|
||||
*/
|
||||
namespace
|
||||
{
|
||||
|
||||
using Pos = const char *;
|
||||
|
||||
class SplitByWhitespaceImpl
|
||||
{
|
||||
private:
|
||||
Pos pos;
|
||||
Pos end;
|
||||
std::optional<size_t> max_splits;
|
||||
size_t splits;
|
||||
bool max_substrings_includes_remaining_string;
|
||||
|
||||
public:
|
||||
static constexpr auto name = "splitByWhitespace";
|
||||
|
||||
static bool isVariadic() { return true; }
|
||||
static size_t getNumberOfArguments() { return 0; }
|
||||
|
||||
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
|
||||
{
|
||||
checkArgumentsWithOptionalMaxSubstrings(func, arguments);
|
||||
}
|
||||
|
||||
static constexpr auto strings_argument_position = 0uz;
|
||||
|
||||
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
|
||||
{
|
||||
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
|
||||
max_splits = extractMaxSplits(arguments, 1);
|
||||
}
|
||||
|
||||
/// Called for each next string.
|
||||
void set(Pos pos_, Pos end_)
|
||||
{
|
||||
pos = pos_;
|
||||
end = end_;
|
||||
splits = 0;
|
||||
}
|
||||
|
||||
/// Get the next token, if any, or return false.
|
||||
bool get(Pos & token_begin, Pos & token_end)
|
||||
{
|
||||
/// Skip garbage
|
||||
while (pos < end && isWhitespaceASCII(*pos))
|
||||
++pos;
|
||||
|
||||
if (pos == end)
|
||||
return false;
|
||||
|
||||
token_begin = pos;
|
||||
|
||||
if (max_splits)
|
||||
{
|
||||
if (max_substrings_includes_remaining_string)
|
||||
{
|
||||
if (splits == *max_splits - 1)
|
||||
{
|
||||
token_end = end;
|
||||
pos = end;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (splits == *max_splits)
|
||||
return false;
|
||||
}
|
||||
|
||||
while (pos < end && !isWhitespaceASCII(*pos))
|
||||
++pos;
|
||||
|
||||
token_end = pos;
|
||||
splits++;
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
using FunctionSplitByWhitespace = FunctionTokens<SplitByWhitespaceImpl>;
|
||||
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(SplitByWhitespace)
|
||||
{
|
||||
factory.registerFunction<FunctionSplitByWhitespace>();
|
||||
}
|
||||
|
||||
}
|
@ -1,6 +1,5 @@
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionsConversion.h>
|
||||
#include <Functions/CastOverloadResolver.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
@ -12,6 +11,14 @@ namespace
|
||||
{
|
||||
class FunctionToBool : public IFunction
|
||||
{
|
||||
private:
|
||||
ContextPtr context;
|
||||
|
||||
static String getReturnTypeName(const DataTypePtr & argument)
|
||||
{
|
||||
return argument->isNullable() ? "Nullable(Bool)" : "Bool";
|
||||
}
|
||||
|
||||
public:
|
||||
static constexpr auto name = "toBool";
|
||||
|
||||
@ -32,8 +39,7 @@ namespace
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
auto bool_type = DataTypeFactory::instance().get("Bool");
|
||||
return arguments[0]->isNullable() ? makeNullable(bool_type) : bool_type;
|
||||
return DataTypeFactory::instance().get(getReturnTypeName(arguments[0]));
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t) const override
|
||||
@ -42,18 +48,17 @@ namespace
|
||||
{
|
||||
arguments[0],
|
||||
{
|
||||
DataTypeString().createColumnConst(arguments[0].column->size(), arguments[0].type->isNullable() ? "Nullable(Bool)" : "Bool"),
|
||||
DataTypeString().createColumnConst(arguments[0].column->size(), getReturnTypeName(arguments[0].type)),
|
||||
std::make_shared<DataTypeString>(),
|
||||
""
|
||||
}
|
||||
};
|
||||
|
||||
FunctionOverloadResolverPtr func_builder_cast = CastInternalOverloadResolver<CastType::nonAccurate>::createImpl();
|
||||
FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(CastType::nonAccurate, {});
|
||||
auto func_cast = func_builder_cast->build(cast_args);
|
||||
return func_cast->execute(cast_args, result_type, arguments[0].column->size());
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(ToBool)
|
||||
|
@ -387,27 +387,44 @@ Model::CompleteMultipartUploadOutcome Client::CompleteMultipartUpload(const Comp
|
||||
auto outcome = doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>(
|
||||
request, [this](const Model::CompleteMultipartUploadRequest & req) { return CompleteMultipartUpload(req); });
|
||||
|
||||
if (!outcome.IsSuccess() || provider_type != ProviderType::GCS)
|
||||
return outcome;
|
||||
|
||||
const auto & key = request.GetKey();
|
||||
const auto & bucket = request.GetBucket();
|
||||
|
||||
/// For GCS we will try to compose object at the end, otherwise we cannot do a native copy
|
||||
/// for the object (e.g. for backups)
|
||||
/// We don't care if the compose fails, because the upload was still successful, only the
|
||||
/// performance for copying the object will be affected
|
||||
S3::ComposeObjectRequest compose_req;
|
||||
compose_req.SetBucket(bucket);
|
||||
compose_req.SetKey(key);
|
||||
compose_req.SetComponentNames({key});
|
||||
compose_req.SetContentType("binary/octet-stream");
|
||||
auto compose_outcome = ComposeObject(compose_req);
|
||||
if (!outcome.IsSuccess()
|
||||
&& outcome.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_UPLOAD)
|
||||
{
|
||||
auto check_request = HeadObjectRequest()
|
||||
.WithBucket(bucket)
|
||||
.WithKey(key);
|
||||
auto check_outcome = HeadObject(check_request);
|
||||
|
||||
if (compose_outcome.IsSuccess())
|
||||
LOG_TRACE(log, "Composing object was successful");
|
||||
else
|
||||
LOG_INFO(log, "Failed to compose object. Message: {}, Key: {}, Bucket: {}", compose_outcome.GetError().GetMessage(), key, bucket);
|
||||
/// if the key exists, than MultipartUpload has been completed at some of the retries
|
||||
/// rewrite outcome with success status
|
||||
if (check_outcome.IsSuccess())
|
||||
outcome = Aws::S3::Model::CompleteMultipartUploadOutcome(Aws::S3::Model::CompleteMultipartUploadResult());
|
||||
}
|
||||
|
||||
if (outcome.IsSuccess() && provider_type == ProviderType::GCS)
|
||||
{
|
||||
/// For GCS we will try to compose object at the end, otherwise we cannot do a native copy
|
||||
/// for the object (e.g. for backups)
|
||||
/// We don't care if the compose fails, because the upload was still successful, only the
|
||||
/// performance for copying the object will be affected
|
||||
S3::ComposeObjectRequest compose_req;
|
||||
compose_req.SetBucket(bucket);
|
||||
compose_req.SetKey(key);
|
||||
compose_req.SetComponentNames({key});
|
||||
compose_req.SetContentType("binary/octet-stream");
|
||||
auto compose_outcome = ComposeObject(compose_req);
|
||||
|
||||
if (compose_outcome.IsSuccess())
|
||||
LOG_TRACE(log, "Composing object was successful");
|
||||
else
|
||||
LOG_INFO(
|
||||
log,
|
||||
"Failed to compose object. Message: {}, Key: {}, Bucket: {}",
|
||||
compose_outcome.GetError().GetMessage(), key, bucket);
|
||||
}
|
||||
|
||||
return outcome;
|
||||
}
|
||||
|
@ -1,9 +1,4 @@
|
||||
#include <exception>
|
||||
#include <variant>
|
||||
#include <IO/S3/Credentials.h>
|
||||
#include <boost/algorithm/string/classification.hpp>
|
||||
#include <Poco/Exception.h>
|
||||
#include "Common/Exception.h"
|
||||
|
||||
#if USE_AWS_S3
|
||||
|
||||
@ -16,7 +11,6 @@
|
||||
# include <aws/core/utils/UUID.h>
|
||||
# include <aws/core/http/HttpClientFactory.h>
|
||||
|
||||
# include <IO/S3/PocoHTTPClientFactory.h>
|
||||
# include <aws/core/utils/HashingUtils.h>
|
||||
# include <aws/core/platform/FileSystem.h>
|
||||
|
||||
@ -28,16 +22,6 @@
|
||||
# include <fstream>
|
||||
# include <base/EnumReflection.h>
|
||||
|
||||
#include <boost/algorithm/string.hpp>
|
||||
#include <boost/algorithm/string/split.hpp>
|
||||
|
||||
|
||||
#include <Poco/URI.h>
|
||||
#include <Poco/Net/HTTPClientSession.h>
|
||||
#include <Poco/Net/HTTPRequest.h>
|
||||
#include <Poco/Net/HTTPResponse.h>
|
||||
#include <Poco/StreamCopier.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -45,8 +29,6 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int AWS_ERROR;
|
||||
extern const int GCP_ERROR;
|
||||
extern const int UNSUPPORTED_METHOD;
|
||||
}
|
||||
|
||||
namespace S3
|
||||
@ -169,6 +151,30 @@ Aws::String AWSEC2MetadataClient::getDefaultCredentialsSecurely() const
|
||||
return GetResourceWithAWSWebServiceResult(credentials_request).GetPayload();
|
||||
}
|
||||
|
||||
Aws::String AWSEC2MetadataClient::getCurrentAvailabilityZone() const
|
||||
{
|
||||
String user_agent_string = awsComputeUserAgentString();
|
||||
auto [new_token, response_code] = getEC2MetadataToken(user_agent_string);
|
||||
if (response_code != Aws::Http::HttpResponseCode::OK || new_token.empty())
|
||||
throw DB::Exception(ErrorCodes::AWS_ERROR,
|
||||
"Failed to make token request. HTTP response code: {}", response_code);
|
||||
|
||||
token = std::move(new_token);
|
||||
const String url = endpoint + EC2_AVAILABILITY_ZONE_RESOURCE;
|
||||
std::shared_ptr<Aws::Http::HttpRequest> profile_request(
|
||||
Aws::Http::CreateHttpRequest(url, Aws::Http::HttpMethod::HTTP_GET, Aws::Utils::Stream::DefaultResponseStreamFactoryMethod));
|
||||
|
||||
profile_request->SetHeaderValue(EC2_IMDS_TOKEN_HEADER, token);
|
||||
profile_request->SetUserAgent(user_agent_string);
|
||||
|
||||
const auto result = GetResourceWithAWSWebServiceResult(profile_request);
|
||||
if (result.GetResponseCode() != Aws::Http::HttpResponseCode::OK)
|
||||
throw DB::Exception(ErrorCodes::AWS_ERROR,
|
||||
"Failed to get availability zone. HTTP response code: {}", result.GetResponseCode());
|
||||
|
||||
return Aws::Utils::StringUtils::Trim(result.GetPayload().c_str());
|
||||
}
|
||||
|
||||
std::pair<Aws::String, Aws::Http::HttpResponseCode> AWSEC2MetadataClient::getEC2MetadataToken(const std::string & user_agent_string) const
|
||||
{
|
||||
std::lock_guard locker(token_mutex);
|
||||
@ -193,10 +199,10 @@ Aws::String AWSEC2MetadataClient::getCurrentRegion() const
|
||||
return Aws::Region::AWS_GLOBAL;
|
||||
}
|
||||
|
||||
static Aws::String getAWSMetadataEndpoint()
|
||||
std::shared_ptr<AWSEC2MetadataClient> InitEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration)
|
||||
{
|
||||
auto * logger = &Poco::Logger::get("AWSEC2InstanceProfileConfigLoader");
|
||||
Aws::String ec2_metadata_service_endpoint = Aws::Environment::GetEnv("AWS_EC2_METADATA_SERVICE_ENDPOINT");
|
||||
auto * logger = &Poco::Logger::get("AWSEC2InstanceProfileConfigLoader");
|
||||
if (ec2_metadata_service_endpoint.empty())
|
||||
{
|
||||
Aws::String ec2_metadata_service_endpoint_mode = Aws::Environment::GetEnv("AWS_EC2_METADATA_SERVICE_ENDPOINT_MODE");
|
||||
@ -227,95 +233,8 @@ static Aws::String getAWSMetadataEndpoint()
|
||||
}
|
||||
}
|
||||
}
|
||||
return ec2_metadata_service_endpoint;
|
||||
}
|
||||
|
||||
std::shared_ptr<AWSEC2MetadataClient> InitEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration)
|
||||
{
|
||||
auto endpoint = getAWSMetadataEndpoint();
|
||||
return std::make_shared<AWSEC2MetadataClient>(client_configuration, endpoint.c_str());
|
||||
}
|
||||
|
||||
String AWSEC2MetadataClient::getAvailabilityZoneOrException()
|
||||
{
|
||||
Poco::URI uri(getAWSMetadataEndpoint() + EC2_AVAILABILITY_ZONE_RESOURCE);
|
||||
Poco::Net::HTTPClientSession session(uri.getHost(), uri.getPort());
|
||||
|
||||
Poco::Net::HTTPResponse response;
|
||||
Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, uri.getPath());
|
||||
session.sendRequest(request);
|
||||
|
||||
std::istream & rs = session.receiveResponse(response);
|
||||
if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK)
|
||||
throw DB::Exception(ErrorCodes::AWS_ERROR, "Failed to get AWS availability zone. HTTP response code: {}", response.getStatus());
|
||||
String response_data;
|
||||
Poco::StreamCopier::copyToString(rs, response_data);
|
||||
return response_data;
|
||||
}
|
||||
|
||||
String getGCPAvailabilityZoneOrException()
|
||||
{
|
||||
Poco::URI uri(String(GCP_METADATA_SERVICE_ENDPOINT) + "/computeMetadata/v1/instance/zone");
|
||||
Poco::Net::HTTPClientSession session(uri.getHost(), uri.getPort());
|
||||
Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, uri.getPath());
|
||||
Poco::Net::HTTPResponse response;
|
||||
request.set("Metadata-Flavor", "Google");
|
||||
session.sendRequest(request);
|
||||
std::istream & rs = session.receiveResponse(response);
|
||||
if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK)
|
||||
throw DB::Exception(ErrorCodes::GCP_ERROR, "Failed to get GCP availability zone. HTTP response code: {}", response.getStatus());
|
||||
String response_data;
|
||||
Poco::StreamCopier::copyToString(rs, response_data);
|
||||
Strings zone_info;
|
||||
boost::split(zone_info, response_data, boost::is_any_of("/"));
|
||||
/// We expect GCP returns a string as "projects/123456789/zones/us-central1a".
|
||||
if (zone_info.size() != 4)
|
||||
throw DB::Exception(ErrorCodes::GCP_ERROR, "Invalid format of GCP zone information, expect projects/<project-number>/zones/<zone-value>, got {}", response_data);
|
||||
return zone_info[3];
|
||||
}
|
||||
|
||||
String getRunningAvailabilityZoneImpl()
|
||||
{
|
||||
LOG_INFO(&Poco::Logger::get("Application"), "Trying to detect the availability zone.");
|
||||
try
|
||||
{
|
||||
auto aws_az = AWSEC2MetadataClient::getAvailabilityZoneOrException();
|
||||
return aws_az;
|
||||
}
|
||||
catch (const DB::Exception & aws_ex)
|
||||
{
|
||||
try
|
||||
{
|
||||
auto gcp_zone = getGCPAvailabilityZoneOrException();
|
||||
return gcp_zone;
|
||||
}
|
||||
catch (const DB::Exception & gcp_ex)
|
||||
{
|
||||
throw DB::Exception(ErrorCodes::UNSUPPORTED_METHOD,
|
||||
"Failed to find the availability zone, tried AWS and GCP. AWS Error: {}\nGCP Error: {}", aws_ex.displayText(), gcp_ex.displayText());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::variant<String, std::exception_ptr> getRunningAvailabilityZoneImplOrException()
|
||||
{
|
||||
try
|
||||
{
|
||||
return getRunningAvailabilityZoneImpl();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
return std::current_exception();
|
||||
}
|
||||
}
|
||||
|
||||
String getRunningAvailabilityZone()
|
||||
{
|
||||
static auto az_or_exception = getRunningAvailabilityZoneImplOrException();
|
||||
if (const auto * az = std::get_if<String>(&az_or_exception))
|
||||
return *az;
|
||||
else
|
||||
std::rethrow_exception(std::get<std::exception_ptr>(az_or_exception));
|
||||
LOG_INFO(logger, "Using IMDS endpoint: {}", ec2_metadata_service_endpoint);
|
||||
return std::make_shared<AWSEC2MetadataClient>(client_configuration, ec2_metadata_service_endpoint.c_str());
|
||||
}
|
||||
|
||||
AWSEC2InstanceProfileConfigLoader::AWSEC2InstanceProfileConfigLoader(const std::shared_ptr<AWSEC2MetadataClient> & client_, bool use_secure_pull_)
|
||||
@ -784,6 +703,7 @@ S3CredentialsProviderChain::S3CredentialsProviderChain(
|
||||
aws_client_configuration.requestTimeoutMs = 1000;
|
||||
|
||||
aws_client_configuration.retryStrategy = std::make_shared<Aws::Client::DefaultRetryStrategy>(1, 1000);
|
||||
|
||||
auto ec2_metadata_client = InitEC2MetadataClient(aws_client_configuration);
|
||||
auto config_loader = std::make_shared<AWSEC2InstanceProfileConfigLoader>(ec2_metadata_client, !credentials_configuration.use_insecure_imds_request);
|
||||
|
||||
@ -801,21 +721,4 @@ S3CredentialsProviderChain::S3CredentialsProviderChain(
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace S3
|
||||
{
|
||||
|
||||
String getRunningAvailabilityZone()
|
||||
{
|
||||
throw Poco::Exception("Does not support availability zone detection for non-cloud environment");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user