Merge branch 'master' into hanfei/keeperrocks

This commit is contained in:
Han Fei 2023-11-14 12:57:13 +01:00
commit eb64d4215e
711 changed files with 19083 additions and 16763 deletions

11
.github/actions/clean/action.yml vendored Normal file
View File

@ -0,0 +1,11 @@
name: Clean runner
description: Clean the runner's temp path on ending
runs:
using: "composite"
steps:
- name: Clean
shell: bash
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "${{runner.temp}}"

33
.github/actions/common_setup/action.yml vendored Normal file
View File

@ -0,0 +1,33 @@
name: Common setup
description: Setup necessary environments
inputs:
job_type:
description: the name to use in the TEMP_PATH and REPO_COPY
default: common
type: string
nested_job:
description: the fuse for unintended use inside of the reusable callable jobs
default: true
type: boolean
runs:
using: "composite"
steps:
- name: Setup and check ENV
shell: bash
run: |
echo "Setup the common ENV variables"
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/${{inputs.job_type}}
REPO_COPY=${{runner.temp}}/${{inputs.job_type}}/git-repo-copy
EOF
if [ -z "${{env.GITHUB_JOB_OVERRIDDEN}}" ] && [ "true" == "${{inputs.nested_job}}" ]; then
echo "The GITHUB_JOB_OVERRIDDEN ENV is unset, and must be set for the nested jobs"
exit 1
fi
- name: Setup $TEMP_PATH
shell: bash
run: |
# to remove every leftovers
sudo rm -fr "$TEMP_PATH"
mkdir -p "$REPO_COPY"
cp -a "$GITHUB_WORKSPACE"/. "$REPO_COPY"/

View File

@ -1,3 +1,4 @@
# yamllint disable rule:comments-indentation
name: BackportPR
env:
@ -33,7 +34,12 @@ jobs:
- name: Python unit tests
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 -m unittest discover -s . -p '*_test.py'
echo "Testing the main ci directory"
python3 -m unittest discover -s . -p 'test_*.py'
for dir in *_lambda/; do
echo "Testing $dir"
python3 -m unittest discover -s "$dir" -p 'test_*.py'
done
DockerHubPushAarch64:
runs-on: [self-hosted, style-checker-aarch64]
needs: CheckLabels
@ -69,7 +75,7 @@ jobs:
name: changed_images_amd64
path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json
DockerHubPush:
needs: [DockerHubPushAmd64, DockerHubPushAarch64]
needs: [DockerHubPushAmd64, DockerHubPushAarch64, PythonUnitTests]
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
@ -164,320 +170,43 @@ jobs:
#########################################################################################
BuilderDebRelease:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_release
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # For a proper version and performance artifacts
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_release
checkout_depth: 0
BuilderDebAarch64:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_aarch64
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # For a proper version and performance artifacts
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_aarch64
checkout_depth: 0
BuilderDebAsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_asan
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_asan
BuilderDebTsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_tsan
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_tsan
BuilderDebDebug:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_debug
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
du -hs "$GITHUB_WORKSPACE/contrib" ||:
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_debug
BuilderBinDarwin:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_darwin
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
du -hs "$GITHUB_WORKSPACE/contrib" ||:
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_darwin
checkout_depth: 0
BuilderBinDarwinAarch64:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_darwin_aarch64
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
du -hs "$GITHUB_WORKSPACE/contrib" ||:
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_darwin_aarch64
checkout_depth: 0
############################################################################################
##################################### Docker images #######################################
############################################################################################

View File

@ -1,3 +1,4 @@
# yamllint disable rule:comments-indentation
name: MasterCI
env:
@ -19,7 +20,12 @@ jobs:
- name: Python unit tests
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 -m unittest discover -s . -p '*_test.py'
echo "Testing the main ci directory"
python3 -m unittest discover -s . -p 'test_*.py'
for dir in *_lambda/; do
echo "Testing $dir"
python3 -m unittest discover -s "$dir" -p 'test_*.py'
done
DockerHubPushAarch64:
runs-on: [self-hosted, style-checker-aarch64]
steps:
@ -179,789 +185,109 @@ jobs:
#########################################################################################
BuilderDebRelease:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_release
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # For a proper version and performance artifacts
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
checkout_depth: 0
build_name: package_release
BuilderDebAarch64:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_aarch64
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # For a proper version and performance artifacts
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
checkout_depth: 0
build_name: package_aarch64
BuilderBinRelease:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_release
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
checkout_depth: 0
build_name: binary_release
BuilderDebAsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_asan
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_asan
BuilderDebUBsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_ubsan
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_ubsan
BuilderDebTsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_tsan
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_tsan
BuilderDebMsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_msan
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_msan
BuilderDebDebug:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_debug
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
du -hs "$GITHUB_WORKSPACE/contrib" ||:
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_debug
##########################################################################################
##################################### SPECIAL BUILDS #####################################
##########################################################################################
BuilderBinClangTidy:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_tidy
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_tidy
BuilderBinDarwin:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_darwin
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
du -hs "$GITHUB_WORKSPACE/contrib" ||:
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_darwin
checkout_depth: 0
BuilderBinAarch64:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_aarch64
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_aarch64
checkout_depth: 0
BuilderBinFreeBSD:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_freebsd
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_freebsd
checkout_depth: 0
BuilderBinDarwinAarch64:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_darwin_aarch64
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
du -hs "$GITHUB_WORKSPACE/contrib" ||:
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_darwin_aarch64
checkout_depth: 0
BuilderBinPPC64:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_ppc64le
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_ppc64le
checkout_depth: 0
BuilderBinAmd64Compat:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_amd64_compat
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_amd64_compat
checkout_depth: 0
BuilderBinAarch64V80Compat:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_aarch64_v80compat
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_aarch64_v80compat
checkout_depth: 0
BuilderBinRISCV64:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_riscv64
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_riscv64
checkout_depth: 0
BuilderBinS390X:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_s390x
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_s390x
checkout_depth: 0
############################################################################################
##################################### Docker images #######################################
############################################################################################

View File

@ -1,3 +1,4 @@
# yamllint disable rule:comments-indentation
name: PullRequestCI
env:
@ -47,10 +48,10 @@ jobs:
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
echo "Testing the main ci directory"
python3 -m unittest discover -s . -p '*_test.py'
python3 -m unittest discover -s . -p 'test_*.py'
for dir in *_lambda/; do
echo "Testing $dir"
python3 -m unittest discover -s "$dir" -p '*_test.py'
python3 -m unittest discover -s "$dir" -p 'test_*.py'
done
DockerHubPushAarch64:
needs: CheckLabels
@ -246,771 +247,100 @@ jobs:
#################################### ORDINARY BUILDS ####################################
#########################################################################################
BuilderDebRelease:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_release
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
fetch-depth: 0 # for performance artifact
filter: tree:0
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
BuilderBinRelease:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_release
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_release
checkout_depth: 0
BuilderDebAarch64:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_aarch64
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # for performance artifact
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_aarch64
checkout_depth: 0
BuilderBinRelease:
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_release
BuilderDebAsan:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_asan
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_asan
BuilderDebUBsan:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_ubsan
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_ubsan
BuilderDebTsan:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_tsan
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_tsan
BuilderDebMsan:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_msan
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_msan
BuilderDebDebug:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_debug
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
du -hs "$GITHUB_WORKSPACE/contrib" ||:
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_debug
##########################################################################################
##################################### SPECIAL BUILDS #####################################
##########################################################################################
BuilderBinClangTidy:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_tidy
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_tidy
BuilderBinDarwin:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_darwin
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
du -hs "$GITHUB_WORKSPACE/contrib" ||:
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_darwin
BuilderBinAarch64:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_aarch64
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_aarch64
BuilderBinFreeBSD:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_freebsd
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_freebsd
BuilderBinDarwinAarch64:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_darwin_aarch64
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
du -hs "$GITHUB_WORKSPACE/contrib" ||:
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_darwin_aarch64
BuilderBinPPC64:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_ppc64le
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_ppc64le
BuilderBinAmd64Compat:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_amd64_compat
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_amd64_compat
BuilderBinAarch64V80Compat:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_aarch64_v80compat
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_aarch64_v80compat
BuilderBinRISCV64:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_riscv64
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_riscv64
BuilderBinS390X:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_s390x
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
needs: [FastTest, StyleCheck]
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_s390x
############################################################################################
##################################### Docker images #######################################
############################################################################################

View File

@ -1,3 +1,4 @@
# yamllint disable rule:comments-indentation
name: ReleaseBranchCI
env:
@ -140,401 +141,53 @@ jobs:
#########################################################################################
BuilderDebRelease:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_release
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_release
checkout_depth: 0
BuilderDebAarch64:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_aarch64
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # For a proper version and performance artifacts
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_aarch64
checkout_depth: 0
BuilderDebAsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_asan
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_asan
BuilderDebUBsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_ubsan
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_ubsan
BuilderDebTsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_tsan
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_tsan
BuilderDebMsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_msan
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_msan
BuilderDebDebug:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=package_debug
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
du -hs "$GITHUB_WORKSPACE/contrib" ||:
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: package_debug
BuilderBinDarwin:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_darwin
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
du -hs "$GITHUB_WORKSPACE/contrib" ||:
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_darwin
checkout_depth: 0
BuilderBinDarwinAarch64:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_darwin_aarch64
EOF
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
du -hs "$GITHUB_WORKSPACE/contrib" ||:
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_darwin_aarch64
checkout_depth: 0
############################################################################################
##################################### Docker images #######################################
############################################################################################

74
.github/workflows/reusable_build.yml vendored Normal file
View File

@ -0,0 +1,74 @@
### For the pure soul wishes to move it to another place
# https://github.com/orgs/community/discussions/9050
name: Build ClickHouse
'on':
workflow_call:
inputs:
build_name:
description: the value of build type from tests/ci/ci_config.py
required: true
type: string
checkout_depth:
description: the value of the git shallow checkout
required: false
type: number
default: 1
runner_type:
description: the label of runner to use
default: builder
type: string
additional_envs:
description: additional ENV variables to setup the job
type: string
jobs:
Build:
name: Build-${{inputs.build_name}}
runs-on: [self-hosted, '${{inputs.runner_type}}']
steps:
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
submodules: true
fetch-depth: ${{inputs.checkout_depth}}
filter: tree:0
- name: Set build envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
IMAGES_PATH=${{runner.temp}}/images_path
GITHUB_JOB_OVERRIDDEN=Build-${{inputs.build_name}}
${{inputs.additional_envs}}
EOF
python3 "$GITHUB_WORKSPACE"/tests/ci/ci_config.py --build-name "${{inputs.build_name}}" >> "$GITHUB_ENV"
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
# This step is done in GITHUB_WORKSPACE,
# because it's broken in REPO_COPY for some reason
if: ${{ env.BUILD_SPARSE_CHECKOUT == 'true' }}
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
du -hs "$GITHUB_WORKSPACE/contrib" ||:
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Common setup
uses: ./.github/actions/common_setup
with:
job_type: build_check
- name: Download changed images
uses: actions/download-artifact@v3
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Build
run: |
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v3
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Clean
uses: ./.github/actions/clean

3
.gitmodules vendored
View File

@ -1,3 +1,6 @@
# Please do not use 'branch = ...' tags with submodule entries. Such tags make updating submodules a
# little bit more convenient but they do *not* specify the tracked submodule branch. Thus, they are
# more confusing than useful.
[submodule "contrib/zstd"]
path = contrib/zstd
url = https://github.com/facebook/zstd

View File

@ -1,6 +1,17 @@
[<img alt="ClickHouse — open source distributed column-oriented DBMS" width="400px" src="https://clickhouse.com/images/ch_gh_logo_rounded.png" />](https://clickhouse.com?utm_source=github)
<div align=center>
ClickHouse® is an open-source column-oriented database management system that allows generating analytical data reports in real-time.
[![Website](https://img.shields.io/website?up_message=AVAILABLE&down_message=DOWN&url=https%3A%2F%2Fclickhouse.com&style=for-the-badge)](https://clickhouse.com)
[![Apache 2.0 License](https://img.shields.io/badge/license-Apache%202.0-blueviolet?style=for-the-badge)](https://www.apache.org/licenses/LICENSE-2.0)
<picture align=center>
<source media="(prefers-color-scheme: dark)" srcset="https://github.com/ClickHouse/clickhouse-docs/assets/9611008/4ef9c104-2d3f-4646-b186-507358d2fe28">
<source media="(prefers-color-scheme: light)" srcset="https://github.com/ClickHouse/clickhouse-docs/assets/9611008/b001dc7b-5a45-4dcd-9275-e03beb7f9177">
<img alt="The ClickHouse company logo." src="https://github.com/ClickHouse/clickhouse-docs/assets/9611008/b001dc7b-5a45-4dcd-9275-e03beb7f9177">
</picture>
<h4>ClickHouse® is an open-source column-oriented database management system that allows generating analytical data reports in real-time.</h4>
</div>
## How To Install (Linux, macOS, FreeBSD)
```
@ -22,8 +33,7 @@ curl https://clickhouse.com/ | sh
## Upcoming Events
* [**ClickHouse Meetup in Beijing**](https://www.meetup.com/clickhouse-beijing-user-group/events/296334856/) - Nov 4
* [**ClickHouse Meetup in San Francisco**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/296334923/) - Nov 8
* [**ClickHouse Meetup in San Francisco**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/296334923/) - Nov 14
* [**ClickHouse Meetup in Singapore**](https://www.meetup.com/clickhouse-singapore-meetup-group/events/296334976/) - Nov 15
* [**ClickHouse Meetup in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/296488501/) - Nov 30
* [**ClickHouse Meetup in NYC**](https://www.meetup.com/clickhouse-new-york-user-group/events/296488779/) - Dec 11
@ -33,7 +43,7 @@ Also, keep an eye out for upcoming meetups around the world. Somewhere else you
## Recent Recordings
* **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"
* **Recording available**: [**v23.6 Release Webinar**](https://www.youtube.com/watch?v=cuf_hYn7dqU) All the features of 23.6, one convenient video! Watch it now!
* **Recording available**: [**v23.10 Release Webinar**](https://www.youtube.com/watch?v=PGQS6uPb970) All the features of 23.10, one convenient video! Watch it now!
* **All release webinar recordings**: [YouTube playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3jAlSy1JxyP8zluvXaN3nxU)

View File

@ -119,17 +119,16 @@
#include <base/types.h>
namespace DB
{
void abortOnFailedAssertion(const String & description);
[[noreturn]] void abortOnFailedAssertion(const String & description);
}
#define chassert(x) static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(#x)
#define chassert(x) do { static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(#x); } while (0)
#define UNREACHABLE() abort()
// clang-format off
#else
/// Here sizeof() trick is used to suppress unused warning for result,
/// since simple "(void)x" will evaluate the expression, while
/// "sizeof(!(x))" will not.
#define NIL_EXPRESSION(x) (void)sizeof(!(x))
#define chassert(x) NIL_EXPRESSION(x)
#define chassert(x) (void)sizeof(!(x))
#define UNREACHABLE() __builtin_unreachable()
#endif
#endif

View File

@ -1,3 +1,5 @@
# Generates a separate file with debug symbols while stripping it from the main binary.
# This is needed for Debian packages.
macro(clickhouse_split_debug_symbols)
set(oneValueArgs TARGET DESTINATION_DIR BINARY_PATH)

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit eb1572129c71beb2156dcdaadc3fb136954aed96
Subproject commit b7ea89b817a18dc0eafc1f909d568869f02d2d04

View File

@ -1 +0,0 @@
../../../thrift/build/cmake/config.h.in

2
contrib/grpc vendored

@ -1 +1 @@
Subproject commit bef8212d1e01f99e406c282ceab3d42da08e09ce
Subproject commit b723ecae0991bb873fe87a595dfb187178733fde

View File

@ -7,12 +7,6 @@ endif()
set(LIB_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libssh")
set(LIB_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/libssh")
# Specify search path for CMake modules to be loaded by include()
# and find_package()
list(APPEND CMAKE_MODULE_PATH "${LIB_SOURCE_DIR}/cmake/Modules")
include(DefineCMakeDefaults)
include(DefineCompilerFlags)
project(libssh VERSION 0.9.7 LANGUAGES C)
@ -29,12 +23,6 @@ set(APPLICATION_NAME ${PROJECT_NAME})
set(LIBRARY_VERSION "4.8.7")
set(LIBRARY_SOVERSION "4")
# where to look first for cmake modules, before ${CMAKE_ROOT}/Modules/ is checked
# add definitions
include(DefinePlatformDefaults)
# Copy library files to a lib sub-directory
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${LIB_BINARY_DIR}/lib")

View File

@ -1,20 +1,8 @@
set(LIBSSH_LINK_LIBRARIES
${LIBSSH_REQUIRED_LIBRARIES}
)
set(LIBSSH_LINK_LIBRARIES
${LIBSSH_LINK_LIBRARIES}
OpenSSL::Crypto
)
if (MINGW AND Threads_FOUND)
set(LIBSSH_LINK_LIBRARIES
${LIBSSH_LINK_LIBRARIES}
Threads::Threads
)
endif()
set(libssh_SRCS
${LIB_SOURCE_DIR}/src/agent.c
${LIB_SOURCE_DIR}/src/auth.c
@ -66,30 +54,11 @@ set(libssh_SRCS
${LIB_SOURCE_DIR}/src/pki_ed25519_common.c
)
if (DEFAULT_C_NO_DEPRECATION_FLAGS)
set_source_files_properties(known_hosts.c
PROPERTIES
COMPILE_FLAGS ${DEFAULT_C_NO_DEPRECATION_FLAGS})
endif()
if (CMAKE_USE_PTHREADS_INIT)
set(libssh_SRCS
${libssh_SRCS}
${LIB_SOURCE_DIR}/src/threads/noop.c
${LIB_SOURCE_DIR}/src/threads/pthread.c
)
elseif (CMAKE_USE_WIN32_THREADS_INIT)
set(libssh_SRCS
${libssh_SRCS}
${LIB_SOURCE_DIR}/src/threads/noop.c
${LIB_SOURCE_DIR}/src/threads/winlocks.c
)
else()
set(libssh_SRCS
${libssh_SRCS}
${LIB_SOURCE_DIR}/src/threads/noop.c
)
endif()
set(libssh_SRCS
${libssh_SRCS}
${LIB_SOURCE_DIR}/src/threads/noop.c
${LIB_SOURCE_DIR}/src/threads/pthread.c
)
# LIBCRYPT specific
set(libssh_SRCS
@ -127,14 +96,3 @@ target_compile_options(_ssh
PRIVATE
${DEFAULT_C_COMPILE_FLAGS}
-D_GNU_SOURCE)
set_target_properties(_ssh
PROPERTIES
VERSION
${LIBRARY_VERSION}
SOVERSION
${LIBRARY_SOVERSION}
DEFINE_SYMBOL
LIBSSH_EXPORTS
)

View File

@ -6,12 +6,13 @@ FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
echo '/*' > $FILES_TO_CHECKOUT
echo '!/test/*' >> $FILES_TO_CHECKOUT
echo '/test/build/*' >> $FILES_TO_CHECKOUT
echo '/test/core/tsi/alts/fake_handshaker/*' >> $FILES_TO_CHECKOUT
echo '/test/core/event_engine/fuzzing_event_engine/*' >> $FILES_TO_CHECKOUT
echo '!/tools/*' >> $FILES_TO_CHECKOUT
echo '/tools/codegen/*' >> $FILES_TO_CHECKOUT
echo '!/examples/*' >> $FILES_TO_CHECKOUT
echo '!/doc/*' >> $FILES_TO_CHECKOUT
# FIXME why do we need csharp?
#echo '!/src/csharp/*' >> $FILES_TO_CHECKOUT
echo '!/src/csharp/*' >> $FILES_TO_CHECKOUT
echo '!/src/python/*' >> $FILES_TO_CHECKOUT
echo '!/src/objective-c/*' >> $FILES_TO_CHECKOUT
echo '!/src/php/*' >> $FILES_TO_CHECKOUT

View File

@ -1,11 +1,12 @@
#!/bin/sh
set -e
WORKDIR=$(dirname "$0")
WORKDIR=$(readlink -f "${WORKDIR}")
SCRIPT_PATH=$(realpath "$0")
SCRIPT_DIR=$(dirname "${SCRIPT_PATH}")
GIT_DIR=$(git -C "$SCRIPT_DIR" rev-parse --show-toplevel)
cd $GIT_DIR
"$WORKDIR/sparse-checkout/setup-sparse-checkout.sh"
contrib/sparse-checkout/setup-sparse-checkout.sh
git submodule init
git submodule sync
git submodule update --depth=1
git config --file .gitmodules --get-regexp .*path | sed 's/[^ ]* //' | xargs -I _ --max-procs 64 git submodule update --depth=1 --single-branch _

View File

@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="23.10.1.1976"
ARG VERSION="23.10.3.5"
ARG PACKAGES="clickhouse-keeper"
# user/group precreated explicitly with fixed uid/gid on purpose.

View File

@ -126,6 +126,7 @@ fi
mv ./programs/clickhouse* /output || mv ./programs/*_fuzzer /output
[ -x ./programs/self-extracting/clickhouse ] && mv ./programs/self-extracting/clickhouse /output
[ -x ./programs/self-extracting/clickhouse-stripped ] && mv ./programs/self-extracting/clickhouse-stripped /output
mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds
mv ./programs/*.dict ./programs/*.options ./programs/*_seed_corpus.zip /output ||: # libFuzzer oss-fuzz compatible infrastructure

View File

@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="23.10.1.1976"
ARG VERSION="23.10.3.5"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# user/group precreated explicitly with fixed uid/gid on purpose.

View File

@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="23.10.1.1976"
ARG VERSION="23.10.3.5"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image

View File

@ -15,10 +15,15 @@ CLICKHOUSE_CI_LOGS_USER=${CLICKHOUSE_CI_LOGS_USER:-ci}
# Pre-configured destination cluster, where to export the data
CLICKHOUSE_CI_LOGS_CLUSTER=${CLICKHOUSE_CI_LOGS_CLUSTER:-system_logs_export}
EXTRA_COLUMNS=${EXTRA_COLUMNS:-"pull_request_number UInt32, commit_sha String, check_start_time DateTime('UTC'), check_name String, instance_type String, instance_id String, "}
EXTRA_COLUMNS_EXPRESSION=${EXTRA_COLUMNS_EXPRESSION:-"CAST(0 AS UInt32) AS pull_request_number, '' AS commit_sha, now() AS check_start_time, '' AS check_name, '' AS instance_type, '' AS instance_id"}
EXTRA_COLUMNS=${EXTRA_COLUMNS:-"pull_request_number UInt32, commit_sha String, check_start_time DateTime('UTC'), check_name LowCardinality(String), instance_type LowCardinality(String), instance_id String, INDEX ix_pr (pull_request_number) TYPE set(100), INDEX ix_commit (commit_sha) TYPE set(100), INDEX ix_check_time (check_start_time) TYPE minmax, "}
EXTRA_COLUMNS_EXPRESSION=${EXTRA_COLUMNS_EXPRESSION:-"CAST(0 AS UInt32) AS pull_request_number, '' AS commit_sha, now() AS check_start_time, toLowCardinality('') AS check_name, toLowCardinality('') AS instance_type, '' AS instance_id"}
EXTRA_ORDER_BY_COLUMNS=${EXTRA_ORDER_BY_COLUMNS:-"check_name, "}
# trace_log needs more columns for symbolization
EXTRA_COLUMNS_TRACE_LOG="${EXTRA_COLUMNS} symbols Array(LowCardinality(String)), lines Array(LowCardinality(String)), "
EXTRA_COLUMNS_EXPRESSION_TRACE_LOG="${EXTRA_COLUMNS_EXPRESSION}, arrayMap(x -> toLowCardinality(demangle(addressToSymbol(x))), trace) AS symbols, arrayMap(x -> toLowCardinality(addressToLine(x)), trace) AS lines"
function __set_connection_args
{
# It's impossible to use generous $CONNECTION_ARGS string, it's unsafe from word splitting perspective.
@ -125,9 +130,18 @@ function setup_logs_replication
echo 'Create %_log tables'
clickhouse-client --query "SHOW TABLES FROM system LIKE '%\\_log'" | while read -r table
do
if [[ "$table" = "trace_log" ]]
then
EXTRA_COLUMNS_FOR_TABLE="${EXTRA_COLUMNS_TRACE_LOG}"
EXTRA_COLUMNS_EXPRESSION_FOR_TABLE="${EXTRA_COLUMNS_EXPRESSION_TRACE_LOG}"
else
EXTRA_COLUMNS_FOR_TABLE="${EXTRA_COLUMNS}"
EXTRA_COLUMNS_EXPRESSION_FOR_TABLE="${EXTRA_COLUMNS_EXPRESSION}"
fi
# Calculate hash of its structure. Note: 4 is the version of extra columns - increment it if extra columns are changed:
hash=$(clickhouse-client --query "
SELECT sipHash64(4, groupArray((name, type)))
SELECT sipHash64(9, groupArray((name, type)))
FROM (SELECT name, type FROM system.columns
WHERE database = 'system' AND table = '$table'
ORDER BY position)
@ -135,7 +149,7 @@ function setup_logs_replication
# Create the destination table with adapted name and structure:
statement=$(clickhouse-client --format TSVRaw --query "SHOW CREATE TABLE system.${table}" | sed -r -e '
s/^\($/('"$EXTRA_COLUMNS"'/;
s/^\($/('"$EXTRA_COLUMNS_FOR_TABLE"'/;
s/ORDER BY \(/ORDER BY ('"$EXTRA_ORDER_BY_COLUMNS"'/;
s/^CREATE TABLE system\.\w+_log$/CREATE TABLE IF NOT EXISTS '"$table"'_'"$hash"'/;
/^TTL /d
@ -155,7 +169,7 @@ function setup_logs_replication
ENGINE = Distributed(${CLICKHOUSE_CI_LOGS_CLUSTER}, default, ${table}_${hash})
SETTINGS flush_on_detach=0
EMPTY AS
SELECT ${EXTRA_COLUMNS_EXPRESSION}, *
SELECT ${EXTRA_COLUMNS_EXPRESSION_FOR_TABLE}, *
FROM system.${table}
" || continue
@ -163,7 +177,7 @@ function setup_logs_replication
clickhouse-client --query "
CREATE MATERIALIZED VIEW system.${table}_watcher TO system.${table}_sender AS
SELECT ${EXTRA_COLUMNS_EXPRESSION}, *
SELECT ${EXTRA_COLUMNS_EXPRESSION_FOR_TABLE}, *
FROM system.${table}
" || continue
done

View File

@ -19,6 +19,11 @@ dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
dpkg -i package_folder/clickhouse-server_*.deb
dpkg -i package_folder/clickhouse-client_*.deb
# Check that the tools are available under short names
ch --query "SELECT 1" || exit 1
chl --query "SELECT 1" || exit 1
chc --version || exit 1
ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
# shellcheck disable=SC1091
@ -62,7 +67,7 @@ if [ "$NUM_TRIES" -gt "1" ]; then
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000
mkdir -p /var/run/clickhouse-server
# simpliest way to forward env variables to server
# simplest way to forward env variables to server
sudo -E -u clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml --daemon --pid-file /var/run/clickhouse-server/clickhouse-server.pid
else
sudo clickhouse start

View File

@ -53,31 +53,28 @@ function configure()
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
function randomize_config_boolean_value {
function randomize_keeper_config_boolean_value {
value=$(($RANDOM % 2))
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
sudo cat /etc/clickhouse-server/config.d/$2.xml \
| sed "s|<$1>[01]</$1>|<$1>$value</$1>|" \
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
> /etc/clickhouse-server/config.d/$2.xml.tmp
sudo mv /etc/clickhouse-server/config.d/$2.xml.tmp /etc/clickhouse-server/config.d/$2.xml
}
if [[ -n "$RANDOMIZE_KEEPER_FEATURE_FLAGS" ]] && [[ "$RANDOMIZE_KEEPER_FEATURE_FLAGS" -eq 1 ]]; then
# Randomize all Keeper feature flags
randomize_config_boolean_value filtered_list
randomize_config_boolean_value multi_read
randomize_config_boolean_value check_not_exists
randomize_config_boolean_value create_if_not_exists
randomize_config_boolean_value filtered_list keeper_port
randomize_config_boolean_value multi_read keeper_port
randomize_config_boolean_value check_not_exists keeper_port
randomize_config_boolean_value create_if_not_exists keeper_port
fi
sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
sudo chgrp clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
#Randomize merge tree setting allow_experimental_block_number_column
value=$(($RANDOM % 2))
sudo cat /etc/clickhouse-server/config.d/merge_tree_settings.xml \
| sed "s|<allow_experimental_block_number_column>[01]</allow_experimental_block_number_column>|<allow_experimental_block_number_column>$value</allow_experimental_block_number_column>|" \
> /etc/clickhouse-server/config.d/merge_tree_settings.xml.tmp
sudo mv /etc/clickhouse-server/config.d/merge_tree_settings.xml.tmp /etc/clickhouse-server/config.d/merge_tree_settings.xml
randomize_config_boolean_value use_compression zookeeper
randomize_config_boolean_value allow_experimental_block_number_column merge_tree_settings
# for clickhouse-server (via service)
echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment

View File

@ -0,0 +1,18 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.10.2.13-stable (65d8522bb1d) FIXME as compared to v23.10.1.1976-stable (13adae0e42f)
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix a crash during table loading on startup [#56232](https://github.com/ClickHouse/ClickHouse/pull/56232) ([Nikolay Degterinsky](https://github.com/evillique)).
* Fix segfault in signal handler for Keeper [#56266](https://github.com/ClickHouse/ClickHouse/pull/56266) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix incomplete query result for UNION in view() function. [#56274](https://github.com/ClickHouse/ClickHouse/pull/56274) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix inconsistency of "cast('0' as DateTime64(3))" and "cast('0' as Nullable(DateTime64(3)))" [#56286](https://github.com/ClickHouse/ClickHouse/pull/56286) ([李扬](https://github.com/taiyang-li)).
* Fix crash in case of adding a column with type Object(JSON) [#56307](https://github.com/ClickHouse/ClickHouse/pull/56307) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Fix buffer overflow in T64 [#56434](https://github.com/ClickHouse/ClickHouse/pull/56434) ([Alexey Milovidov](https://github.com/alexey-milovidov)).

View File

@ -0,0 +1,16 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.10.3.5-stable (b2ba7637a41) FIXME as compared to v23.10.2.13-stable (65d8522bb1d)
#### Improvement
* Backported in [#56513](https://github.com/ClickHouse/ClickHouse/issues/56513): Allow backup of materialized view with dropped inner table instead of failing the backup. [#56387](https://github.com/ClickHouse/ClickHouse/pull/56387) ([Kseniia Sumarokova](https://github.com/kssenii)).
#### NO CL CATEGORY
* Backported in [#56605](https://github.com/ClickHouse/ClickHouse/issues/56605):. [#56598](https://github.com/ClickHouse/ClickHouse/pull/56598) ([Maksim Kita](https://github.com/kitaisreal)).

View File

@ -0,0 +1,14 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.3.16.7-lts (fb4125cc92a) FIXME as compared to v23.3.15.29-lts (218336662e4)
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix: avoid using regex match, possibly containing alternation, as a key condition. [#54696](https://github.com/ClickHouse/ClickHouse/pull/54696) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Fix buffer overflow in T64 [#56434](https://github.com/ClickHouse/ClickHouse/pull/56434) ([Alexey Milovidov](https://github.com/alexey-milovidov)).

View File

@ -0,0 +1,21 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.8.6.16-lts (077df679bed) FIXME as compared to v23.8.5.16-lts (e8a1af5fe2f)
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix rare case of CHECKSUM_DOESNT_MATCH error [#54549](https://github.com/ClickHouse/ClickHouse/pull/54549) ([alesapin](https://github.com/alesapin)).
* Fix: avoid using regex match, possibly containing alternation, as a key condition. [#54696](https://github.com/ClickHouse/ClickHouse/pull/54696) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Fix a crash during table loading on startup [#56232](https://github.com/ClickHouse/ClickHouse/pull/56232) ([Nikolay Degterinsky](https://github.com/evillique)).
* Fix segfault in signal handler for Keeper [#56266](https://github.com/ClickHouse/ClickHouse/pull/56266) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix buffer overflow in T64 [#56434](https://github.com/ClickHouse/ClickHouse/pull/56434) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Improve enrich image [#55793](https://github.com/ClickHouse/ClickHouse/pull/55793) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -0,0 +1,17 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.9.4.11-stable (74c1f49dd6a) FIXME as compared to v23.9.3.12-stable (b7230b06563)
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix wrong query result when http_write_exception_in_output_format=1 [#56135](https://github.com/ClickHouse/ClickHouse/pull/56135) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix schema cache for fallback JSON->JSONEachRow with changed settings [#56172](https://github.com/ClickHouse/ClickHouse/pull/56172) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix a crash during table loading on startup [#56232](https://github.com/ClickHouse/ClickHouse/pull/56232) ([Nikolay Degterinsky](https://github.com/evillique)).
* Fix segfault in signal handler for Keeper [#56266](https://github.com/ClickHouse/ClickHouse/pull/56266) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix buffer overflow in T64 [#56434](https://github.com/ClickHouse/ClickHouse/pull/56434) ([Alexey Milovidov](https://github.com/alexey-milovidov)).

View File

@ -67,22 +67,30 @@ Implementations of `ReadBuffer`/`WriteBuffer` are used for working with files an
Read/WriteBuffers only deal with bytes. There are functions from `ReadHelpers` and `WriteHelpers` header files to help with formatting input/output. For example, there are helpers to write a number in decimal format.
Lets look at what happens when you want to write a result set in `JSON` format to stdout. You have a result set ready to be fetched from `IBlockInputStream`. You create `WriteBufferFromFileDescriptor(STDOUT_FILENO)` to write bytes to stdout. You create `JSONRowOutputStream`, initialized with that `WriteBuffer`, to write rows in `JSON` to stdout. You create `BlockOutputStreamFromRowOutputStream` on top of it, to represent it as `IBlockOutputStream`. Then you call `copyData` to transfer data from `IBlockInputStream` to `IBlockOutputStream`, and everything works. Internally, `JSONRowOutputStream` will write various JSON delimiters and call the `IDataType::serializeTextJSON` method with a reference to `IColumn` and the row number as arguments. Consequently, `IDataType::serializeTextJSON` will call a method from `WriteHelpers.h`: for example, `writeText` for numeric types and `writeJSONString` for `DataTypeString`.
Let's examine what happens when you want to write a result set in `JSON` format to stdout.
You have a result set ready to be fetched from a pulling `QueryPipeline`.
First, you create a `WriteBufferFromFileDescriptor(STDOUT_FILENO)` to write bytes to stdout.
Next, you connect the result from the query pipeline to `JSONRowOutputFormat`, which is initialized with that `WriteBuffer`, to write rows in `JSON` format to stdout.
This can be done via the `complete` method, which turns a pulling `QueryPipeline` into a completed `QueryPipeline`.
Internally, `JSONRowOutputFormat` will write various JSON delimiters and call the `IDataType::serializeTextJSON` method with a reference to `IColumn` and the row number as arguments. Consequently, `IDataType::serializeTextJSON` will call a method from `WriteHelpers.h`: for example, `writeText` for numeric types and `writeJSONString` for `DataTypeString`.
## Tables {#tables}
The `IStorage` interface represents tables. Different implementations of that interface are different table engines. Examples are `StorageMergeTree`, `StorageMemory`, and so on. Instances of these classes are just tables.
The key `IStorage` methods are `read` and `write`. There are also `alter`, `rename`, `drop`, and so on. The `read` method accepts the following arguments: the set of columns to read from a table, the `AST` query to consider, and the desired number of streams to return. It returns one or multiple `IBlockInputStream` objects and information about the stage of data processing that was completed inside a table engine during query execution.
The key methods in `IStorage` are `read` and `write`, along with others such as `alter`, `rename`, and `drop`. The `read` method accepts the following arguments: a set of columns to read from a table, the `AST` query to consider, and the desired number of streams. It returns a `Pipe`.
In most cases, the read method is only responsible for reading the specified columns from a table, not for any further data processing. All further data processing is done by the query interpreter and is outside the responsibility of `IStorage`.
In most cases, the read method is responsible only for reading the specified columns from a table, not for any further data processing.
All subsequent data processing is handled by another part of the pipeline, which falls outside the responsibility of `IStorage`.
But there are notable exceptions:
- The AST query is passed to the `read` method, and the table engine can use it to derive index usage and to read fewer data from a table.
- Sometimes the table engine can process data itself to a specific stage. For example, `StorageDistributed` can send a query to remote servers, ask them to process data to a stage where data from different remote servers can be merged, and return that preprocessed data. The query interpreter then finishes processing the data.
The tables `read` method can return multiple `IBlockInputStream` objects to allow parallel data processing. These multiple block input streams can read from a table in parallel. Then you can wrap these streams with various transformations (such as expression evaluation or filtering) that can be calculated independently and create a `UnionBlockInputStream` on top of them, to read from multiple streams in parallel.
The tables `read` method can return a `Pipe` consisting of multiple `Processors`. These `Processors` can read from a table in parallel.
Then, you can connect these processors with various other transformations (such as expression evaluation or filtering), which can be calculated independently.
And then, create a `QueryPipeline` on top of them, and execute it via `PipelineExecutor`.
There are also `TableFunction`s. These are functions that return a temporary `IStorage` object to use in the `FROM` clause of a query.
@ -98,9 +106,19 @@ A hand-written recursive descent parser parses a query. For example, `ParserSele
## Interpreters {#interpreters}
Interpreters are responsible for creating the query execution pipeline from an `AST`. There are simple interpreters, such as `InterpreterExistsQuery` and `InterpreterDropQuery`, or the more sophisticated `InterpreterSelectQuery`. The query execution pipeline is a combination of block input or output streams. For example, the result of interpreting the `SELECT` query is the `IBlockInputStream` to read the result set from; the result of the `INSERT` query is the `IBlockOutputStream` to write data for insertion to, and the result of interpreting the `INSERT SELECT` query is the `IBlockInputStream` that returns an empty result set on the first read, but that copies data from `SELECT` to `INSERT` at the same time.
Interpreters are responsible for creating the query execution pipeline from an AST. There are simple interpreters, such as `InterpreterExistsQuery` and `InterpreterDropQuery`, as well as the more sophisticated `InterpreterSelectQuery`.
`InterpreterSelectQuery` uses `ExpressionAnalyzer` and `ExpressionActions` machinery for query analysis and transformations. This is where most rule-based query optimizations are done. `ExpressionAnalyzer` is quite messy and should be rewritten: various query transformations and optimizations should be extracted to separate classes to allow modular transformations of query.
The query execution pipeline is a combination of processors that can consume and produce chunks (sets of columns with specific types).
A processor communicates via ports and can have multiple input ports and multiple output ports.
A more detailed description can be found in [src/Processors/IProcessor.h](https://github.com/ClickHouse/ClickHouse/blob/master/src/Processors/IProcessor.h).
For example, the result of interpreting the `SELECT` query is a "pulling" `QueryPipeline` which has a special output port to read the result set from.
The result of the `INSERT` query is a "pushing" `QueryPipeline` with an input port to write data for insertion.
And the result of interpreting the `INSERT SELECT` query is a "completed" `QueryPipeline` that has no inputs or outputs but copies data from `SELECT` to `INSERT` simultaneously.
`InterpreterSelectQuery` uses `ExpressionAnalyzer` and `ExpressionActions` machinery for query analysis and transformations. This is where most rule-based query optimizations are performed. `ExpressionAnalyzer` is quite messy and should be rewritten: various query transformations and optimizations should be extracted into separate classes to allow for modular transformations of the query.
To address current problems that exist in interpreters, a new `InterpreterSelectQueryAnalyzer` is being developed. It is a new version of `InterpreterSelectQuery` that does not use `ExpressionAnalyzer` and introduces an additional abstraction level between `AST` and `QueryPipeline` called `QueryTree`. It is not production-ready yet, but it can be tested with the `allow_experimental_analyzer` flag.
## Functions {#functions}

View File

@ -23,43 +23,34 @@ Create a fork of ClickHouse repository. To do that please click on the “fork
The development process consists of first committing the intended changes into your fork of ClickHouse and then creating a “pull request” for these changes to be accepted into the main repository (ClickHouse/ClickHouse).
To work with git repositories, please install `git`.
To do that in Ubuntu you would run in the command line terminal:
To work with Git repositories, please install `git`. To do that in Ubuntu you would run in the command line terminal:
sudo apt update
sudo apt install git
A brief manual on using Git can be found here: https://education.github.com/git-cheat-sheet-education.pdf.
For a detailed manual on Git see https://git-scm.com/book/en/v2.
A brief manual on using Git can be found [here](https://education.github.com/git-cheat-sheet-education.pdf).
For a detailed manual on Git see [here](https://git-scm.com/book/en/v2).
## Cloning a Repository to Your Development Machine {#cloning-a-repository-to-your-development-machine}
Next, you need to download the source files onto your working machine. This is called “to clone a repository” because it creates a local copy of the repository on your working machine.
In the command line terminal run:
Run in your terminal:
git clone --shallow-submodules git@github.com:your_github_username/ClickHouse.git
git clone git@github.com:your_github_username/ClickHouse.git # replace placeholder with your GitHub user name
cd ClickHouse
Or (if you'd like to use sparse checkout for submodules and avoid checking out unneeded files):
This command will create a directory `ClickHouse/` containing the source code of ClickHouse. If you specify a custom checkout directory (after the URL), it is important that this path does not contain whitespaces as it may lead to problems with the build system.
git clone git@github.com:your_github_username/ClickHouse.git
cd ClickHouse
./contrib/update-submodules.sh
To make library dependencies available for the build, the ClickHouse repository uses Git submodules, i.e. references to external repositories. These are not checked out by default. To do so, you can either
Note: please, substitute *your_github_username* with what is appropriate!
- run `git clone` with option `--recurse-submodules`,
This command will create a directory `ClickHouse` containing the working copy of the project.
- if `git clone` did not check out submodules, run `git submodule update --init --jobs <N>` (e.g. `<N> = 12` to parallelize the checkout) to achieve the same as the previous alternative, or
It is important that the path to the working directory contains no whitespaces as it may lead to problems with running the build system.
- if `git clone` did not check out submodules and you like to use [sparse](https://github.blog/2020-01-17-bring-your-monorepo-down-to-size-with-sparse-checkout/) and [shallow](https://github.blog/2020-12-21-get-up-to-speed-with-partial-clone-and-shallow-clone/) submodule checkout to omit unneeded files and history in submodules to save space (ca. 5 GB instead of ca. 15 GB), run `./contrib/update-submodules.sh`. Not really recommended as it generally makes working with submodules less convenient and slower.
Please note that ClickHouse repository uses `submodules`. That is what the references to additional repositories are called (i.e. external libraries on which the project depends). It means that when cloning the repository you need to specify the `--recursive` flag as in the example above. If the repository has been cloned without submodules, to download them you need to run the following:
git submodule init
git submodule update
You can check the status with the command: `git submodule status`.
You can check the Git status with the command: `git submodule status`.
If you get the following error message:
@ -83,36 +74,6 @@ You can also add original ClickHouse repo address to your local repository to pu
After successfully running this command you will be able to pull updates from the main ClickHouse repo by running `git pull upstream master`.
### Working with Submodules {#working-with-submodules}
Working with submodules in git could be painful. Next commands will help to manage it:
# ! each command accepts
# Update remote URLs for submodules. Barely rare case
git submodule sync
# Add new submodules
git submodule init
# Update existing submodules to the current state
git submodule update
# Two last commands could be merged together
git submodule update --init
The next commands would help you to reset all submodules to the initial state (!WARNING! - any changes inside will be deleted):
# Synchronizes submodules' remote URL with .gitmodules
git submodule sync
# Update the registered submodules with initialize not yet initialized
git submodule update --init
# Reset all changes done after HEAD
git submodule foreach git reset --hard
# Clean files from .gitignore
git submodule foreach git clean -xfd
# Repeat last 4 commands for all submodule
git submodule foreach git submodule sync
git submodule foreach git submodule update --init
git submodule foreach git submodule foreach git reset --hard
git submodule foreach git submodule foreach git clean -xfd
## Build System {#build-system}
ClickHouse uses CMake and Ninja for building.

View File

@ -345,7 +345,7 @@ struct ExtractDomain
**7.** For abstract classes (interfaces) you can add the `I` prefix.
``` cpp
class IBlockInputStream
class IProcessor
```
**8.** If you use a variable locally, you can use the short name.

View File

@ -2,9 +2,10 @@
slug: /en/engines/table-engines/integrations/materialized-postgresql
sidebar_position: 130
sidebar_label: MaterializedPostgreSQL
title: MaterializedPostgreSQL
---
# [experimental] MaterializedPostgreSQL
Creates ClickHouse table with an initial data dump of PostgreSQL table and starts replication process, i.e. executes background job to apply new changes as they happen on PostgreSQL table in the remote PostgreSQL database.
If more than one table is required, it is highly recommended to use the [MaterializedPostgreSQL](../../../engines/database-engines/materialized-postgresql.md) database engine instead of the table engine and use the `materialized_postgresql_tables_list` setting, which specifies the tables to be replicated (will also be possible to add database `schema`). It will be much better in terms of CPU, fewer connections and fewer replication slots inside the remote PostgreSQL database.

View File

@ -46,6 +46,11 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] AS [db2.]name2
`sharding_key` - (optionally) sharding key
Specifying the `sharding_key` is necessary for the following:
- For `INSERTs` into a distributed table (as the table engine needs the `sharding_key` to determine how to split the data). However, if `insert_distributed_one_random_shard` setting is enabled, then `INSERTs` do not need the sharding key
- For use with `optimize_skip_unused_shards` as the `sharding_key` is necessary to determine what shards should be queried
#### policy_name
`policy_name` - (optionally) policy name, it will be used to store temporary files for background send

View File

@ -1,5 +1,4 @@
---
slug: /en/getting-started/example-datasets/wikistat
sidebar_label: WikiStat
---
@ -41,7 +40,8 @@ CREATE TABLE wikistat
project LowCardinality(String),
subproject LowCardinality(String),
path String CODEC(ZSTD(3)),
hits UInt64 CODEC(ZSTD(3))
hits UInt64 CODEC(ZSTD(3)),
size UInt64 CODEC(ZSTD(3))
)
ENGINE = MergeTree
ORDER BY (path, time);

View File

@ -2156,7 +2156,7 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
- [input_format_parquet_local_file_min_bytes_for_seek](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_local_file_min_bytes_for_seek) - min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format. Default value - `8192`.
- [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`.
- [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`.
- [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `snappy`.
- [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `lz4`.
## ParquetMetadata {data-format-parquet-metadata}

View File

@ -438,7 +438,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
< X-ClickHouse-Query-Id: 96fe0052-01e6-43ce-b12a-6b7370de6e8a
< X-ClickHouse-Format: Template
< X-ClickHouse-Timezone: Asia/Shanghai
< Keep-Alive: timeout=3
< Keep-Alive: timeout=10
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
<
# HELP "Query" "Number of executing queries"
@ -603,7 +603,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
< Connection: Keep-Alive
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< Keep-Alive: timeout=10
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
<
* Connection #0 to host localhost left intact
@ -643,7 +643,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
< Connection: Keep-Alive
< Content-Type: text/plain; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< Keep-Alive: timeout=10
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
<
* Connection #0 to host localhost left intact
@ -695,7 +695,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
< Connection: Keep-Alive
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< Keep-Alive: timeout=10
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
<
<html><body>Absolute Path File</body></html>
@ -714,7 +714,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
< Connection: Keep-Alive
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< Keep-Alive: timeout=10
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
<
<html><body>Relative Path File</body></html>

View File

@ -74,6 +74,7 @@ ClickHouse Inc does **not** maintain the libraries listed below and hasnt don
### Elixir
- [clickhousex](https://github.com/appodeal/clickhousex/)
- [pillar](https://github.com/sofakingworld/pillar)
- [ecto_ch](https://github.com/plausible/ecto_ch)
### Nim
- [nim-clickhouse](https://github.com/leonardoce/nim-clickhouse)
### Haskell

View File

@ -18,7 +18,15 @@ function, table engine, database, etc. In the examples below the parameter list
linked to for each type.
Parameters set in a named collection can be overridden in SQL, this is shown in the examples
below.
below. This ability can be limited using `[NOT] OVERRIDABLE` keywords and XML attributes
and/or the configuration option `allow_named_collection_override_by_default`.
:::warning
If override is allowed, it may be possible for users without administrative access to
figure out the credentials that you are trying to hide.
If you are using named collections with that purpose, you should disable
`allow_named_collection_override_by_default` (which is enabled by default).
:::
## Storing named collections in the system database
@ -26,11 +34,17 @@ below.
```sql
CREATE NAMED COLLECTION name AS
key_1 = 'value',
key_2 = 'value2',
key_1 = 'value' OVERRIDABLE,
key_2 = 'value2' NOT OVERRIDABLE,
url = 'https://connection.url/'
```
In the above example:
* `key_1` can always be overridden.
* `key_2` can never be overridden.
* `url` can be overridden or not depending on the value of `allow_named_collection_override_by_default`.
### Permissions to create named collections with DDL
To manage named collections with DDL a user must have the `named_control_collection` privilege. This can be assigned by adding a file to `/etc/clickhouse-server/users.d/`. The example gives the user `default` both the `access_management` and `named_collection_control` privileges:
@ -61,25 +75,37 @@ In the above example the `password_sha256_hex` value is the hexadecimal represen
<clickhouse>
<named_collections>
<name>
<key_1>value</key_1>
<key_2>value_2</key_2>
<key_1 overridable="true">value</key_1>
<key_2 overridable="false">value_2</key_2>
<url>https://connection.url/</url>
</name>
</named_collections>
</clickhouse>
```
In the above example:
* `key_1` can always be overridden.
* `key_2` can never be overridden.
* `url` can be overridden or not depending on the value of `allow_named_collection_override_by_default`.
## Modifying named collections
Named collections that are created with DDL queries can be altered or dropped with DDL. Named collections created with XML files can be managed by editing or deleting the corresponding XML.
### Alter a DDL named collection
Change or add the keys `key1` and `key3` of the collection `collection2`:
Change or add the keys `key1` and `key3` of the collection `collection2`
(this will not change the value of the `overridable` flag for those keys):
```sql
ALTER NAMED COLLECTION collection2 SET key1=4, key3='value3'
```
Change or add the key `key1` and allow it to be always overridden:
```sql
ALTER NAMED COLLECTION collection2 SET key1=4 OVERRIDABLE
```
Remove the key `key2` from `collection2`:
```sql
ALTER NAMED COLLECTION collection2 DELETE key2
@ -90,6 +116,13 @@ Change or add the key `key1` and delete the key `key3` of the collection `collec
ALTER NAMED COLLECTION collection2 SET key1=4, DELETE key3
```
To force a key to use the default settings for the `overridable` flag, you have to
remove and re-add the key.
```sql
ALTER NAMED COLLECTION collection2 DELETE key1;
ALTER NAMED COLLECTION collection2 SET key1=4;
```
### Drop the DDL named collection `collection2`:
```sql
DROP NAMED COLLECTION collection2

View File

@ -1,5 +1,4 @@
---
slug: /en/operations/optimizing-performance/profile-guided-optimization
sidebar_position: 54
sidebar_label: Profile Guided Optimization (PGO)
---

View File

@ -11,7 +11,8 @@ ClickHouse runs sampling profiler that allows analyzing query execution. Using p
Query profiler is automatically enabled in ClickHouse Cloud and you can run a sample query as follows
:::note If you are running the following query in ClickHouse Cloud, make sure to change `FROM system.trace_log` to `FROM clusterAllReplicas(default, system.trace_log)` to select from all nodes of the cluster :::
:::note If you are running the following query in ClickHouse Cloud, make sure to change `FROM system.trace_log` to `FROM clusterAllReplicas(default, system.trace_log)` to select from all nodes of the cluster
:::
``` sql
SELECT

View File

@ -214,7 +214,7 @@ Max consecutive resolving failures before dropping a host from ClickHouse DNS ca
Type: UInt32
Default: 1024
Default: 10
## index_mark_cache_policy
@ -2427,6 +2427,8 @@ This section contains the following parameters:
* hostname_levenshtein_distance - just like nearest_hostname, but it compares hostname in a levenshtein distance manner.
* first_or_random - selects the first ZooKeeper node, if it's not available then randomly selects one of remaining ZooKeeper nodes.
* round_robin - selects the first ZooKeeper node, if reconnection happens selects the next.
- `use_compression` — If set to true, enables compression in Keeper protocol.
**Example configuration**

View File

@ -897,6 +897,12 @@ Use DOS/Windows-style line separator (CRLF) in CSV instead of Unix style (LF).
Disabled by default.
### input_format_csv_allow_cr_end_of_line {#input_format_csv_allow_cr_end_of_line}
If it is set true, CR(\\r) will be allowed at end of line not followed by LF(\\n)
Disabled by default.
### input_format_csv_enum_as_number {#input_format_csv_enum_as_number}
When enabled, always treat enum values as enum ids for CSV input format. It's recommended to enable this setting if data contains only enum ids to optimize enum parsing.

View File

@ -3943,6 +3943,17 @@ Possible values:
Default value: `''`.
## preferred_optimize_projection_name {#preferred_optimize_projection_name}
If it is set to a non-empty string, ClickHouse will try to apply specified projection in query.
Possible values:
- string: name of preferred projection
Default value: `''`.
## alter_sync {#alter-sync}
Allows to set up waiting for actions to be executed on replicas by [ALTER](../../sql-reference/statements/alter/index.md), [OPTIMIZE](../../sql-reference/statements/optimize.md) or [TRUNCATE](../../sql-reference/statements/truncate.md) queries.

View File

@ -18,12 +18,14 @@ SHOW TABLES FROM information_schema;
│ KEY_COLUMN_USAGE │
│ REFERENTIAL_CONSTRAINTS │
│ SCHEMATA │
| STATISTICS |
│ TABLES │
│ VIEWS │
│ columns │
│ key_column_usage │
│ referential_constraints │
│ schemata │
| statistics |
│ tables │
│ views │
└─────────────────────────┘
@ -32,11 +34,12 @@ SHOW TABLES FROM information_schema;
`INFORMATION_SCHEMA` contains the following views:
- [COLUMNS](#columns)
- [SCHEMATA](#schemata)
- [TABLES](#tables)
- [VIEWS](#views)
- [KEY_COLUMN_USAGE](#key_column_usage)
- [REFERENTIAL_CONSTRAINTS](#referential_constraints)
- [SCHEMATA](#schemata)
- [STATISTICS](#statistics)
- [TABLES](#tables)
- [VIEWS](#views)
Case-insensitive equivalent views, e.g. `INFORMATION_SCHEMA.columns` are provided for reasons of compatibility with other databases. The same applies to all the columns in these views - both lowercase (for example, `table_name`) and uppercase (`TABLE_NAME`) variants are provided.
@ -372,3 +375,28 @@ Columns:
- `delete_rule` ([String](../../sql-reference/data-types/string.md)) — Currently unused.
- `table_name` ([String](../../sql-reference/data-types/string.md)) — Currently unused.
- `referenced_table_name` ([String](../../sql-reference/data-types/string.md)) — Currently unused.
## STATISTICS {#statistics}
Provides information about table indexes. Currently returns an empty result (no rows) which is just enough to provide compatibility with 3rd party tools like Tableau Online.
Columns:
- `table_catalog` ([String](../../sql-reference/data-types/string.md)) — Currently unused.
- `table_schema` ([String](../../sql-reference/data-types/string.md)) — Currently unused.
- `table_name` ([String](../../sql-reference/data-types/string.md)) — Currently unused.
- `non_unique` ([Int32](../../sql-reference/data-types/int-uint.md)) — Currently unused.
- `index_schema` ([String](../../sql-reference/data-types/string.md)) — Currently unused.
- `index_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Currently unused.
- `seq_in_index` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Currently unused.
- `column_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Currently unused.
- `collation` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Currently unused.
- `cardinality` ([Nullable](../../sql-reference/data-types/nullable.md)([Int64](../../sql-reference/data-types/int-uint.md))) — Currently unused.
- `sub_part` ([Nullable](../../sql-reference/data-types/nullable.md)([Int64](../../sql-reference/data-types/int-uint.md))) — Currently unused.
- `packed` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Currently unused.
- `nullable` ([String](../../sql-reference/data-types/string.md)) — Currently unused.
- `index_type` ([String](../../sql-reference/data-types/string.md)) — Currently unused.
- `comment` ([String](../../sql-reference/data-types/string.md)) — Currently unused.
- `index_comment` ([String](../../sql-reference/data-types/string.md)) — Currently unused.
- `is_visible` ([String](../../sql-reference/data-types/string.md)) — Currently unused.
- `expression` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Currently unused.

View File

@ -35,27 +35,25 @@ WITH arrayMap(x -> demangle(addressToSymbol(x)), trace) AS all SELECT thread_nam
``` text
Row 1:
──────
thread_name: clickhouse-serv
thread_id: 686
query_id: 1a11f70b-626d-47c1-b948-f9c7b206395d
res: sigqueue
DB::StorageSystemStackTrace::fillData(std::__1::vector<COW<DB::IColumn>::mutable_ptr<DB::IColumn>, std::__1::allocator<COW<DB::IColumn>::mutable_ptr<DB::IColumn> > >&, DB::Context const&, DB::SelectQueryInfo const&) const
DB::IStorageSystemOneBlock<DB::StorageSystemStackTrace>::read(std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > const&, DB::SelectQueryInfo const&, DB::Context const&, DB::QueryProcessingStage::Enum, unsigned long, unsigned int)
DB::InterpreterSelectQuery::executeFetchColumns(DB::QueryProcessingStage::Enum, DB::QueryPipeline&, std::__1::shared_ptr<DB::PrewhereInfo> const&, std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > const&)
DB::InterpreterSelectQuery::executeImpl(DB::QueryPipeline&, std::__1::shared_ptr<DB::IBlockInputStream> const&, std::__1::optional<DB::Pipe>)
DB::InterpreterSelectQuery::execute()
DB::InterpreterSelectWithUnionQuery::execute()
DB::executeQueryImpl(char const*, char const*, DB::Context&, bool, DB::QueryProcessingStage::Enum, bool, DB::ReadBuffer*)
DB::executeQuery(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, DB::Context&, bool, DB::QueryProcessingStage::Enum, bool)
DB::TCPHandler::runImpl()
DB::TCPHandler::run()
Poco::Net::TCPServerConnection::start()
Poco::Net::TCPServerDispatcher::run()
Poco::PooledThread::run()
Poco::ThreadImpl::runnableEntry(void*)
start_thread
__clone
thread_name: QueryPipelineEx
thread_id: 743490
query_id: dc55a564-febb-4e37-95bb-090ef182c6f1
res: memcpy
large_ralloc
arena_ralloc
do_rallocx
Allocator<true, true>::realloc(void*, unsigned long, unsigned long, unsigned long)
HashTable<unsigned long, HashMapCell<unsigned long, char*, HashCRC32<unsigned long>, HashTableNoState, PairNoInit<unsigned long, char*>>, HashCRC32<unsigned long>, HashTableGrowerWithPrecalculation<8ul>, Allocator<true, true>>::resize(unsigned long, unsigned long)
void DB::Aggregator::executeImplBatch<false, false, true, DB::AggregationMethodOneNumber<unsigned long, HashMapTable<unsigned long, HashMapCell<unsigned long, char*, HashCRC32<unsigned long>, HashTableNoState, PairNoInit<unsigned long, char*>>, HashCRC32<unsigned long>, HashTableGrowerWithPrecalculation<8ul>, Allocator<true, true>>, true, false>>(DB::AggregationMethodOneNumber<unsigned long, HashMapTable<unsigned long, HashMapCell<unsigned long, char*, HashCRC32<unsigned long>, HashTableNoState, PairNoInit<unsigned long, char*>>, HashCRC32<unsigned long>, HashTableGrowerWithPrecalculation<8ul>, Allocator<true, true>>, true, false>&, DB::AggregationMethodOneNumber<unsigned long, HashMapTable<unsigned long, HashMapCell<unsigned long, char*, HashCRC32<unsigned long>, HashTableNoState, PairNoInit<unsigned long, char*>>, HashCRC32<unsigned long>, HashTableGrowerWithPrecalculation<8ul>, Allocator<true, true>>, true, false>::State&, DB::Arena*, unsigned long, unsigned long, DB::Aggregator::AggregateFunctionInstruction*, bool, char*) const
DB::Aggregator::executeImpl(DB::AggregatedDataVariants&, unsigned long, unsigned long, std::__1::vector<DB::IColumn const*, std::__1::allocator<DB::IColumn const*>>&, DB::Aggregator::AggregateFunctionInstruction*, bool, bool, char*) const
DB::Aggregator::executeOnBlock(std::__1::vector<COW<DB::IColumn>::immutable_ptr<DB::IColumn>, std::__1::allocator<COW<DB::IColumn>::immutable_ptr<DB::IColumn>>>, unsigned long, unsigned long, DB::AggregatedDataVariants&, std::__1::vector<DB::IColumn const*, std::__1::allocator<DB::IColumn const*>>&, std::__1::vector<std::__1::vector<DB::IColumn const*, std::__1::allocator<DB::IColumn const*>>, std::__1::allocator<std::__1::vector<DB::IColumn const*, std::__1::allocator<DB::IColumn const*>>>>&, bool&) const
DB::AggregatingTransform::work()
DB::ExecutionThreadContext::executeTask()
DB::PipelineExecutor::executeStepImpl(unsigned long, std::__1::atomic<bool>*)
void std::__1::__function::__policy_invoker<void ()>::__call_impl<std::__1::__function::__default_alloc_func<DB::PipelineExecutor::spawnThreads()::$_0, void ()>>(std::__1::__function::__policy_storage const*)
ThreadPoolImpl<ThreadFromGlobalPoolImpl<false>>::worker(std::__1::__list_iterator<ThreadFromGlobalPoolImpl<false>, void*>)
void std::__1::__function::__policy_invoker<void ()>::__call_impl<std::__1::__function::__default_alloc_func<ThreadFromGlobalPoolImpl<false>::ThreadFromGlobalPoolImpl<void ThreadPoolImpl<ThreadFromGlobalPoolImpl<false>>::scheduleImpl<void>(std::__1::function<void ()>, Priority, std::__1::optional<unsigned long>, bool)::'lambda0'()>(void&&)::'lambda'(), void ()>>(std::__1::__function::__policy_storage const*)
void* std::__1::__thread_proxy[abi:v15000]<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct>>, void ThreadPoolImpl<std::__1::thread>::scheduleImpl<void>(std::__1::function<void ()>, Priority, std::__1::optional<unsigned long>, bool)::'lambda0'()>>(void*)
```
Getting filenames and line numbers in ClickHouse source code:

View File

@ -0,0 +1,35 @@
---
slug: /en/operations/system-tables/symbols
---
# symbols
Contains information for introspection of `clickhouse` binary. It requires the introspection privilege to access.
This table is only useful for C++ experts and ClickHouse engineers.
Columns:
- `symbol` ([String](../../sql-reference/data-types/string.md)) — Symbol name in the binary. It is mangled. You can apply `demangle(symbol)` to obtain a readable name.
- `address_begin` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Start address of the symbol in the binary.
- `address_end` ([UInt64](../../sql-reference/data-types/int-uint.md)) — End address of the symbol in the binary.
- `name` ([String](../../sql-reference/data-types/string.md)) — Alias for `event`.
**Example**
``` sql
SELECT address_begin, address_end - address_begin AS size, demangle(symbol) FROM system.symbols ORDER BY size DESC LIMIT 10
```
``` text
┌─address_begin─┬─────size─┬─demangle(symbol)──────────────────────────────────────────────────────────────────┐
│ 25000976 │ 29466000 │ icudt70_dat │
│ 400605288 │ 2097272 │ arena_emap_global │
│ 18760592 │ 1048576 │ CLD2::kQuadChrome1015_2 │
│ 9807152 │ 884808 │ TopLevelDomainLookupHash::isValid(char const*, unsigned long)::wordlist │
│ 57442432 │ 850608 │ llvm::X86Insts │
│ 55682944 │ 681360 │ (anonymous namespace)::X86DAGToDAGISel::SelectCode(llvm::SDNode*)::MatcherTable │
│ 55130368 │ 502840 │ (anonymous namespace)::X86InstructionSelector::getMatchTable() const::MatchTable0 │
│ 402930616 │ 404032 │ qpl::ml::dispatcher::hw_dispatcher::get_instance()::instance │
│ 274131872 │ 356795 │ DB::SettingsTraits::Accessor::instance()::$_0::operator()() const │
│ 58293040 │ 249424 │ llvm::X86InstrNameData │
└───────────────┴──────────┴───────────────────────────────────────────────────────────────────────────────────┘
```

View File

@ -12,6 +12,7 @@ A client application to interact with clickhouse-keeper by its native protocol.
- `-q QUERY`, `--query=QUERY` — Query to execute. If this parameter is not passed, `clickhouse-keeper-client` will start in interactive mode.
- `-h HOST`, `--host=HOST` — Server host. Default value: `localhost`.
- `-p N`, `--port=N` — Server port. Default value: 9181
- `-c FILE_PATH`, `--config-file=FILE_PATH` — Set path of config file to get the connection string. Default value: `config.xml`.
- `--connection-timeout=TIMEOUT` — Set connection timeout in seconds. Default value: 10s.
- `--session-timeout=TIMEOUT` — Set session timeout in seconds. Default value: 10s.
- `--operation-timeout=TIMEOUT` — Set operation timeout in seconds. Default value: 10s.

View File

@ -16,7 +16,7 @@ CREATE TABLE IF NOT EXISTS float_vs_decimal
my_decimal Decimal64(3)
)Engine=MergeTree ORDER BY tuple()
INSERT INTO float_vs_decimal SELECT round(canonicalRand(), 3) AS res, res FROM system.numbers LIMIT 1000000; # Generate 1 000 000 random number with 2 decimal places and store them as a float and as a decimal
INSERT INTO float_vs_decimal SELECT round(randCanonical(), 3) AS res, res FROM system.numbers LIMIT 1000000; # Generate 1 000 000 random number with 2 decimal places and store them as a float and as a decimal
SELECT sum(my_float), sum(my_decimal) FROM float_vs_decimal;
> 500279.56300000014 500279.563

View File

@ -1769,7 +1769,7 @@ Example of settings:
<password>qwerty123</password>
<keyspase>database_name</keyspase>
<column_family>table_name</column_family>
<allow_filering>1</allow_filering>
<allow_filtering>1</allow_filtering>
<partition_key_prefix>1</partition_key_prefix>
<consistency>One</consistency>
<where>"SomeColumn" = 42</where>
@ -1787,7 +1787,7 @@ Setting fields:
- `password` Password of the Cassandra user.
- `keyspace` Name of the keyspace (database).
- `column_family` Name of the column family (table).
- `allow_filering` Flag to allow or not potentially expensive conditions on clustering key columns. Default value is 1.
- `allow_filtering` Flag to allow or not potentially expensive conditions on clustering key columns. Default value is 1.
- `partition_key_prefix` Number of partition key columns in primary key of the Cassandra table. Required for compose key dictionaries. Order of key columns in the dictionary definition must be the same as in Cassandra. Default value is 1 (the first key column is a partition key and other key columns are clustering key).
- `consistency` Consistency level. Possible values: `One`, `Two`, `Three`, `All`, `EachQuorum`, `Quorum`, `LocalQuorum`, `LocalOne`, `Serial`, `LocalSerial`. Default value is `One`.
- `where` Optional selection criteria.

View File

@ -2175,7 +2175,7 @@ Result:
## arrayRandomSample
Function `arrayRandomSample` returns a subset with `samples`-many random elements of an input array. If `samples` exceeds the size of the input array, the sample size is limited to the size of the array. In this case, all elements of the input array are returned, but the order is not guaranteed. The function can handle both flat arrays and nested arrays.
Function `arrayRandomSample` returns a subset with `samples`-many random elements of an input array. If `samples` exceeds the size of the input array, the sample size is limited to the size of the array, i.e. all array elements are returned but their order is not guaranteed. The function can handle both flat arrays and nested arrays.
**Syntax**
@ -2185,13 +2185,15 @@ arrayRandomSample(arr, samples)
**Arguments**
- `arr` — The input array from which to sample elements. This may be flat or nested arrays.
- `samples`An unsigned integer specifying the number of elements to include in the random sample.
- `arr` — The input array from which to sample elements. ([Array(T)](../data-types/array.md))
- `samples`The number of elements to include in the random sample ([UInt*](../data-types/int-uint.md))
**Returned Value**
- An array containing a random sample of elements from the input array.
Type: [Array](../data-types/array.md).
**Examples**
Query:
@ -2201,9 +2203,10 @@ SELECT arrayRandomSample(['apple', 'banana', 'cherry', 'date'], 2) as res;
```
Result:
```
┌─res────────────────┐
│ ['banana','apple'] │
│ ['cherry','apple'] │
└────────────────────┘
```
@ -2214,6 +2217,7 @@ SELECT arrayRandomSample([[1, 2], [3, 4], [5, 6]], 2) as res;
```
Result:
```
┌─res───────────┐
│ [[3,4],[5,6]] │
@ -2222,24 +2226,12 @@ Result:
Query:
```sql
SELECT arrayRandomSample([1, 2, 3, 4, 5], 0) as res;
```
Result:
```
┌─res─┐
│ [] │
└─────┘
```
Query:
```sql
SELECT arrayRandomSample([1, 2, 3], 5) as res;
```
Result:
```
┌─res─────┐
│ [3,1,2] │

View File

@ -2766,9 +2766,11 @@ Result:
## fromUnixTimestamp
Function converts Unix timestamp to a calendar date and a time of a day. When there is only a single argument of [Integer](../../sql-reference/data-types/int-uint.md) type, it acts in the same way as [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime) and return [DateTime](../../sql-reference/data-types/datetime.md) type.
This function converts a Unix timestamp to a calendar date and a time of a day.
fromUnixTimestamp uses MySQL datetime format style, refer to https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format.
It can be called in two ways:
When given a single argument of type [Integer](../../sql-reference/data-types/int-uint.md), it returns a value of type [DateTime](../../sql-reference/data-types/datetime.md), i.e. behaves like [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime).
Alias: `FROM_UNIXTIME`.
@ -2786,14 +2788,16 @@ Result:
└──────────────────────────────┘
```
When there are two or three arguments, the first an [Integer](../../sql-reference/data-types/int-uint.md), [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md), the second a constant format string and the third an optional constant time zone string — it acts in the same way as [formatDateTime](#formatdatetime) and return [String](../../sql-reference/data-types/string.md#string) type.
When given two or three arguments where the first argument is a value of type [Integer](../../sql-reference/data-types/int-uint.md), [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md), the second argument is a constant format string and the third argument is an optional constant time zone string, the function returns a value of type [String](../../sql-reference/data-types/string.md#string), i.e. it behaves like [formatDateTime](#formatdatetime). In this case, [MySQL's datetime format style](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format) is used.
For example:
**Example:**
```sql
SELECT fromUnixTimestamp(1234334543, '%Y-%m-%d %R:%S') AS DateTime;
```
Result:
```text
┌─DateTime────────────┐
│ 2009-02-11 14:42:23 │
@ -2806,19 +2810,20 @@ SELECT fromUnixTimestamp(1234334543, '%Y-%m-%d %R:%S') AS DateTime;
## fromUnixTimestampInJodaSyntax
Similar to fromUnixTimestamp, except that it formats time in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html.
Same as [fromUnixTimestamp](#fromUnixTimestamp) but when called in the second way (two or three arguments), the formatting is performed using [Joda style](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) instead of MySQL style.
**Example:**
``` sql
SELECT fromUnixTimestampInJodaSyntax(1669804872, 'yyyy-MM-dd HH:mm:ss', 'UTC');
SELECT fromUnixTimestampInJodaSyntax(1234334543, 'yyyy-MM-dd HH:mm:ss', 'UTC') AS DateTime;
```
Result:
```
┌─fromUnixTimestampInJodaSyntax(1669804872, 'yyyy-MM-dd HH:mm:ss', 'UTC')────┐
│ 2022-11-30 10:41:12
└────────────────────────────────────────────────────────────────────────────
┌─DateTime────────────┐
│ 2009-02-11 06:42:23
└─────────────────────┘
```
## toModifiedJulianDay

View File

@ -2760,10 +2760,13 @@ message Root
Returns a formatted, possibly multi-line, version of the given SQL query.
Throws an exception if the query is not well-formed. To return `NULL` instead, function `formatQueryOrNull()` may be used.
**Syntax**
```sql
formatQuery(query)
formatQueryOrNull(query)
```
**Arguments**
@ -2796,10 +2799,13 @@ WHERE (a > 3) AND (b < 3) │
Like formatQuery() but the returned formatted string contains no line breaks.
Throws an exception if the query is not well-formed. To return `NULL` instead, function `formatQuerySingleLineOrNull()` may be used.
**Syntax**
```sql
formatQuerySingleLine(query)
formatQuerySingleLineOrNull(query)
```
**Arguments**

View File

@ -107,11 +107,7 @@ round(3.65, 1) = 3.6
Rounds a number to a specified decimal position.
- If the rounding number is halfway between two numbers, the function uses bankers rounding.
Banker's rounding is a method of rounding fractional numbers. When the rounding number is halfway between two numbers, it's rounded to the nearest even digit at the specified decimal position. For example: 3.5 rounds up to 4, 2.5 rounds down to 2.
It's the default rounding method for floating point numbers defined in [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754#Roundings_to_nearest). The [round](#rounding_functions-round) function performs the same rounding for floating point numbers. The `roundBankers` function also rounds integers the same way, for example, `roundBankers(45, -1) = 40`.
- If the rounding number is halfway between two numbers, the function uses bankers rounding. Banker's rounding is a method of rounding fractional numbers. When the rounding number is halfway between two numbers, it's rounded to the nearest even digit at the specified decimal position. For example: 3.5 rounds up to 4, 2.5 rounds down to 2. It's the default rounding method for floating point numbers defined in [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754#Roundings_to_nearest). The [round](#rounding_functions-round) function performs the same rounding for floating point numbers. The `roundBankers` function also rounds integers the same way, for example, `roundBankers(45, -1) = 40`.
- In other cases, the function rounds numbers to the nearest integer.

View File

@ -1371,6 +1371,86 @@ Result:
└──────────────────┘
```
## byteHammingDistance
Calculates the [hamming distance](https://en.wikipedia.org/wiki/Hamming_distance) between two byte strings.
**Syntax**
```sql
byteHammingDistance(string1, string2)
```
**Examples**
``` sql
SELECT byteHammingDistance('karolin', 'kathrin');
```
Result:
``` text
┌─byteHammingDistance('karolin', 'kathrin')─┐
│ 3 │
└───────────────────────────────────────────┘
```
Alias: mismatches
## stringJaccardIndex
Calculates the [Jaccard similarity index](https://en.wikipedia.org/wiki/Jaccard_index) between two byte strings.
**Syntax**
```sql
stringJaccardIndex(string1, string2)
```
**Examples**
``` sql
SELECT stringJaccardIndex('clickhouse', 'mouse');
```
Result:
``` text
┌─stringJaccardIndex('clickhouse', 'mouse')─┐
│ 0.4 │
└───────────────────────────────────────────┘
```
## stringJaccardIndexUTF8
Like [stringJaccardIndex](#stringJaccardIndex) but for UTF8-encoded strings.
## editDistance
Calculates the [edit distance](https://en.wikipedia.org/wiki/Edit_distance) between two byte strings.
**Syntax**
```sql
editDistance(string1, string2)
```
**Examples**
``` sql
SELECT editDistance('clickhouse', 'mouse');
```
Result:
``` text
┌─editDistance('clickhouse', 'mouse')─┐
│ 6 │
└─────────────────────────────────────┘
```
Alias: levenshteinDistance
## initcap
Convert the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters.

View File

@ -681,79 +681,3 @@ Like [hasSubsequence](#hasSubsequence) but assumes `haystack` and `needle` are U
## hasSubsequenceCaseInsensitiveUTF8
Like [hasSubsequenceUTF8](#hasSubsequenceUTF8) but searches case-insensitively.
## byteHammingDistance
Calculates the [hamming distance](https://en.wikipedia.org/wiki/Hamming_distance) between two byte strings.
**Syntax**
```sql
byteHammingDistance(string1, string2)
```
**Examples**
``` sql
SELECT byteHammingDistance('karolin', 'kathrin');
```
Result:
``` text
┌─byteHammingDistance('karolin', 'kathrin')─┐
│ 3 │
└───────────────────────────────────────────┘
```
Alias: mismatches
## stringJaccardIndex
Calculates the [Jaccard similarity index](https://en.wikipedia.org/wiki/Jaccard_index) between two byte strings.
**Syntax**
```sql
stringJaccardIndex(string1, string2)
```
**Examples**
``` sql
SELECT stringJaccardIndex('clickhouse', 'mouse');
```
Result:
``` text
┌─stringJaccardIndex('clickhouse', 'mouse')─┐
│ 0.4 │
└───────────────────────────────────────────┘
```
## editDistance
Calculates the [edit distance](https://en.wikipedia.org/wiki/Edit_distance) between two byte strings.
**Syntax**
```sql
editDistance(string1, string2)
```
**Examples**
``` sql
SELECT editDistance('clickhouse', 'mouse');
```
Result:
``` text
┌─editDistance('clickhouse', 'mouse')─┐
│ 6 │
└─────────────────────────────────────┘
```
Alias: levenshteinDistance

View File

@ -171,7 +171,8 @@ Result:
Can be used with [MinHash](../../sql-reference/functions/hash-functions.md#ngramminhash) functions for detection of semi-duplicate strings:
``` sql
SELECT tupleHammingDistance(wordShingleMinHash(string), wordShingleMinHashCaseInsensitive(string)) as HammingDistance FROM (SELECT 'ClickHouse is a column-oriented database management system for online analytical processing of queries.' AS string);
SELECT tupleHammingDistance(wordShingleMinHash(string), wordShingleMinHashCaseInsensitive(string)) AS HammingDistance
FROM (SELECT 'ClickHouse is a column-oriented database management system for online analytical processing of queries.' AS string);
```
Result:

View File

@ -1840,9 +1840,9 @@ Converts an `Int64` to a `DateTime64` value with fixed sub-second precision and
**Syntax**
``` sql
fromUnixTimestamp64Milli(value [, timezone])
fromUnixTimestamp64Micro(value [, timezone])
fromUnixTimestamp64Nano(value [, timezone])
fromUnixTimestamp64Milli(value[, timezone])
fromUnixTimestamp64Micro(value[, timezone])
fromUnixTimestamp64Nano(value[, timezone])
```
**Arguments**

View File

@ -12,9 +12,9 @@ This query intends to modify already existing named collections.
```sql
ALTER NAMED COLLECTION [IF EXISTS] name [ON CLUSTER cluster]
[ SET
key_name1 = 'some value',
key_name2 = 'some value',
key_name3 = 'some value',
key_name1 = 'some value' [[NOT] OVERRIDABLE],
key_name2 = 'some value' [[NOT] OVERRIDABLE],
key_name3 = 'some value' [[NOT] OVERRIDABLE],
... ] |
[ DELETE key_name4, key_name5, ... ]
```
@ -22,9 +22,9 @@ key_name3 = 'some value',
**Example**
```sql
CREATE NAMED COLLECTION foobar AS a = '1', b = '2';
CREATE NAMED COLLECTION foobar AS a = '1' NOT OVERRIDABLE, b = '2';
ALTER NAMED COLLECTION foobar SET a = '2', c = '3';
ALTER NAMED COLLECTION foobar SET a = '2' OVERRIDABLE, c = '3';
ALTER NAMED COLLECTION foobar DELETE b;
```

View File

@ -11,16 +11,16 @@ Creates a new named collection.
```sql
CREATE NAMED COLLECTION [IF NOT EXISTS] name [ON CLUSTER cluster] AS
key_name1 = 'some value',
key_name2 = 'some value',
key_name3 = 'some value',
key_name1 = 'some value' [[NOT] OVERRIDABLE],
key_name2 = 'some value' [[NOT] OVERRIDABLE],
key_name3 = 'some value' [[NOT] OVERRIDABLE],
...
```
**Example**
```sql
CREATE NAMED COLLECTION foobar AS a = '1', b = '2';
CREATE NAMED COLLECTION foobar AS a = '1', b = '2' OVERRIDABLE;
```
**Related statements**

View File

@ -103,7 +103,7 @@ INSERT INTO holdings VALUES
('Bitcoin', 200),
('Ethereum', 250),
('Ethereum', 5000),
('DOGEFI', 10);
('DOGEFI', 10),
('Bitcoin Diamond', 5000);
```

View File

@ -7,7 +7,7 @@ keywords: [gcs, bucket]
# gcs Table Function
Provides a table-like interface to select/insert files in [Google Cloud Storage](https://cloud.google.com/storage/).
Provides a table-like interface to `SELECT` and `INSERT` data from [Google Cloud Storage](https://cloud.google.com/storage/). Requires the [`Storage Object User` IAM role](https://cloud.google.com/storage/docs/access-control/iam-roles).
**Syntax**

View File

@ -49,21 +49,9 @@ ClickHouse — полноценная столбцовая СУБД. Данны
Блоки создаются для всех обработанных фрагментов данных. Напоминаем, что одни и те же типы вычислений, имена столбцов и типы переиспользуются в разных блоках и только данные колонок изменяются. Лучше разделить данные и заголовок блока потому, что в блоках маленького размера мы имеем большой оверхэд по временным строкам при копировании умных указателей (`shared_ptrs`) и имен столбцов.
## Потоки блоков (Block Streams) {#block-streams}
## Процессоры
Потоки блоков обрабатывают данные. Мы используем потоки блоков для чтения данных, трансформации или записи данных куда-либо. `IBlockInputStream` предоставляет метод `read` для получения следующего блока, пока это возможно, и метод `write`, чтобы продвигать (push) блок куда-либо.
Потоки отвечают за:
1. Чтение и запись в таблицу. Таблица лишь возвращает поток для чтения или записи блоков.
2. Реализацию форматов данных. Например, при выводе данных в терминал в формате `Pretty`, вы создаете выходной поток блоков, который форматирует поступающие в него блоки.
3. Трансформацию данных. Допустим, у вас есть `IBlockInputStream` и вы хотите создать отфильтрованный поток. Вы создаете `FilterBlockInputStream` и инициализируете его вашим потоком. Затем вы тянете (pull) блоки из `FilterBlockInputStream`, а он тянет блоки исходного потока, фильтрует их и возвращает отфильтрованные блоки вам. Таким образом построены конвейеры выполнения запросов.
Имеются и более сложные трансформации. Например, когда вы тянете блоки из `AggregatingBlockInputStream`, он считывает все данные из своего источника, агрегирует их, и возвращает поток агрегированных данных вам. Другой пример: конструктор `UnionBlockInputStream` принимает множество источников входных данных и число потоков. Такой `Stream` работает в несколько потоков и читает данные источников параллельно.
> Потоки блоков используют «втягивающий» (pull) подход к управлению потоком выполнения: когда вы вытягиваете блок из первого потока, он, следовательно, вытягивает необходимые блоки из вложенных потоков, так и работает весь конвейер выполнения. Ни «pull» ни «push» не имеют явного преимущества, потому что поток управления неявный, и это ограничивает в реализации различных функций, таких как одновременное выполнение нескольких запросов (слияние нескольких конвейеров вместе). Это ограничение можно преодолеть с помощью сопрограмм (coroutines) или просто запуском дополнительных потоков, которые ждут друг друга. У нас может быть больше возможностей, если мы сделаем поток управления явным: если мы локализуем логику для передачи данных из одной расчетной единицы в другую вне этих расчетных единиц. Читайте эту [статью](http://journal.stuffwithstuff.com/2013/01/13/iteration-inside-and-out/) для углубленного изучения.
Следует отметить, что конвейер выполнения запроса создает временные данные на каждом шаге. Мы стараемся сохранить размер блока достаточно маленьким, чтобы временные данные помещались в кэш процессора. При таком допущении запись и чтение временных данных практически бесплатны по сравнению с другими расчетами. Мы могли бы рассмотреть альтернативу, которая заключается в том, чтобы объединить многие операции в конвейере вместе. Это может сделать конвейер как можно короче и удалить большую часть временных данных, что может быть преимуществом, но у такого подхода также есть недостатки. Например, разделенный конвейер позволяет легко реализовать кэширование промежуточных данных, использование промежуточных данных из аналогичных запросов, выполняемых одновременно, и объединение конвейеров для аналогичных запросов.
Смотрите описание в файле [src/Processors/IProcessor.h](https://github.com/ClickHouse/ClickHouse/blob/master/src/Processors/IProcessor.h) исходного кода.
## Форматы {#formats}
@ -81,13 +69,16 @@ ClickHouse — полноценная столбцовая СУБД. Данны
Буферы чтения-записи имеют дело только с байтами. В заголовочных файлах `ReadHelpers` и `WriteHelpers` объявлены некоторые функции, чтобы помочь с форматированием ввода-вывода. Например, есть помощники для записи числа в десятичном формате.
Давайте посмотрим, что происходит, когда вы хотите вывести результат в `JSON` формате в стандартный вывод (stdout). У вас есть результирующий набор данных, готовый к извлечению из `IBlockInputStream`. Вы создаете `WriteBufferFromFileDescriptor(STDOUT_FILENO)` чтобы записать байты в stdout. Вы создаете `JSONRowOutputStream`, инициализируете с этим `WriteBuffer`'ом, чтобы записать строки `JSON` в stdout. Кроме того вы создаете `BlockOutputStreamFromRowOutputStream`, реализуя `IBlockOutputStream`. Затем вызывается `copyData` для передачи данных из `IBlockInputStream` в `IBlockOutputStream` и все работает. Внутренний `JSONRowOutputStream` будет писать в формате `JSON` различные разделители и вызвать `IDataType::serializeTextJSON` метод со ссылкой на `IColumn` и номер строки в качестве аргументов. Следовательно, `IDataType::serializeTextJSON` вызовет метод из `WriteHelpers.h`: например, `writeText` для числовых типов и `writeJSONString` для `DataTypeString`.
Давайте посмотрим, что происходит, когда вы хотите вывести результат в `JSON` формате в стандартный вывод (stdout). У вас есть результирующий набор данных, готовый к извлечению из `QueryPipeline`. Вы создаете `WriteBufferFromFileDescriptor(STDOUT_FILENO)` чтобы записать байты в stdout. Вы создаете `JSONRowOutputFormat`, инициализируете с этим `WriteBuffer`'ом, чтобы записать строки `JSON` в stdout.
Чтобы соеденить выход `QueryPipeline` с форматом, можно использовать метод `complete`, который превращает `QueryPipeline` в завершенный `QueryPipeline`.
Внутренний `JSONRowOutputStream` будет писать в формате `JSON` различные разделители и вызвать `IDataType::serializeTextJSON` метод со ссылкой на `IColumn` и номер строки в качестве аргументов. Следовательно, `IDataType::serializeTextJSON` вызовет метод из `WriteHelpers.h`: например, `writeText` для числовых типов и `writeJSONString` для `DataTypeString`.
## Таблицы {#tables}
Интерфейс `IStorage` служит для отображения таблицы. Различные движки таблиц являются реализациями этого интерфейса. Примеры `StorageMergeTree`, `StorageMemory` и так далее. Экземпляры этих классов являются просто таблицами.
Ключевые методы `IStorage` это `read` и `write`. Есть и другие варианты — `alter`, `rename`, `drop` и так далее. Метод `read` принимает следующие аргументы: набор столбцов для чтения из таблицы, `AST` запрос и желаемое количество потоков для вывода. Он возвращает один или несколько объектов `IBlockInputStream` и информацию о стадии обработки данных, которая была завершена внутри табличного движка во время выполнения запроса.
Ключевые методы `IStorage` это `read` и `write`. Есть и другие варианты — `alter`, `rename`, `drop` и так далее.
Метод `read` принимает следующие аргументы: набор столбцов для чтения из таблицы, `AST` запрос и желаемое количество потоков для вывода и возвращает `Pipe`.
В большинстве случаев метод read отвечает только за чтение указанных столбцов из таблицы, а не за дальнейшую обработку данных. Вся дальнейшая обработка данных осуществляется интерпретатором запросов и не входит в сферу ответственности `IStorage`.
@ -96,7 +87,9 @@ ClickHouse — полноценная столбцовая СУБД. Данны
- AST-запрос, передающийся в метод `read`, может использоваться движком таблицы для получения информации о возможности использования индекса и считывания меньшего количества данных из таблицы.
- Иногда движок таблиц может сам обрабатывать данные до определенного этапа. Например, `StorageDistributed` можно отправить запрос на удаленные серверы, попросить их обработать данные до этапа, когда данные с разных удаленных серверов могут быть объединены, и вернуть эти предварительно обработанные данные. Затем интерпретатор запросов завершает обработку данных.
Метод `read` может возвращать несколько объектов `IBlockInputStream`, позволяя осуществлять параллельную обработку данных. Эти несколько блочных входных потоков могут считываться из таблицы параллельно. Затем вы можете обернуть эти потоки различными преобразованиями (такими как вычисление выражений или фильтрация), которые могут быть вычислены независимо, и создать `UnionBlockInputStream` поверх них, чтобы читать из нескольких потоков параллельно.
Метод `read` может возвращать `Pipe`, состоящий из нескольких процессоров. Каждый их этих процессоров может читать данные параллельно.
Затем, вы можете соеденить эти просессоры с другими преобразованиями (такими как вычисление выражений или фильтрация), которые могут быть вычислены независимо.
Далее, создан `QueryPipeline` поверх них, можно выполнить пайплайн с помощью `PipelineExecutor`.
Есть и другие варианты. Например, `TableFunction` возвращает временный объект `IStorage`, который можно подставить во `FROM`.
@ -112,10 +105,18 @@ ClickHouse — полноценная столбцовая СУБД. Данны
## Интерпретаторы {#interpreters}
Интерпретаторы отвечают за создание конвейера выполнения запроса из `AST`. Есть простые интерпретаторы, такие как `InterpreterExistsQuery` и `InterpreterDropQuery` или более сложный `InterpreterSelectQuery`. Конвейер выполнения запроса представляет собой комбинацию входных и выходных потоков блоков. Например, результатом интерпретации `SELECT` запроса является `IBlockInputStream` для чтения результирующего набора данных; результат интерпретации `INSERT` запроса — это `IBlockOutputStream`, для записи данных, предназначенных для вставки; результат интерпретации `INSERT SELECT` запроса — это `IBlockInputStream`, который возвращает пустой результирующий набор при первом чтении, но копирует данные из `SELECT` к `INSERT`.
Интерпретаторы отвечают за создание конвейера выполнения запроса из `AST`. Есть простые интерпретаторы, такие как `InterpreterExistsQuery` и `InterpreterDropQuery` или более сложный `InterpreterSelectQuery`.
Конвейер выполнения запроса представляет собой комбинацию процессоров, которые могут принимать на вход и также возвращать чанки (набор колонок с их типами)
Процессоры обмениваются данными через порты и могут иметь несколько входных и выходных портов.
Более подробное описание можно найти в файле [src/Processors/IProcessor.h](https://github.com/ClickHouse/ClickHouse/blob/master/src/Processors/IProcessor.h).
Например, результатом интерпретации `SELECT` запроса является `QueryPipeline`, который имеет специальный выходной порт для чтения результирующего набора данных. Результатом интерпретации `INSERT` запроса является `QueryPipeline` с входным портом для записи данных для вставки. Результатом интерпретации `INSERT SELECT` запроса является завершенный `QueryPipeline`, который не имеет входов или выходов, но копирует данные из `SELECT` в `INSERT` одновременно.
`InterpreterSelectQuery` использует `ExpressionAnalyzer` и `ExpressionActions` механизмы для анализа запросов и преобразований. Именно здесь выполняется большинство оптимизаций запросов на основе правил. `ExpressionAnalyzer` написан довольно грязно и должен быть переписан: различные преобразования запросов и оптимизации должны быть извлечены в отдельные классы, чтобы позволить модульные преобразования или запросы.
Для решения текущих проблем, существующих в интерпретаторах, разрабатывается новый `InterpreterSelectQueryAnalyzer`. Это новая версия `InterpreterSelectQuery`, которая не использует `ExpressionAnalyzer` и вводит дополнительный уровень абстракции между `AST` и `QueryPipeline`, называемый `QueryTree`. Он еще не готов к использованию в продакшене, но его можно протестировать с помощью флага `allow_experimental_analyzer`.
## Функции {#functions}
Существуют обычные функции и агрегатные функции. Агрегатные функции смотрите в следующем разделе.

View File

@ -345,7 +345,7 @@ struct ExtractDomain
**7.** Для абстрактных классов (интерфейсов) можно добавить в начало имени букву `I`.
``` cpp
class IBlockInputStream
class IProcessor
```
**8.** Если переменная используется достаточно локально, то можно использовать короткое имя.

View File

@ -366,7 +366,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
< X-ClickHouse-Query-Id: 96fe0052-01e6-43ce-b12a-6b7370de6e8a
< X-ClickHouse-Format: Template
< X-ClickHouse-Timezone: Asia/Shanghai
< Keep-Alive: timeout=3
< Keep-Alive: timeout=10
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
<
# HELP "Query" "Number of executing queries"
@ -529,7 +529,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
< Connection: Keep-Alive
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< Keep-Alive: timeout=10
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
<
* Connection #0 to host localhost left intact
@ -569,7 +569,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
< Connection: Keep-Alive
< Content-Type: text/plain; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< Keep-Alive: timeout=10
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
<
* Connection #0 to host localhost left intact
@ -621,7 +621,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
< Connection: Keep-Alive
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< Keep-Alive: timeout=10
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
<
<html><body>Absolute Path File</body></html>
@ -640,7 +640,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
< Connection: Keep-Alive
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< Keep-Alive: timeout=10
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
<
<html><body>Relative Path File</body></html>

View File

@ -31,27 +31,25 @@ WITH arrayMap(x -> demangle(addressToSymbol(x)), trace) AS all SELECT thread_nam
``` text
Row 1:
──────
thread_name: clickhouse-serv
thread_id: 686
query_id: 1a11f70b-626d-47c1-b948-f9c7b206395d
res: sigqueue
DB::StorageSystemStackTrace::fillData(std::__1::vector<COW<DB::IColumn>::mutable_ptr<DB::IColumn>, std::__1::allocator<COW<DB::IColumn>::mutable_ptr<DB::IColumn> > >&, DB::Context const&, DB::SelectQueryInfo const&) const
DB::IStorageSystemOneBlock<DB::StorageSystemStackTrace>::read(std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > const&, DB::SelectQueryInfo const&, DB::Context const&, DB::QueryProcessingStage::Enum, unsigned long, unsigned int)
DB::InterpreterSelectQuery::executeFetchColumns(DB::QueryProcessingStage::Enum, DB::QueryPipeline&, std::__1::shared_ptr<DB::PrewhereInfo> const&, std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > const&)
DB::InterpreterSelectQuery::executeImpl(DB::QueryPipeline&, std::__1::shared_ptr<DB::IBlockInputStream> const&, std::__1::optional<DB::Pipe>)
DB::InterpreterSelectQuery::execute()
DB::InterpreterSelectWithUnionQuery::execute()
DB::executeQueryImpl(char const*, char const*, DB::Context&, bool, DB::QueryProcessingStage::Enum, bool, DB::ReadBuffer*)
DB::executeQuery(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, DB::Context&, bool, DB::QueryProcessingStage::Enum, bool)
DB::TCPHandler::runImpl()
DB::TCPHandler::run()
Poco::Net::TCPServerConnection::start()
Poco::Net::TCPServerDispatcher::run()
Poco::PooledThread::run()
Poco::ThreadImpl::runnableEntry(void*)
start_thread
__clone
thread_name: QueryPipelineEx
thread_id: 743490
query_id: dc55a564-febb-4e37-95bb-090ef182c6f1
res: memcpy
large_ralloc
arena_ralloc
do_rallocx
Allocator<true, true>::realloc(void*, unsigned long, unsigned long, unsigned long)
HashTable<unsigned long, HashMapCell<unsigned long, char*, HashCRC32<unsigned long>, HashTableNoState, PairNoInit<unsigned long, char*>>, HashCRC32<unsigned long>, HashTableGrowerWithPrecalculation<8ul>, Allocator<true, true>>::resize(unsigned long, unsigned long)
void DB::Aggregator::executeImplBatch<false, false, true, DB::AggregationMethodOneNumber<unsigned long, HashMapTable<unsigned long, HashMapCell<unsigned long, char*, HashCRC32<unsigned long>, HashTableNoState, PairNoInit<unsigned long, char*>>, HashCRC32<unsigned long>, HashTableGrowerWithPrecalculation<8ul>, Allocator<true, true>>, true, false>>(DB::AggregationMethodOneNumber<unsigned long, HashMapTable<unsigned long, HashMapCell<unsigned long, char*, HashCRC32<unsigned long>, HashTableNoState, PairNoInit<unsigned long, char*>>, HashCRC32<unsigned long>, HashTableGrowerWithPrecalculation<8ul>, Allocator<true, true>>, true, false>&, DB::AggregationMethodOneNumber<unsigned long, HashMapTable<unsigned long, HashMapCell<unsigned long, char*, HashCRC32<unsigned long>, HashTableNoState, PairNoInit<unsigned long, char*>>, HashCRC32<unsigned long>, HashTableGrowerWithPrecalculation<8ul>, Allocator<true, true>>, true, false>::State&, DB::Arena*, unsigned long, unsigned long, DB::Aggregator::AggregateFunctionInstruction*, bool, char*) const
DB::Aggregator::executeImpl(DB::AggregatedDataVariants&, unsigned long, unsigned long, std::__1::vector<DB::IColumn const*, std::__1::allocator<DB::IColumn const*>>&, DB::Aggregator::AggregateFunctionInstruction*, bool, bool, char*) const
DB::Aggregator::executeOnBlock(std::__1::vector<COW<DB::IColumn>::immutable_ptr<DB::IColumn>, std::__1::allocator<COW<DB::IColumn>::immutable_ptr<DB::IColumn>>>, unsigned long, unsigned long, DB::AggregatedDataVariants&, std::__1::vector<DB::IColumn const*, std::__1::allocator<DB::IColumn const*>>&, std::__1::vector<std::__1::vector<DB::IColumn const*, std::__1::allocator<DB::IColumn const*>>, std::__1::allocator<std::__1::vector<DB::IColumn const*, std::__1::allocator<DB::IColumn const*>>>>&, bool&) const
DB::AggregatingTransform::work()
DB::ExecutionThreadContext::executeTask()
DB::PipelineExecutor::executeStepImpl(unsigned long, std::__1::atomic<bool>*)
void std::__1::__function::__policy_invoker<void ()>::__call_impl<std::__1::__function::__default_alloc_func<DB::PipelineExecutor::spawnThreads()::$_0, void ()>>(std::__1::__function::__policy_storage const*)
ThreadPoolImpl<ThreadFromGlobalPoolImpl<false>>::worker(std::__1::__list_iterator<ThreadFromGlobalPoolImpl<false>, void*>)
void std::__1::__function::__policy_invoker<void ()>::__call_impl<std::__1::__function::__default_alloc_func<ThreadFromGlobalPoolImpl<false>::ThreadFromGlobalPoolImpl<void ThreadPoolImpl<ThreadFromGlobalPoolImpl<false>>::scheduleImpl<void>(std::__1::function<void ()>, Priority, std::__1::optional<unsigned long>, bool)::'lambda0'()>(void&&)::'lambda'(), void ()>>(std::__1::__function::__policy_storage const*)
void* std::__1::__thread_proxy[abi:v15000]<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct>>, void ThreadPoolImpl<std::__1::thread>::scheduleImpl<void>(std::__1::function<void ()>, Priority, std::__1::optional<unsigned long>, bool)::'lambda0'()>>(void*)
```
Получение имен файлов и номеров строк в исходном коде ClickHouse:

View File

@ -723,7 +723,7 @@ SOURCE(REDIS(
<password>qwerty123</password>
<keyspase>database_name</keyspase>
<column_family>table_name</column_family>
<allow_filering>1</allow_filering>
<allow_filtering>1</allow_filtering>
<partition_key_prefix>1</partition_key_prefix>
<consistency>One</consistency>
<where>"SomeColumn" = 42</where>
@ -741,7 +741,7 @@ SOURCE(REDIS(
- `password` пароль для соединения с Cassandra.
- `keyspace` имя keyspace (база данных).
- `column_family` имя семейства столбцов (таблица).
- `allow_filering` флаг, разрешающий или не разрешающий потенциально дорогостоящие условия на кластеризации ключевых столбцов. Значение по умолчанию: 1.
- `allow_filtering` флаг, разрешающий или не разрешающий потенциально дорогостоящие условия на кластеризации ключевых столбцов. Значение по умолчанию: 1.
- `partition_key_prefix` количество партиций ключевых столбцов в первичном ключе таблицы Cassandra.
Необходимо для составления ключей словаря. Порядок ключевых столбцов в определении словаря должен быть таким же, как в Cassandra.
Значение по умолчанию: 1 (первый ключевой столбец - это ключ партицирования, остальные ключевые столбцы - ключи кластеризации).

View File

@ -362,7 +362,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
< X-ClickHouse-Query-Id: 96fe0052-01e6-43ce-b12a-6b7370de6e8a
< X-ClickHouse-Format: Template
< X-ClickHouse-Timezone: Asia/Shanghai
< Keep-Alive: timeout=3
< Keep-Alive: timeout=10
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
<
# HELP "Query" "Number of executing queries"
@ -520,7 +520,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
< Connection: Keep-Alive
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< Keep-Alive: timeout=10
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
<
* Connection #0 to host localhost left intact
@ -560,7 +560,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
< Connection: Keep-Alive
< Content-Type: text/plain; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< Keep-Alive: timeout=10
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
<
* Connection #0 to host localhost left intact
@ -612,7 +612,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
< Connection: Keep-Alive
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< Keep-Alive: timeout=10
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
<
<html><body>Absolute Path File</body></html>
@ -631,7 +631,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
< Connection: Keep-Alive
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< Keep-Alive: timeout=10
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
<
<html><body>Relative Path File</body></html>

View File

@ -439,6 +439,13 @@ else()
install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif()
# A target to get stripped binary.
# Note: this is different to the above (extract debug symbols to a separate place)
add_custom_target(clickhouse-stripped ALL
COMMAND "${STRIP_PATH}" -o "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-stripped" --strip-debug --remove-section=.comment --remove-section=.note "${CMAKE_CURRENT_BINARY_DIR}/clickhouse"
DEPENDS clickhouse
COMMENT "Stripping clickhouse binary" VERBATIM)
if (ENABLE_TESTS)
set (CLICKHOUSE_UNIT_TESTS_TARGETS unit_tests_dbms)
add_custom_target (clickhouse-tests ALL DEPENDS ${CLICKHOUSE_UNIT_TESTS_TARGETS})

View File

@ -425,7 +425,7 @@ void Client::connect()
if (hosts_and_ports.empty())
{
String host = config().getString("host", "localhost");
UInt16 port = ConnectionParameters::getPortFromConfig(config());
UInt16 port = ConnectionParameters::getPortFromConfig(config(), host);
hosts_and_ports.emplace_back(HostAndPort{host, port});
}

View File

@ -5,6 +5,7 @@
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Common/ZooKeeper/KeeperException.h>
#include <Common/randomSeed.h>
#include <Common/setThreadName.h>
#include <Common/CurrentMetrics.h>
#include <Interpreters/InterpreterInsertQuery.h>
@ -59,7 +60,7 @@ void ClusterCopier::init()
getContext()->setClustersConfig(task_cluster_current_config, false, task_cluster->clusters_prefix);
/// Set up shards and their priority
task_cluster->random_engine.seed(task_cluster->random_device());
task_cluster->random_engine.seed(randomSeed());
for (auto & task_table : task_cluster->table_tasks)
{
task_table.cluster_pull = getContext()->getCluster(task_table.cluster_pull_name);

View File

@ -7,7 +7,7 @@
#include <Poco/Util/AbstractConfiguration.h>
#include <random>
#include <pcg_random.hpp>
namespace DB
{
@ -45,7 +45,6 @@ struct TaskCluster
/// Subtasks
TasksTable table_tasks;
std::random_device random_device;
pcg64 random_engine;
};

View File

@ -420,7 +420,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
/// Create symlinks.
std::initializer_list<const char *> tools
std::initializer_list<std::string_view> tools
{
"clickhouse-server",
"clickhouse-client",
@ -435,6 +435,9 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
"clickhouse-keeper",
"clickhouse-keeper-converter",
"clickhouse-disks",
"ch",
"chl",
"chc",
};
for (const auto & tool : tools)
@ -444,29 +447,39 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
if (fs::exists(symlink_path))
{
bool is_symlink = FS::isSymlink(symlink_path);
fs::path points_to;
if (is_symlink)
points_to = fs::weakly_canonical(FS::readSymlink(symlink_path));
if (is_symlink && (points_to == main_bin_path || (options.count("link") && points_to == binary_self_canonical_path)))
/// Do not replace short named symlinks if they are already present in the system
/// to avoid collision with other tools.
if (!tool.starts_with("clickhouse"))
{
fmt::print("Symlink {} already exists. Will keep it.\n", symlink_path.string());
need_to_create = false;
}
else
{
if (!is_symlink)
bool is_symlink = FS::isSymlink(symlink_path);
fs::path points_to;
if (is_symlink)
points_to = fs::weakly_canonical(FS::readSymlink(symlink_path));
if (is_symlink && (points_to == main_bin_path || (options.count("link") && points_to == binary_self_canonical_path)))
{
fs::path rename_path = symlink_path.replace_extension(".old");
fmt::print("File {} already exists but it's not a symlink. Will rename to {}.\n",
symlink_path.string(), rename_path.string());
fs::rename(symlink_path, rename_path);
need_to_create = false;
}
else if (points_to != main_bin_path)
else
{
fmt::print("Symlink {} already exists but it points to {}. Will replace the old symlink to {}.\n",
symlink_path.string(), points_to.string(), main_bin_path.string());
fs::remove(symlink_path);
if (!is_symlink)
{
fs::path rename_path = symlink_path.replace_extension(".old");
fmt::print("File {} already exists but it's not a symlink. Will rename to {}.\n",
symlink_path.string(), rename_path.string());
fs::rename(symlink_path, rename_path);
}
else if (points_to != main_bin_path)
{
fmt::print("Symlink {} already exists but it points to {}. Will replace the old symlink to {}.\n",
symlink_path.string(), points_to.string(), main_bin_path.string());
fs::remove(symlink_path);
}
}
}
}

View File

@ -424,7 +424,7 @@ void LocalServer::setupUsers()
void LocalServer::connect()
{
connection_parameters = ConnectionParameters(config());
connection_parameters = ConnectionParameters(config(), "localhost");
connection = LocalConnection::createConnection(
connection_parameters, global_context, need_render_progress, need_render_profile_events, server_display_name);
}

View File

@ -2,15 +2,12 @@
#include <csetjmp>
#include <unistd.h>
#ifdef OS_LINUX
#include <sys/mman.h>
#endif
#include <new>
#include <iostream>
#include <vector>
#include <string>
#include <tuple>
#include <string_view>
#include <utility> /// pair
#include <fmt/format.h>
@ -22,7 +19,6 @@
#include <Common/IO.h>
#include <base/phdr_cache.h>
#include <base/scope_guard.h>
/// Universal executable for various clickhouse applications
@ -98,7 +94,7 @@ using MainFunc = int (*)(int, char**);
#if !defined(FUZZING_MODE)
/// Add an item here to register new application
std::pair<const char *, MainFunc> clickhouse_applications[] =
std::pair<std::string_view, MainFunc> clickhouse_applications[] =
{
#if ENABLE_CLICKHOUSE_LOCAL
{"local", mainEntryClickHouseLocal},
@ -158,6 +154,18 @@ std::pair<const char *, MainFunc> clickhouse_applications[] =
#endif
};
/// Add an item here to register a new short name
std::pair<std::string_view, std::string_view> clickhouse_short_names[] =
{
#if ENABLE_CLICKHOUSE_LOCAL
{"ch", "local"},
{"chl", "local"},
#endif
#if ENABLE_CLICKHOUSE_CLIENT
{"chc", "client"},
#endif
};
int printHelp(int, char **)
{
std::cerr << "Use one of the following commands:" << std::endl;
@ -387,15 +395,21 @@ void checkHarmfulEnvironmentVariables(char ** argv)
}
bool isClickhouseApp(const std::string & app_suffix, std::vector<char *> & argv)
bool isClickhouseApp(std::string_view app_suffix, std::vector<char *> & argv)
{
for (const auto & [alias, name] : clickhouse_short_names)
if (app_suffix == name
&& !argv.empty() && (alias == argv[0] || endsWith(argv[0], "/" + std::string(alias))))
return true;
/// Use app if the first arg 'app' is passed (the arg should be quietly removed)
if (argv.size() >= 2)
{
auto first_arg = argv.begin() + 1;
/// 'clickhouse --client ...' and 'clickhouse client ...' are Ok
if (*first_arg == "--" + app_suffix || *first_arg == app_suffix)
if (*first_arg == app_suffix
|| (std::string_view(*first_arg).starts_with("--") && std::string_view(*first_arg).substr(2) == app_suffix))
{
argv.erase(first_arg);
return true;
@ -403,7 +417,7 @@ bool isClickhouseApp(const std::string & app_suffix, std::vector<char *> & argv)
}
/// Use app if clickhouse binary is run through symbolic link with name clickhouse-app
std::string app_name = "clickhouse-" + app_suffix;
std::string app_name = "clickhouse-" + std::string(app_suffix);
return !argv.empty() && (app_name == argv[0] || endsWith(argv[0], "/" + app_name));
}

View File

@ -1106,7 +1106,7 @@ public:
{
if (isInteger(data_type))
{
if (isUnsignedInteger(data_type))
if (isUInt(data_type))
return std::make_unique<UnsignedIntegerModel>(seed);
else
return std::make_unique<SignedIntegerModel>(seed);

View File

@ -11,8 +11,8 @@ else ()
endif ()
add_custom_target (self-extracting ALL
${CMAKE_COMMAND} -E remove clickhouse
${CMAKE_COMMAND} -E remove clickhouse clickhouse-stripped
COMMAND ${COMPRESSOR} ${DECOMPRESSOR} clickhouse ../clickhouse
DEPENDS clickhouse compressor
COMMAND ${COMPRESSOR} ${DECOMPRESSOR} clickhouse-stripped ../clickhouse-stripped
DEPENDS clickhouse clickhouse-stripped compressor
)

View File

@ -104,15 +104,14 @@
</url_scheme_mappers>
<!-- Add headers to response in options request. OPTIONS method is used in CORS preflight requests. -->
<!-- It is off by default. Next headers are obligate for CORS.-->
<!-- http_options_response>
<http_options_response>
<header>
<name>Access-Control-Allow-Origin</name>
<value>*</value>
</header>
<header>
<name>Access-Control-Allow-Headers</name>
<value>origin, x-requested-with</value>
<value>origin, x-requested-with, x-clickhouse-format, x-clickhouse-user, x-clickhouse-key, Authorization</value>
</header>
<header>
<name>Access-Control-Allow-Methods</name>
@ -122,7 +121,7 @@
<name>Access-Control-Max-Age</name>
<value>86400</value>
</header>
</http_options_response -->
</http_options_response>
<!-- It is the name that will be shown in the clickhouse-client.
By default, anything with "production" will be highlighted in red in query prompt.
@ -245,7 +244,7 @@
<max_connections>4096</max_connections>
<!-- For 'Connection: keep-alive' in HTTP 1.1 -->
<keep_alive_timeout>3</keep_alive_timeout>
<keep_alive_timeout>10</keep_alive_timeout>
<!-- gRPC protocol (see src/Server/grpc_protos/clickhouse_grpc.proto for the API) -->
<!-- <grpc_port>9100</grpc_port> -->
@ -326,7 +325,7 @@
Query can upscale to desired number of threads during execution if more threads become available.
-->
<concurrent_threads_soft_limit_num>0</concurrent_threads_soft_limit_num>
<concurrent_threads_soft_limit_ratio_to_cores>0</concurrent_threads_soft_limit_ratio_to_cores>
<concurrent_threads_soft_limit_ratio_to_cores>2</concurrent_threads_soft_limit_ratio_to_cores>
<!-- Maximum number of concurrent queries. -->
<max_concurrent_queries>1000</max_concurrent_queries>

View File

@ -1,7 +0,0 @@
<clickhouse>
<profiles>
<default>
<allow_introspection_functions>1</allow_introspection_functions>
</default>
</profiles>
</clickhouse>

View File

@ -0,0 +1 @@
../../../tests/config/users.d/allow_introspection_functions.yaml

View File

@ -86,6 +86,13 @@
<!-- User can create other users and grant rights to them. -->
<!-- <access_management>1</access_management> -->
<!-- User permissions can be granted here -->
<!--
<grants>
<query>GRANT ALL ON *.*</query>
</grants>
-->
</default>
</users>

View File

@ -91,6 +91,10 @@ users:
# User can create other users and grant rights to them.
# access_management: 1
# SQL expressions for grants available for that user - https://clickhouse.com/docs/en/sql-reference/statements/grant
# grants:
# - query: GRANT ALL ON *.*
# Quotas.
quotas:
# Name of quota.

View File

@ -12,7 +12,7 @@
#include <Common/Config/ConfigReloader.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/quoteString.h>
#include <Common/TransformEndianness.hpp>
#include <Common/transformEndianness.h>
#include <Core/Settings.h>
#include <Interpreters/executeQuery.h>
#include <Parsers/Access/ASTGrantQuery.h>

View File

@ -1,7 +1,18 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionAnalysisOfVariance.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <IO/VarInt.h>
#include <array>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeTuple.h>
#include <Columns/ColumnNullable.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/Moments.h>
#include <Common/NaNUtils.h>
#include <Common/assert_cast.h>
namespace DB
{
@ -13,6 +24,82 @@ namespace ErrorCodes
namespace
{
using AggregateFunctionAnalysisOfVarianceData = AnalysisOfVarianceMoments<Float64>;
/// One way analysis of variance
/// Provides a statistical test of whether two or more population means are equal (null hypothesis)
/// Has an assumption that subjects from group i have normal distribution.
/// Accepts two arguments - a value and a group number which this value belongs to.
/// Groups are enumerated starting from 0 and there should be at least two groups to perform a test
/// Moreover there should be at least one group with the number of observations greater than one.
class AggregateFunctionAnalysisOfVariance final : public IAggregateFunctionDataHelper<AggregateFunctionAnalysisOfVarianceData, AggregateFunctionAnalysisOfVariance>
{
public:
explicit AggregateFunctionAnalysisOfVariance(const DataTypes & arguments, const Array & params)
: IAggregateFunctionDataHelper(arguments, params, createResultType())
{}
DataTypePtr createResultType() const
{
DataTypes types {std::make_shared<DataTypeNumber<Float64>>(), std::make_shared<DataTypeNumber<Float64>>() };
Strings names {"f_statistic", "p_value"};
return std::make_shared<DataTypeTuple>(
std::move(types),
std::move(names)
);
}
String getName() const override { return "analysisOfVariance"; }
bool allocatesMemoryInArena() const override { return false; }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
{
data(place).add(columns[0]->getFloat64(row_num), columns[1]->getUInt(row_num));
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
data(place).merge(data(rhs));
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
data(place).write(buf);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
{
data(place).read(buf);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
auto f_stat = data(place).getFStatistic();
auto & column_tuple = assert_cast<ColumnTuple &>(to);
auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
if (unlikely(!std::isfinite(f_stat) || f_stat < 0))
{
column_stat.getData().push_back(std::numeric_limits<Float64>::quiet_NaN());
column_value.getData().push_back(std::numeric_limits<Float64>::quiet_NaN());
return;
}
auto p_value = data(place).getPValue(f_stat);
/// Because p-value is a probability.
p_value = std::min(1.0, std::max(0.0, p_value));
column_stat.getData().push_back(f_stat);
column_value.getData().push_back(p_value);
}
};
AggregateFunctionPtr createAggregateFunctionAnalysisOfVariance(const std::string & name, const DataTypes & arguments, const Array & parameters, const Settings *)
{
assertNoParameters(name, parameters);

View File

@ -1,97 +0,0 @@
#pragma once
#include <IO/VarInt.h>
#include <IO/WriteHelpers.h>
#include <array>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeTuple.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnsCommon.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/Moments.h>
#include "Common/NaNUtils.h"
#include <Common/assert_cast.h>
#include <Core/Types.h>
namespace DB
{
using AggregateFunctionAnalysisOfVarianceData = AnalysisOfVarianceMoments<Float64>;
/// One way analysis of variance
/// Provides a statistical test of whether two or more population means are equal (null hypothesis)
/// Has an assumption that subjects from group i have normal distribution.
/// Accepts two arguments - a value and a group number which this value belongs to.
/// Groups are enumerated starting from 0 and there should be at least two groups to perform a test
/// Moreover there should be at least one group with the number of observations greater than one.
class AggregateFunctionAnalysisOfVariance final : public IAggregateFunctionDataHelper<AggregateFunctionAnalysisOfVarianceData, AggregateFunctionAnalysisOfVariance>
{
public:
explicit AggregateFunctionAnalysisOfVariance(const DataTypes & arguments, const Array & params)
: IAggregateFunctionDataHelper(arguments, params, createResultType())
{}
DataTypePtr createResultType() const
{
DataTypes types {std::make_shared<DataTypeNumber<Float64>>(), std::make_shared<DataTypeNumber<Float64>>() };
Strings names {"f_statistic", "p_value"};
return std::make_shared<DataTypeTuple>(
std::move(types),
std::move(names)
);
}
String getName() const override { return "analysisOfVariance"; }
bool allocatesMemoryInArena() const override { return false; }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
{
data(place).add(columns[0]->getFloat64(row_num), columns[1]->getUInt(row_num));
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
data(place).merge(data(rhs));
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
data(place).write(buf);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
{
data(place).read(buf);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
auto f_stat = data(place).getFStatistic();
auto & column_tuple = assert_cast<ColumnTuple &>(to);
auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
if (unlikely(!std::isfinite(f_stat) || f_stat < 0))
{
column_stat.getData().push_back(std::numeric_limits<Float64>::quiet_NaN());
column_value.getData().push_back(std::numeric_limits<Float64>::quiet_NaN());
return;
}
auto p_value = data(place).getPValue(f_stat);
/// Because p-value is a probability.
p_value = std::min(1.0, std::max(0.0, p_value));
column_stat.getData().push_back(f_stat);
column_value.getData().push_back(p_value);
}
};
}

View File

@ -1,12 +1,14 @@
#include <memory>
#include <type_traits>
#include <AggregateFunctions/AggregateFunctionAvg.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionAvgWeighted.h>
#include <AggregateFunctions/Helpers.h>
#include <AggregateFunctions/FactoryHelpers.h>
namespace DB
{
struct Settings;
namespace ErrorCodes
@ -16,13 +18,93 @@ namespace ErrorCodes
namespace
{
template <typename T>
using AvgWeightedFieldType = std::conditional_t<DecimalOrExtendedInt<T>,
Float64, // no way to do UInt128 * UInt128, better cast to Float64
NearestFieldType<T>>;
template <typename T, typename U>
using MaxFieldType = std::conditional_t<(sizeof(AvgWeightedFieldType<T>) > sizeof(AvgWeightedFieldType<U>)),
AvgWeightedFieldType<T>, AvgWeightedFieldType<U>>;
template <typename Value, typename Weight>
class AggregateFunctionAvgWeighted final :
public AggregateFunctionAvgBase<
MaxFieldType<Value, Weight>, AvgWeightedFieldType<Weight>, AggregateFunctionAvgWeighted<Value, Weight>>
{
public:
using Base = AggregateFunctionAvgBase<
MaxFieldType<Value, Weight>, AvgWeightedFieldType<Weight>, AggregateFunctionAvgWeighted<Value, Weight>>;
using Base::Base;
using Numerator = typename Base::Numerator;
using Denominator = typename Base::Denominator;
using Fraction = typename Base::Fraction;
void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
{
const auto & weights = static_cast<const ColumnVector<Weight> &>(*columns[1]);
this->data(place).numerator += static_cast<Numerator>(
static_cast<const ColumnVector<Value> &>(*columns[0]).getData()[row_num])
* static_cast<Numerator>(weights.getData()[row_num]);
this->data(place).denominator += static_cast<Denominator>(weights.getData()[row_num]);
}
String getName() const override { return "avgWeighted"; }
#if USE_EMBEDDED_COMPILER
bool isCompilable() const override
{
bool can_be_compiled = Base::isCompilable();
can_be_compiled &= canBeNativeType<Weight>();
return can_be_compiled;
}
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const ValuesWithType & arguments) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * numerator_type = toNativeType<Numerator>(b);
auto * numerator_ptr = aggregate_data_ptr;
auto * numerator_value = b.CreateLoad(numerator_type, numerator_ptr);
auto numerator_data_type = toNativeDataType<Numerator>();
auto * argument = nativeCast(b, arguments[0], numerator_data_type);
auto * weight = nativeCast(b, arguments[1], numerator_data_type);
llvm::Value * value_weight_multiplication = argument->getType()->isIntegerTy() ? b.CreateMul(argument, weight) : b.CreateFMul(argument, weight);
auto * numerator_result_value = numerator_type->isIntegerTy() ? b.CreateAdd(numerator_value, value_weight_multiplication) : b.CreateFAdd(numerator_value, value_weight_multiplication);
b.CreateStore(numerator_result_value, numerator_ptr);
auto * denominator_type = toNativeType<Denominator>(b);
static constexpr size_t denominator_offset = offsetof(Fraction, denominator);
auto * denominator_ptr = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_ptr, denominator_offset);
auto * weight_cast_to_denominator = nativeCast(b, arguments[1], toNativeDataType<Denominator>());
auto * denominator_value = b.CreateLoad(denominator_type, denominator_ptr);
auto * denominator_value_updated = denominator_type->isIntegerTy() ? b.CreateAdd(denominator_value, weight_cast_to_denominator) : b.CreateFAdd(denominator_value, weight_cast_to_denominator);
b.CreateStore(denominator_value_updated, denominator_ptr);
}
#endif
};
bool allowTypes(const DataTypePtr& left, const DataTypePtr& right) noexcept
{
const WhichDataType l_dt(left), r_dt(right);
constexpr auto allow = [](WhichDataType t)
{
return t.isInt() || t.isUInt() || t.isFloat() || t.isDecimal();
return t.isInt() || t.isUInt() || t.isFloat();
};
return allow(l_dt) && allow(r_dt);
@ -33,7 +115,6 @@ bool allowTypes(const DataTypePtr& left, const DataTypePtr& right) noexcept
{ \
LINE(Int8); LINE(Int16); LINE(Int32); LINE(Int64); LINE(Int128); LINE(Int256); \
LINE(UInt8); LINE(UInt16); LINE(UInt32); LINE(UInt64); LINE(UInt128); LINE(UInt256); \
LINE(Decimal32); LINE(Decimal64); LINE(Decimal128); LINE(Decimal256); \
LINE(Float32); LINE(Float64); \
default: return nullptr; \
}
@ -75,31 +156,14 @@ createAggregateFunctionAvgWeighted(const std::string & name, const DataTypes & a
"Types {} and {} are non-conforming as arguments for aggregate function {}",
data_type->getName(), data_type_weight->getName(), name);
AggregateFunctionPtr ptr;
const bool left_decimal = isDecimal(data_type);
const bool right_decimal = isDecimal(data_type_weight);
/// We multiply value by weight, so actual scale of numerator is <scale of value> + <scale of weight>
if (left_decimal && right_decimal)
ptr.reset(create(*data_type, *data_type_weight,
argument_types,
getDecimalScale(*data_type) + getDecimalScale(*data_type_weight), getDecimalScale(*data_type_weight)));
else if (left_decimal)
ptr.reset(create(*data_type, *data_type_weight, argument_types,
getDecimalScale(*data_type)));
else if (right_decimal)
ptr.reset(create(*data_type, *data_type_weight, argument_types,
getDecimalScale(*data_type_weight), getDecimalScale(*data_type_weight)));
else
ptr.reset(create(*data_type, *data_type_weight, argument_types));
return ptr;
return AggregateFunctionPtr(create(*data_type, *data_type_weight, argument_types));
}
}
void registerAggregateFunctionAvgWeighted(AggregateFunctionFactory & factory)
{
factory.registerFunction("avgWeighted", createAggregateFunctionAvgWeighted, AggregateFunctionFactory::CaseSensitive);
factory.registerFunction("avgWeighted", createAggregateFunctionAvgWeighted);
}
}

View File

@ -1,90 +0,0 @@
#pragma once
#include <type_traits>
#include <AggregateFunctions/AggregateFunctionAvg.h>
namespace DB
{
struct Settings;
template <typename T>
using AvgWeightedFieldType = std::conditional_t<is_decimal<T>,
std::conditional_t<std::is_same_v<T, Decimal256>, Decimal256, Decimal128>,
std::conditional_t<DecimalOrExtendedInt<T>,
Float64, // no way to do UInt128 * UInt128, better cast to Float64
NearestFieldType<T>>>;
template <typename T, typename U>
using MaxFieldType = std::conditional_t<(sizeof(AvgWeightedFieldType<T>) > sizeof(AvgWeightedFieldType<U>)),
AvgWeightedFieldType<T>, AvgWeightedFieldType<U>>;
template <typename Value, typename Weight>
class AggregateFunctionAvgWeighted final :
public AggregateFunctionAvgBase<
MaxFieldType<Value, Weight>, AvgWeightedFieldType<Weight>, AggregateFunctionAvgWeighted<Value, Weight>>
{
public:
using Base = AggregateFunctionAvgBase<
MaxFieldType<Value, Weight>, AvgWeightedFieldType<Weight>, AggregateFunctionAvgWeighted<Value, Weight>>;
using Base::Base;
using Numerator = typename Base::Numerator;
using Denominator = typename Base::Denominator;
using Fraction = typename Base::Fraction;
void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
{
const auto& weights = static_cast<const ColumnVectorOrDecimal<Weight> &>(*columns[1]);
this->data(place).numerator += static_cast<Numerator>(
static_cast<const ColumnVectorOrDecimal<Value> &>(*columns[0]).getData()[row_num]) *
static_cast<Numerator>(weights.getData()[row_num]);
this->data(place).denominator += static_cast<Denominator>(weights.getData()[row_num]);
}
String getName() const override { return "avgWeighted"; }
#if USE_EMBEDDED_COMPILER
bool isCompilable() const override
{
bool can_be_compiled = Base::isCompilable();
can_be_compiled &= canBeNativeType<Weight>();
return can_be_compiled;
}
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const ValuesWithType & arguments) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * numerator_type = toNativeType<Numerator>(b);
auto * numerator_ptr = aggregate_data_ptr;
auto * numerator_value = b.CreateLoad(numerator_type, numerator_ptr);
auto numerator_data_type = toNativeDataType<Numerator>();
auto * argument = nativeCast(b, arguments[0], numerator_data_type);
auto * weight = nativeCast(b, arguments[1], numerator_data_type);
llvm::Value * value_weight_multiplication = argument->getType()->isIntegerTy() ? b.CreateMul(argument, weight) : b.CreateFMul(argument, weight);
auto * numerator_result_value = numerator_type->isIntegerTy() ? b.CreateAdd(numerator_value, value_weight_multiplication) : b.CreateFAdd(numerator_value, value_weight_multiplication);
b.CreateStore(numerator_result_value, numerator_ptr);
auto * denominator_type = toNativeType<Denominator>(b);
static constexpr size_t denominator_offset = offsetof(Fraction, denominator);
auto * denominator_ptr = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_ptr, denominator_offset);
auto * weight_cast_to_denominator = nativeCast(b, arguments[1], toNativeDataType<Denominator>());
auto * denominator_value = b.CreateLoad(denominator_type, denominator_ptr);
auto * denominator_value_updated = denominator_type->isIntegerTy() ? b.CreateAdd(denominator_value, weight_cast_to_denominator) : b.CreateFAdd(denominator_value, weight_cast_to_denominator);
b.CreateStore(denominator_value_updated, denominator_ptr);
}
#endif
};
}

View File

@ -1,11 +1,27 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionBitwise.h>
#include <AggregateFunctions/Helpers.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnVector.h>
#include <Common/assert_cast.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include "config.h"
#if USE_EMBEDDED_COMPILER
# include <llvm/IR/IRBuilder.h>
# include <DataTypes/Native.h>
#endif
namespace DB
{
struct Settings;
namespace ErrorCodes
@ -16,6 +32,179 @@ namespace ErrorCodes
namespace
{
template <typename T>
struct AggregateFunctionGroupBitOrData
{
T value = 0;
static const char * name() { return "groupBitOr"; }
void update(T x) { value |= x; }
#if USE_EMBEDDED_COMPILER
static void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * value_ptr)
{
auto type = toNativeType<T>(builder);
builder.CreateStore(llvm::Constant::getNullValue(type), value_ptr);
}
static llvm::Value* compileUpdate(llvm::IRBuilderBase & builder, llvm::Value * lhs, llvm::Value * rhs)
{
return builder.CreateOr(lhs, rhs);
}
#endif
};
template <typename T>
struct AggregateFunctionGroupBitAndData
{
T value = -1; /// Two's complement arithmetic, sign extension.
static const char * name() { return "groupBitAnd"; }
void update(T x) { value &= x; }
#if USE_EMBEDDED_COMPILER
static void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * value_ptr)
{
auto type = toNativeType<T>(builder);
builder.CreateStore(llvm::ConstantInt::get(type, -1), value_ptr);
}
static llvm::Value* compileUpdate(llvm::IRBuilderBase & builder, llvm::Value * lhs, llvm::Value * rhs)
{
return builder.CreateAnd(lhs, rhs);
}
#endif
};
template <typename T>
struct AggregateFunctionGroupBitXorData
{
T value = 0;
static const char * name() { return "groupBitXor"; }
void update(T x) { value ^= x; }
#if USE_EMBEDDED_COMPILER
static void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * value_ptr)
{
auto type = toNativeType<T>(builder);
builder.CreateStore(llvm::Constant::getNullValue(type), value_ptr);
}
static llvm::Value* compileUpdate(llvm::IRBuilderBase & builder, llvm::Value * lhs, llvm::Value * rhs)
{
return builder.CreateXor(lhs, rhs);
}
#endif
};
/// Counts bitwise operation on numbers.
template <typename T, typename Data>
class AggregateFunctionBitwise final : public IAggregateFunctionDataHelper<Data, AggregateFunctionBitwise<T, Data>>
{
public:
explicit AggregateFunctionBitwise(const DataTypePtr & type)
: IAggregateFunctionDataHelper<Data, AggregateFunctionBitwise<T, Data>>({type}, {}, createResultType())
{}
String getName() const override { return Data::name(); }
static DataTypePtr createResultType()
{
return std::make_shared<DataTypeNumber<T>>();
}
bool allocatesMemoryInArena() const override { return false; }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
{
this->data(place).update(assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num]);
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
this->data(place).update(this->data(rhs).value);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
writeBinary(this->data(place).value, buf);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
{
readBinary(this->data(place).value, buf);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
assert_cast<ColumnVector<T> &>(to).getData().push_back(this->data(place).value);
}
#if USE_EMBEDDED_COMPILER
bool isCompilable() const override
{
auto return_type = this->getResultType();
return canBeNativeType(*return_type);
}
void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
auto * value_ptr = aggregate_data_ptr;
Data::compileCreate(builder, value_ptr);
}
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const ValuesWithType & arguments) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * return_type = toNativeType(b, this->getResultType());
auto * value_ptr = aggregate_data_ptr;
auto * value = b.CreateLoad(return_type, value_ptr);
auto * result_value = Data::compileUpdate(builder, value, arguments[0].value);
b.CreateStore(result_value, value_ptr);
}
void compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * return_type = toNativeType(b, this->getResultType());
auto * value_dst_ptr = aggregate_data_dst_ptr;
auto * value_dst = b.CreateLoad(return_type, value_dst_ptr);
auto * value_src_ptr = aggregate_data_src_ptr;
auto * value_src = b.CreateLoad(return_type, value_src_ptr);
auto * result_value = Data::compileUpdate(builder, value_dst, value_src);
b.CreateStore(result_value, value_dst_ptr);
}
llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * return_type = toNativeType(b, this->getResultType());
auto * value_ptr = aggregate_data_ptr;
return b.CreateLoad(return_type, value_ptr);
}
#endif
};
template <template <typename> class Data>
AggregateFunctionPtr createAggregateFunctionBitwise(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{

View File

@ -1,197 +0,0 @@
#pragma once
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnVector.h>
#include <Common/assert_cast.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include "config.h"
#if USE_EMBEDDED_COMPILER
# include <llvm/IR/IRBuilder.h>
# include <DataTypes/Native.h>
#endif
namespace DB
{
struct Settings;
template <typename T>
struct AggregateFunctionGroupBitOrData
{
T value = 0;
static const char * name() { return "groupBitOr"; }
void update(T x) { value |= x; }
#if USE_EMBEDDED_COMPILER
static void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * value_ptr)
{
auto type = toNativeType<T>(builder);
builder.CreateStore(llvm::Constant::getNullValue(type), value_ptr);
}
static llvm::Value* compileUpdate(llvm::IRBuilderBase & builder, llvm::Value * lhs, llvm::Value * rhs)
{
return builder.CreateOr(lhs, rhs);
}
#endif
};
template <typename T>
struct AggregateFunctionGroupBitAndData
{
T value = -1; /// Two's complement arithmetic, sign extension.
static const char * name() { return "groupBitAnd"; }
void update(T x) { value &= x; }
#if USE_EMBEDDED_COMPILER
static void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * value_ptr)
{
auto type = toNativeType<T>(builder);
builder.CreateStore(llvm::ConstantInt::get(type, -1), value_ptr);
}
static llvm::Value* compileUpdate(llvm::IRBuilderBase & builder, llvm::Value * lhs, llvm::Value * rhs)
{
return builder.CreateAnd(lhs, rhs);
}
#endif
};
template <typename T>
struct AggregateFunctionGroupBitXorData
{
T value = 0;
static const char * name() { return "groupBitXor"; }
void update(T x) { value ^= x; }
#if USE_EMBEDDED_COMPILER
static void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * value_ptr)
{
auto type = toNativeType<T>(builder);
builder.CreateStore(llvm::Constant::getNullValue(type), value_ptr);
}
static llvm::Value* compileUpdate(llvm::IRBuilderBase & builder, llvm::Value * lhs, llvm::Value * rhs)
{
return builder.CreateXor(lhs, rhs);
}
#endif
};
/// Counts bitwise operation on numbers.
template <typename T, typename Data>
class AggregateFunctionBitwise final : public IAggregateFunctionDataHelper<Data, AggregateFunctionBitwise<T, Data>>
{
public:
explicit AggregateFunctionBitwise(const DataTypePtr & type)
: IAggregateFunctionDataHelper<Data, AggregateFunctionBitwise<T, Data>>({type}, {}, createResultType())
{}
String getName() const override { return Data::name(); }
static DataTypePtr createResultType()
{
return std::make_shared<DataTypeNumber<T>>();
}
bool allocatesMemoryInArena() const override { return false; }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
{
this->data(place).update(assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num]);
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
this->data(place).update(this->data(rhs).value);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
writeBinary(this->data(place).value, buf);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
{
readBinary(this->data(place).value, buf);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
assert_cast<ColumnVector<T> &>(to).getData().push_back(this->data(place).value);
}
#if USE_EMBEDDED_COMPILER
bool isCompilable() const override
{
auto return_type = this->getResultType();
return canBeNativeType(*return_type);
}
void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
auto * value_ptr = aggregate_data_ptr;
Data::compileCreate(builder, value_ptr);
}
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const ValuesWithType & arguments) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * return_type = toNativeType(b, this->getResultType());
auto * value_ptr = aggregate_data_ptr;
auto * value = b.CreateLoad(return_type, value_ptr);
auto * result_value = Data::compileUpdate(builder, value, arguments[0].value);
b.CreateStore(result_value, value_ptr);
}
void compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * return_type = toNativeType(b, this->getResultType());
auto * value_dst_ptr = aggregate_data_dst_ptr;
auto * value_dst = b.CreateLoad(return_type, value_dst_ptr);
auto * value_src_ptr = aggregate_data_src_ptr;
auto * value_src = b.CreateLoad(return_type, value_src_ptr);
auto * result_value = Data::compileUpdate(builder, value_dst, value_src);
b.CreateStore(result_value, value_dst_ptr);
}
llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * return_type = toNativeType(b, this->getResultType());
auto * value_ptr = aggregate_data_ptr;
return b.CreateLoad(return_type, value_ptr);
}
#endif
};
}

View File

@ -1,7 +1,14 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionBoundingRatio.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnsNumber.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <Common/assert_cast.h>
#include <Common/transformEndianness.h>
namespace DB
{
@ -10,11 +17,169 @@ struct Settings;
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int BAD_ARGUMENTS;
}
namespace
{
/** Tracks the leftmost and rightmost (x, y) data points.
*/
struct AggregateFunctionBoundingRatioData
{
struct Point
{
Float64 x;
Float64 y;
};
bool empty = true;
Point left;
Point right;
void add(Float64 x, Float64 y)
{
Point point{x, y};
if (empty)
{
left = point;
right = point;
empty = false;
}
else if (point.x < left.x)
{
left = point;
}
else if (point.x > right.x)
{
right = point;
}
}
void merge(const AggregateFunctionBoundingRatioData & other)
{
if (empty)
{
*this = other;
}
else
{
if (other.left.x < left.x)
left = other.left;
if (other.right.x > right.x)
right = other.right;
}
}
void serialize(WriteBuffer & buf) const;
void deserialize(ReadBuffer & buf);
};
template <std::endian endian>
inline void transformEndianness(AggregateFunctionBoundingRatioData::Point & p)
{
DB::transformEndianness<endian>(p.x);
DB::transformEndianness<endian>(p.y);
}
void AggregateFunctionBoundingRatioData::serialize(WriteBuffer & buf) const
{
writeBinaryLittleEndian(empty, buf);
if (!empty)
{
writeBinaryLittleEndian(left, buf);
writeBinaryLittleEndian(right, buf);
}
}
void AggregateFunctionBoundingRatioData::deserialize(ReadBuffer & buf)
{
readBinaryLittleEndian(empty, buf);
if (!empty)
{
readBinaryLittleEndian(left, buf);
readBinaryLittleEndian(right, buf);
}
}
inline void writeBinary(const AggregateFunctionBoundingRatioData::Point & p, WriteBuffer & buf)
{
writePODBinary(p, buf);
}
inline void readBinary(AggregateFunctionBoundingRatioData::Point & p, ReadBuffer & buf)
{
readPODBinary(p, buf);
}
class AggregateFunctionBoundingRatio final : public IAggregateFunctionDataHelper<AggregateFunctionBoundingRatioData, AggregateFunctionBoundingRatio>
{
private:
/** Calculates the slope of a line between leftmost and rightmost data points.
* (y2 - y1) / (x2 - x1)
*/
static Float64 NO_SANITIZE_UNDEFINED getBoundingRatio(const AggregateFunctionBoundingRatioData & data)
{
if (data.empty)
return std::numeric_limits<Float64>::quiet_NaN();
return (data.right.y - data.left.y) / (data.right.x - data.left.x);
}
public:
String getName() const override
{
return "boundingRatio";
}
explicit AggregateFunctionBoundingRatio(const DataTypes & arguments)
: IAggregateFunctionDataHelper<AggregateFunctionBoundingRatioData, AggregateFunctionBoundingRatio>(arguments, {}, std::make_shared<DataTypeFloat64>())
{
const auto * x_arg = arguments.at(0).get();
const auto * y_arg = arguments.at(1).get();
if (!x_arg->isValueRepresentedByNumber() || !y_arg->isValueRepresentedByNumber())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Illegal types of arguments of aggregate function {}, must have number representation.",
getName());
}
bool allocatesMemoryInArena() const override { return false; }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override
{
/// NOTE Slightly inefficient.
const auto x = columns[0]->getFloat64(row_num);
const auto y = columns[1]->getFloat64(row_num);
data(place).add(x, y);
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
data(place).merge(data(rhs));
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
data(place).serialize(buf);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
{
data(place).deserialize(buf);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
assert_cast<ColumnFloat64 &>(to).getData().push_back(getBoundingRatio(data(place)));
}
};
AggregateFunctionPtr createAggregateFunctionRate(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertNoParameters(name, parameters);

View File

@ -1,177 +0,0 @@
#pragma once
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnsNumber.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <AggregateFunctions/Helpers.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <Common/assert_cast.h>
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
/** Tracks the leftmost and rightmost (x, y) data points.
*/
struct AggregateFunctionBoundingRatioData
{
struct Point
{
Float64 x;
Float64 y;
};
bool empty = true;
Point left;
Point right;
void add(Float64 x, Float64 y)
{
Point point{x, y};
if (empty)
{
left = point;
right = point;
empty = false;
}
else if (point.x < left.x)
{
left = point;
}
else if (point.x > right.x)
{
right = point;
}
}
void merge(const AggregateFunctionBoundingRatioData & other)
{
if (empty)
{
*this = other;
}
else
{
if (other.left.x < left.x)
left = other.left;
if (other.right.x > right.x)
right = other.right;
}
}
void serialize(WriteBuffer & buf) const;
void deserialize(ReadBuffer & buf);
};
template <std::endian endian>
inline void transformEndianness(AggregateFunctionBoundingRatioData::Point & p)
{
transformEndianness<endian>(p.x);
transformEndianness<endian>(p.y);
}
void AggregateFunctionBoundingRatioData::serialize(WriteBuffer & buf) const
{
writeBinaryLittleEndian(empty, buf);
if (!empty)
{
writeBinaryLittleEndian(left, buf);
writeBinaryLittleEndian(right, buf);
}
}
void AggregateFunctionBoundingRatioData::deserialize(ReadBuffer & buf)
{
readBinaryLittleEndian(empty, buf);
if (!empty)
{
readBinaryLittleEndian(left, buf);
readBinaryLittleEndian(right, buf);
}
}
inline void writeBinary(const AggregateFunctionBoundingRatioData::Point & p, WriteBuffer & buf)
{
writePODBinary(p, buf);
}
inline void readBinary(AggregateFunctionBoundingRatioData::Point & p, ReadBuffer & buf)
{
readPODBinary(p, buf);
}
class AggregateFunctionBoundingRatio final : public IAggregateFunctionDataHelper<AggregateFunctionBoundingRatioData, AggregateFunctionBoundingRatio>
{
private:
/** Calculates the slope of a line between leftmost and rightmost data points.
* (y2 - y1) / (x2 - x1)
*/
static Float64 NO_SANITIZE_UNDEFINED getBoundingRatio(const AggregateFunctionBoundingRatioData & data)
{
if (data.empty)
return std::numeric_limits<Float64>::quiet_NaN();
return (data.right.y - data.left.y) / (data.right.x - data.left.x);
}
public:
String getName() const override
{
return "boundingRatio";
}
explicit AggregateFunctionBoundingRatio(const DataTypes & arguments)
: IAggregateFunctionDataHelper<AggregateFunctionBoundingRatioData, AggregateFunctionBoundingRatio>(arguments, {}, std::make_shared<DataTypeFloat64>())
{
const auto * x_arg = arguments.at(0).get();
const auto * y_arg = arguments.at(1).get();
if (!x_arg->isValueRepresentedByNumber() || !y_arg->isValueRepresentedByNumber())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Illegal types of arguments of aggregate function {}, must have number representation.",
getName());
}
bool allocatesMemoryInArena() const override { return false; }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override
{
/// NOTE Slightly inefficient.
const auto x = columns[0]->getFloat64(row_num);
const auto y = columns[1]->getFloat64(row_num);
data(place).add(x, y);
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
data(place).merge(data(rhs));
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
data(place).serialize(buf);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
{
data(place).deserialize(buf);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
assert_cast<ColumnFloat64 &>(to).getData().push_back(getBoundingRatio(data(place)));
}
};
}

View File

@ -1,9 +1,15 @@
#include <AggregateFunctions/AggregateFunctionDeltaSum.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <AggregateFunctions/Helpers.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Columns/ColumnVector.h>
#include <DataTypes/DataTypesNumber.h>
#include <AggregateFunctions/IAggregateFunction.h>
namespace DB
{
@ -18,6 +24,113 @@ namespace ErrorCodes
namespace
{
template <typename T>
struct AggregationFunctionDeltaSumData
{
T sum = 0;
T last = 0;
T first = 0;
bool seen = false;
};
template <typename T>
class AggregationFunctionDeltaSum final
: public IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>
{
public:
AggregationFunctionDeltaSum(const DataTypes & arguments, const Array & params)
: IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>{arguments, params, createResultType()}
{}
AggregationFunctionDeltaSum()
: IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>{}
{}
String getName() const override { return "deltaSum"; }
static DataTypePtr createResultType() { return std::make_shared<DataTypeNumber<T>>(); }
bool allocatesMemoryInArena() const override { return false; }
void NO_SANITIZE_UNDEFINED ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
{
auto value = assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
if ((this->data(place).last < value) && this->data(place).seen)
{
this->data(place).sum += (value - this->data(place).last);
}
this->data(place).last = value;
if (!this->data(place).seen)
{
this->data(place).first = value;
this->data(place).seen = true;
}
}
void NO_SANITIZE_UNDEFINED merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
auto place_data = &this->data(place);
auto rhs_data = &this->data(rhs);
if ((place_data->last < rhs_data->first) && place_data->seen && rhs_data->seen)
{
// If the lhs last number seen is less than the first number the rhs saw, the lhs is before
// the rhs, for example [0, 2] [4, 7]. So we want to add the deltasums, but also add the
// difference between lhs last number and rhs first number (the 2 and 4). Then we want to
// take last value from the rhs, so first and last become 0 and 7.
place_data->sum += rhs_data->sum + (rhs_data->first - place_data->last);
place_data->last = rhs_data->last;
}
else if ((rhs_data->first < place_data->last && rhs_data->seen && place_data->seen))
{
// In the opposite scenario, the lhs comes after the rhs, e.g. [4, 6] [1, 2]. Since we
// assume the input interval states are sorted by time, we assume this is a counter
// reset, and therefore do *not* add the difference between our first value and the
// rhs last value.
place_data->sum += rhs_data->sum;
place_data->last = rhs_data->last;
}
else if (rhs_data->seen && !place_data->seen)
{
// If we're here then the lhs is an empty state and the rhs does have some state, so
// we'll just take that state.
place_data->first = rhs_data->first;
place_data->last = rhs_data->last;
place_data->sum = rhs_data->sum;
place_data->seen = rhs_data->seen;
}
// Otherwise lhs either has data or is uninitialized, so we don't need to modify its values.
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
writeBinaryLittleEndian(this->data(place).sum, buf);
writeBinaryLittleEndian(this->data(place).first, buf);
writeBinaryLittleEndian(this->data(place).last, buf);
writeBinaryLittleEndian(this->data(place).seen, buf);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
{
readBinaryLittleEndian(this->data(place).sum, buf);
readBinaryLittleEndian(this->data(place).first, buf);
readBinaryLittleEndian(this->data(place).last, buf);
readBinaryLittleEndian(this->data(place).seen, buf);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
assert_cast<ColumnVector<T> &>(to).getData().push_back(this->data(place).sum);
}
};
AggregateFunctionPtr createAggregateFunctionDeltaSum(
const String & name,
const DataTypes & arguments,

View File

@ -1,126 +0,0 @@
#pragma once
#include <type_traits>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Columns/ColumnVector.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypesNumber.h>
#include <AggregateFunctions/IAggregateFunction.h>
namespace DB
{
struct Settings;
template <typename T>
struct AggregationFunctionDeltaSumData
{
T sum = 0;
T last = 0;
T first = 0;
bool seen = false;
};
template <typename T>
class AggregationFunctionDeltaSum final
: public IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>
{
public:
AggregationFunctionDeltaSum(const DataTypes & arguments, const Array & params)
: IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>{arguments, params, createResultType()}
{}
AggregationFunctionDeltaSum()
: IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>{}
{}
String getName() const override { return "deltaSum"; }
static DataTypePtr createResultType() { return std::make_shared<DataTypeNumber<T>>(); }
bool allocatesMemoryInArena() const override { return false; }
void NO_SANITIZE_UNDEFINED ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
{
auto value = assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
if ((this->data(place).last < value) && this->data(place).seen)
{
this->data(place).sum += (value - this->data(place).last);
}
this->data(place).last = value;
if (!this->data(place).seen)
{
this->data(place).first = value;
this->data(place).seen = true;
}
}
void NO_SANITIZE_UNDEFINED ALWAYS_INLINE merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
auto place_data = &this->data(place);
auto rhs_data = &this->data(rhs);
if ((place_data->last < rhs_data->first) && place_data->seen && rhs_data->seen)
{
// If the lhs last number seen is less than the first number the rhs saw, the lhs is before
// the rhs, for example [0, 2] [4, 7]. So we want to add the deltasums, but also add the
// difference between lhs last number and rhs first number (the 2 and 4). Then we want to
// take last value from the rhs, so first and last become 0 and 7.
place_data->sum += rhs_data->sum + (rhs_data->first - place_data->last);
place_data->last = rhs_data->last;
}
else if ((rhs_data->first < place_data->last && rhs_data->seen && place_data->seen))
{
// In the opposite scenario, the lhs comes after the rhs, e.g. [4, 6] [1, 2]. Since we
// assume the input interval states are sorted by time, we assume this is a counter
// reset, and therefore do *not* add the difference between our first value and the
// rhs last value.
place_data->sum += rhs_data->sum;
place_data->last = rhs_data->last;
}
else if (rhs_data->seen && !place_data->seen)
{
// If we're here then the lhs is an empty state and the rhs does have some state, so
// we'll just take that state.
place_data->first = rhs_data->first;
place_data->last = rhs_data->last;
place_data->sum = rhs_data->sum;
place_data->seen = rhs_data->seen;
}
// Otherwise lhs either has data or is uninitialized, so we don't need to modify its values.
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
writeBinaryLittleEndian(this->data(place).sum, buf);
writeBinaryLittleEndian(this->data(place).first, buf);
writeBinaryLittleEndian(this->data(place).last, buf);
writeBinaryLittleEndian(this->data(place).seen, buf);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
{
readBinaryLittleEndian(this->data(place).sum, buf);
readBinaryLittleEndian(this->data(place).first, buf);
readBinaryLittleEndian(this->data(place).last, buf);
readBinaryLittleEndian(this->data(place).seen, buf);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
assert_cast<ColumnVector<T> &>(to).getData().push_back(this->data(place).sum);
}
};
}

View File

@ -1,22 +1,181 @@
#include <AggregateFunctions/AggregateFunctionDeltaSumTimestamp.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <AggregateFunctions/Helpers.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Columns/ColumnVector.h>
#include <DataTypes/DataTypesNumber.h>
#include <AggregateFunctions/IAggregateFunction.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
namespace
{
template <typename ValueType, typename TimestampType>
struct AggregationFunctionDeltaSumTimestampData
{
ValueType sum = 0;
ValueType first = 0;
ValueType last = 0;
TimestampType first_ts = 0;
TimestampType last_ts = 0;
bool seen = false;
};
template <typename ValueType, typename TimestampType>
class AggregationFunctionDeltaSumTimestamp final
: public IAggregateFunctionDataHelper<
AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType>,
AggregationFunctionDeltaSumTimestamp<ValueType, TimestampType>
>
{
public:
AggregationFunctionDeltaSumTimestamp(const DataTypes & arguments, const Array & params)
: IAggregateFunctionDataHelper<
AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType>,
AggregationFunctionDeltaSumTimestamp<ValueType, TimestampType>
>{arguments, params, createResultType()}
{}
AggregationFunctionDeltaSumTimestamp()
: IAggregateFunctionDataHelper<
AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType>,
AggregationFunctionDeltaSumTimestamp<ValueType, TimestampType>
>{}
{}
bool allocatesMemoryInArena() const override { return false; }
String getName() const override { return "deltaSumTimestamp"; }
static DataTypePtr createResultType() { return std::make_shared<DataTypeNumber<ValueType>>(); }
void NO_SANITIZE_UNDEFINED ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
{
auto value = assert_cast<const ColumnVector<ValueType> &>(*columns[0]).getData()[row_num];
auto ts = assert_cast<const ColumnVector<TimestampType> &>(*columns[1]).getData()[row_num];
auto & data = this->data(place);
if ((data.last < value) && data.seen)
{
data.sum += (value - data.last);
}
data.last = value;
data.last_ts = ts;
if (!data.seen)
{
data.first = value;
data.seen = true;
data.first_ts = ts;
}
}
// before returns true if lhs is before rhs or false if it is not or can't be determined
bool ALWAYS_INLINE before(
const AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType> & lhs,
const AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType> & rhs) const
{
if (lhs.last_ts < rhs.first_ts)
return true;
if (lhs.last_ts == rhs.first_ts && (lhs.last_ts < rhs.last_ts || lhs.first_ts < rhs.first_ts))
return true;
return false;
}
void NO_SANITIZE_UNDEFINED merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
auto & place_data = this->data(place);
auto & rhs_data = this->data(rhs);
if (!place_data.seen && rhs_data.seen)
{
place_data.sum = rhs_data.sum;
place_data.seen = true;
place_data.first = rhs_data.first;
place_data.first_ts = rhs_data.first_ts;
place_data.last = rhs_data.last;
place_data.last_ts = rhs_data.last_ts;
}
else if (place_data.seen && !rhs_data.seen)
{
return;
}
else if (before(place_data, rhs_data))
{
// This state came before the rhs state
if (rhs_data.first > place_data.last)
place_data.sum += (rhs_data.first - place_data.last);
place_data.sum += rhs_data.sum;
place_data.last = rhs_data.last;
place_data.last_ts = rhs_data.last_ts;
}
else if (before(rhs_data, place_data))
{
// This state came after the rhs state
if (place_data.first > rhs_data.last)
place_data.sum += (place_data.first - rhs_data.last);
place_data.sum += rhs_data.sum;
place_data.first = rhs_data.first;
place_data.first_ts = rhs_data.first_ts;
}
else
{
// If none of those conditions matched, it means both states we are merging have all
// same timestamps. We have to pick either the smaller or larger value so that the
// result is deterministic.
if (place_data.first < rhs_data.first)
{
place_data.first = rhs_data.first;
place_data.last = rhs_data.last;
}
}
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
const auto & data = this->data(place);
writeBinaryLittleEndian(data.sum, buf);
writeBinaryLittleEndian(data.first, buf);
writeBinaryLittleEndian(data.first_ts, buf);
writeBinaryLittleEndian(data.last, buf);
writeBinaryLittleEndian(data.last_ts, buf);
writeBinaryLittleEndian(data.seen, buf);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
{
auto & data = this->data(place);
readBinaryLittleEndian(data.sum, buf);
readBinaryLittleEndian(data.first, buf);
readBinaryLittleEndian(data.first_ts, buf);
readBinaryLittleEndian(data.last, buf);
readBinaryLittleEndian(data.last_ts, buf);
readBinaryLittleEndian(data.seen, buf);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
assert_cast<ColumnVector<ValueType> &>(to).getData().push_back(this->data(place).sum);
}
};
AggregateFunctionPtr createAggregateFunctionDeltaSumTimestamp(
const String & name,
const DataTypes & arguments,
@ -24,10 +183,7 @@ AggregateFunctionPtr createAggregateFunctionDeltaSumTimestamp(
const Settings *)
{
assertNoParameters(name, params);
if (arguments.size() != 2)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Incorrect number of arguments for aggregate function {}", name);
assertBinary(name, arguments);
if (!isInteger(arguments[0]) && !isFloat(arguments[0]) && !isDate(arguments[0]) && !isDateTime(arguments[0]))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}, "

View File

@ -1,171 +0,0 @@
#pragma once
#include <type_traits>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Columns/ColumnVector.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypesNumber.h>
#include <AggregateFunctions/IAggregateFunction.h>
namespace DB
{
template <typename ValueType, typename TimestampType>
struct AggregationFunctionDeltaSumTimestampData
{
ValueType sum = 0;
ValueType first = 0;
ValueType last = 0;
TimestampType first_ts = 0;
TimestampType last_ts = 0;
bool seen = false;
};
template <typename ValueType, typename TimestampType>
class AggregationFunctionDeltaSumTimestamp final
: public IAggregateFunctionDataHelper<
AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType>,
AggregationFunctionDeltaSumTimestamp<ValueType, TimestampType>
>
{
public:
AggregationFunctionDeltaSumTimestamp(const DataTypes & arguments, const Array & params)
: IAggregateFunctionDataHelper<
AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType>,
AggregationFunctionDeltaSumTimestamp<ValueType, TimestampType>
>{arguments, params, createResultType()}
{}
AggregationFunctionDeltaSumTimestamp()
: IAggregateFunctionDataHelper<
AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType>,
AggregationFunctionDeltaSumTimestamp<ValueType, TimestampType>
>{}
{}
bool allocatesMemoryInArena() const override { return false; }
String getName() const override { return "deltaSumTimestamp"; }
static DataTypePtr createResultType() { return std::make_shared<DataTypeNumber<ValueType>>(); }
void NO_SANITIZE_UNDEFINED ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
{
auto value = assert_cast<const ColumnVector<ValueType> &>(*columns[0]).getData()[row_num];
auto ts = assert_cast<const ColumnVector<TimestampType> &>(*columns[1]).getData()[row_num];
if ((this->data(place).last < value) && this->data(place).seen)
{
this->data(place).sum += (value - this->data(place).last);
}
this->data(place).last = value;
this->data(place).last_ts = ts;
if (!this->data(place).seen)
{
this->data(place).first = value;
this->data(place).seen = true;
this->data(place).first_ts = ts;
}
}
// before returns true if lhs is before rhs or false if it is not or can't be determined
bool ALWAYS_INLINE before (
const AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType> * lhs,
const AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType> * rhs
) const
{
if (lhs->last_ts < rhs->first_ts)
{
return true;
}
if (lhs->last_ts == rhs->first_ts && (lhs->last_ts < rhs->last_ts || lhs->first_ts < rhs->first_ts))
{
return true;
}
return false;
}
void NO_SANITIZE_UNDEFINED ALWAYS_INLINE merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
auto place_data = &this->data(place);
auto rhs_data = &this->data(rhs);
if (!place_data->seen && rhs_data->seen)
{
place_data->sum = rhs_data->sum;
place_data->seen = true;
place_data->first = rhs_data->first;
place_data->first_ts = rhs_data->first_ts;
place_data->last = rhs_data->last;
place_data->last_ts = rhs_data->last_ts;
}
else if (place_data->seen && !rhs_data->seen)
return;
else if (before(place_data, rhs_data))
{
// This state came before the rhs state
if (rhs_data->first > place_data->last)
place_data->sum += (rhs_data->first - place_data->last);
place_data->sum += rhs_data->sum;
place_data->last = rhs_data->last;
place_data->last_ts = rhs_data->last_ts;
}
else if (before(rhs_data, place_data))
{
// This state came after the rhs state
if (place_data->first > rhs_data->last)
place_data->sum += (place_data->first - rhs_data->last);
place_data->sum += rhs_data->sum;
place_data->first = rhs_data->first;
place_data->first_ts = rhs_data->first_ts;
}
else
{
// If none of those conditions matched, it means both states we are merging have all
// same timestamps. We have to pick either the smaller or larger value so that the
// result is deterministic.
if (place_data->first < rhs_data->first)
{
place_data->first = rhs_data->first;
place_data->last = rhs_data->last;
}
}
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
writeBinaryLittleEndian(this->data(place).sum, buf);
writeBinaryLittleEndian(this->data(place).first, buf);
writeBinaryLittleEndian(this->data(place).first_ts, buf);
writeBinaryLittleEndian(this->data(place).last, buf);
writeBinaryLittleEndian(this->data(place).last_ts, buf);
writeBinaryLittleEndian(this->data(place).seen, buf);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
{
readBinaryLittleEndian(this->data(place).sum, buf);
readBinaryLittleEndian(this->data(place).first, buf);
readBinaryLittleEndian(this->data(place).first_ts, buf);
readBinaryLittleEndian(this->data(place).last, buf);
readBinaryLittleEndian(this->data(place).last_ts, buf);
readBinaryLittleEndian(this->data(place).seen, buf);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
assert_cast<ColumnVector<ValueType> &>(to).getData().push_back(this->data(place).sum);
}
};
}

View File

@ -1,8 +1,18 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionEntropy.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <AggregateFunctions/Helpers.h>
#include <Common/HashTable/HashMap.h>
#include <Common/NaNUtils.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/UniqVariadicHash.h>
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnVector.h>
#include <Common/assert_cast.h>
#include <cmath>
namespace DB
{
@ -16,6 +26,133 @@ namespace ErrorCodes
namespace
{
/** Calculates Shannon Entropy, using HashMap and computing empirical distribution function.
* Entropy is measured in bits (base-2 logarithm is used).
*/
template <typename Value>
struct EntropyData
{
using Weight = UInt64;
using HashingMap = HashMapWithStackMemory<Value, Weight, HashCRC32<Value>, 4>;
/// For the case of pre-hashed values.
using TrivialMap = HashMapWithStackMemory<Value, Weight, UInt128TrivialHash, 4>;
using Map = std::conditional_t<std::is_same_v<UInt128, Value>, TrivialMap, HashingMap>;
Map map;
void add(const Value & x)
{
if (!isNaN(x))
++map[x];
}
void add(const Value & x, const Weight & weight)
{
if (!isNaN(x))
map[x] += weight;
}
void merge(const EntropyData & rhs)
{
for (const auto & pair : rhs.map)
map[pair.getKey()] += pair.getMapped();
}
void serialize(WriteBuffer & buf) const
{
map.write(buf);
}
void deserialize(ReadBuffer & buf)
{
typename Map::Reader reader(buf);
while (reader.next())
{
const auto & pair = reader.get();
map[pair.first] = pair.second;
}
}
Float64 get() const
{
UInt64 total_value = 0;
for (const auto & pair : map)
total_value += pair.getMapped();
Float64 shannon_entropy = 0;
for (const auto & pair : map)
{
Float64 frequency = Float64(pair.getMapped()) / total_value;
shannon_entropy -= frequency * log2(frequency);
}
return shannon_entropy;
}
};
template <typename Value>
class AggregateFunctionEntropy final : public IAggregateFunctionDataHelper<EntropyData<Value>, AggregateFunctionEntropy<Value>>
{
private:
size_t num_args;
public:
explicit AggregateFunctionEntropy(const DataTypes & argument_types_)
: IAggregateFunctionDataHelper<EntropyData<Value>, AggregateFunctionEntropy<Value>>(argument_types_, {}, createResultType())
, num_args(argument_types_.size())
{
}
String getName() const override { return "entropy"; }
static DataTypePtr createResultType()
{
return std::make_shared<DataTypeNumber<Float64>>();
}
bool allocatesMemoryInArena() const override { return false; }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
{
if constexpr (!std::is_same_v<UInt128, Value>)
{
/// Here we manage only with numerical types
const auto & column = assert_cast<const ColumnVector <Value> &>(*columns[0]);
this->data(place).add(column.getData()[row_num]);
}
else
{
this->data(place).add(UniqVariadicHash<true, false>::apply(num_args, columns, row_num));
}
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
this->data(place).merge(this->data(rhs));
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
this->data(const_cast<AggregateDataPtr>(place)).serialize(buf);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
{
this->data(place).deserialize(buf);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
auto & column = assert_cast<ColumnVector<Float64> &>(to);
column.getData().push_back(this->data(place).get());
}
};
AggregateFunctionPtr createAggregateFunctionEntropy(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{

View File

@ -1,145 +0,0 @@
#pragma once
#include <Common/HashTable/HashMap.h>
#include <Common/NaNUtils.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/UniqVariadicHash.h>
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnVector.h>
#include <Common/assert_cast.h>
#include <cmath>
namespace DB
{
struct Settings;
/** Calculates Shannon Entropy, using HashMap and computing empirical distribution function.
* Entropy is measured in bits (base-2 logarithm is used).
*/
template <typename Value>
struct EntropyData
{
using Weight = UInt64;
using HashingMap = HashMapWithStackMemory<Value, Weight, HashCRC32<Value>, 4>;
/// For the case of pre-hashed values.
using TrivialMap = HashMapWithStackMemory<Value, Weight, UInt128TrivialHash, 4>;
using Map = std::conditional_t<std::is_same_v<UInt128, Value>, TrivialMap, HashingMap>;
Map map;
void add(const Value & x)
{
if (!isNaN(x))
++map[x];
}
void add(const Value & x, const Weight & weight)
{
if (!isNaN(x))
map[x] += weight;
}
void merge(const EntropyData & rhs)
{
for (const auto & pair : rhs.map)
map[pair.getKey()] += pair.getMapped();
}
void serialize(WriteBuffer & buf) const
{
map.write(buf);
}
void deserialize(ReadBuffer & buf)
{
typename Map::Reader reader(buf);
while (reader.next())
{
const auto & pair = reader.get();
map[pair.first] = pair.second;
}
}
Float64 get() const
{
UInt64 total_value = 0;
for (const auto & pair : map)
total_value += pair.getMapped();
Float64 shannon_entropy = 0;
for (const auto & pair : map)
{
Float64 frequency = Float64(pair.getMapped()) / total_value;
shannon_entropy -= frequency * log2(frequency);
}
return shannon_entropy;
}
};
template <typename Value>
class AggregateFunctionEntropy final : public IAggregateFunctionDataHelper<EntropyData<Value>, AggregateFunctionEntropy<Value>>
{
private:
size_t num_args;
public:
explicit AggregateFunctionEntropy(const DataTypes & argument_types_)
: IAggregateFunctionDataHelper<EntropyData<Value>, AggregateFunctionEntropy<Value>>(argument_types_, {}, createResultType())
, num_args(argument_types_.size())
{
}
String getName() const override { return "entropy"; }
static DataTypePtr createResultType()
{
return std::make_shared<DataTypeNumber<Float64>>();
}
bool allocatesMemoryInArena() const override { return false; }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
{
if constexpr (!std::is_same_v<UInt128, Value>)
{
/// Here we manage only with numerical types
const auto & column = assert_cast<const ColumnVector <Value> &>(*columns[0]);
this->data(place).add(column.getData()[row_num]);
}
else
{
this->data(place).add(UniqVariadicHash<true, false>::apply(num_args, columns, row_num));
}
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
this->data(place).merge(this->data(rhs));
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
this->data(const_cast<AggregateDataPtr>(place)).serialize(buf);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
{
this->data(place).deserialize(buf);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
auto & column = assert_cast<ColumnVector<Float64> &>(to);
column.getData().push_back(this->data(place).get());
}
};
}

View File

@ -1,12 +1,32 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionGroupArray.h>
#include <AggregateFunctions/Helpers.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <Interpreters/Context.h>
#include <Core/ServerSettings.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadBufferFromString.h>
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h>
#include <Common/ArenaAllocator.h>
#include <Common/assert_cast.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <type_traits>
#define AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE 0xFFFFFF
namespace DB
{
@ -16,11 +36,670 @@ namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int BAD_ARGUMENTS;
extern const int TOO_LARGE_ARRAY_SIZE;
}
namespace
{
enum class Sampler
{
NONE,
RNG,
};
template <bool Thas_limit, bool Tlast, Sampler Tsampler>
struct GroupArrayTrait
{
static constexpr bool has_limit = Thas_limit;
static constexpr bool last = Tlast;
static constexpr Sampler sampler = Tsampler;
};
template <typename Trait>
constexpr const char * getNameByTrait()
{
if (Trait::last)
return "groupArrayLast";
if (Trait::sampler == Sampler::NONE)
return "groupArray";
else if (Trait::sampler == Sampler::RNG)
return "groupArraySample";
UNREACHABLE();
}
template <typename T>
struct GroupArraySamplerData
{
/// For easy serialization.
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
using Array = PODArray<T, 32, Allocator>;
Array value;
size_t total_values = 0;
pcg32_fast rng;
UInt64 genRandom(size_t lim)
{
chassert(lim != 0);
/// With a large number of values, we will generate random numbers several times slower.
if (lim <= static_cast<UInt64>(rng.max()))
return rng() % lim;
else
return (static_cast<UInt64>(rng()) * (static_cast<UInt64>(rng.max()) + 1ULL) + static_cast<UInt64>(rng())) % lim;
}
void randomShuffle()
{
size_t size = value.size();
chassert(size < std::numeric_limits<size_t>::max());
for (size_t i = 1; i < size; ++i)
{
size_t j = genRandom(i + 1);
std::swap(value[i], value[j]);
}
}
};
/// A particular case is an implementation for numeric types.
template <typename T, bool has_sampler>
struct GroupArrayNumericData;
template <typename T>
struct GroupArrayNumericData<T, false>
{
/// For easy serialization.
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
using Array = PODArray<T, 32, Allocator>;
// For groupArrayLast()
size_t total_values = 0;
Array value;
};
template <typename T>
struct GroupArrayNumericData<T, true> : public GroupArraySamplerData<T>
{
};
template <typename T, typename Trait>
class GroupArrayNumericImpl final
: public IAggregateFunctionDataHelper<GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>, GroupArrayNumericImpl<T, Trait>>
{
using Data = GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>;
static constexpr bool limit_num_elems = Trait::has_limit;
UInt64 max_elems;
std::optional<UInt64> seed;
public:
explicit GroupArrayNumericImpl(
const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_, std::optional<UInt64> seed_)
: IAggregateFunctionDataHelper<GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>, GroupArrayNumericImpl<T, Trait>>(
{data_type_}, parameters_, std::make_shared<DataTypeArray>(data_type_))
, max_elems(max_elems_)
, seed(seed_)
{
}
String getName() const override { return getNameByTrait<Trait>(); }
void insertWithSampler(Data & a, const T & v, Arena * arena) const
{
++a.total_values;
if (a.value.size() < max_elems)
a.value.push_back(v, arena);
else
{
UInt64 rnd = a.genRandom(a.total_values);
if (rnd < max_elems)
a.value[rnd] = v;
}
}
void create(AggregateDataPtr __restrict place) const override /// NOLINT
{
[[maybe_unused]] auto a = new (place) Data;
if constexpr (Trait::sampler == Sampler::RNG)
a->rng.seed(seed.value_or(thread_local_rng()));
}
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
const auto & row_value = assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
auto & cur_elems = this->data(place);
++cur_elems.total_values;
if constexpr (Trait::sampler == Sampler::NONE)
{
if (limit_num_elems && cur_elems.value.size() >= max_elems)
{
if constexpr (Trait::last)
cur_elems.value[(cur_elems.total_values - 1) % max_elems] = row_value;
return;
}
cur_elems.value.push_back(row_value, arena);
}
if constexpr (Trait::sampler == Sampler::RNG)
{
if (cur_elems.value.size() < max_elems)
cur_elems.value.push_back(row_value, arena);
else
{
UInt64 rnd = cur_elems.genRandom(cur_elems.total_values);
if (rnd < max_elems)
cur_elems.value[rnd] = row_value;
}
}
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
auto & cur_elems = this->data(place);
auto & rhs_elems = this->data(rhs);
if (rhs_elems.value.empty())
return;
if constexpr (Trait::last)
mergeNoSamplerLast(cur_elems, rhs_elems, arena);
else if constexpr (Trait::sampler == Sampler::NONE)
mergeNoSampler(cur_elems, rhs_elems, arena);
else if constexpr (Trait::sampler == Sampler::RNG)
mergeWithRNGSampler(cur_elems, rhs_elems, arena);
}
void mergeNoSamplerLast(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
{
UInt64 new_elements = std::min(static_cast<size_t>(max_elems), cur_elems.value.size() + rhs_elems.value.size());
cur_elems.value.resize_exact(new_elements, arena);
for (auto & value : rhs_elems.value)
{
cur_elems.value[cur_elems.total_values % max_elems] = value;
++cur_elems.total_values;
}
chassert(rhs_elems.total_values >= rhs_elems.value.size());
cur_elems.total_values += rhs_elems.total_values - rhs_elems.value.size();
}
void mergeNoSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
{
if (!limit_num_elems)
{
if (rhs_elems.value.size())
cur_elems.value.insertByOffsets(rhs_elems.value, 0, rhs_elems.value.size(), arena);
}
else
{
UInt64 elems_to_insert = std::min(static_cast<size_t>(max_elems) - cur_elems.value.size(), rhs_elems.value.size());
if (elems_to_insert)
cur_elems.value.insertByOffsets(rhs_elems.value, 0, elems_to_insert, arena);
}
}
void mergeWithRNGSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
{
if (rhs_elems.total_values <= max_elems)
{
for (size_t i = 0; i < rhs_elems.value.size(); ++i)
insertWithSampler(cur_elems, rhs_elems.value[i], arena);
}
else if (cur_elems.total_values <= max_elems)
{
decltype(cur_elems.value) from;
from.swap(cur_elems.value, arena);
cur_elems.value.assign(rhs_elems.value.begin(), rhs_elems.value.end(), arena);
cur_elems.total_values = rhs_elems.total_values;
for (size_t i = 0; i < from.size(); ++i)
insertWithSampler(cur_elems, from[i], arena);
}
else
{
cur_elems.randomShuffle();
cur_elems.total_values += rhs_elems.total_values;
for (size_t i = 0; i < max_elems; ++i)
{
UInt64 rnd = cur_elems.genRandom(cur_elems.total_values);
if (rnd < rhs_elems.total_values)
cur_elems.value[i] = rhs_elems.value[i];
}
}
}
static void checkArraySize(size_t elems, size_t max_elems)
{
if (unlikely(elems > max_elems))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
"Too large array size {} (maximum: {})", elems, max_elems);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
const auto & value = this->data(place).value;
const UInt64 size = value.size();
checkArraySize(size, max_elems);
writeVarUInt(size, buf);
for (const auto & element : value)
writeBinaryLittleEndian(element, buf);
if constexpr (Trait::last)
writeBinaryLittleEndian(this->data(place).total_values, buf);
if constexpr (Trait::sampler == Sampler::RNG)
{
writeBinaryLittleEndian(this->data(place).total_values, buf);
WriteBufferFromOwnString rng_buf;
rng_buf << this->data(place).rng;
writeStringBinary(rng_buf.str(), buf);
}
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
size_t size = 0;
readVarUInt(size, buf);
checkArraySize(size, max_elems);
auto & value = this->data(place).value;
value.resize_exact(size, arena);
for (auto & element : value)
readBinaryLittleEndian(element, buf);
if constexpr (Trait::last)
readBinaryLittleEndian(this->data(place).total_values, buf);
if constexpr (Trait::sampler == Sampler::RNG)
{
readBinaryLittleEndian(this->data(place).total_values, buf);
std::string rng_string;
readStringBinary(rng_string, buf);
ReadBufferFromString rng_buf(rng_string);
rng_buf >> this->data(place).rng;
}
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
const auto & value = this->data(place).value;
size_t size = value.size();
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
offsets_to.push_back(offsets_to.back() + size);
if (size)
{
typename ColumnVector<T>::Container & data_to = assert_cast<ColumnVector<T> &>(arr_to.getData()).getData();
data_to.insert(this->data(place).value.begin(), this->data(place).value.end());
}
}
bool allocatesMemoryInArena() const override { return true; }
};
/// General case
/// Nodes used to implement a linked list for storage of groupArray states
template <typename Node>
struct GroupArrayNodeBase
{
UInt64 size; // size of payload
/// Returns pointer to actual payload
char * data() { return reinterpret_cast<char *>(this) + sizeof(Node); }
const char * data() const { return reinterpret_cast<const char *>(this) + sizeof(Node); }
/// Clones existing node (does not modify next field)
Node * clone(Arena * arena) const
{
return reinterpret_cast<Node *>(
const_cast<char *>(arena->alignedInsert(reinterpret_cast<const char *>(this), sizeof(Node) + size, alignof(Node))));
}
static void checkElementSize(size_t size, size_t max_size)
{
if (unlikely(size > max_size))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
"Too large array element size {} (maximum: {})", size, max_size);
}
/// Write node to buffer
void write(WriteBuffer & buf) const
{
checkElementSize(size, AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE);
writeVarUInt(size, buf);
buf.write(data(), size);
}
/// Reads and allocates node from ReadBuffer's data (doesn't set next)
static Node * read(ReadBuffer & buf, Arena * arena)
{
UInt64 size;
readVarUInt(size, buf);
checkElementSize(size, AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE);
Node * node = reinterpret_cast<Node *>(arena->alignedAlloc(sizeof(Node) + size, alignof(Node)));
node->size = size;
buf.readStrict(node->data(), size);
return node;
}
};
struct GroupArrayNodeString : public GroupArrayNodeBase<GroupArrayNodeString>
{
using Node = GroupArrayNodeString;
/// Create node from string
static Node * allocate(const IColumn & column, size_t row_num, Arena * arena)
{
StringRef string = assert_cast<const ColumnString &>(column).getDataAt(row_num);
Node * node = reinterpret_cast<Node *>(arena->alignedAlloc(sizeof(Node) + string.size, alignof(Node)));
node->size = string.size;
memcpy(node->data(), string.data, string.size);
return node;
}
void insertInto(IColumn & column)
{
assert_cast<ColumnString &>(column).insertData(data(), size);
}
};
struct GroupArrayNodeGeneral : public GroupArrayNodeBase<GroupArrayNodeGeneral>
{
using Node = GroupArrayNodeGeneral;
static Node * allocate(const IColumn & column, size_t row_num, Arena * arena)
{
const char * begin = arena->alignedAlloc(sizeof(Node), alignof(Node));
StringRef value = column.serializeValueIntoArena(row_num, *arena, begin);
Node * node = reinterpret_cast<Node *>(const_cast<char *>(begin));
node->size = value.size;
return node;
}
void insertInto(IColumn & column) { column.deserializeAndInsertFromArena(data()); }
};
template <typename Node, bool has_sampler>
struct GroupArrayGeneralData;
template <typename Node>
struct GroupArrayGeneralData<Node, false>
{
// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
using Allocator = MixedAlignedArenaAllocator<alignof(Node *), 4096>;
using Array = PODArray<Node *, 32, Allocator>;
// For groupArrayLast()
size_t total_values = 0;
Array value;
};
template <typename Node>
struct GroupArrayGeneralData<Node, true> : public GroupArraySamplerData<Node *>
{
};
/// Implementation of groupArray for String or any ComplexObject via Array
template <typename Node, typename Trait>
class GroupArrayGeneralImpl final
: public IAggregateFunctionDataHelper<GroupArrayGeneralData<Node, Trait::sampler != Sampler::NONE>, GroupArrayGeneralImpl<Node, Trait>>
{
static constexpr bool limit_num_elems = Trait::has_limit;
using Data = GroupArrayGeneralData<Node, Trait::sampler != Sampler::NONE>;
static Data & data(AggregateDataPtr __restrict place) { return *reinterpret_cast<Data *>(place); }
static const Data & data(ConstAggregateDataPtr __restrict place) { return *reinterpret_cast<const Data *>(place); }
DataTypePtr & data_type;
UInt64 max_elems;
std::optional<UInt64> seed;
public:
GroupArrayGeneralImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_, std::optional<UInt64> seed_)
: IAggregateFunctionDataHelper<GroupArrayGeneralData<Node, Trait::sampler != Sampler::NONE>, GroupArrayGeneralImpl<Node, Trait>>(
{data_type_}, parameters_, std::make_shared<DataTypeArray>(data_type_))
, data_type(this->argument_types[0])
, max_elems(max_elems_)
, seed(seed_)
{
}
String getName() const override { return getNameByTrait<Trait>(); }
void insertWithSampler(Data & a, const Node * v, Arena * arena) const
{
++a.total_values;
if (a.value.size() < max_elems)
a.value.push_back(v->clone(arena), arena);
else
{
UInt64 rnd = a.genRandom(a.total_values);
if (rnd < max_elems)
a.value[rnd] = v->clone(arena);
}
}
void create(AggregateDataPtr __restrict place) const override /// NOLINT
{
[[maybe_unused]] auto a = new (place) Data;
if constexpr (Trait::sampler == Sampler::RNG)
a->rng.seed(seed.value_or(thread_local_rng()));
}
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
auto & cur_elems = data(place);
++cur_elems.total_values;
if constexpr (Trait::sampler == Sampler::NONE)
{
if (limit_num_elems && cur_elems.value.size() >= max_elems)
{
if (Trait::last)
{
Node * node = Node::allocate(*columns[0], row_num, arena);
cur_elems.value[(cur_elems.total_values - 1) % max_elems] = node;
}
return;
}
Node * node = Node::allocate(*columns[0], row_num, arena);
cur_elems.value.push_back(node, arena);
}
if constexpr (Trait::sampler == Sampler::RNG)
{
if (cur_elems.value.size() < max_elems)
cur_elems.value.push_back(Node::allocate(*columns[0], row_num, arena), arena);
else
{
UInt64 rnd = cur_elems.genRandom(cur_elems.total_values);
if (rnd < max_elems)
cur_elems.value[rnd] = Node::allocate(*columns[0], row_num, arena);
}
}
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
auto & cur_elems = data(place);
auto & rhs_elems = data(rhs);
if (rhs_elems.value.empty())
return;
if constexpr (Trait::last)
mergeNoSamplerLast(cur_elems, rhs_elems, arena);
else if constexpr (Trait::sampler == Sampler::NONE)
mergeNoSampler(cur_elems, rhs_elems, arena);
else if constexpr (Trait::sampler == Sampler::RNG)
mergeWithRNGSampler(cur_elems, rhs_elems, arena);
}
void ALWAYS_INLINE mergeNoSamplerLast(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
{
UInt64 new_elements = std::min(static_cast<size_t>(max_elems), cur_elems.value.size() + rhs_elems.value.size());
cur_elems.value.resize_exact(new_elements, arena);
for (auto & value : rhs_elems.value)
{
cur_elems.value[cur_elems.total_values % max_elems] = value->clone(arena);
++cur_elems.total_values;
}
chassert(rhs_elems.total_values >= rhs_elems.value.size());
cur_elems.total_values += rhs_elems.total_values - rhs_elems.value.size();
}
void ALWAYS_INLINE mergeNoSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
{
UInt64 new_elems;
if (limit_num_elems)
{
if (cur_elems.value.size() >= max_elems)
return;
new_elems = std::min(rhs_elems.value.size(), static_cast<size_t>(max_elems) - cur_elems.value.size());
}
else
new_elems = rhs_elems.value.size();
for (UInt64 i = 0; i < new_elems; ++i)
cur_elems.value.push_back(rhs_elems.value[i]->clone(arena), arena);
}
void ALWAYS_INLINE mergeWithRNGSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
{
if (rhs_elems.total_values <= max_elems)
{
for (size_t i = 0; i < rhs_elems.value.size(); ++i)
insertWithSampler(cur_elems, rhs_elems.value[i], arena);
}
else if (cur_elems.total_values <= max_elems)
{
decltype(cur_elems.value) from;
from.swap(cur_elems.value, arena);
for (auto & node : rhs_elems.value)
cur_elems.value.push_back(node->clone(arena), arena);
cur_elems.total_values = rhs_elems.total_values;
for (size_t i = 0; i < from.size(); ++i)
insertWithSampler(cur_elems, from[i], arena);
}
else
{
cur_elems.randomShuffle();
cur_elems.total_values += rhs_elems.total_values;
for (size_t i = 0; i < max_elems; ++i)
{
UInt64 rnd = cur_elems.genRandom(cur_elems.total_values);
if (rnd < rhs_elems.total_values)
cur_elems.value[i] = rhs_elems.value[i]->clone(arena);
}
}
}
static void checkArraySize(size_t elems, size_t max_elems)
{
if (unlikely(elems > max_elems))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
"Too large array size {} (maximum: {})", elems, max_elems);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
UInt64 elems = data(place).value.size();
checkArraySize(elems, max_elems);
writeVarUInt(elems, buf);
auto & value = data(place).value;
for (auto & node : value)
node->write(buf);
if constexpr (Trait::last)
writeBinaryLittleEndian(data(place).total_values, buf);
if constexpr (Trait::sampler == Sampler::RNG)
{
writeBinaryLittleEndian(data(place).total_values, buf);
WriteBufferFromOwnString rng_buf;
rng_buf << data(place).rng;
writeStringBinary(rng_buf.str(), buf);
}
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
UInt64 elems;
readVarUInt(elems, buf);
if (unlikely(elems == 0))
return;
checkArraySize(elems, max_elems);
auto & value = data(place).value;
value.resize_exact(elems, arena);
for (UInt64 i = 0; i < elems; ++i)
value[i] = Node::read(buf, arena);
if constexpr (Trait::last)
readBinaryLittleEndian(data(place).total_values, buf);
if constexpr (Trait::sampler == Sampler::RNG)
{
readBinaryLittleEndian(data(place).total_values, buf);
std::string rng_string;
readStringBinary(rng_string, buf);
ReadBufferFromString rng_buf(rng_string);
rng_buf >> data(place).rng;
}
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
auto & column_array = assert_cast<ColumnArray &>(to);
auto & offsets = column_array.getOffsets();
offsets.push_back(offsets.back() + data(place).value.size());
auto & column_data = column_array.getData();
if (std::is_same_v<Node, GroupArrayNodeString>)
{
auto & string_offsets = assert_cast<ColumnString &>(column_data).getOffsets();
string_offsets.reserve(string_offsets.size() + data(place).value.size());
}
auto & value = data(place).value;
for (auto & node : value)
node->insertInto(column_data);
}
bool allocatesMemoryInArena() const override { return true; }
};
template <template <typename, typename> class AggregateFunctionTemplate, typename Data, typename ... TArgs>
IAggregateFunction * createWithNumericOrTimeType(const IDataType & argument_type, TArgs && ... args)
{
@ -87,10 +766,10 @@ AggregateFunctionPtr createAggregateFunctionGroupArray(
{
if (Tlast)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "groupArrayLast make sense only with max_elems (groupArrayLast(max_elems)())");
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait</* Thas_limit= */ false, Tlast, /* Tsampler= */ Sampler::NONE>>(argument_types[0], parameters, max_elems);
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait</* Thas_limit= */ false, Tlast, /* Tsampler= */ Sampler::NONE>>(argument_types[0], parameters, max_elems, std::nullopt);
}
else
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait</* Thas_limit= */ true, Tlast, /* Tsampler= */ Sampler::NONE>>(argument_types[0], parameters, max_elems);
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait</* Thas_limit= */ true, Tlast, /* Tsampler= */ Sampler::NONE>>(argument_types[0], parameters, max_elems, std::nullopt);
}
AggregateFunctionPtr createAggregateFunctionGroupArraySample(
@ -117,11 +796,9 @@ AggregateFunctionPtr createAggregateFunctionGroupArraySample(
UInt64 max_elems = get_parameter(0);
UInt64 seed;
std::optional<UInt64> seed;
if (parameters.size() >= 2)
seed = get_parameter(1);
else
seed = thread_local_rng();
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait</* Thas_limit= */ true, /* Tlast= */ false, /* Tsampler= */ Sampler::RNG>>(argument_types[0], parameters, max_elems, seed);
}

View File

@ -1,690 +0,0 @@
#pragma once
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadBufferFromString.h>
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h>
#include <Common/ArenaAllocator.h>
#include <Common/assert_cast.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <type_traits>
#define AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE 0xFFFFFF
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int TOO_LARGE_ARRAY_SIZE;
}
enum class Sampler
{
NONE,
RNG,
};
template <bool Thas_limit, bool Tlast, Sampler Tsampler>
struct GroupArrayTrait
{
static constexpr bool has_limit = Thas_limit;
static constexpr bool last = Tlast;
static constexpr Sampler sampler = Tsampler;
};
template <typename Trait>
static constexpr const char * getNameByTrait()
{
if (Trait::last)
return "groupArrayLast";
if (Trait::sampler == Sampler::NONE)
return "groupArray";
else if (Trait::sampler == Sampler::RNG)
return "groupArraySample";
UNREACHABLE();
}
template <typename T>
struct GroupArraySamplerData
{
/// For easy serialization.
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
using Array = PODArray<T, 32, Allocator>;
Array value;
size_t total_values = 0;
pcg32_fast rng;
UInt64 genRandom(size_t lim)
{
/// With a large number of values, we will generate random numbers several times slower.
if (lim <= static_cast<UInt64>(rng.max()))
return static_cast<UInt32>(rng()) % static_cast<UInt32>(lim);
else
return (static_cast<UInt64>(rng()) * (static_cast<UInt64>(rng.max()) + 1ULL) + static_cast<UInt64>(rng())) % lim;
}
void randomShuffle()
{
for (size_t i = 1; i < value.size(); ++i)
{
size_t j = genRandom(i + 1);
std::swap(value[i], value[j]);
}
}
};
/// A particular case is an implementation for numeric types.
template <typename T, bool has_sampler>
struct GroupArrayNumericData;
template <typename T>
struct GroupArrayNumericData<T, false>
{
/// For easy serialization.
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
using Array = PODArray<T, 32, Allocator>;
// For groupArrayLast()
size_t total_values = 0;
Array value;
};
template <typename T>
struct GroupArrayNumericData<T, true> : public GroupArraySamplerData<T>
{
};
template <typename T, typename Trait>
class GroupArrayNumericImpl final
: public IAggregateFunctionDataHelper<GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>, GroupArrayNumericImpl<T, Trait>>
{
using Data = GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>;
static constexpr bool limit_num_elems = Trait::has_limit;
UInt64 max_elems;
UInt64 seed;
public:
explicit GroupArrayNumericImpl(
const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_, UInt64 seed_ = 123456)
: IAggregateFunctionDataHelper<GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>, GroupArrayNumericImpl<T, Trait>>(
{data_type_}, parameters_, std::make_shared<DataTypeArray>(data_type_))
, max_elems(max_elems_)
, seed(seed_)
{
}
String getName() const override { return getNameByTrait<Trait>(); }
void insertWithSampler(Data & a, const T & v, Arena * arena) const
{
++a.total_values;
if (a.value.size() < max_elems)
a.value.push_back(v, arena);
else
{
UInt64 rnd = a.genRandom(a.total_values);
if (rnd < max_elems)
a.value[rnd] = v;
}
}
void create(AggregateDataPtr __restrict place) const override /// NOLINT
{
[[maybe_unused]] auto a = new (place) Data;
if constexpr (Trait::sampler == Sampler::RNG)
a->rng.seed(seed);
}
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
const auto & row_value = assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
auto & cur_elems = this->data(place);
++cur_elems.total_values;
if constexpr (Trait::sampler == Sampler::NONE)
{
if (limit_num_elems && cur_elems.value.size() >= max_elems)
{
if constexpr (Trait::last)
cur_elems.value[(cur_elems.total_values - 1) % max_elems] = row_value;
return;
}
cur_elems.value.push_back(row_value, arena);
}
if constexpr (Trait::sampler == Sampler::RNG)
{
if (cur_elems.value.size() < max_elems)
cur_elems.value.push_back(row_value, arena);
else
{
UInt64 rnd = cur_elems.genRandom(cur_elems.total_values);
if (rnd < max_elems)
cur_elems.value[rnd] = row_value;
}
}
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
auto & cur_elems = this->data(place);
auto & rhs_elems = this->data(rhs);
if (rhs_elems.value.empty())
return;
if constexpr (Trait::last)
mergeNoSamplerLast(cur_elems, rhs_elems, arena);
else if constexpr (Trait::sampler == Sampler::NONE)
mergeNoSampler(cur_elems, rhs_elems, arena);
else if constexpr (Trait::sampler == Sampler::RNG)
mergeWithRNGSampler(cur_elems, rhs_elems, arena);
}
void mergeNoSamplerLast(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
{
UInt64 new_elements = std::min(static_cast<size_t>(max_elems), cur_elems.value.size() + rhs_elems.value.size());
cur_elems.value.resize_exact(new_elements, arena);
for (auto & value : rhs_elems.value)
{
cur_elems.value[cur_elems.total_values % max_elems] = value;
++cur_elems.total_values;
}
assert(rhs_elems.total_values >= rhs_elems.value.size());
cur_elems.total_values += rhs_elems.total_values - rhs_elems.value.size();
}
void mergeNoSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
{
if (!limit_num_elems)
{
if (rhs_elems.value.size())
cur_elems.value.insertByOffsets(rhs_elems.value, 0, rhs_elems.value.size(), arena);
}
else
{
UInt64 elems_to_insert = std::min(static_cast<size_t>(max_elems) - cur_elems.value.size(), rhs_elems.value.size());
if (elems_to_insert)
cur_elems.value.insertByOffsets(rhs_elems.value, 0, elems_to_insert, arena);
}
}
void mergeWithRNGSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
{
if (rhs_elems.total_values <= max_elems)
{
for (size_t i = 0; i < rhs_elems.value.size(); ++i)
insertWithSampler(cur_elems, rhs_elems.value[i], arena);
}
else if (cur_elems.total_values <= max_elems)
{
decltype(cur_elems.value) from;
from.swap(cur_elems.value, arena);
cur_elems.value.assign(rhs_elems.value.begin(), rhs_elems.value.end(), arena);
cur_elems.total_values = rhs_elems.total_values;
for (size_t i = 0; i < from.size(); ++i)
insertWithSampler(cur_elems, from[i], arena);
}
else
{
cur_elems.randomShuffle();
cur_elems.total_values += rhs_elems.total_values;
for (size_t i = 0; i < max_elems; ++i)
{
UInt64 rnd = cur_elems.genRandom(cur_elems.total_values);
if (rnd < rhs_elems.total_values)
cur_elems.value[i] = rhs_elems.value[i];
}
}
}
static void checkArraySize(size_t elems, size_t max_elems)
{
if (unlikely(elems > max_elems))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
"Too large array size {} (maximum: {})", elems, max_elems);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
const auto & value = this->data(place).value;
const UInt64 size = value.size();
checkArraySize(size, max_elems);
writeVarUInt(size, buf);
for (const auto & element : value)
writeBinaryLittleEndian(element, buf);
if constexpr (Trait::last)
writeBinaryLittleEndian(this->data(place).total_values, buf);
if constexpr (Trait::sampler == Sampler::RNG)
{
writeBinaryLittleEndian(this->data(place).total_values, buf);
WriteBufferFromOwnString rng_buf;
rng_buf << this->data(place).rng;
writeStringBinary(rng_buf.str(), buf);
}
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
size_t size = 0;
readVarUInt(size, buf);
checkArraySize(size, max_elems);
auto & value = this->data(place).value;
value.resize_exact(size, arena);
for (auto & element : value)
readBinaryLittleEndian(element, buf);
if constexpr (Trait::last)
readBinaryLittleEndian(this->data(place).total_values, buf);
if constexpr (Trait::sampler == Sampler::RNG)
{
readBinaryLittleEndian(this->data(place).total_values, buf);
std::string rng_string;
readStringBinary(rng_string, buf);
ReadBufferFromString rng_buf(rng_string);
rng_buf >> this->data(place).rng;
}
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
const auto & value = this->data(place).value;
size_t size = value.size();
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
offsets_to.push_back(offsets_to.back() + size);
if (size)
{
typename ColumnVector<T>::Container & data_to = assert_cast<ColumnVector<T> &>(arr_to.getData()).getData();
data_to.insert(this->data(place).value.begin(), this->data(place).value.end());
}
}
bool allocatesMemoryInArena() const override { return true; }
};
/// General case
/// Nodes used to implement a linked list for storage of groupArray states
template <typename Node>
struct GroupArrayNodeBase
{
UInt64 size; // size of payload
/// Returns pointer to actual payload
char * data() { return reinterpret_cast<char *>(this) + sizeof(Node); }
const char * data() const { return reinterpret_cast<const char *>(this) + sizeof(Node); }
/// Clones existing node (does not modify next field)
Node * clone(Arena * arena) const
{
return reinterpret_cast<Node *>(
const_cast<char *>(arena->alignedInsert(reinterpret_cast<const char *>(this), sizeof(Node) + size, alignof(Node))));
}
static void checkElementSize(size_t size, size_t max_size)
{
if (unlikely(size > max_size))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
"Too large array element size {} (maximum: {})", size, max_size);
}
/// Write node to buffer
void write(WriteBuffer & buf) const
{
checkElementSize(size, AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE);
writeVarUInt(size, buf);
buf.write(data(), size);
}
/// Reads and allocates node from ReadBuffer's data (doesn't set next)
static Node * read(ReadBuffer & buf, Arena * arena)
{
UInt64 size;
readVarUInt(size, buf);
checkElementSize(size, AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE);
Node * node = reinterpret_cast<Node *>(arena->alignedAlloc(sizeof(Node) + size, alignof(Node)));
node->size = size;
buf.readStrict(node->data(), size);
return node;
}
};
struct GroupArrayNodeString : public GroupArrayNodeBase<GroupArrayNodeString>
{
using Node = GroupArrayNodeString;
/// Create node from string
static Node * allocate(const IColumn & column, size_t row_num, Arena * arena)
{
StringRef string = assert_cast<const ColumnString &>(column).getDataAt(row_num);
Node * node = reinterpret_cast<Node *>(arena->alignedAlloc(sizeof(Node) + string.size, alignof(Node)));
node->size = string.size;
memcpy(node->data(), string.data, string.size);
return node;
}
void insertInto(IColumn & column)
{
assert_cast<ColumnString &>(column).insertData(data(), size);
}
};
struct GroupArrayNodeGeneral : public GroupArrayNodeBase<GroupArrayNodeGeneral>
{
using Node = GroupArrayNodeGeneral;
static Node * allocate(const IColumn & column, size_t row_num, Arena * arena)
{
const char * begin = arena->alignedAlloc(sizeof(Node), alignof(Node));
StringRef value = column.serializeValueIntoArena(row_num, *arena, begin);
Node * node = reinterpret_cast<Node *>(const_cast<char *>(begin));
node->size = value.size;
return node;
}
void insertInto(IColumn & column) { column.deserializeAndInsertFromArena(data()); }
};
template <typename Node, bool has_sampler>
struct GroupArrayGeneralData;
template <typename Node>
struct GroupArrayGeneralData<Node, false>
{
// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
using Allocator = MixedAlignedArenaAllocator<alignof(Node *), 4096>;
using Array = PODArray<Node *, 32, Allocator>;
// For groupArrayLast()
size_t total_values = 0;
Array value;
};
template <typename Node>
struct GroupArrayGeneralData<Node, true> : public GroupArraySamplerData<Node *>
{
};
/// Implementation of groupArray for String or any ComplexObject via Array
template <typename Node, typename Trait>
class GroupArrayGeneralImpl final
: public IAggregateFunctionDataHelper<GroupArrayGeneralData<Node, Trait::sampler != Sampler::NONE>, GroupArrayGeneralImpl<Node, Trait>>
{
static constexpr bool limit_num_elems = Trait::has_limit;
using Data = GroupArrayGeneralData<Node, Trait::sampler != Sampler::NONE>;
static Data & data(AggregateDataPtr __restrict place) { return *reinterpret_cast<Data *>(place); }
static const Data & data(ConstAggregateDataPtr __restrict place) { return *reinterpret_cast<const Data *>(place); }
DataTypePtr & data_type;
UInt64 max_elems;
UInt64 seed;
public:
GroupArrayGeneralImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_, UInt64 seed_ = 123456)
: IAggregateFunctionDataHelper<GroupArrayGeneralData<Node, Trait::sampler != Sampler::NONE>, GroupArrayGeneralImpl<Node, Trait>>(
{data_type_}, parameters_, std::make_shared<DataTypeArray>(data_type_))
, data_type(this->argument_types[0])
, max_elems(max_elems_)
, seed(seed_)
{
}
String getName() const override { return getNameByTrait<Trait>(); }
void insertWithSampler(Data & a, const Node * v, Arena * arena) const
{
++a.total_values;
if (a.value.size() < max_elems)
a.value.push_back(v->clone(arena), arena);
else
{
UInt64 rnd = a.genRandom(a.total_values);
if (rnd < max_elems)
a.value[rnd] = v->clone(arena);
}
}
void create(AggregateDataPtr __restrict place) const override /// NOLINT
{
[[maybe_unused]] auto a = new (place) Data;
if constexpr (Trait::sampler == Sampler::RNG)
a->rng.seed(seed);
}
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
auto & cur_elems = data(place);
++cur_elems.total_values;
if constexpr (Trait::sampler == Sampler::NONE)
{
if (limit_num_elems && cur_elems.value.size() >= max_elems)
{
if (Trait::last)
{
Node * node = Node::allocate(*columns[0], row_num, arena);
cur_elems.value[(cur_elems.total_values - 1) % max_elems] = node;
}
return;
}
Node * node = Node::allocate(*columns[0], row_num, arena);
cur_elems.value.push_back(node, arena);
}
if constexpr (Trait::sampler == Sampler::RNG)
{
if (cur_elems.value.size() < max_elems)
cur_elems.value.push_back(Node::allocate(*columns[0], row_num, arena), arena);
else
{
UInt64 rnd = cur_elems.genRandom(cur_elems.total_values);
if (rnd < max_elems)
cur_elems.value[rnd] = Node::allocate(*columns[0], row_num, arena);
}
}
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
auto & cur_elems = data(place);
auto & rhs_elems = data(rhs);
if (rhs_elems.value.empty())
return;
if constexpr (Trait::last)
mergeNoSamplerLast(cur_elems, rhs_elems, arena);
else if constexpr (Trait::sampler == Sampler::NONE)
mergeNoSampler(cur_elems, rhs_elems, arena);
else if constexpr (Trait::sampler == Sampler::RNG)
mergeWithRNGSampler(cur_elems, rhs_elems, arena);
}
void ALWAYS_INLINE mergeNoSamplerLast(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
{
UInt64 new_elements = std::min(static_cast<size_t>(max_elems), cur_elems.value.size() + rhs_elems.value.size());
cur_elems.value.resize_exact(new_elements, arena);
for (auto & value : rhs_elems.value)
{
cur_elems.value[cur_elems.total_values % max_elems] = value->clone(arena);
++cur_elems.total_values;
}
assert(rhs_elems.total_values >= rhs_elems.value.size());
cur_elems.total_values += rhs_elems.total_values - rhs_elems.value.size();
}
void ALWAYS_INLINE mergeNoSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
{
UInt64 new_elems;
if (limit_num_elems)
{
if (cur_elems.value.size() >= max_elems)
return;
new_elems = std::min(rhs_elems.value.size(), static_cast<size_t>(max_elems) - cur_elems.value.size());
}
else
new_elems = rhs_elems.value.size();
for (UInt64 i = 0; i < new_elems; ++i)
cur_elems.value.push_back(rhs_elems.value[i]->clone(arena), arena);
}
void ALWAYS_INLINE mergeWithRNGSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
{
if (rhs_elems.total_values <= max_elems)
{
for (size_t i = 0; i < rhs_elems.value.size(); ++i)
insertWithSampler(cur_elems, rhs_elems.value[i], arena);
}
else if (cur_elems.total_values <= max_elems)
{
decltype(cur_elems.value) from;
from.swap(cur_elems.value, arena);
for (auto & node : rhs_elems.value)
cur_elems.value.push_back(node->clone(arena), arena);
cur_elems.total_values = rhs_elems.total_values;
for (size_t i = 0; i < from.size(); ++i)
insertWithSampler(cur_elems, from[i], arena);
}
else
{
cur_elems.randomShuffle();
cur_elems.total_values += rhs_elems.total_values;
for (size_t i = 0; i < max_elems; ++i)
{
UInt64 rnd = cur_elems.genRandom(cur_elems.total_values);
if (rnd < rhs_elems.total_values)
cur_elems.value[i] = rhs_elems.value[i]->clone(arena);
}
}
}
static void checkArraySize(size_t elems, size_t max_elems)
{
if (unlikely(elems > max_elems))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
"Too large array size {} (maximum: {})", elems, max_elems);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
UInt64 elems = data(place).value.size();
checkArraySize(elems, max_elems);
writeVarUInt(elems, buf);
auto & value = data(place).value;
for (auto & node : value)
node->write(buf);
if constexpr (Trait::last)
writeBinaryLittleEndian(data(place).total_values, buf);
if constexpr (Trait::sampler == Sampler::RNG)
{
writeBinaryLittleEndian(data(place).total_values, buf);
WriteBufferFromOwnString rng_buf;
rng_buf << data(place).rng;
writeStringBinary(rng_buf.str(), buf);
}
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
UInt64 elems;
readVarUInt(elems, buf);
if (unlikely(elems == 0))
return;
checkArraySize(elems, max_elems);
auto & value = data(place).value;
value.resize_exact(elems, arena);
for (UInt64 i = 0; i < elems; ++i)
value[i] = Node::read(buf, arena);
if constexpr (Trait::last)
readBinaryLittleEndian(data(place).total_values, buf);
if constexpr (Trait::sampler == Sampler::RNG)
{
readBinaryLittleEndian(data(place).total_values, buf);
std::string rng_string;
readStringBinary(rng_string, buf);
ReadBufferFromString rng_buf(rng_string);
rng_buf >> data(place).rng;
}
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
auto & column_array = assert_cast<ColumnArray &>(to);
auto & offsets = column_array.getOffsets();
offsets.push_back(offsets.back() + data(place).value.size());
auto & column_data = column_array.getData();
if (std::is_same_v<Node, GroupArrayNodeString>)
{
auto & string_offsets = assert_cast<ColumnString &>(column_data).getOffsets();
string_offsets.reserve(string_offsets.size() + data(place).value.size());
}
auto & value = data(place).value;
for (auto & node : value)
node->insertInto(column_data);
}
bool allocatesMemoryInArena() const override { return true; }
};
#undef AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE
}

Some files were not shown because too many files have changed in this diff Show More