mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-17 21:24:28 +00:00
Merge branch 'master' into keeper-map-backup-restore
This commit is contained in:
commit
59480205d4
11
.github/actions/clean/action.yml
vendored
Normal file
11
.github/actions/clean/action.yml
vendored
Normal file
@ -0,0 +1,11 @@
|
||||
name: Clean runner
|
||||
description: Clean the runner's temp path on ending
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Clean
|
||||
shell: bash
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "${{runner.temp}}"
|
33
.github/actions/common_setup/action.yml
vendored
Normal file
33
.github/actions/common_setup/action.yml
vendored
Normal file
@ -0,0 +1,33 @@
|
||||
name: Common setup
|
||||
description: Setup necessary environments
|
||||
inputs:
|
||||
job_type:
|
||||
description: the name to use in the TEMP_PATH and REPO_COPY
|
||||
default: common
|
||||
type: string
|
||||
nested_job:
|
||||
description: the fuse for unintended use inside of the reusable callable jobs
|
||||
default: true
|
||||
type: boolean
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Setup and check ENV
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Setup the common ENV variables"
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/${{inputs.job_type}}
|
||||
REPO_COPY=${{runner.temp}}/${{inputs.job_type}}/git-repo-copy
|
||||
EOF
|
||||
if [ -z "${{env.GITHUB_JOB_OVERRIDDEN}}" ] && [ "true" == "${{inputs.nested_job}}" ]; then
|
||||
echo "The GITHUB_JOB_OVERRIDDEN ENV is unset, and must be set for the nested jobs"
|
||||
exit 1
|
||||
fi
|
||||
- name: Setup $TEMP_PATH
|
||||
shell: bash
|
||||
run: |
|
||||
# to remove every leftovers
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$REPO_COPY"
|
||||
cp -a "$GITHUB_WORKSPACE"/. "$REPO_COPY"/
|
328
.github/workflows/backport_branches.yml
vendored
328
.github/workflows/backport_branches.yml
vendored
@ -1,3 +1,4 @@
|
||||
# yamllint disable rule:comments-indentation
|
||||
name: BackportPR
|
||||
|
||||
env:
|
||||
@ -169,320 +170,43 @@ jobs:
|
||||
#########################################################################################
|
||||
BuilderDebRelease:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_release
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # For a proper version and performance artifacts
|
||||
filter: tree:0
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_release
|
||||
checkout_depth: 0
|
||||
BuilderDebAarch64:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_aarch64
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # For a proper version and performance artifacts
|
||||
filter: tree:0
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_aarch64
|
||||
checkout_depth: 0
|
||||
BuilderDebAsan:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_asan
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_asan
|
||||
BuilderDebTsan:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_tsan
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_tsan
|
||||
BuilderDebDebug:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_debug
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
|
||||
run: |
|
||||
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
|
||||
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
|
||||
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
|
||||
du -hs "$GITHUB_WORKSPACE/contrib" ||:
|
||||
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_debug
|
||||
BuilderBinDarwin:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_darwin
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
filter: tree:0
|
||||
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
|
||||
run: |
|
||||
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
|
||||
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
|
||||
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
|
||||
du -hs "$GITHUB_WORKSPACE/contrib" ||:
|
||||
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_darwin
|
||||
checkout_depth: 0
|
||||
BuilderBinDarwinAarch64:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_darwin_aarch64
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
filter: tree:0
|
||||
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
|
||||
run: |
|
||||
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
|
||||
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
|
||||
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
|
||||
du -hs "$GITHUB_WORKSPACE/contrib" ||:
|
||||
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_darwin_aarch64
|
||||
checkout_depth: 0
|
||||
############################################################################################
|
||||
##################################### Docker images #######################################
|
||||
############################################################################################
|
||||
|
813
.github/workflows/master.yml
vendored
813
.github/workflows/master.yml
vendored
@ -1,3 +1,4 @@
|
||||
# yamllint disable rule:comments-indentation
|
||||
name: MasterCI
|
||||
|
||||
env:
|
||||
@ -184,789 +185,109 @@ jobs:
|
||||
#########################################################################################
|
||||
BuilderDebRelease:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_release
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # For a proper version and performance artifacts
|
||||
filter: tree:0
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
checkout_depth: 0
|
||||
build_name: package_release
|
||||
BuilderDebAarch64:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_aarch64
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ runner.temp }}/images_path
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # For a proper version and performance artifacts
|
||||
filter: tree:0
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
checkout_depth: 0
|
||||
build_name: package_aarch64
|
||||
BuilderBinRelease:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_release
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
filter: tree:0
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
checkout_depth: 0
|
||||
build_name: binary_release
|
||||
BuilderDebAsan:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_asan
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_asan
|
||||
BuilderDebUBsan:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_ubsan
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_ubsan
|
||||
BuilderDebTsan:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_tsan
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_tsan
|
||||
BuilderDebMsan:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_msan
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_msan
|
||||
BuilderDebDebug:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_debug
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
|
||||
run: |
|
||||
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
|
||||
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
|
||||
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
|
||||
du -hs "$GITHUB_WORKSPACE/contrib" ||:
|
||||
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_debug
|
||||
##########################################################################################
|
||||
##################################### SPECIAL BUILDS #####################################
|
||||
##########################################################################################
|
||||
BuilderBinClangTidy:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_tidy
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_tidy
|
||||
BuilderBinDarwin:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_darwin
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
filter: tree:0
|
||||
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
|
||||
run: |
|
||||
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
|
||||
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
|
||||
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
|
||||
du -hs "$GITHUB_WORKSPACE/contrib" ||:
|
||||
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_darwin
|
||||
checkout_depth: 0
|
||||
BuilderBinAarch64:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_aarch64
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
filter: tree:0
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_aarch64
|
||||
checkout_depth: 0
|
||||
BuilderBinFreeBSD:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_freebsd
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
filter: tree:0
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_freebsd
|
||||
checkout_depth: 0
|
||||
BuilderBinDarwinAarch64:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_darwin_aarch64
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
filter: tree:0
|
||||
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
|
||||
run: |
|
||||
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
|
||||
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
|
||||
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
|
||||
du -hs "$GITHUB_WORKSPACE/contrib" ||:
|
||||
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_darwin_aarch64
|
||||
checkout_depth: 0
|
||||
BuilderBinPPC64:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_ppc64le
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
filter: tree:0
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_ppc64le
|
||||
checkout_depth: 0
|
||||
BuilderBinAmd64Compat:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_amd64_compat
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
filter: tree:0
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_amd64_compat
|
||||
checkout_depth: 0
|
||||
BuilderBinAarch64V80Compat:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_aarch64_v80compat
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
filter: tree:0
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_aarch64_v80compat
|
||||
checkout_depth: 0
|
||||
BuilderBinRISCV64:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_riscv64
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
filter: tree:0
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_riscv64
|
||||
checkout_depth: 0
|
||||
BuilderBinS390X:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_s390x
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
filter: tree:0
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_s390x
|
||||
checkout_depth: 0
|
||||
############################################################################################
|
||||
##################################### Docker images #######################################
|
||||
############################################################################################
|
||||
|
822
.github/workflows/pull_request.yml
vendored
822
.github/workflows/pull_request.yml
vendored
@ -1,3 +1,4 @@
|
||||
# yamllint disable rule:comments-indentation
|
||||
name: PullRequestCI
|
||||
|
||||
env:
|
||||
@ -246,771 +247,100 @@ jobs:
|
||||
#################################### ORDINARY BUILDS ####################################
|
||||
#########################################################################################
|
||||
BuilderDebRelease:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_release
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
fetch-depth: 0 # for performance artifact
|
||||
filter: tree:0
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
BuilderBinRelease:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_release
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_release
|
||||
checkout_depth: 0
|
||||
BuilderDebAarch64:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_aarch64
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ runner.temp }}/images_path
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # for performance artifact
|
||||
filter: tree:0
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_aarch64
|
||||
checkout_depth: 0
|
||||
BuilderBinRelease:
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_release
|
||||
BuilderDebAsan:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_asan
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_asan
|
||||
BuilderDebUBsan:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_ubsan
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_ubsan
|
||||
BuilderDebTsan:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_tsan
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_tsan
|
||||
BuilderDebMsan:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_msan
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_msan
|
||||
BuilderDebDebug:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_debug
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
|
||||
run: |
|
||||
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
|
||||
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
|
||||
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
|
||||
du -hs "$GITHUB_WORKSPACE/contrib" ||:
|
||||
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_debug
|
||||
##########################################################################################
|
||||
##################################### SPECIAL BUILDS #####################################
|
||||
##########################################################################################
|
||||
BuilderBinClangTidy:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_tidy
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_tidy
|
||||
BuilderBinDarwin:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_darwin
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
|
||||
run: |
|
||||
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
|
||||
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
|
||||
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
|
||||
du -hs "$GITHUB_WORKSPACE/contrib" ||:
|
||||
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_darwin
|
||||
BuilderBinAarch64:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_aarch64
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_aarch64
|
||||
BuilderBinFreeBSD:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_freebsd
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_freebsd
|
||||
BuilderBinDarwinAarch64:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_darwin_aarch64
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
|
||||
run: |
|
||||
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
|
||||
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
|
||||
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
|
||||
du -hs "$GITHUB_WORKSPACE/contrib" ||:
|
||||
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_darwin_aarch64
|
||||
BuilderBinPPC64:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_ppc64le
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_ppc64le
|
||||
BuilderBinAmd64Compat:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_amd64_compat
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_amd64_compat
|
||||
BuilderBinAarch64V80Compat:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_aarch64_v80compat
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_aarch64_v80compat
|
||||
BuilderBinRISCV64:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_riscv64
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_riscv64
|
||||
BuilderBinS390X:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_s390x
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
needs: [FastTest, StyleCheck]
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_s390x
|
||||
############################################################################################
|
||||
##################################### Docker images #######################################
|
||||
############################################################################################
|
||||
|
411
.github/workflows/release_branches.yml
vendored
411
.github/workflows/release_branches.yml
vendored
@ -1,3 +1,4 @@
|
||||
# yamllint disable rule:comments-indentation
|
||||
name: ReleaseBranchCI
|
||||
|
||||
env:
|
||||
@ -140,401 +141,53 @@ jobs:
|
||||
#########################################################################################
|
||||
BuilderDebRelease:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_release
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
filter: tree:0
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_release
|
||||
checkout_depth: 0
|
||||
BuilderDebAarch64:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_aarch64
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ runner.temp }}/images_path
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # For a proper version and performance artifacts
|
||||
filter: tree:0
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_aarch64
|
||||
checkout_depth: 0
|
||||
BuilderDebAsan:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_asan
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_asan
|
||||
BuilderDebUBsan:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_ubsan
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_ubsan
|
||||
BuilderDebTsan:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_tsan
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_tsan
|
||||
BuilderDebMsan:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_msan
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_msan
|
||||
BuilderDebDebug:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=package_debug
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
|
||||
run: |
|
||||
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
|
||||
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
|
||||
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
|
||||
du -hs "$GITHUB_WORKSPACE/contrib" ||:
|
||||
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: package_debug
|
||||
BuilderBinDarwin:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_darwin
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
filter: tree:0
|
||||
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
|
||||
run: |
|
||||
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
|
||||
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
|
||||
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
|
||||
du -hs "$GITHUB_WORKSPACE/contrib" ||:
|
||||
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_darwin
|
||||
checkout_depth: 0
|
||||
BuilderBinDarwinAarch64:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_darwin_aarch64
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
filter: tree:0
|
||||
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
|
||||
run: |
|
||||
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
|
||||
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
|
||||
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
|
||||
du -hs "$GITHUB_WORKSPACE/contrib" ||:
|
||||
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
uses: ./.github/workflows/reusable_build.yml
|
||||
with:
|
||||
build_name: binary_darwin_aarch64
|
||||
checkout_depth: 0
|
||||
############################################################################################
|
||||
##################################### Docker images #######################################
|
||||
############################################################################################
|
||||
|
74
.github/workflows/reusable_build.yml
vendored
Normal file
74
.github/workflows/reusable_build.yml
vendored
Normal file
@ -0,0 +1,74 @@
|
||||
### For the pure soul wishes to move it to another place
|
||||
# https://github.com/orgs/community/discussions/9050
|
||||
|
||||
name: Build ClickHouse
|
||||
'on':
|
||||
workflow_call:
|
||||
inputs:
|
||||
build_name:
|
||||
description: the value of build type from tests/ci/ci_config.py
|
||||
required: true
|
||||
type: string
|
||||
checkout_depth:
|
||||
description: the value of the git shallow checkout
|
||||
required: false
|
||||
type: number
|
||||
default: 1
|
||||
runner_type:
|
||||
description: the label of runner to use
|
||||
default: builder
|
||||
type: string
|
||||
additional_envs:
|
||||
description: additional ENV variables to setup the job
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
Build:
|
||||
name: Build-${{inputs.build_name}}
|
||||
runs-on: [self-hosted, '${{inputs.runner_type}}']
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: ${{inputs.checkout_depth}}
|
||||
filter: tree:0
|
||||
- name: Set build envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
GITHUB_JOB_OVERRIDDEN=Build-${{inputs.build_name}}
|
||||
${{inputs.additional_envs}}
|
||||
EOF
|
||||
python3 "$GITHUB_WORKSPACE"/tests/ci/ci_config.py --build-name "${{inputs.build_name}}" >> "$GITHUB_ENV"
|
||||
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
|
||||
# This step is done in GITHUB_WORKSPACE,
|
||||
# because it's broken in REPO_COPY for some reason
|
||||
if: ${{ env.BUILD_SPARSE_CHECKOUT == 'true' }}
|
||||
run: |
|
||||
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
|
||||
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
|
||||
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
|
||||
du -hs "$GITHUB_WORKSPACE/contrib" ||:
|
||||
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
|
||||
- name: Common setup
|
||||
uses: ./.github/actions/common_setup
|
||||
with:
|
||||
job_type: build_check
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Build
|
||||
run: |
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Clean
|
||||
uses: ./.github/actions/clean
|
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -1,3 +1,6 @@
|
||||
# Please do not use 'branch = ...' tags with submodule entries. Such tags make updating submodules a
|
||||
# little bit more convenient but they do *not* specify the tracked submodule branch. Thus, they are
|
||||
# more confusing than useful.
|
||||
[submodule "contrib/zstd"]
|
||||
path = contrib/zstd
|
||||
url = https://github.com/facebook/zstd
|
||||
|
20
README.md
20
README.md
@ -1,6 +1,17 @@
|
||||
[<img alt="ClickHouse — open source distributed column-oriented DBMS" width="400px" src="https://clickhouse.com/images/ch_gh_logo_rounded.png" />](https://clickhouse.com?utm_source=github)
|
||||
<div align=center>
|
||||
|
||||
ClickHouse® is an open-source column-oriented database management system that allows generating analytical data reports in real-time.
|
||||
[![Website](https://img.shields.io/website?up_message=AVAILABLE&down_message=DOWN&url=https%3A%2F%2Fclickhouse.com&style=for-the-badge)](https://clickhouse.com)
|
||||
[![Apache 2.0 License](https://img.shields.io/badge/license-Apache%202.0-blueviolet?style=for-the-badge)](https://www.apache.org/licenses/LICENSE-2.0)
|
||||
|
||||
<picture align=center>
|
||||
<source media="(prefers-color-scheme: dark)" srcset="https://github.com/ClickHouse/clickhouse-docs/assets/9611008/4ef9c104-2d3f-4646-b186-507358d2fe28">
|
||||
<source media="(prefers-color-scheme: light)" srcset="https://github.com/ClickHouse/clickhouse-docs/assets/9611008/b001dc7b-5a45-4dcd-9275-e03beb7f9177">
|
||||
<img alt="The ClickHouse company logo." src="https://github.com/ClickHouse/clickhouse-docs/assets/9611008/b001dc7b-5a45-4dcd-9275-e03beb7f9177">
|
||||
</picture>
|
||||
|
||||
<h4>ClickHouse® is an open-source column-oriented database management system that allows generating analytical data reports in real-time.</h4>
|
||||
|
||||
</div>
|
||||
|
||||
## How To Install (Linux, macOS, FreeBSD)
|
||||
```
|
||||
@ -22,8 +33,7 @@ curl https://clickhouse.com/ | sh
|
||||
|
||||
## Upcoming Events
|
||||
|
||||
* [**ClickHouse Meetup in Beijing**](https://www.meetup.com/clickhouse-beijing-user-group/events/296334856/) - Nov 4
|
||||
* [**ClickHouse Meetup in San Francisco**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/296334923/) - Nov 8
|
||||
* [**ClickHouse Meetup in San Francisco**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/296334923/) - Nov 14
|
||||
* [**ClickHouse Meetup in Singapore**](https://www.meetup.com/clickhouse-singapore-meetup-group/events/296334976/) - Nov 15
|
||||
* [**ClickHouse Meetup in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/296488501/) - Nov 30
|
||||
* [**ClickHouse Meetup in NYC**](https://www.meetup.com/clickhouse-new-york-user-group/events/296488779/) - Dec 11
|
||||
@ -33,7 +43,7 @@ Also, keep an eye out for upcoming meetups around the world. Somewhere else you
|
||||
|
||||
## Recent Recordings
|
||||
* **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"
|
||||
* **Recording available**: [**v23.6 Release Webinar**](https://www.youtube.com/watch?v=cuf_hYn7dqU) All the features of 23.6, one convenient video! Watch it now!
|
||||
* **Recording available**: [**v23.10 Release Webinar**](https://www.youtube.com/watch?v=PGQS6uPb970) All the features of 23.10, one convenient video! Watch it now!
|
||||
* **All release webinar recordings**: [YouTube playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3jAlSy1JxyP8zluvXaN3nxU)
|
||||
|
||||
|
||||
|
@ -119,17 +119,16 @@
|
||||
#include <base/types.h>
|
||||
namespace DB
|
||||
{
|
||||
void abortOnFailedAssertion(const String & description);
|
||||
[[noreturn]] void abortOnFailedAssertion(const String & description);
|
||||
}
|
||||
#define chassert(x) static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(#x)
|
||||
#define chassert(x) do { static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(#x); } while (0)
|
||||
#define UNREACHABLE() abort()
|
||||
// clang-format off
|
||||
#else
|
||||
/// Here sizeof() trick is used to suppress unused warning for result,
|
||||
/// since simple "(void)x" will evaluate the expression, while
|
||||
/// "sizeof(!(x))" will not.
|
||||
#define NIL_EXPRESSION(x) (void)sizeof(!(x))
|
||||
#define chassert(x) NIL_EXPRESSION(x)
|
||||
#define chassert(x) (void)sizeof(!(x))
|
||||
#define UNREACHABLE() __builtin_unreachable()
|
||||
#endif
|
||||
#endif
|
||||
|
@ -1,3 +1,5 @@
|
||||
# Generates a separate file with debug symbols while stripping it from the main binary.
|
||||
# This is needed for Debian packages.
|
||||
macro(clickhouse_split_debug_symbols)
|
||||
set(oneValueArgs TARGET DESTINATION_DIR BINARY_PATH)
|
||||
|
||||
|
2
contrib/NuRaft
vendored
2
contrib/NuRaft
vendored
@ -1 +1 @@
|
||||
Subproject commit eb1572129c71beb2156dcdaadc3fb136954aed96
|
||||
Subproject commit b7ea89b817a18dc0eafc1f909d568869f02d2d04
|
@ -1 +0,0 @@
|
||||
../../../thrift/build/cmake/config.h.in
|
2
contrib/grpc
vendored
2
contrib/grpc
vendored
@ -1 +1 @@
|
||||
Subproject commit bef8212d1e01f99e406c282ceab3d42da08e09ce
|
||||
Subproject commit b723ecae0991bb873fe87a595dfb187178733fde
|
@ -7,12 +7,6 @@ endif()
|
||||
|
||||
set(LIB_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libssh")
|
||||
set(LIB_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/libssh")
|
||||
# Specify search path for CMake modules to be loaded by include()
|
||||
# and find_package()
|
||||
list(APPEND CMAKE_MODULE_PATH "${LIB_SOURCE_DIR}/cmake/Modules")
|
||||
|
||||
include(DefineCMakeDefaults)
|
||||
include(DefineCompilerFlags)
|
||||
|
||||
project(libssh VERSION 0.9.7 LANGUAGES C)
|
||||
|
||||
@ -29,12 +23,6 @@ set(APPLICATION_NAME ${PROJECT_NAME})
|
||||
set(LIBRARY_VERSION "4.8.7")
|
||||
set(LIBRARY_SOVERSION "4")
|
||||
|
||||
# where to look first for cmake modules, before ${CMAKE_ROOT}/Modules/ is checked
|
||||
|
||||
# add definitions
|
||||
|
||||
include(DefinePlatformDefaults)
|
||||
|
||||
# Copy library files to a lib sub-directory
|
||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${LIB_BINARY_DIR}/lib")
|
||||
|
||||
|
@ -1,20 +1,8 @@
|
||||
set(LIBSSH_LINK_LIBRARIES
|
||||
${LIBSSH_REQUIRED_LIBRARIES}
|
||||
)
|
||||
|
||||
|
||||
set(LIBSSH_LINK_LIBRARIES
|
||||
${LIBSSH_LINK_LIBRARIES}
|
||||
OpenSSL::Crypto
|
||||
)
|
||||
|
||||
if (MINGW AND Threads_FOUND)
|
||||
set(LIBSSH_LINK_LIBRARIES
|
||||
${LIBSSH_LINK_LIBRARIES}
|
||||
Threads::Threads
|
||||
)
|
||||
endif()
|
||||
|
||||
set(libssh_SRCS
|
||||
${LIB_SOURCE_DIR}/src/agent.c
|
||||
${LIB_SOURCE_DIR}/src/auth.c
|
||||
@ -66,30 +54,11 @@ set(libssh_SRCS
|
||||
${LIB_SOURCE_DIR}/src/pki_ed25519_common.c
|
||||
)
|
||||
|
||||
if (DEFAULT_C_NO_DEPRECATION_FLAGS)
|
||||
set_source_files_properties(known_hosts.c
|
||||
PROPERTIES
|
||||
COMPILE_FLAGS ${DEFAULT_C_NO_DEPRECATION_FLAGS})
|
||||
endif()
|
||||
|
||||
if (CMAKE_USE_PTHREADS_INIT)
|
||||
set(libssh_SRCS
|
||||
${libssh_SRCS}
|
||||
${LIB_SOURCE_DIR}/src/threads/noop.c
|
||||
${LIB_SOURCE_DIR}/src/threads/pthread.c
|
||||
)
|
||||
elseif (CMAKE_USE_WIN32_THREADS_INIT)
|
||||
set(libssh_SRCS
|
||||
${libssh_SRCS}
|
||||
${LIB_SOURCE_DIR}/src/threads/noop.c
|
||||
${LIB_SOURCE_DIR}/src/threads/winlocks.c
|
||||
)
|
||||
else()
|
||||
set(libssh_SRCS
|
||||
${libssh_SRCS}
|
||||
${LIB_SOURCE_DIR}/src/threads/noop.c
|
||||
)
|
||||
endif()
|
||||
set(libssh_SRCS
|
||||
${libssh_SRCS}
|
||||
${LIB_SOURCE_DIR}/src/threads/noop.c
|
||||
${LIB_SOURCE_DIR}/src/threads/pthread.c
|
||||
)
|
||||
|
||||
# LIBCRYPT specific
|
||||
set(libssh_SRCS
|
||||
@ -127,14 +96,3 @@ target_compile_options(_ssh
|
||||
PRIVATE
|
||||
${DEFAULT_C_COMPILE_FLAGS}
|
||||
-D_GNU_SOURCE)
|
||||
|
||||
|
||||
set_target_properties(_ssh
|
||||
PROPERTIES
|
||||
VERSION
|
||||
${LIBRARY_VERSION}
|
||||
SOVERSION
|
||||
${LIBRARY_SOVERSION}
|
||||
DEFINE_SYMBOL
|
||||
LIBSSH_EXPORTS
|
||||
)
|
||||
|
@ -6,12 +6,13 @@ FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
|
||||
echo '/*' > $FILES_TO_CHECKOUT
|
||||
echo '!/test/*' >> $FILES_TO_CHECKOUT
|
||||
echo '/test/build/*' >> $FILES_TO_CHECKOUT
|
||||
echo '/test/core/tsi/alts/fake_handshaker/*' >> $FILES_TO_CHECKOUT
|
||||
echo '/test/core/event_engine/fuzzing_event_engine/*' >> $FILES_TO_CHECKOUT
|
||||
echo '!/tools/*' >> $FILES_TO_CHECKOUT
|
||||
echo '/tools/codegen/*' >> $FILES_TO_CHECKOUT
|
||||
echo '!/examples/*' >> $FILES_TO_CHECKOUT
|
||||
echo '!/doc/*' >> $FILES_TO_CHECKOUT
|
||||
# FIXME why do we need csharp?
|
||||
#echo '!/src/csharp/*' >> $FILES_TO_CHECKOUT
|
||||
echo '!/src/csharp/*' >> $FILES_TO_CHECKOUT
|
||||
echo '!/src/python/*' >> $FILES_TO_CHECKOUT
|
||||
echo '!/src/objective-c/*' >> $FILES_TO_CHECKOUT
|
||||
echo '!/src/php/*' >> $FILES_TO_CHECKOUT
|
||||
|
11
contrib/update-submodules.sh
vendored
11
contrib/update-submodules.sh
vendored
@ -1,11 +1,12 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
WORKDIR=$(dirname "$0")
|
||||
WORKDIR=$(readlink -f "${WORKDIR}")
|
||||
SCRIPT_PATH=$(realpath "$0")
|
||||
SCRIPT_DIR=$(dirname "${SCRIPT_PATH}")
|
||||
GIT_DIR=$(git -C "$SCRIPT_DIR" rev-parse --show-toplevel)
|
||||
cd $GIT_DIR
|
||||
|
||||
"$WORKDIR/sparse-checkout/setup-sparse-checkout.sh"
|
||||
contrib/sparse-checkout/setup-sparse-checkout.sh
|
||||
git submodule init
|
||||
git submodule sync
|
||||
git submodule update --depth=1
|
||||
git config --file .gitmodules --get-regexp .*path | sed 's/[^ ]* //' | xargs -I _ --max-procs 64 git submodule update --depth=1 --single-branch _
|
||||
|
@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="23.10.1.1976"
|
||||
ARG VERSION="23.10.3.5"
|
||||
ARG PACKAGES="clickhouse-keeper"
|
||||
|
||||
# user/group precreated explicitly with fixed uid/gid on purpose.
|
||||
|
@ -126,6 +126,7 @@ fi
|
||||
|
||||
mv ./programs/clickhouse* /output || mv ./programs/*_fuzzer /output
|
||||
[ -x ./programs/self-extracting/clickhouse ] && mv ./programs/self-extracting/clickhouse /output
|
||||
[ -x ./programs/self-extracting/clickhouse-stripped ] && mv ./programs/self-extracting/clickhouse-stripped /output
|
||||
mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds
|
||||
mv ./programs/*.dict ./programs/*.options ./programs/*_seed_corpus.zip /output ||: # libFuzzer oss-fuzz compatible infrastructure
|
||||
|
||||
|
@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="23.10.1.1976"
|
||||
ARG VERSION="23.10.3.5"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
# user/group precreated explicitly with fixed uid/gid on purpose.
|
||||
|
@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
|
||||
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
|
||||
ARG VERSION="23.10.1.1976"
|
||||
ARG VERSION="23.10.3.5"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
# set non-empty deb_location_url url to create a docker image
|
||||
|
@ -15,10 +15,15 @@ CLICKHOUSE_CI_LOGS_USER=${CLICKHOUSE_CI_LOGS_USER:-ci}
|
||||
# Pre-configured destination cluster, where to export the data
|
||||
CLICKHOUSE_CI_LOGS_CLUSTER=${CLICKHOUSE_CI_LOGS_CLUSTER:-system_logs_export}
|
||||
|
||||
EXTRA_COLUMNS=${EXTRA_COLUMNS:-"pull_request_number UInt32, commit_sha String, check_start_time DateTime('UTC'), check_name String, instance_type String, instance_id String, "}
|
||||
EXTRA_COLUMNS_EXPRESSION=${EXTRA_COLUMNS_EXPRESSION:-"CAST(0 AS UInt32) AS pull_request_number, '' AS commit_sha, now() AS check_start_time, '' AS check_name, '' AS instance_type, '' AS instance_id"}
|
||||
EXTRA_COLUMNS=${EXTRA_COLUMNS:-"pull_request_number UInt32, commit_sha String, check_start_time DateTime('UTC'), check_name LowCardinality(String), instance_type LowCardinality(String), instance_id String, INDEX ix_pr (pull_request_number) TYPE set(100), INDEX ix_commit (commit_sha) TYPE set(100), INDEX ix_check_time (check_start_time) TYPE minmax, "}
|
||||
EXTRA_COLUMNS_EXPRESSION=${EXTRA_COLUMNS_EXPRESSION:-"CAST(0 AS UInt32) AS pull_request_number, '' AS commit_sha, now() AS check_start_time, toLowCardinality('') AS check_name, toLowCardinality('') AS instance_type, '' AS instance_id"}
|
||||
EXTRA_ORDER_BY_COLUMNS=${EXTRA_ORDER_BY_COLUMNS:-"check_name, "}
|
||||
|
||||
# trace_log needs more columns for symbolization
|
||||
EXTRA_COLUMNS_TRACE_LOG="${EXTRA_COLUMNS} symbols Array(LowCardinality(String)), lines Array(LowCardinality(String)), "
|
||||
EXTRA_COLUMNS_EXPRESSION_TRACE_LOG="${EXTRA_COLUMNS_EXPRESSION}, arrayMap(x -> toLowCardinality(demangle(addressToSymbol(x))), trace) AS symbols, arrayMap(x -> toLowCardinality(addressToLine(x)), trace) AS lines"
|
||||
|
||||
|
||||
function __set_connection_args
|
||||
{
|
||||
# It's impossible to use generous $CONNECTION_ARGS string, it's unsafe from word splitting perspective.
|
||||
@ -125,9 +130,18 @@ function setup_logs_replication
|
||||
echo 'Create %_log tables'
|
||||
clickhouse-client --query "SHOW TABLES FROM system LIKE '%\\_log'" | while read -r table
|
||||
do
|
||||
if [[ "$table" = "trace_log" ]]
|
||||
then
|
||||
EXTRA_COLUMNS_FOR_TABLE="${EXTRA_COLUMNS_TRACE_LOG}"
|
||||
EXTRA_COLUMNS_EXPRESSION_FOR_TABLE="${EXTRA_COLUMNS_EXPRESSION_TRACE_LOG}"
|
||||
else
|
||||
EXTRA_COLUMNS_FOR_TABLE="${EXTRA_COLUMNS}"
|
||||
EXTRA_COLUMNS_EXPRESSION_FOR_TABLE="${EXTRA_COLUMNS_EXPRESSION}"
|
||||
fi
|
||||
|
||||
# Calculate hash of its structure. Note: 4 is the version of extra columns - increment it if extra columns are changed:
|
||||
hash=$(clickhouse-client --query "
|
||||
SELECT sipHash64(4, groupArray((name, type)))
|
||||
SELECT sipHash64(9, groupArray((name, type)))
|
||||
FROM (SELECT name, type FROM system.columns
|
||||
WHERE database = 'system' AND table = '$table'
|
||||
ORDER BY position)
|
||||
@ -135,7 +149,7 @@ function setup_logs_replication
|
||||
|
||||
# Create the destination table with adapted name and structure:
|
||||
statement=$(clickhouse-client --format TSVRaw --query "SHOW CREATE TABLE system.${table}" | sed -r -e '
|
||||
s/^\($/('"$EXTRA_COLUMNS"'/;
|
||||
s/^\($/('"$EXTRA_COLUMNS_FOR_TABLE"'/;
|
||||
s/ORDER BY \(/ORDER BY ('"$EXTRA_ORDER_BY_COLUMNS"'/;
|
||||
s/^CREATE TABLE system\.\w+_log$/CREATE TABLE IF NOT EXISTS '"$table"'_'"$hash"'/;
|
||||
/^TTL /d
|
||||
@ -155,7 +169,7 @@ function setup_logs_replication
|
||||
ENGINE = Distributed(${CLICKHOUSE_CI_LOGS_CLUSTER}, default, ${table}_${hash})
|
||||
SETTINGS flush_on_detach=0
|
||||
EMPTY AS
|
||||
SELECT ${EXTRA_COLUMNS_EXPRESSION}, *
|
||||
SELECT ${EXTRA_COLUMNS_EXPRESSION_FOR_TABLE}, *
|
||||
FROM system.${table}
|
||||
" || continue
|
||||
|
||||
@ -163,7 +177,7 @@ function setup_logs_replication
|
||||
|
||||
clickhouse-client --query "
|
||||
CREATE MATERIALIZED VIEW system.${table}_watcher TO system.${table}_sender AS
|
||||
SELECT ${EXTRA_COLUMNS_EXPRESSION}, *
|
||||
SELECT ${EXTRA_COLUMNS_EXPRESSION_FOR_TABLE}, *
|
||||
FROM system.${table}
|
||||
" || continue
|
||||
done
|
||||
|
@ -19,6 +19,11 @@ dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
|
||||
dpkg -i package_folder/clickhouse-server_*.deb
|
||||
dpkg -i package_folder/clickhouse-client_*.deb
|
||||
|
||||
# Check that the tools are available under short names
|
||||
ch --query "SELECT 1" || exit 1
|
||||
chl --query "SELECT 1" || exit 1
|
||||
chc --version || exit 1
|
||||
|
||||
ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
|
||||
|
||||
# shellcheck disable=SC1091
|
||||
@ -62,7 +67,7 @@ if [ "$NUM_TRIES" -gt "1" ]; then
|
||||
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000
|
||||
|
||||
mkdir -p /var/run/clickhouse-server
|
||||
# simpliest way to forward env variables to server
|
||||
# simplest way to forward env variables to server
|
||||
sudo -E -u clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml --daemon --pid-file /var/run/clickhouse-server/clickhouse-server.pid
|
||||
else
|
||||
sudo clickhouse start
|
||||
|
@ -53,31 +53,28 @@ function configure()
|
||||
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
|
||||
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
|
||||
function randomize_config_boolean_value {
|
||||
function randomize_keeper_config_boolean_value {
|
||||
value=$(($RANDOM % 2))
|
||||
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
|
||||
sudo cat /etc/clickhouse-server/config.d/$2.xml \
|
||||
| sed "s|<$1>[01]</$1>|<$1>$value</$1>|" \
|
||||
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
|
||||
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
> /etc/clickhouse-server/config.d/$2.xml.tmp
|
||||
sudo mv /etc/clickhouse-server/config.d/$2.xml.tmp /etc/clickhouse-server/config.d/$2.xml
|
||||
}
|
||||
|
||||
if [[ -n "$RANDOMIZE_KEEPER_FEATURE_FLAGS" ]] && [[ "$RANDOMIZE_KEEPER_FEATURE_FLAGS" -eq 1 ]]; then
|
||||
# Randomize all Keeper feature flags
|
||||
randomize_config_boolean_value filtered_list
|
||||
randomize_config_boolean_value multi_read
|
||||
randomize_config_boolean_value check_not_exists
|
||||
randomize_config_boolean_value create_if_not_exists
|
||||
randomize_config_boolean_value filtered_list keeper_port
|
||||
randomize_config_boolean_value multi_read keeper_port
|
||||
randomize_config_boolean_value check_not_exists keeper_port
|
||||
randomize_config_boolean_value create_if_not_exists keeper_port
|
||||
fi
|
||||
|
||||
sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
sudo chgrp clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
|
||||
#Randomize merge tree setting allow_experimental_block_number_column
|
||||
value=$(($RANDOM % 2))
|
||||
sudo cat /etc/clickhouse-server/config.d/merge_tree_settings.xml \
|
||||
| sed "s|<allow_experimental_block_number_column>[01]</allow_experimental_block_number_column>|<allow_experimental_block_number_column>$value</allow_experimental_block_number_column>|" \
|
||||
> /etc/clickhouse-server/config.d/merge_tree_settings.xml.tmp
|
||||
sudo mv /etc/clickhouse-server/config.d/merge_tree_settings.xml.tmp /etc/clickhouse-server/config.d/merge_tree_settings.xml
|
||||
randomize_config_boolean_value use_compression zookeeper
|
||||
|
||||
randomize_config_boolean_value allow_experimental_block_number_column merge_tree_settings
|
||||
|
||||
# for clickhouse-server (via service)
|
||||
echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment
|
||||
|
18
docs/changelogs/v23.10.2.13-stable.md
Normal file
18
docs/changelogs/v23.10.2.13-stable.md
Normal file
@ -0,0 +1,18 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.10.2.13-stable (65d8522bb1d) FIXME as compared to v23.10.1.1976-stable (13adae0e42f)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix a crash during table loading on startup [#56232](https://github.com/ClickHouse/ClickHouse/pull/56232) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Fix segfault in signal handler for Keeper [#56266](https://github.com/ClickHouse/ClickHouse/pull/56266) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix incomplete query result for UNION in view() function. [#56274](https://github.com/ClickHouse/ClickHouse/pull/56274) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix inconsistency of "cast('0' as DateTime64(3))" and "cast('0' as Nullable(DateTime64(3)))" [#56286](https://github.com/ClickHouse/ClickHouse/pull/56286) ([李扬](https://github.com/taiyang-li)).
|
||||
* Fix crash in case of adding a column with type Object(JSON) [#56307](https://github.com/ClickHouse/ClickHouse/pull/56307) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Fix buffer overflow in T64 [#56434](https://github.com/ClickHouse/ClickHouse/pull/56434) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
16
docs/changelogs/v23.10.3.5-stable.md
Normal file
16
docs/changelogs/v23.10.3.5-stable.md
Normal file
@ -0,0 +1,16 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.10.3.5-stable (b2ba7637a41) FIXME as compared to v23.10.2.13-stable (65d8522bb1d)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#56513](https://github.com/ClickHouse/ClickHouse/issues/56513): Allow backup of materialized view with dropped inner table instead of failing the backup. [#56387](https://github.com/ClickHouse/ClickHouse/pull/56387) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
|
||||
#### NO CL CATEGORY
|
||||
|
||||
* Backported in [#56605](https://github.com/ClickHouse/ClickHouse/issues/56605):. [#56598](https://github.com/ClickHouse/ClickHouse/pull/56598) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
|
14
docs/changelogs/v23.3.16.7-lts.md
Normal file
14
docs/changelogs/v23.3.16.7-lts.md
Normal file
@ -0,0 +1,14 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.3.16.7-lts (fb4125cc92a) FIXME as compared to v23.3.15.29-lts (218336662e4)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix: avoid using regex match, possibly containing alternation, as a key condition. [#54696](https://github.com/ClickHouse/ClickHouse/pull/54696) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Fix buffer overflow in T64 [#56434](https://github.com/ClickHouse/ClickHouse/pull/56434) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
21
docs/changelogs/v23.8.6.16-lts.md
Normal file
21
docs/changelogs/v23.8.6.16-lts.md
Normal file
@ -0,0 +1,21 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.8.6.16-lts (077df679bed) FIXME as compared to v23.8.5.16-lts (e8a1af5fe2f)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix rare case of CHECKSUM_DOESNT_MATCH error [#54549](https://github.com/ClickHouse/ClickHouse/pull/54549) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix: avoid using regex match, possibly containing alternation, as a key condition. [#54696](https://github.com/ClickHouse/ClickHouse/pull/54696) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Fix a crash during table loading on startup [#56232](https://github.com/ClickHouse/ClickHouse/pull/56232) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Fix segfault in signal handler for Keeper [#56266](https://github.com/ClickHouse/ClickHouse/pull/56266) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix buffer overflow in T64 [#56434](https://github.com/ClickHouse/ClickHouse/pull/56434) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Improve enrich image [#55793](https://github.com/ClickHouse/ClickHouse/pull/55793) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
17
docs/changelogs/v23.9.4.11-stable.md
Normal file
17
docs/changelogs/v23.9.4.11-stable.md
Normal file
@ -0,0 +1,17 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.9.4.11-stable (74c1f49dd6a) FIXME as compared to v23.9.3.12-stable (b7230b06563)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix wrong query result when http_write_exception_in_output_format=1 [#56135](https://github.com/ClickHouse/ClickHouse/pull/56135) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix schema cache for fallback JSON->JSONEachRow with changed settings [#56172](https://github.com/ClickHouse/ClickHouse/pull/56172) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix a crash during table loading on startup [#56232](https://github.com/ClickHouse/ClickHouse/pull/56232) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Fix segfault in signal handler for Keeper [#56266](https://github.com/ClickHouse/ClickHouse/pull/56266) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix buffer overflow in T64 [#56434](https://github.com/ClickHouse/ClickHouse/pull/56434) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
@ -23,43 +23,34 @@ Create a fork of ClickHouse repository. To do that please click on the “fork
|
||||
|
||||
The development process consists of first committing the intended changes into your fork of ClickHouse and then creating a “pull request” for these changes to be accepted into the main repository (ClickHouse/ClickHouse).
|
||||
|
||||
To work with git repositories, please install `git`.
|
||||
|
||||
To do that in Ubuntu you would run in the command line terminal:
|
||||
To work with Git repositories, please install `git`. To do that in Ubuntu you would run in the command line terminal:
|
||||
|
||||
sudo apt update
|
||||
sudo apt install git
|
||||
|
||||
A brief manual on using Git can be found here: https://education.github.com/git-cheat-sheet-education.pdf.
|
||||
For a detailed manual on Git see https://git-scm.com/book/en/v2.
|
||||
A brief manual on using Git can be found [here](https://education.github.com/git-cheat-sheet-education.pdf).
|
||||
For a detailed manual on Git see [here](https://git-scm.com/book/en/v2).
|
||||
|
||||
## Cloning a Repository to Your Development Machine {#cloning-a-repository-to-your-development-machine}
|
||||
|
||||
Next, you need to download the source files onto your working machine. This is called “to clone a repository” because it creates a local copy of the repository on your working machine.
|
||||
|
||||
In the command line terminal run:
|
||||
Run in your terminal:
|
||||
|
||||
git clone --shallow-submodules git@github.com:your_github_username/ClickHouse.git
|
||||
git clone git@github.com:your_github_username/ClickHouse.git # replace placeholder with your GitHub user name
|
||||
cd ClickHouse
|
||||
|
||||
Or (if you'd like to use sparse checkout for submodules and avoid checking out unneeded files):
|
||||
This command will create a directory `ClickHouse/` containing the source code of ClickHouse. If you specify a custom checkout directory (after the URL), it is important that this path does not contain whitespaces as it may lead to problems with the build system.
|
||||
|
||||
git clone git@github.com:your_github_username/ClickHouse.git
|
||||
cd ClickHouse
|
||||
./contrib/update-submodules.sh
|
||||
To make library dependencies available for the build, the ClickHouse repository uses Git submodules, i.e. references to external repositories. These are not checked out by default. To do so, you can either
|
||||
|
||||
Note: please, substitute *your_github_username* with what is appropriate!
|
||||
- run `git clone` with option `--recurse-submodules`,
|
||||
|
||||
This command will create a directory `ClickHouse` containing the working copy of the project.
|
||||
- if `git clone` did not check out submodules, run `git submodule update --init --jobs <N>` (e.g. `<N> = 12` to parallelize the checkout) to achieve the same as the previous alternative, or
|
||||
|
||||
It is important that the path to the working directory contains no whitespaces as it may lead to problems with running the build system.
|
||||
- if `git clone` did not check out submodules and you like to use [sparse](https://github.blog/2020-01-17-bring-your-monorepo-down-to-size-with-sparse-checkout/) and [shallow](https://github.blog/2020-12-21-get-up-to-speed-with-partial-clone-and-shallow-clone/) submodule checkout to omit unneeded files and history in submodules to save space (ca. 5 GB instead of ca. 15 GB), run `./contrib/update-submodules.sh`. Not really recommended as it generally makes working with submodules less convenient and slower.
|
||||
|
||||
Please note that ClickHouse repository uses `submodules`. That is what the references to additional repositories are called (i.e. external libraries on which the project depends). It means that when cloning the repository you need to specify the `--recursive` flag as in the example above. If the repository has been cloned without submodules, to download them you need to run the following:
|
||||
|
||||
git submodule init
|
||||
git submodule update
|
||||
|
||||
You can check the status with the command: `git submodule status`.
|
||||
You can check the Git status with the command: `git submodule status`.
|
||||
|
||||
If you get the following error message:
|
||||
|
||||
@ -83,36 +74,6 @@ You can also add original ClickHouse repo address to your local repository to pu
|
||||
|
||||
After successfully running this command you will be able to pull updates from the main ClickHouse repo by running `git pull upstream master`.
|
||||
|
||||
### Working with Submodules {#working-with-submodules}
|
||||
|
||||
Working with submodules in git could be painful. Next commands will help to manage it:
|
||||
|
||||
# ! each command accepts
|
||||
# Update remote URLs for submodules. Barely rare case
|
||||
git submodule sync
|
||||
# Add new submodules
|
||||
git submodule init
|
||||
# Update existing submodules to the current state
|
||||
git submodule update
|
||||
# Two last commands could be merged together
|
||||
git submodule update --init
|
||||
|
||||
The next commands would help you to reset all submodules to the initial state (!WARNING! - any changes inside will be deleted):
|
||||
|
||||
# Synchronizes submodules' remote URL with .gitmodules
|
||||
git submodule sync
|
||||
# Update the registered submodules with initialize not yet initialized
|
||||
git submodule update --init
|
||||
# Reset all changes done after HEAD
|
||||
git submodule foreach git reset --hard
|
||||
# Clean files from .gitignore
|
||||
git submodule foreach git clean -xfd
|
||||
# Repeat last 4 commands for all submodule
|
||||
git submodule foreach git submodule sync
|
||||
git submodule foreach git submodule update --init
|
||||
git submodule foreach git submodule foreach git reset --hard
|
||||
git submodule foreach git submodule foreach git clean -xfd
|
||||
|
||||
## Build System {#build-system}
|
||||
|
||||
ClickHouse uses CMake and Ninja for building.
|
||||
|
@ -2,9 +2,10 @@
|
||||
slug: /en/engines/table-engines/integrations/materialized-postgresql
|
||||
sidebar_position: 130
|
||||
sidebar_label: MaterializedPostgreSQL
|
||||
title: MaterializedPostgreSQL
|
||||
---
|
||||
|
||||
# [experimental] MaterializedPostgreSQL
|
||||
|
||||
Creates ClickHouse table with an initial data dump of PostgreSQL table and starts replication process, i.e. executes background job to apply new changes as they happen on PostgreSQL table in the remote PostgreSQL database.
|
||||
|
||||
If more than one table is required, it is highly recommended to use the [MaterializedPostgreSQL](../../../engines/database-engines/materialized-postgresql.md) database engine instead of the table engine and use the `materialized_postgresql_tables_list` setting, which specifies the tables to be replicated (will also be possible to add database `schema`). It will be much better in terms of CPU, fewer connections and fewer replication slots inside the remote PostgreSQL database.
|
||||
|
@ -46,6 +46,11 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] AS [db2.]name2
|
||||
|
||||
`sharding_key` - (optionally) sharding key
|
||||
|
||||
Specifying the `sharding_key` is necessary for the following:
|
||||
|
||||
- For `INSERTs` into a distributed table (as the table engine needs the `sharding_key` to determine how to split the data). However, if `insert_distributed_one_random_shard` setting is enabled, then `INSERTs` do not need the sharding key
|
||||
- For use with `optimize_skip_unused_shards` as the `sharding_key` is necessary to determine what shards should be queried
|
||||
|
||||
#### policy_name
|
||||
|
||||
`policy_name` - (optionally) policy name, it will be used to store temporary files for background send
|
||||
|
@ -1,5 +1,4 @@
|
||||
---
|
||||
slug: /en/getting-started/example-datasets/wikistat
|
||||
sidebar_label: WikiStat
|
||||
---
|
||||
|
||||
@ -41,7 +40,8 @@ CREATE TABLE wikistat
|
||||
project LowCardinality(String),
|
||||
subproject LowCardinality(String),
|
||||
path String CODEC(ZSTD(3)),
|
||||
hits UInt64 CODEC(ZSTD(3))
|
||||
hits UInt64 CODEC(ZSTD(3)),
|
||||
size UInt64 CODEC(ZSTD(3))
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (path, time);
|
||||
|
@ -2156,7 +2156,7 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
|
||||
- [input_format_parquet_local_file_min_bytes_for_seek](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_local_file_min_bytes_for_seek) - min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format. Default value - `8192`.
|
||||
- [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`.
|
||||
- [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`.
|
||||
- [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `snappy`.
|
||||
- [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `lz4`.
|
||||
|
||||
## ParquetMetadata {data-format-parquet-metadata}
|
||||
|
||||
|
@ -438,7 +438,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
|
||||
< X-ClickHouse-Query-Id: 96fe0052-01e6-43ce-b12a-6b7370de6e8a
|
||||
< X-ClickHouse-Format: Template
|
||||
< X-ClickHouse-Timezone: Asia/Shanghai
|
||||
< Keep-Alive: timeout=3
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
<
|
||||
# HELP "Query" "Number of executing queries"
|
||||
@ -603,7 +603,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
|
||||
< Connection: Keep-Alive
|
||||
< Content-Type: text/html; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=3
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
<
|
||||
* Connection #0 to host localhost left intact
|
||||
@ -643,7 +643,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
|
||||
< Connection: Keep-Alive
|
||||
< Content-Type: text/plain; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=3
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
<
|
||||
* Connection #0 to host localhost left intact
|
||||
@ -695,7 +695,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
|
||||
< Connection: Keep-Alive
|
||||
< Content-Type: text/html; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=3
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
<
|
||||
<html><body>Absolute Path File</body></html>
|
||||
@ -714,7 +714,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
|
||||
< Connection: Keep-Alive
|
||||
< Content-Type: text/html; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=3
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
<
|
||||
<html><body>Relative Path File</body></html>
|
||||
|
@ -74,6 +74,7 @@ ClickHouse Inc does **not** maintain the libraries listed below and hasn’t don
|
||||
### Elixir
|
||||
- [clickhousex](https://github.com/appodeal/clickhousex/)
|
||||
- [pillar](https://github.com/sofakingworld/pillar)
|
||||
- [ecto_ch](https://github.com/plausible/ecto_ch)
|
||||
### Nim
|
||||
- [nim-clickhouse](https://github.com/leonardoce/nim-clickhouse)
|
||||
### Haskell
|
||||
|
@ -18,7 +18,15 @@ function, table engine, database, etc. In the examples below the parameter list
|
||||
linked to for each type.
|
||||
|
||||
Parameters set in a named collection can be overridden in SQL, this is shown in the examples
|
||||
below.
|
||||
below. This ability can be limited using `[NOT] OVERRIDABLE` keywords and XML attributes
|
||||
and/or the configuration option `allow_named_collection_override_by_default`.
|
||||
|
||||
:::warning
|
||||
If override is allowed, it may be possible for users without administrative access to
|
||||
figure out the credentials that you are trying to hide.
|
||||
If you are using named collections with that purpose, you should disable
|
||||
`allow_named_collection_override_by_default` (which is enabled by default).
|
||||
:::
|
||||
|
||||
## Storing named collections in the system database
|
||||
|
||||
@ -26,11 +34,17 @@ below.
|
||||
|
||||
```sql
|
||||
CREATE NAMED COLLECTION name AS
|
||||
key_1 = 'value',
|
||||
key_2 = 'value2',
|
||||
key_1 = 'value' OVERRIDABLE,
|
||||
key_2 = 'value2' NOT OVERRIDABLE,
|
||||
url = 'https://connection.url/'
|
||||
```
|
||||
|
||||
In the above example:
|
||||
|
||||
* `key_1` can always be overridden.
|
||||
* `key_2` can never be overridden.
|
||||
* `url` can be overridden or not depending on the value of `allow_named_collection_override_by_default`.
|
||||
|
||||
### Permissions to create named collections with DDL
|
||||
|
||||
To manage named collections with DDL a user must have the `named_control_collection` privilege. This can be assigned by adding a file to `/etc/clickhouse-server/users.d/`. The example gives the user `default` both the `access_management` and `named_collection_control` privileges:
|
||||
@ -61,25 +75,37 @@ In the above example the `password_sha256_hex` value is the hexadecimal represen
|
||||
<clickhouse>
|
||||
<named_collections>
|
||||
<name>
|
||||
<key_1>value</key_1>
|
||||
<key_2>value_2</key_2>
|
||||
<key_1 overridable="true">value</key_1>
|
||||
<key_2 overridable="false">value_2</key_2>
|
||||
<url>https://connection.url/</url>
|
||||
</name>
|
||||
</named_collections>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
In the above example:
|
||||
|
||||
* `key_1` can always be overridden.
|
||||
* `key_2` can never be overridden.
|
||||
* `url` can be overridden or not depending on the value of `allow_named_collection_override_by_default`.
|
||||
|
||||
## Modifying named collections
|
||||
|
||||
Named collections that are created with DDL queries can be altered or dropped with DDL. Named collections created with XML files can be managed by editing or deleting the corresponding XML.
|
||||
|
||||
### Alter a DDL named collection
|
||||
|
||||
Change or add the keys `key1` and `key3` of the collection `collection2`:
|
||||
Change or add the keys `key1` and `key3` of the collection `collection2`
|
||||
(this will not change the value of the `overridable` flag for those keys):
|
||||
```sql
|
||||
ALTER NAMED COLLECTION collection2 SET key1=4, key3='value3'
|
||||
```
|
||||
|
||||
Change or add the key `key1` and allow it to be always overridden:
|
||||
```sql
|
||||
ALTER NAMED COLLECTION collection2 SET key1=4 OVERRIDABLE
|
||||
```
|
||||
|
||||
Remove the key `key2` from `collection2`:
|
||||
```sql
|
||||
ALTER NAMED COLLECTION collection2 DELETE key2
|
||||
@ -90,6 +116,13 @@ Change or add the key `key1` and delete the key `key3` of the collection `collec
|
||||
ALTER NAMED COLLECTION collection2 SET key1=4, DELETE key3
|
||||
```
|
||||
|
||||
To force a key to use the default settings for the `overridable` flag, you have to
|
||||
remove and re-add the key.
|
||||
```sql
|
||||
ALTER NAMED COLLECTION collection2 DELETE key1;
|
||||
ALTER NAMED COLLECTION collection2 SET key1=4;
|
||||
```
|
||||
|
||||
### Drop the DDL named collection `collection2`:
|
||||
```sql
|
||||
DROP NAMED COLLECTION collection2
|
||||
|
@ -1,5 +1,4 @@
|
||||
---
|
||||
slug: /en/operations/optimizing-performance/profile-guided-optimization
|
||||
sidebar_position: 54
|
||||
sidebar_label: Profile Guided Optimization (PGO)
|
||||
---
|
||||
|
@ -11,7 +11,8 @@ ClickHouse runs sampling profiler that allows analyzing query execution. Using p
|
||||
|
||||
Query profiler is automatically enabled in ClickHouse Cloud and you can run a sample query as follows
|
||||
|
||||
:::note If you are running the following query in ClickHouse Cloud, make sure to change `FROM system.trace_log` to `FROM clusterAllReplicas(default, system.trace_log)` to select from all nodes of the cluster :::
|
||||
:::note If you are running the following query in ClickHouse Cloud, make sure to change `FROM system.trace_log` to `FROM clusterAllReplicas(default, system.trace_log)` to select from all nodes of the cluster
|
||||
:::
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
|
@ -214,7 +214,7 @@ Max consecutive resolving failures before dropping a host from ClickHouse DNS ca
|
||||
|
||||
Type: UInt32
|
||||
|
||||
Default: 1024
|
||||
Default: 10
|
||||
|
||||
|
||||
## index_mark_cache_policy
|
||||
@ -2427,6 +2427,8 @@ This section contains the following parameters:
|
||||
* hostname_levenshtein_distance - just like nearest_hostname, but it compares hostname in a levenshtein distance manner.
|
||||
* first_or_random - selects the first ZooKeeper node, if it's not available then randomly selects one of remaining ZooKeeper nodes.
|
||||
* round_robin - selects the first ZooKeeper node, if reconnection happens selects the next.
|
||||
- `use_compression` — If set to true, enables compression in Keeper protocol.
|
||||
|
||||
|
||||
**Example configuration**
|
||||
|
||||
|
@ -3943,6 +3943,17 @@ Possible values:
|
||||
|
||||
Default value: `''`.
|
||||
|
||||
## preferred_optimize_projection_name {#preferred_optimize_projection_name}
|
||||
|
||||
If it is set to a non-empty string, ClickHouse will try to apply specified projection in query.
|
||||
|
||||
|
||||
Possible values:
|
||||
|
||||
- string: name of preferred projection
|
||||
|
||||
Default value: `''`.
|
||||
|
||||
## alter_sync {#alter-sync}
|
||||
|
||||
Allows to set up waiting for actions to be executed on replicas by [ALTER](../../sql-reference/statements/alter/index.md), [OPTIMIZE](../../sql-reference/statements/optimize.md) or [TRUNCATE](../../sql-reference/statements/truncate.md) queries.
|
||||
|
35
docs/en/operations/system-tables/symbols.md
Normal file
35
docs/en/operations/system-tables/symbols.md
Normal file
@ -0,0 +1,35 @@
|
||||
---
|
||||
slug: /en/operations/system-tables/symbols
|
||||
---
|
||||
# symbols
|
||||
|
||||
Contains information for introspection of `clickhouse` binary. It requires the introspection privilege to access.
|
||||
This table is only useful for C++ experts and ClickHouse engineers.
|
||||
|
||||
Columns:
|
||||
|
||||
- `symbol` ([String](../../sql-reference/data-types/string.md)) — Symbol name in the binary. It is mangled. You can apply `demangle(symbol)` to obtain a readable name.
|
||||
- `address_begin` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Start address of the symbol in the binary.
|
||||
- `address_end` ([UInt64](../../sql-reference/data-types/int-uint.md)) — End address of the symbol in the binary.
|
||||
- `name` ([String](../../sql-reference/data-types/string.md)) — Alias for `event`.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT address_begin, address_end - address_begin AS size, demangle(symbol) FROM system.symbols ORDER BY size DESC LIMIT 10
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─address_begin─┬─────size─┬─demangle(symbol)──────────────────────────────────────────────────────────────────┐
|
||||
│ 25000976 │ 29466000 │ icudt70_dat │
|
||||
│ 400605288 │ 2097272 │ arena_emap_global │
|
||||
│ 18760592 │ 1048576 │ CLD2::kQuadChrome1015_2 │
|
||||
│ 9807152 │ 884808 │ TopLevelDomainLookupHash::isValid(char const*, unsigned long)::wordlist │
|
||||
│ 57442432 │ 850608 │ llvm::X86Insts │
|
||||
│ 55682944 │ 681360 │ (anonymous namespace)::X86DAGToDAGISel::SelectCode(llvm::SDNode*)::MatcherTable │
|
||||
│ 55130368 │ 502840 │ (anonymous namespace)::X86InstructionSelector::getMatchTable() const::MatchTable0 │
|
||||
│ 402930616 │ 404032 │ qpl::ml::dispatcher::hw_dispatcher::get_instance()::instance │
|
||||
│ 274131872 │ 356795 │ DB::SettingsTraits::Accessor::instance()::$_0::operator()() const │
|
||||
│ 58293040 │ 249424 │ llvm::X86InstrNameData │
|
||||
└───────────────┴──────────┴───────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
@ -16,7 +16,7 @@ CREATE TABLE IF NOT EXISTS float_vs_decimal
|
||||
my_decimal Decimal64(3)
|
||||
)Engine=MergeTree ORDER BY tuple()
|
||||
|
||||
INSERT INTO float_vs_decimal SELECT round(canonicalRand(), 3) AS res, res FROM system.numbers LIMIT 1000000; # Generate 1 000 000 random number with 2 decimal places and store them as a float and as a decimal
|
||||
INSERT INTO float_vs_decimal SELECT round(randCanonical(), 3) AS res, res FROM system.numbers LIMIT 1000000; # Generate 1 000 000 random number with 2 decimal places and store them as a float and as a decimal
|
||||
|
||||
SELECT sum(my_float), sum(my_decimal) FROM float_vs_decimal;
|
||||
> 500279.56300000014 500279.563
|
||||
|
@ -2766,9 +2766,11 @@ Result:
|
||||
|
||||
## fromUnixTimestamp
|
||||
|
||||
Function converts Unix timestamp to a calendar date and a time of a day. When there is only a single argument of [Integer](../../sql-reference/data-types/int-uint.md) type, it acts in the same way as [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime) and return [DateTime](../../sql-reference/data-types/datetime.md) type.
|
||||
This function converts a Unix timestamp to a calendar date and a time of a day.
|
||||
|
||||
fromUnixTimestamp uses MySQL datetime format style, refer to https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format.
|
||||
It can be called in two ways:
|
||||
|
||||
When given a single argument of type [Integer](../../sql-reference/data-types/int-uint.md), it returns a value of type [DateTime](../../sql-reference/data-types/datetime.md), i.e. behaves like [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime).
|
||||
|
||||
Alias: `FROM_UNIXTIME`.
|
||||
|
||||
@ -2786,14 +2788,16 @@ Result:
|
||||
└──────────────────────────────┘
|
||||
```
|
||||
|
||||
When there are two or three arguments, the first an [Integer](../../sql-reference/data-types/int-uint.md), [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md), the second a constant format string and the third an optional constant time zone string — it acts in the same way as [formatDateTime](#formatdatetime) and return [String](../../sql-reference/data-types/string.md#string) type.
|
||||
When given two or three arguments where the first argument is a value of type [Integer](../../sql-reference/data-types/int-uint.md), [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md), the second argument is a constant format string and the third argument is an optional constant time zone string, the function returns a value of type [String](../../sql-reference/data-types/string.md#string), i.e. it behaves like [formatDateTime](#formatdatetime). In this case, [MySQL's datetime format style](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format) is used.
|
||||
|
||||
For example:
|
||||
**Example:**
|
||||
|
||||
```sql
|
||||
SELECT fromUnixTimestamp(1234334543, '%Y-%m-%d %R:%S') AS DateTime;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─DateTime────────────┐
|
||||
│ 2009-02-11 14:42:23 │
|
||||
@ -2806,19 +2810,20 @@ SELECT fromUnixTimestamp(1234334543, '%Y-%m-%d %R:%S') AS DateTime;
|
||||
|
||||
## fromUnixTimestampInJodaSyntax
|
||||
|
||||
Similar to fromUnixTimestamp, except that it formats time in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html.
|
||||
Same as [fromUnixTimestamp](#fromUnixTimestamp) but when called in the second way (two or three arguments), the formatting is performed using [Joda style](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) instead of MySQL style.
|
||||
|
||||
**Example:**
|
||||
|
||||
``` sql
|
||||
SELECT fromUnixTimestampInJodaSyntax(1669804872, 'yyyy-MM-dd HH:mm:ss', 'UTC');
|
||||
SELECT fromUnixTimestampInJodaSyntax(1234334543, 'yyyy-MM-dd HH:mm:ss', 'UTC') AS DateTime;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```
|
||||
┌─fromUnixTimestampInJodaSyntax(1669804872, 'yyyy-MM-dd HH:mm:ss', 'UTC')────┐
|
||||
│ 2022-11-30 10:41:12 │
|
||||
└────────────────────────────────────────────────────────────────────────────┘
|
||||
┌─DateTime────────────┐
|
||||
│ 2009-02-11 06:42:23 │
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
## toModifiedJulianDay
|
||||
|
@ -107,11 +107,7 @@ round(3.65, 1) = 3.6
|
||||
|
||||
Rounds a number to a specified decimal position.
|
||||
|
||||
- If the rounding number is halfway between two numbers, the function uses banker’s rounding.
|
||||
|
||||
Banker's rounding is a method of rounding fractional numbers. When the rounding number is halfway between two numbers, it's rounded to the nearest even digit at the specified decimal position. For example: 3.5 rounds up to 4, 2.5 rounds down to 2.
|
||||
|
||||
It's the default rounding method for floating point numbers defined in [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754#Roundings_to_nearest). The [round](#rounding_functions-round) function performs the same rounding for floating point numbers. The `roundBankers` function also rounds integers the same way, for example, `roundBankers(45, -1) = 40`.
|
||||
- If the rounding number is halfway between two numbers, the function uses banker’s rounding. Banker's rounding is a method of rounding fractional numbers. When the rounding number is halfway between two numbers, it's rounded to the nearest even digit at the specified decimal position. For example: 3.5 rounds up to 4, 2.5 rounds down to 2. It's the default rounding method for floating point numbers defined in [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754#Roundings_to_nearest). The [round](#rounding_functions-round) function performs the same rounding for floating point numbers. The `roundBankers` function also rounds integers the same way, for example, `roundBankers(45, -1) = 40`.
|
||||
|
||||
- In other cases, the function rounds numbers to the nearest integer.
|
||||
|
||||
|
@ -171,7 +171,8 @@ Result:
|
||||
Can be used with [MinHash](../../sql-reference/functions/hash-functions.md#ngramminhash) functions for detection of semi-duplicate strings:
|
||||
|
||||
``` sql
|
||||
SELECT tupleHammingDistance(wordShingleMinHash(string), wordShingleMinHashCaseInsensitive(string)) as HammingDistance FROM (SELECT 'ClickHouse is a column-oriented database management system for online analytical processing of queries.' AS string);
|
||||
SELECT tupleHammingDistance(wordShingleMinHash(string), wordShingleMinHashCaseInsensitive(string)) AS HammingDistance
|
||||
FROM (SELECT 'ClickHouse is a column-oriented database management system for online analytical processing of queries.' AS string);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
@ -1840,9 +1840,9 @@ Converts an `Int64` to a `DateTime64` value with fixed sub-second precision and
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
fromUnixTimestamp64Milli(value [, timezone])
|
||||
fromUnixTimestamp64Micro(value [, timezone])
|
||||
fromUnixTimestamp64Nano(value [, timezone])
|
||||
fromUnixTimestamp64Milli(value[, timezone])
|
||||
fromUnixTimestamp64Micro(value[, timezone])
|
||||
fromUnixTimestamp64Nano(value[, timezone])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
@ -12,9 +12,9 @@ This query intends to modify already existing named collections.
|
||||
```sql
|
||||
ALTER NAMED COLLECTION [IF EXISTS] name [ON CLUSTER cluster]
|
||||
[ SET
|
||||
key_name1 = 'some value',
|
||||
key_name2 = 'some value',
|
||||
key_name3 = 'some value',
|
||||
key_name1 = 'some value' [[NOT] OVERRIDABLE],
|
||||
key_name2 = 'some value' [[NOT] OVERRIDABLE],
|
||||
key_name3 = 'some value' [[NOT] OVERRIDABLE],
|
||||
... ] |
|
||||
[ DELETE key_name4, key_name5, ... ]
|
||||
```
|
||||
@ -22,9 +22,9 @@ key_name3 = 'some value',
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
CREATE NAMED COLLECTION foobar AS a = '1', b = '2';
|
||||
CREATE NAMED COLLECTION foobar AS a = '1' NOT OVERRIDABLE, b = '2';
|
||||
|
||||
ALTER NAMED COLLECTION foobar SET a = '2', c = '3';
|
||||
ALTER NAMED COLLECTION foobar SET a = '2' OVERRIDABLE, c = '3';
|
||||
|
||||
ALTER NAMED COLLECTION foobar DELETE b;
|
||||
```
|
||||
|
@ -11,16 +11,16 @@ Creates a new named collection.
|
||||
|
||||
```sql
|
||||
CREATE NAMED COLLECTION [IF NOT EXISTS] name [ON CLUSTER cluster] AS
|
||||
key_name1 = 'some value',
|
||||
key_name2 = 'some value',
|
||||
key_name3 = 'some value',
|
||||
key_name1 = 'some value' [[NOT] OVERRIDABLE],
|
||||
key_name2 = 'some value' [[NOT] OVERRIDABLE],
|
||||
key_name3 = 'some value' [[NOT] OVERRIDABLE],
|
||||
...
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
CREATE NAMED COLLECTION foobar AS a = '1', b = '2';
|
||||
CREATE NAMED COLLECTION foobar AS a = '1', b = '2' OVERRIDABLE;
|
||||
```
|
||||
|
||||
**Related statements**
|
||||
|
@ -103,7 +103,7 @@ INSERT INTO holdings VALUES
|
||||
('Bitcoin', 200),
|
||||
('Ethereum', 250),
|
||||
('Ethereum', 5000),
|
||||
('DOGEFI', 10);
|
||||
('DOGEFI', 10),
|
||||
('Bitcoin Diamond', 5000);
|
||||
```
|
||||
|
||||
|
@ -366,7 +366,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
|
||||
< X-ClickHouse-Query-Id: 96fe0052-01e6-43ce-b12a-6b7370de6e8a
|
||||
< X-ClickHouse-Format: Template
|
||||
< X-ClickHouse-Timezone: Asia/Shanghai
|
||||
< Keep-Alive: timeout=3
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
|
||||
<
|
||||
# HELP "Query" "Number of executing queries"
|
||||
@ -529,7 +529,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
|
||||
< Connection: Keep-Alive
|
||||
< Content-Type: text/html; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=3
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
<
|
||||
* Connection #0 to host localhost left intact
|
||||
@ -569,7 +569,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
|
||||
< Connection: Keep-Alive
|
||||
< Content-Type: text/plain; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=3
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
<
|
||||
* Connection #0 to host localhost left intact
|
||||
@ -621,7 +621,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
|
||||
< Connection: Keep-Alive
|
||||
< Content-Type: text/html; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=3
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
<
|
||||
<html><body>Absolute Path File</body></html>
|
||||
@ -640,7 +640,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
|
||||
< Connection: Keep-Alive
|
||||
< Content-Type: text/html; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=3
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
<
|
||||
<html><body>Relative Path File</body></html>
|
||||
|
@ -362,7 +362,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
|
||||
< X-ClickHouse-Query-Id: 96fe0052-01e6-43ce-b12a-6b7370de6e8a
|
||||
< X-ClickHouse-Format: Template
|
||||
< X-ClickHouse-Timezone: Asia/Shanghai
|
||||
< Keep-Alive: timeout=3
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
<
|
||||
# HELP "Query" "Number of executing queries"
|
||||
@ -520,7 +520,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
|
||||
< Connection: Keep-Alive
|
||||
< Content-Type: text/html; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=3
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
<
|
||||
* Connection #0 to host localhost left intact
|
||||
@ -560,7 +560,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
|
||||
< Connection: Keep-Alive
|
||||
< Content-Type: text/plain; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=3
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
<
|
||||
* Connection #0 to host localhost left intact
|
||||
@ -612,7 +612,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
|
||||
< Connection: Keep-Alive
|
||||
< Content-Type: text/html; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=3
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
<
|
||||
<html><body>Absolute Path File</body></html>
|
||||
@ -631,7 +631,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
|
||||
< Connection: Keep-Alive
|
||||
< Content-Type: text/html; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=3
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
<
|
||||
<html><body>Relative Path File</body></html>
|
||||
|
@ -439,6 +439,13 @@ else()
|
||||
install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
|
||||
endif()
|
||||
|
||||
# A target to get stripped binary.
|
||||
# Note: this is different to the above (extract debug symbols to a separate place)
|
||||
add_custom_target(clickhouse-stripped ALL
|
||||
COMMAND "${STRIP_PATH}" -o "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-stripped" --strip-debug --remove-section=.comment --remove-section=.note "${CMAKE_CURRENT_BINARY_DIR}/clickhouse"
|
||||
DEPENDS clickhouse
|
||||
COMMENT "Stripping clickhouse binary" VERBATIM)
|
||||
|
||||
if (ENABLE_TESTS)
|
||||
set (CLICKHOUSE_UNIT_TESTS_TARGETS unit_tests_dbms)
|
||||
add_custom_target (clickhouse-tests ALL DEPENDS ${CLICKHOUSE_UNIT_TESTS_TARGETS})
|
||||
|
@ -425,7 +425,7 @@ void Client::connect()
|
||||
if (hosts_and_ports.empty())
|
||||
{
|
||||
String host = config().getString("host", "localhost");
|
||||
UInt16 port = ConnectionParameters::getPortFromConfig(config());
|
||||
UInt16 port = ConnectionParameters::getPortFromConfig(config(), host);
|
||||
hosts_and_ports.emplace_back(HostAndPort{host, port});
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
#include <Common/ZooKeeper/ZooKeeper.h>
|
||||
#include <Common/ZooKeeper/KeeperException.h>
|
||||
#include <Common/randomSeed.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
#include <Interpreters/InterpreterInsertQuery.h>
|
||||
@ -59,7 +60,7 @@ void ClusterCopier::init()
|
||||
getContext()->setClustersConfig(task_cluster_current_config, false, task_cluster->clusters_prefix);
|
||||
|
||||
/// Set up shards and their priority
|
||||
task_cluster->random_engine.seed(task_cluster->random_device());
|
||||
task_cluster->random_engine.seed(randomSeed());
|
||||
for (auto & task_table : task_cluster->table_tasks)
|
||||
{
|
||||
task_table.cluster_pull = getContext()->getCluster(task_table.cluster_pull_name);
|
||||
|
@ -7,7 +7,7 @@
|
||||
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
|
||||
#include <random>
|
||||
#include <pcg_random.hpp>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -45,7 +45,6 @@ struct TaskCluster
|
||||
/// Subtasks
|
||||
TasksTable table_tasks;
|
||||
|
||||
std::random_device random_device;
|
||||
pcg64 random_engine;
|
||||
};
|
||||
|
||||
|
@ -420,7 +420,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
|
||||
/// Create symlinks.
|
||||
|
||||
std::initializer_list<const char *> tools
|
||||
std::initializer_list<std::string_view> tools
|
||||
{
|
||||
"clickhouse-server",
|
||||
"clickhouse-client",
|
||||
@ -435,6 +435,9 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
"clickhouse-keeper",
|
||||
"clickhouse-keeper-converter",
|
||||
"clickhouse-disks",
|
||||
"ch",
|
||||
"chl",
|
||||
"chc",
|
||||
};
|
||||
|
||||
for (const auto & tool : tools)
|
||||
@ -444,29 +447,39 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
|
||||
if (fs::exists(symlink_path))
|
||||
{
|
||||
bool is_symlink = FS::isSymlink(symlink_path);
|
||||
fs::path points_to;
|
||||
if (is_symlink)
|
||||
points_to = fs::weakly_canonical(FS::readSymlink(symlink_path));
|
||||
|
||||
if (is_symlink && (points_to == main_bin_path || (options.count("link") && points_to == binary_self_canonical_path)))
|
||||
/// Do not replace short named symlinks if they are already present in the system
|
||||
/// to avoid collision with other tools.
|
||||
if (!tool.starts_with("clickhouse"))
|
||||
{
|
||||
fmt::print("Symlink {} already exists. Will keep it.\n", symlink_path.string());
|
||||
need_to_create = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!is_symlink)
|
||||
bool is_symlink = FS::isSymlink(symlink_path);
|
||||
fs::path points_to;
|
||||
if (is_symlink)
|
||||
points_to = fs::weakly_canonical(FS::readSymlink(symlink_path));
|
||||
|
||||
if (is_symlink && (points_to == main_bin_path || (options.count("link") && points_to == binary_self_canonical_path)))
|
||||
{
|
||||
fs::path rename_path = symlink_path.replace_extension(".old");
|
||||
fmt::print("File {} already exists but it's not a symlink. Will rename to {}.\n",
|
||||
symlink_path.string(), rename_path.string());
|
||||
fs::rename(symlink_path, rename_path);
|
||||
need_to_create = false;
|
||||
}
|
||||
else if (points_to != main_bin_path)
|
||||
else
|
||||
{
|
||||
fmt::print("Symlink {} already exists but it points to {}. Will replace the old symlink to {}.\n",
|
||||
symlink_path.string(), points_to.string(), main_bin_path.string());
|
||||
fs::remove(symlink_path);
|
||||
if (!is_symlink)
|
||||
{
|
||||
fs::path rename_path = symlink_path.replace_extension(".old");
|
||||
fmt::print("File {} already exists but it's not a symlink. Will rename to {}.\n",
|
||||
symlink_path.string(), rename_path.string());
|
||||
fs::rename(symlink_path, rename_path);
|
||||
}
|
||||
else if (points_to != main_bin_path)
|
||||
{
|
||||
fmt::print("Symlink {} already exists but it points to {}. Will replace the old symlink to {}.\n",
|
||||
symlink_path.string(), points_to.string(), main_bin_path.string());
|
||||
fs::remove(symlink_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -424,7 +424,7 @@ void LocalServer::setupUsers()
|
||||
|
||||
void LocalServer::connect()
|
||||
{
|
||||
connection_parameters = ConnectionParameters(config());
|
||||
connection_parameters = ConnectionParameters(config(), "localhost");
|
||||
connection = LocalConnection::createConnection(
|
||||
connection_parameters, global_context, need_render_progress, need_render_profile_events, server_display_name);
|
||||
}
|
||||
|
@ -2,15 +2,12 @@
|
||||
#include <csetjmp>
|
||||
#include <unistd.h>
|
||||
|
||||
#ifdef OS_LINUX
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
#include <new>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <string_view>
|
||||
#include <utility> /// pair
|
||||
|
||||
#include <fmt/format.h>
|
||||
@ -22,7 +19,6 @@
|
||||
#include <Common/IO.h>
|
||||
|
||||
#include <base/phdr_cache.h>
|
||||
#include <base/scope_guard.h>
|
||||
|
||||
|
||||
/// Universal executable for various clickhouse applications
|
||||
@ -98,7 +94,7 @@ using MainFunc = int (*)(int, char**);
|
||||
#if !defined(FUZZING_MODE)
|
||||
|
||||
/// Add an item here to register new application
|
||||
std::pair<const char *, MainFunc> clickhouse_applications[] =
|
||||
std::pair<std::string_view, MainFunc> clickhouse_applications[] =
|
||||
{
|
||||
#if ENABLE_CLICKHOUSE_LOCAL
|
||||
{"local", mainEntryClickHouseLocal},
|
||||
@ -158,6 +154,18 @@ std::pair<const char *, MainFunc> clickhouse_applications[] =
|
||||
#endif
|
||||
};
|
||||
|
||||
/// Add an item here to register a new short name
|
||||
std::pair<std::string_view, std::string_view> clickhouse_short_names[] =
|
||||
{
|
||||
#if ENABLE_CLICKHOUSE_LOCAL
|
||||
{"ch", "local"},
|
||||
{"chl", "local"},
|
||||
#endif
|
||||
#if ENABLE_CLICKHOUSE_CLIENT
|
||||
{"chc", "client"},
|
||||
#endif
|
||||
};
|
||||
|
||||
int printHelp(int, char **)
|
||||
{
|
||||
std::cerr << "Use one of the following commands:" << std::endl;
|
||||
@ -387,15 +395,21 @@ void checkHarmfulEnvironmentVariables(char ** argv)
|
||||
|
||||
}
|
||||
|
||||
bool isClickhouseApp(const std::string & app_suffix, std::vector<char *> & argv)
|
||||
bool isClickhouseApp(std::string_view app_suffix, std::vector<char *> & argv)
|
||||
{
|
||||
for (const auto & [alias, name] : clickhouse_short_names)
|
||||
if (app_suffix == name
|
||||
&& !argv.empty() && (alias == argv[0] || endsWith(argv[0], "/" + std::string(alias))))
|
||||
return true;
|
||||
|
||||
/// Use app if the first arg 'app' is passed (the arg should be quietly removed)
|
||||
if (argv.size() >= 2)
|
||||
{
|
||||
auto first_arg = argv.begin() + 1;
|
||||
|
||||
/// 'clickhouse --client ...' and 'clickhouse client ...' are Ok
|
||||
if (*first_arg == "--" + app_suffix || *first_arg == app_suffix)
|
||||
if (*first_arg == app_suffix
|
||||
|| (std::string_view(*first_arg).starts_with("--") && std::string_view(*first_arg).substr(2) == app_suffix))
|
||||
{
|
||||
argv.erase(first_arg);
|
||||
return true;
|
||||
@ -403,7 +417,7 @@ bool isClickhouseApp(const std::string & app_suffix, std::vector<char *> & argv)
|
||||
}
|
||||
|
||||
/// Use app if clickhouse binary is run through symbolic link with name clickhouse-app
|
||||
std::string app_name = "clickhouse-" + app_suffix;
|
||||
std::string app_name = "clickhouse-" + std::string(app_suffix);
|
||||
return !argv.empty() && (app_name == argv[0] || endsWith(argv[0], "/" + app_name));
|
||||
}
|
||||
|
||||
|
@ -11,8 +11,8 @@ else ()
|
||||
endif ()
|
||||
|
||||
add_custom_target (self-extracting ALL
|
||||
${CMAKE_COMMAND} -E remove clickhouse
|
||||
${CMAKE_COMMAND} -E remove clickhouse clickhouse-stripped
|
||||
COMMAND ${COMPRESSOR} ${DECOMPRESSOR} clickhouse ../clickhouse
|
||||
DEPENDS clickhouse compressor
|
||||
COMMAND ${COMPRESSOR} ${DECOMPRESSOR} clickhouse-stripped ../clickhouse-stripped
|
||||
DEPENDS clickhouse clickhouse-stripped compressor
|
||||
)
|
||||
|
||||
|
@ -104,15 +104,14 @@
|
||||
</url_scheme_mappers>
|
||||
|
||||
<!-- Add headers to response in options request. OPTIONS method is used in CORS preflight requests. -->
|
||||
<!-- It is off by default. Next headers are obligate for CORS.-->
|
||||
<!-- http_options_response>
|
||||
<http_options_response>
|
||||
<header>
|
||||
<name>Access-Control-Allow-Origin</name>
|
||||
<value>*</value>
|
||||
</header>
|
||||
<header>
|
||||
<name>Access-Control-Allow-Headers</name>
|
||||
<value>origin, x-requested-with</value>
|
||||
<value>origin, x-requested-with, x-clickhouse-format, x-clickhouse-user, x-clickhouse-key, Authorization</value>
|
||||
</header>
|
||||
<header>
|
||||
<name>Access-Control-Allow-Methods</name>
|
||||
@ -122,7 +121,7 @@
|
||||
<name>Access-Control-Max-Age</name>
|
||||
<value>86400</value>
|
||||
</header>
|
||||
</http_options_response -->
|
||||
</http_options_response>
|
||||
|
||||
<!-- It is the name that will be shown in the clickhouse-client.
|
||||
By default, anything with "production" will be highlighted in red in query prompt.
|
||||
@ -245,7 +244,7 @@
|
||||
<max_connections>4096</max_connections>
|
||||
|
||||
<!-- For 'Connection: keep-alive' in HTTP 1.1 -->
|
||||
<keep_alive_timeout>3</keep_alive_timeout>
|
||||
<keep_alive_timeout>10</keep_alive_timeout>
|
||||
|
||||
<!-- gRPC protocol (see src/Server/grpc_protos/clickhouse_grpc.proto for the API) -->
|
||||
<!-- <grpc_port>9100</grpc_port> -->
|
||||
@ -326,7 +325,7 @@
|
||||
Query can upscale to desired number of threads during execution if more threads become available.
|
||||
-->
|
||||
<concurrent_threads_soft_limit_num>0</concurrent_threads_soft_limit_num>
|
||||
<concurrent_threads_soft_limit_ratio_to_cores>0</concurrent_threads_soft_limit_ratio_to_cores>
|
||||
<concurrent_threads_soft_limit_ratio_to_cores>2</concurrent_threads_soft_limit_ratio_to_cores>
|
||||
|
||||
<!-- Maximum number of concurrent queries. -->
|
||||
<max_concurrent_queries>1000</max_concurrent_queries>
|
||||
|
@ -1,7 +0,0 @@
|
||||
<clickhouse>
|
||||
<profiles>
|
||||
<default>
|
||||
<allow_introspection_functions>1</allow_introspection_functions>
|
||||
</default>
|
||||
</profiles>
|
||||
</clickhouse>
|
1
programs/server/users.d/allow_introspection_functions.yaml
Symbolic link
1
programs/server/users.d/allow_introspection_functions.yaml
Symbolic link
@ -0,0 +1 @@
|
||||
../../../tests/config/users.d/allow_introspection_functions.yaml
|
@ -86,6 +86,13 @@
|
||||
|
||||
<!-- User can create other users and grant rights to them. -->
|
||||
<!-- <access_management>1</access_management> -->
|
||||
|
||||
<!-- User permissions can be granted here -->
|
||||
<!--
|
||||
<grants>
|
||||
<query>GRANT ALL ON *.*</query>
|
||||
</grants>
|
||||
-->
|
||||
</default>
|
||||
</users>
|
||||
|
||||
|
@ -91,6 +91,10 @@ users:
|
||||
# User can create other users and grant rights to them.
|
||||
# access_management: 1
|
||||
|
||||
# SQL expressions for grants available for that user - https://clickhouse.com/docs/en/sql-reference/statements/grant
|
||||
# grants:
|
||||
# - query: GRANT ALL ON *.*
|
||||
|
||||
# Quotas.
|
||||
quotas:
|
||||
# Name of quota.
|
||||
|
@ -12,7 +12,7 @@
|
||||
#include <Common/Config/ConfigReloader.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <Common/TransformEndianness.hpp>
|
||||
#include <Common/transformEndianness.h>
|
||||
#include <Core/Settings.h>
|
||||
#include <Interpreters/executeQuery.h>
|
||||
#include <Parsers/Access/ASTGrantQuery.h>
|
||||
|
@ -1,7 +1,18 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionAnalysisOfVariance.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
|
||||
#include <IO/VarInt.h>
|
||||
|
||||
#include <array>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/Moments.h>
|
||||
#include <Common/NaNUtils.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -13,6 +24,82 @@ namespace ErrorCodes
|
||||
namespace
|
||||
{
|
||||
|
||||
using AggregateFunctionAnalysisOfVarianceData = AnalysisOfVarianceMoments<Float64>;
|
||||
|
||||
|
||||
/// One way analysis of variance
|
||||
/// Provides a statistical test of whether two or more population means are equal (null hypothesis)
|
||||
/// Has an assumption that subjects from group i have normal distribution.
|
||||
/// Accepts two arguments - a value and a group number which this value belongs to.
|
||||
/// Groups are enumerated starting from 0 and there should be at least two groups to perform a test
|
||||
/// Moreover there should be at least one group with the number of observations greater than one.
|
||||
class AggregateFunctionAnalysisOfVariance final : public IAggregateFunctionDataHelper<AggregateFunctionAnalysisOfVarianceData, AggregateFunctionAnalysisOfVariance>
|
||||
{
|
||||
public:
|
||||
explicit AggregateFunctionAnalysisOfVariance(const DataTypes & arguments, const Array & params)
|
||||
: IAggregateFunctionDataHelper(arguments, params, createResultType())
|
||||
{}
|
||||
|
||||
DataTypePtr createResultType() const
|
||||
{
|
||||
DataTypes types {std::make_shared<DataTypeNumber<Float64>>(), std::make_shared<DataTypeNumber<Float64>>() };
|
||||
Strings names {"f_statistic", "p_value"};
|
||||
return std::make_shared<DataTypeTuple>(
|
||||
std::move(types),
|
||||
std::move(names)
|
||||
);
|
||||
}
|
||||
|
||||
String getName() const override { return "analysisOfVariance"; }
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
data(place).add(columns[0]->getFloat64(row_num), columns[1]->getUInt(row_num));
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
data(place).merge(data(rhs));
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
data(place).write(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
data(place).read(buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
auto f_stat = data(place).getFStatistic();
|
||||
|
||||
auto & column_tuple = assert_cast<ColumnTuple &>(to);
|
||||
auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
|
||||
auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
|
||||
|
||||
if (unlikely(!std::isfinite(f_stat) || f_stat < 0))
|
||||
{
|
||||
column_stat.getData().push_back(std::numeric_limits<Float64>::quiet_NaN());
|
||||
column_value.getData().push_back(std::numeric_limits<Float64>::quiet_NaN());
|
||||
return;
|
||||
}
|
||||
|
||||
auto p_value = data(place).getPValue(f_stat);
|
||||
|
||||
/// Because p-value is a probability.
|
||||
p_value = std::min(1.0, std::max(0.0, p_value));
|
||||
|
||||
column_stat.getData().push_back(f_stat);
|
||||
column_value.getData().push_back(p_value);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionAnalysisOfVariance(const std::string & name, const DataTypes & arguments, const Array & parameters, const Settings *)
|
||||
{
|
||||
assertNoParameters(name, parameters);
|
||||
|
@ -1,97 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/VarInt.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#include <array>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnsCommon.h>
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/Moments.h>
|
||||
#include "Common/NaNUtils.h"
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Core/Types.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
using AggregateFunctionAnalysisOfVarianceData = AnalysisOfVarianceMoments<Float64>;
|
||||
|
||||
|
||||
/// One way analysis of variance
|
||||
/// Provides a statistical test of whether two or more population means are equal (null hypothesis)
|
||||
/// Has an assumption that subjects from group i have normal distribution.
|
||||
/// Accepts two arguments - a value and a group number which this value belongs to.
|
||||
/// Groups are enumerated starting from 0 and there should be at least two groups to perform a test
|
||||
/// Moreover there should be at least one group with the number of observations greater than one.
|
||||
class AggregateFunctionAnalysisOfVariance final : public IAggregateFunctionDataHelper<AggregateFunctionAnalysisOfVarianceData, AggregateFunctionAnalysisOfVariance>
|
||||
{
|
||||
public:
|
||||
explicit AggregateFunctionAnalysisOfVariance(const DataTypes & arguments, const Array & params)
|
||||
: IAggregateFunctionDataHelper(arguments, params, createResultType())
|
||||
{}
|
||||
|
||||
DataTypePtr createResultType() const
|
||||
{
|
||||
DataTypes types {std::make_shared<DataTypeNumber<Float64>>(), std::make_shared<DataTypeNumber<Float64>>() };
|
||||
Strings names {"f_statistic", "p_value"};
|
||||
return std::make_shared<DataTypeTuple>(
|
||||
std::move(types),
|
||||
std::move(names)
|
||||
);
|
||||
}
|
||||
|
||||
String getName() const override { return "analysisOfVariance"; }
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
data(place).add(columns[0]->getFloat64(row_num), columns[1]->getUInt(row_num));
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
data(place).merge(data(rhs));
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
data(place).write(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
data(place).read(buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
auto f_stat = data(place).getFStatistic();
|
||||
|
||||
auto & column_tuple = assert_cast<ColumnTuple &>(to);
|
||||
auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
|
||||
auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
|
||||
|
||||
if (unlikely(!std::isfinite(f_stat) || f_stat < 0))
|
||||
{
|
||||
column_stat.getData().push_back(std::numeric_limits<Float64>::quiet_NaN());
|
||||
column_value.getData().push_back(std::numeric_limits<Float64>::quiet_NaN());
|
||||
return;
|
||||
}
|
||||
|
||||
auto p_value = data(place).getPValue(f_stat);
|
||||
|
||||
/// Because p-value is a probability.
|
||||
p_value = std::min(1.0, std::max(0.0, p_value));
|
||||
|
||||
column_stat.getData().push_back(f_stat);
|
||||
column_value.getData().push_back(p_value);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
@ -1,12 +1,14 @@
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
#include <AggregateFunctions/AggregateFunctionAvg.h>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionAvgWeighted.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
@ -16,13 +18,93 @@ namespace ErrorCodes
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename T>
|
||||
using AvgWeightedFieldType = std::conditional_t<DecimalOrExtendedInt<T>,
|
||||
Float64, // no way to do UInt128 * UInt128, better cast to Float64
|
||||
NearestFieldType<T>>;
|
||||
|
||||
template <typename T, typename U>
|
||||
using MaxFieldType = std::conditional_t<(sizeof(AvgWeightedFieldType<T>) > sizeof(AvgWeightedFieldType<U>)),
|
||||
AvgWeightedFieldType<T>, AvgWeightedFieldType<U>>;
|
||||
|
||||
template <typename Value, typename Weight>
|
||||
class AggregateFunctionAvgWeighted final :
|
||||
public AggregateFunctionAvgBase<
|
||||
MaxFieldType<Value, Weight>, AvgWeightedFieldType<Weight>, AggregateFunctionAvgWeighted<Value, Weight>>
|
||||
{
|
||||
public:
|
||||
using Base = AggregateFunctionAvgBase<
|
||||
MaxFieldType<Value, Weight>, AvgWeightedFieldType<Weight>, AggregateFunctionAvgWeighted<Value, Weight>>;
|
||||
using Base::Base;
|
||||
|
||||
using Numerator = typename Base::Numerator;
|
||||
using Denominator = typename Base::Denominator;
|
||||
using Fraction = typename Base::Fraction;
|
||||
|
||||
void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
const auto & weights = static_cast<const ColumnVector<Weight> &>(*columns[1]);
|
||||
|
||||
this->data(place).numerator += static_cast<Numerator>(
|
||||
static_cast<const ColumnVector<Value> &>(*columns[0]).getData()[row_num])
|
||||
* static_cast<Numerator>(weights.getData()[row_num]);
|
||||
|
||||
this->data(place).denominator += static_cast<Denominator>(weights.getData()[row_num]);
|
||||
}
|
||||
|
||||
String getName() const override { return "avgWeighted"; }
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
|
||||
bool isCompilable() const override
|
||||
{
|
||||
bool can_be_compiled = Base::isCompilable();
|
||||
can_be_compiled &= canBeNativeType<Weight>();
|
||||
|
||||
return can_be_compiled;
|
||||
}
|
||||
|
||||
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const ValuesWithType & arguments) const override
|
||||
{
|
||||
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
|
||||
|
||||
auto * numerator_type = toNativeType<Numerator>(b);
|
||||
auto * numerator_ptr = aggregate_data_ptr;
|
||||
auto * numerator_value = b.CreateLoad(numerator_type, numerator_ptr);
|
||||
|
||||
auto numerator_data_type = toNativeDataType<Numerator>();
|
||||
auto * argument = nativeCast(b, arguments[0], numerator_data_type);
|
||||
auto * weight = nativeCast(b, arguments[1], numerator_data_type);
|
||||
|
||||
llvm::Value * value_weight_multiplication = argument->getType()->isIntegerTy() ? b.CreateMul(argument, weight) : b.CreateFMul(argument, weight);
|
||||
auto * numerator_result_value = numerator_type->isIntegerTy() ? b.CreateAdd(numerator_value, value_weight_multiplication) : b.CreateFAdd(numerator_value, value_weight_multiplication);
|
||||
b.CreateStore(numerator_result_value, numerator_ptr);
|
||||
|
||||
auto * denominator_type = toNativeType<Denominator>(b);
|
||||
|
||||
static constexpr size_t denominator_offset = offsetof(Fraction, denominator);
|
||||
auto * denominator_ptr = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_ptr, denominator_offset);
|
||||
|
||||
auto * weight_cast_to_denominator = nativeCast(b, arguments[1], toNativeDataType<Denominator>());
|
||||
|
||||
auto * denominator_value = b.CreateLoad(denominator_type, denominator_ptr);
|
||||
auto * denominator_value_updated = denominator_type->isIntegerTy() ? b.CreateAdd(denominator_value, weight_cast_to_denominator) : b.CreateFAdd(denominator_value, weight_cast_to_denominator);
|
||||
|
||||
b.CreateStore(denominator_value_updated, denominator_ptr);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
bool allowTypes(const DataTypePtr& left, const DataTypePtr& right) noexcept
|
||||
{
|
||||
const WhichDataType l_dt(left), r_dt(right);
|
||||
|
||||
constexpr auto allow = [](WhichDataType t)
|
||||
{
|
||||
return t.isInt() || t.isUInt() || t.isFloat() || t.isDecimal();
|
||||
return t.isInt() || t.isUInt() || t.isFloat();
|
||||
};
|
||||
|
||||
return allow(l_dt) && allow(r_dt);
|
||||
@ -33,7 +115,6 @@ bool allowTypes(const DataTypePtr& left, const DataTypePtr& right) noexcept
|
||||
{ \
|
||||
LINE(Int8); LINE(Int16); LINE(Int32); LINE(Int64); LINE(Int128); LINE(Int256); \
|
||||
LINE(UInt8); LINE(UInt16); LINE(UInt32); LINE(UInt64); LINE(UInt128); LINE(UInt256); \
|
||||
LINE(Decimal32); LINE(Decimal64); LINE(Decimal128); LINE(Decimal256); \
|
||||
LINE(Float32); LINE(Float64); \
|
||||
default: return nullptr; \
|
||||
}
|
||||
@ -75,31 +156,14 @@ createAggregateFunctionAvgWeighted(const std::string & name, const DataTypes & a
|
||||
"Types {} and {} are non-conforming as arguments for aggregate function {}",
|
||||
data_type->getName(), data_type_weight->getName(), name);
|
||||
|
||||
AggregateFunctionPtr ptr;
|
||||
|
||||
const bool left_decimal = isDecimal(data_type);
|
||||
const bool right_decimal = isDecimal(data_type_weight);
|
||||
|
||||
/// We multiply value by weight, so actual scale of numerator is <scale of value> + <scale of weight>
|
||||
if (left_decimal && right_decimal)
|
||||
ptr.reset(create(*data_type, *data_type_weight,
|
||||
argument_types,
|
||||
getDecimalScale(*data_type) + getDecimalScale(*data_type_weight), getDecimalScale(*data_type_weight)));
|
||||
else if (left_decimal)
|
||||
ptr.reset(create(*data_type, *data_type_weight, argument_types,
|
||||
getDecimalScale(*data_type)));
|
||||
else if (right_decimal)
|
||||
ptr.reset(create(*data_type, *data_type_weight, argument_types,
|
||||
getDecimalScale(*data_type_weight), getDecimalScale(*data_type_weight)));
|
||||
else
|
||||
ptr.reset(create(*data_type, *data_type_weight, argument_types));
|
||||
|
||||
return ptr;
|
||||
return AggregateFunctionPtr(create(*data_type, *data_type_weight, argument_types));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionAvgWeighted(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("avgWeighted", createAggregateFunctionAvgWeighted, AggregateFunctionFactory::CaseSensitive);
|
||||
factory.registerFunction("avgWeighted", createAggregateFunctionAvgWeighted);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,90 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
#include <AggregateFunctions/AggregateFunctionAvg.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
template <typename T>
|
||||
using AvgWeightedFieldType = std::conditional_t<is_decimal<T>,
|
||||
std::conditional_t<std::is_same_v<T, Decimal256>, Decimal256, Decimal128>,
|
||||
std::conditional_t<DecimalOrExtendedInt<T>,
|
||||
Float64, // no way to do UInt128 * UInt128, better cast to Float64
|
||||
NearestFieldType<T>>>;
|
||||
|
||||
template <typename T, typename U>
|
||||
using MaxFieldType = std::conditional_t<(sizeof(AvgWeightedFieldType<T>) > sizeof(AvgWeightedFieldType<U>)),
|
||||
AvgWeightedFieldType<T>, AvgWeightedFieldType<U>>;
|
||||
|
||||
template <typename Value, typename Weight>
|
||||
class AggregateFunctionAvgWeighted final :
|
||||
public AggregateFunctionAvgBase<
|
||||
MaxFieldType<Value, Weight>, AvgWeightedFieldType<Weight>, AggregateFunctionAvgWeighted<Value, Weight>>
|
||||
{
|
||||
public:
|
||||
using Base = AggregateFunctionAvgBase<
|
||||
MaxFieldType<Value, Weight>, AvgWeightedFieldType<Weight>, AggregateFunctionAvgWeighted<Value, Weight>>;
|
||||
using Base::Base;
|
||||
|
||||
using Numerator = typename Base::Numerator;
|
||||
using Denominator = typename Base::Denominator;
|
||||
using Fraction = typename Base::Fraction;
|
||||
|
||||
void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
const auto& weights = static_cast<const ColumnVectorOrDecimal<Weight> &>(*columns[1]);
|
||||
|
||||
this->data(place).numerator += static_cast<Numerator>(
|
||||
static_cast<const ColumnVectorOrDecimal<Value> &>(*columns[0]).getData()[row_num]) *
|
||||
static_cast<Numerator>(weights.getData()[row_num]);
|
||||
|
||||
this->data(place).denominator += static_cast<Denominator>(weights.getData()[row_num]);
|
||||
}
|
||||
|
||||
String getName() const override { return "avgWeighted"; }
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
|
||||
bool isCompilable() const override
|
||||
{
|
||||
bool can_be_compiled = Base::isCompilable();
|
||||
can_be_compiled &= canBeNativeType<Weight>();
|
||||
|
||||
return can_be_compiled;
|
||||
}
|
||||
|
||||
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const ValuesWithType & arguments) const override
|
||||
{
|
||||
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
|
||||
|
||||
auto * numerator_type = toNativeType<Numerator>(b);
|
||||
auto * numerator_ptr = aggregate_data_ptr;
|
||||
auto * numerator_value = b.CreateLoad(numerator_type, numerator_ptr);
|
||||
|
||||
auto numerator_data_type = toNativeDataType<Numerator>();
|
||||
auto * argument = nativeCast(b, arguments[0], numerator_data_type);
|
||||
auto * weight = nativeCast(b, arguments[1], numerator_data_type);
|
||||
|
||||
llvm::Value * value_weight_multiplication = argument->getType()->isIntegerTy() ? b.CreateMul(argument, weight) : b.CreateFMul(argument, weight);
|
||||
auto * numerator_result_value = numerator_type->isIntegerTy() ? b.CreateAdd(numerator_value, value_weight_multiplication) : b.CreateFAdd(numerator_value, value_weight_multiplication);
|
||||
b.CreateStore(numerator_result_value, numerator_ptr);
|
||||
|
||||
auto * denominator_type = toNativeType<Denominator>(b);
|
||||
|
||||
static constexpr size_t denominator_offset = offsetof(Fraction, denominator);
|
||||
auto * denominator_ptr = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_ptr, denominator_offset);
|
||||
|
||||
auto * weight_cast_to_denominator = nativeCast(b, arguments[1], toNativeDataType<Denominator>());
|
||||
|
||||
auto * denominator_value = b.CreateLoad(denominator_type, denominator_ptr);
|
||||
auto * denominator_value_updated = denominator_type->isIntegerTy() ? b.CreateAdd(denominator_value, weight_cast_to_denominator) : b.CreateFAdd(denominator_value, weight_cast_to_denominator);
|
||||
|
||||
b.CreateStore(denominator_value_updated, denominator_ptr);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
};
|
||||
}
|
@ -1,11 +1,27 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionBitwise.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
# include <llvm/IR/IRBuilder.h>
|
||||
# include <DataTypes/Native.h>
|
||||
#endif
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
@ -16,6 +32,179 @@ namespace ErrorCodes
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename T>
|
||||
struct AggregateFunctionGroupBitOrData
|
||||
{
|
||||
T value = 0;
|
||||
static const char * name() { return "groupBitOr"; }
|
||||
void update(T x) { value |= x; }
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
|
||||
static void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * value_ptr)
|
||||
{
|
||||
auto type = toNativeType<T>(builder);
|
||||
builder.CreateStore(llvm::Constant::getNullValue(type), value_ptr);
|
||||
}
|
||||
|
||||
static llvm::Value* compileUpdate(llvm::IRBuilderBase & builder, llvm::Value * lhs, llvm::Value * rhs)
|
||||
{
|
||||
return builder.CreateOr(lhs, rhs);
|
||||
}
|
||||
|
||||
#endif
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct AggregateFunctionGroupBitAndData
|
||||
{
|
||||
T value = -1; /// Two's complement arithmetic, sign extension.
|
||||
static const char * name() { return "groupBitAnd"; }
|
||||
void update(T x) { value &= x; }
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
|
||||
static void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * value_ptr)
|
||||
{
|
||||
auto type = toNativeType<T>(builder);
|
||||
builder.CreateStore(llvm::ConstantInt::get(type, -1), value_ptr);
|
||||
}
|
||||
|
||||
static llvm::Value* compileUpdate(llvm::IRBuilderBase & builder, llvm::Value * lhs, llvm::Value * rhs)
|
||||
{
|
||||
return builder.CreateAnd(lhs, rhs);
|
||||
}
|
||||
|
||||
#endif
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct AggregateFunctionGroupBitXorData
|
||||
{
|
||||
T value = 0;
|
||||
static const char * name() { return "groupBitXor"; }
|
||||
void update(T x) { value ^= x; }
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
|
||||
static void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * value_ptr)
|
||||
{
|
||||
auto type = toNativeType<T>(builder);
|
||||
builder.CreateStore(llvm::Constant::getNullValue(type), value_ptr);
|
||||
}
|
||||
|
||||
static llvm::Value* compileUpdate(llvm::IRBuilderBase & builder, llvm::Value * lhs, llvm::Value * rhs)
|
||||
{
|
||||
return builder.CreateXor(lhs, rhs);
|
||||
}
|
||||
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
/// Counts bitwise operation on numbers.
|
||||
template <typename T, typename Data>
|
||||
class AggregateFunctionBitwise final : public IAggregateFunctionDataHelper<Data, AggregateFunctionBitwise<T, Data>>
|
||||
{
|
||||
public:
|
||||
explicit AggregateFunctionBitwise(const DataTypePtr & type)
|
||||
: IAggregateFunctionDataHelper<Data, AggregateFunctionBitwise<T, Data>>({type}, {}, createResultType())
|
||||
{}
|
||||
|
||||
String getName() const override { return Data::name(); }
|
||||
|
||||
static DataTypePtr createResultType()
|
||||
{
|
||||
return std::make_shared<DataTypeNumber<T>>();
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
this->data(place).update(assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num]);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
this->data(place).update(this->data(rhs).value);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
writeBinary(this->data(place).value, buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
readBinary(this->data(place).value, buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
assert_cast<ColumnVector<T> &>(to).getData().push_back(this->data(place).value);
|
||||
}
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
|
||||
bool isCompilable() const override
|
||||
{
|
||||
auto return_type = this->getResultType();
|
||||
return canBeNativeType(*return_type);
|
||||
}
|
||||
|
||||
void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
|
||||
{
|
||||
auto * value_ptr = aggregate_data_ptr;
|
||||
Data::compileCreate(builder, value_ptr);
|
||||
}
|
||||
|
||||
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const ValuesWithType & arguments) const override
|
||||
{
|
||||
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
|
||||
|
||||
auto * return_type = toNativeType(b, this->getResultType());
|
||||
|
||||
auto * value_ptr = aggregate_data_ptr;
|
||||
auto * value = b.CreateLoad(return_type, value_ptr);
|
||||
|
||||
auto * result_value = Data::compileUpdate(builder, value, arguments[0].value);
|
||||
|
||||
b.CreateStore(result_value, value_ptr);
|
||||
}
|
||||
|
||||
void compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
|
||||
{
|
||||
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
|
||||
|
||||
auto * return_type = toNativeType(b, this->getResultType());
|
||||
|
||||
auto * value_dst_ptr = aggregate_data_dst_ptr;
|
||||
auto * value_dst = b.CreateLoad(return_type, value_dst_ptr);
|
||||
|
||||
auto * value_src_ptr = aggregate_data_src_ptr;
|
||||
auto * value_src = b.CreateLoad(return_type, value_src_ptr);
|
||||
|
||||
auto * result_value = Data::compileUpdate(builder, value_dst, value_src);
|
||||
|
||||
b.CreateStore(result_value, value_dst_ptr);
|
||||
}
|
||||
|
||||
llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
|
||||
{
|
||||
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
|
||||
|
||||
auto * return_type = toNativeType(b, this->getResultType());
|
||||
auto * value_ptr = aggregate_data_ptr;
|
||||
|
||||
return b.CreateLoad(return_type, value_ptr);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
|
||||
template <template <typename> class Data>
|
||||
AggregateFunctionPtr createAggregateFunctionBitwise(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
{
|
||||
|
@ -1,197 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
# include <llvm/IR/IRBuilder.h>
|
||||
# include <DataTypes/Native.h>
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
|
||||
template <typename T>
|
||||
struct AggregateFunctionGroupBitOrData
|
||||
{
|
||||
T value = 0;
|
||||
static const char * name() { return "groupBitOr"; }
|
||||
void update(T x) { value |= x; }
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
|
||||
static void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * value_ptr)
|
||||
{
|
||||
auto type = toNativeType<T>(builder);
|
||||
builder.CreateStore(llvm::Constant::getNullValue(type), value_ptr);
|
||||
}
|
||||
|
||||
static llvm::Value* compileUpdate(llvm::IRBuilderBase & builder, llvm::Value * lhs, llvm::Value * rhs)
|
||||
{
|
||||
return builder.CreateOr(lhs, rhs);
|
||||
}
|
||||
|
||||
#endif
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct AggregateFunctionGroupBitAndData
|
||||
{
|
||||
T value = -1; /// Two's complement arithmetic, sign extension.
|
||||
static const char * name() { return "groupBitAnd"; }
|
||||
void update(T x) { value &= x; }
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
|
||||
static void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * value_ptr)
|
||||
{
|
||||
auto type = toNativeType<T>(builder);
|
||||
builder.CreateStore(llvm::ConstantInt::get(type, -1), value_ptr);
|
||||
}
|
||||
|
||||
static llvm::Value* compileUpdate(llvm::IRBuilderBase & builder, llvm::Value * lhs, llvm::Value * rhs)
|
||||
{
|
||||
return builder.CreateAnd(lhs, rhs);
|
||||
}
|
||||
|
||||
#endif
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct AggregateFunctionGroupBitXorData
|
||||
{
|
||||
T value = 0;
|
||||
static const char * name() { return "groupBitXor"; }
|
||||
void update(T x) { value ^= x; }
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
|
||||
static void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * value_ptr)
|
||||
{
|
||||
auto type = toNativeType<T>(builder);
|
||||
builder.CreateStore(llvm::Constant::getNullValue(type), value_ptr);
|
||||
}
|
||||
|
||||
static llvm::Value* compileUpdate(llvm::IRBuilderBase & builder, llvm::Value * lhs, llvm::Value * rhs)
|
||||
{
|
||||
return builder.CreateXor(lhs, rhs);
|
||||
}
|
||||
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
/// Counts bitwise operation on numbers.
|
||||
template <typename T, typename Data>
|
||||
class AggregateFunctionBitwise final : public IAggregateFunctionDataHelper<Data, AggregateFunctionBitwise<T, Data>>
|
||||
{
|
||||
public:
|
||||
explicit AggregateFunctionBitwise(const DataTypePtr & type)
|
||||
: IAggregateFunctionDataHelper<Data, AggregateFunctionBitwise<T, Data>>({type}, {}, createResultType())
|
||||
{}
|
||||
|
||||
String getName() const override { return Data::name(); }
|
||||
|
||||
static DataTypePtr createResultType()
|
||||
{
|
||||
return std::make_shared<DataTypeNumber<T>>();
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
this->data(place).update(assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num]);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
this->data(place).update(this->data(rhs).value);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
writeBinary(this->data(place).value, buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
readBinary(this->data(place).value, buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
assert_cast<ColumnVector<T> &>(to).getData().push_back(this->data(place).value);
|
||||
}
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
|
||||
bool isCompilable() const override
|
||||
{
|
||||
auto return_type = this->getResultType();
|
||||
return canBeNativeType(*return_type);
|
||||
}
|
||||
|
||||
void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
|
||||
{
|
||||
auto * value_ptr = aggregate_data_ptr;
|
||||
Data::compileCreate(builder, value_ptr);
|
||||
}
|
||||
|
||||
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const ValuesWithType & arguments) const override
|
||||
{
|
||||
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
|
||||
|
||||
auto * return_type = toNativeType(b, this->getResultType());
|
||||
|
||||
auto * value_ptr = aggregate_data_ptr;
|
||||
auto * value = b.CreateLoad(return_type, value_ptr);
|
||||
|
||||
auto * result_value = Data::compileUpdate(builder, value, arguments[0].value);
|
||||
|
||||
b.CreateStore(result_value, value_ptr);
|
||||
}
|
||||
|
||||
void compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
|
||||
{
|
||||
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
|
||||
|
||||
auto * return_type = toNativeType(b, this->getResultType());
|
||||
|
||||
auto * value_dst_ptr = aggregate_data_dst_ptr;
|
||||
auto * value_dst = b.CreateLoad(return_type, value_dst_ptr);
|
||||
|
||||
auto * value_src_ptr = aggregate_data_src_ptr;
|
||||
auto * value_src = b.CreateLoad(return_type, value_src_ptr);
|
||||
|
||||
auto * result_value = Data::compileUpdate(builder, value_dst, value_src);
|
||||
|
||||
b.CreateStore(result_value, value_dst_ptr);
|
||||
}
|
||||
|
||||
llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
|
||||
{
|
||||
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
|
||||
|
||||
auto * return_type = toNativeType(b, this->getResultType());
|
||||
auto * value_ptr = aggregate_data_ptr;
|
||||
|
||||
return b.CreateLoad(return_type, value_ptr);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
|
||||
}
|
@ -1,7 +1,14 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionBoundingRatio.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/transformEndianness.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -10,11 +17,169 @@ struct Settings;
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
/** Tracks the leftmost and rightmost (x, y) data points.
|
||||
*/
|
||||
struct AggregateFunctionBoundingRatioData
|
||||
{
|
||||
struct Point
|
||||
{
|
||||
Float64 x;
|
||||
Float64 y;
|
||||
};
|
||||
|
||||
bool empty = true;
|
||||
Point left;
|
||||
Point right;
|
||||
|
||||
void add(Float64 x, Float64 y)
|
||||
{
|
||||
Point point{x, y};
|
||||
|
||||
if (empty)
|
||||
{
|
||||
left = point;
|
||||
right = point;
|
||||
empty = false;
|
||||
}
|
||||
else if (point.x < left.x)
|
||||
{
|
||||
left = point;
|
||||
}
|
||||
else if (point.x > right.x)
|
||||
{
|
||||
right = point;
|
||||
}
|
||||
}
|
||||
|
||||
void merge(const AggregateFunctionBoundingRatioData & other)
|
||||
{
|
||||
if (empty)
|
||||
{
|
||||
*this = other;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (other.left.x < left.x)
|
||||
left = other.left;
|
||||
if (other.right.x > right.x)
|
||||
right = other.right;
|
||||
}
|
||||
}
|
||||
|
||||
void serialize(WriteBuffer & buf) const;
|
||||
void deserialize(ReadBuffer & buf);
|
||||
};
|
||||
|
||||
template <std::endian endian>
|
||||
inline void transformEndianness(AggregateFunctionBoundingRatioData::Point & p)
|
||||
{
|
||||
DB::transformEndianness<endian>(p.x);
|
||||
DB::transformEndianness<endian>(p.y);
|
||||
}
|
||||
|
||||
void AggregateFunctionBoundingRatioData::serialize(WriteBuffer & buf) const
|
||||
{
|
||||
writeBinaryLittleEndian(empty, buf);
|
||||
|
||||
if (!empty)
|
||||
{
|
||||
writeBinaryLittleEndian(left, buf);
|
||||
writeBinaryLittleEndian(right, buf);
|
||||
}
|
||||
}
|
||||
|
||||
void AggregateFunctionBoundingRatioData::deserialize(ReadBuffer & buf)
|
||||
{
|
||||
readBinaryLittleEndian(empty, buf);
|
||||
|
||||
if (!empty)
|
||||
{
|
||||
readBinaryLittleEndian(left, buf);
|
||||
readBinaryLittleEndian(right, buf);
|
||||
}
|
||||
}
|
||||
|
||||
inline void writeBinary(const AggregateFunctionBoundingRatioData::Point & p, WriteBuffer & buf)
|
||||
{
|
||||
writePODBinary(p, buf);
|
||||
}
|
||||
|
||||
inline void readBinary(AggregateFunctionBoundingRatioData::Point & p, ReadBuffer & buf)
|
||||
{
|
||||
readPODBinary(p, buf);
|
||||
}
|
||||
|
||||
|
||||
class AggregateFunctionBoundingRatio final : public IAggregateFunctionDataHelper<AggregateFunctionBoundingRatioData, AggregateFunctionBoundingRatio>
|
||||
{
|
||||
private:
|
||||
/** Calculates the slope of a line between leftmost and rightmost data points.
|
||||
* (y2 - y1) / (x2 - x1)
|
||||
*/
|
||||
static Float64 NO_SANITIZE_UNDEFINED getBoundingRatio(const AggregateFunctionBoundingRatioData & data)
|
||||
{
|
||||
if (data.empty)
|
||||
return std::numeric_limits<Float64>::quiet_NaN();
|
||||
|
||||
return (data.right.y - data.left.y) / (data.right.x - data.left.x);
|
||||
}
|
||||
|
||||
public:
|
||||
String getName() const override
|
||||
{
|
||||
return "boundingRatio";
|
||||
}
|
||||
|
||||
explicit AggregateFunctionBoundingRatio(const DataTypes & arguments)
|
||||
: IAggregateFunctionDataHelper<AggregateFunctionBoundingRatioData, AggregateFunctionBoundingRatio>(arguments, {}, std::make_shared<DataTypeFloat64>())
|
||||
{
|
||||
const auto * x_arg = arguments.at(0).get();
|
||||
const auto * y_arg = arguments.at(1).get();
|
||||
|
||||
if (!x_arg->isValueRepresentedByNumber() || !y_arg->isValueRepresentedByNumber())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Illegal types of arguments of aggregate function {}, must have number representation.",
|
||||
getName());
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override
|
||||
{
|
||||
/// NOTE Slightly inefficient.
|
||||
const auto x = columns[0]->getFloat64(row_num);
|
||||
const auto y = columns[1]->getFloat64(row_num);
|
||||
data(place).add(x, y);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
data(place).merge(data(rhs));
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
data(place).serialize(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
data(place).deserialize(buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
assert_cast<ColumnFloat64 &>(to).getData().push_back(getBoundingRatio(data(place)));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionRate(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
{
|
||||
assertNoParameters(name, parameters);
|
||||
|
@ -1,177 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
/** Tracks the leftmost and rightmost (x, y) data points.
|
||||
*/
|
||||
struct AggregateFunctionBoundingRatioData
|
||||
{
|
||||
struct Point
|
||||
{
|
||||
Float64 x;
|
||||
Float64 y;
|
||||
};
|
||||
|
||||
bool empty = true;
|
||||
Point left;
|
||||
Point right;
|
||||
|
||||
void add(Float64 x, Float64 y)
|
||||
{
|
||||
Point point{x, y};
|
||||
|
||||
if (empty)
|
||||
{
|
||||
left = point;
|
||||
right = point;
|
||||
empty = false;
|
||||
}
|
||||
else if (point.x < left.x)
|
||||
{
|
||||
left = point;
|
||||
}
|
||||
else if (point.x > right.x)
|
||||
{
|
||||
right = point;
|
||||
}
|
||||
}
|
||||
|
||||
void merge(const AggregateFunctionBoundingRatioData & other)
|
||||
{
|
||||
if (empty)
|
||||
{
|
||||
*this = other;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (other.left.x < left.x)
|
||||
left = other.left;
|
||||
if (other.right.x > right.x)
|
||||
right = other.right;
|
||||
}
|
||||
}
|
||||
|
||||
void serialize(WriteBuffer & buf) const;
|
||||
void deserialize(ReadBuffer & buf);
|
||||
};
|
||||
|
||||
template <std::endian endian>
|
||||
inline void transformEndianness(AggregateFunctionBoundingRatioData::Point & p)
|
||||
{
|
||||
transformEndianness<endian>(p.x);
|
||||
transformEndianness<endian>(p.y);
|
||||
}
|
||||
|
||||
void AggregateFunctionBoundingRatioData::serialize(WriteBuffer & buf) const
|
||||
{
|
||||
writeBinaryLittleEndian(empty, buf);
|
||||
|
||||
if (!empty)
|
||||
{
|
||||
writeBinaryLittleEndian(left, buf);
|
||||
writeBinaryLittleEndian(right, buf);
|
||||
}
|
||||
}
|
||||
|
||||
void AggregateFunctionBoundingRatioData::deserialize(ReadBuffer & buf)
|
||||
{
|
||||
readBinaryLittleEndian(empty, buf);
|
||||
|
||||
if (!empty)
|
||||
{
|
||||
readBinaryLittleEndian(left, buf);
|
||||
readBinaryLittleEndian(right, buf);
|
||||
}
|
||||
}
|
||||
|
||||
inline void writeBinary(const AggregateFunctionBoundingRatioData::Point & p, WriteBuffer & buf)
|
||||
{
|
||||
writePODBinary(p, buf);
|
||||
}
|
||||
|
||||
inline void readBinary(AggregateFunctionBoundingRatioData::Point & p, ReadBuffer & buf)
|
||||
{
|
||||
readPODBinary(p, buf);
|
||||
}
|
||||
|
||||
|
||||
class AggregateFunctionBoundingRatio final : public IAggregateFunctionDataHelper<AggregateFunctionBoundingRatioData, AggregateFunctionBoundingRatio>
|
||||
{
|
||||
private:
|
||||
/** Calculates the slope of a line between leftmost and rightmost data points.
|
||||
* (y2 - y1) / (x2 - x1)
|
||||
*/
|
||||
static Float64 NO_SANITIZE_UNDEFINED getBoundingRatio(const AggregateFunctionBoundingRatioData & data)
|
||||
{
|
||||
if (data.empty)
|
||||
return std::numeric_limits<Float64>::quiet_NaN();
|
||||
|
||||
return (data.right.y - data.left.y) / (data.right.x - data.left.x);
|
||||
}
|
||||
|
||||
public:
|
||||
String getName() const override
|
||||
{
|
||||
return "boundingRatio";
|
||||
}
|
||||
|
||||
explicit AggregateFunctionBoundingRatio(const DataTypes & arguments)
|
||||
: IAggregateFunctionDataHelper<AggregateFunctionBoundingRatioData, AggregateFunctionBoundingRatio>(arguments, {}, std::make_shared<DataTypeFloat64>())
|
||||
{
|
||||
const auto * x_arg = arguments.at(0).get();
|
||||
const auto * y_arg = arguments.at(1).get();
|
||||
|
||||
if (!x_arg->isValueRepresentedByNumber() || !y_arg->isValueRepresentedByNumber())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Illegal types of arguments of aggregate function {}, must have number representation.",
|
||||
getName());
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override
|
||||
{
|
||||
/// NOTE Slightly inefficient.
|
||||
const auto x = columns[0]->getFloat64(row_num);
|
||||
const auto y = columns[1]->getFloat64(row_num);
|
||||
data(place).add(x, y);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
data(place).merge(data(rhs));
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
data(place).serialize(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
data(place).deserialize(buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
assert_cast<ColumnFloat64 &>(to).getData().push_back(getBoundingRatio(data(place)));
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -1,9 +1,15 @@
|
||||
#include <AggregateFunctions/AggregateFunctionDeltaSum.h>
|
||||
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -18,6 +24,113 @@ namespace ErrorCodes
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename T>
|
||||
struct AggregationFunctionDeltaSumData
|
||||
{
|
||||
T sum = 0;
|
||||
T last = 0;
|
||||
T first = 0;
|
||||
bool seen = false;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class AggregationFunctionDeltaSum final
|
||||
: public IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>
|
||||
{
|
||||
public:
|
||||
AggregationFunctionDeltaSum(const DataTypes & arguments, const Array & params)
|
||||
: IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>{arguments, params, createResultType()}
|
||||
{}
|
||||
|
||||
AggregationFunctionDeltaSum()
|
||||
: IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>{}
|
||||
{}
|
||||
|
||||
String getName() const override { return "deltaSum"; }
|
||||
|
||||
static DataTypePtr createResultType() { return std::make_shared<DataTypeNumber<T>>(); }
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
void NO_SANITIZE_UNDEFINED ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
auto value = assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
|
||||
|
||||
if ((this->data(place).last < value) && this->data(place).seen)
|
||||
{
|
||||
this->data(place).sum += (value - this->data(place).last);
|
||||
}
|
||||
|
||||
this->data(place).last = value;
|
||||
|
||||
if (!this->data(place).seen)
|
||||
{
|
||||
this->data(place).first = value;
|
||||
this->data(place).seen = true;
|
||||
}
|
||||
}
|
||||
|
||||
void NO_SANITIZE_UNDEFINED merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
auto place_data = &this->data(place);
|
||||
auto rhs_data = &this->data(rhs);
|
||||
|
||||
if ((place_data->last < rhs_data->first) && place_data->seen && rhs_data->seen)
|
||||
{
|
||||
// If the lhs last number seen is less than the first number the rhs saw, the lhs is before
|
||||
// the rhs, for example [0, 2] [4, 7]. So we want to add the deltasums, but also add the
|
||||
// difference between lhs last number and rhs first number (the 2 and 4). Then we want to
|
||||
// take last value from the rhs, so first and last become 0 and 7.
|
||||
|
||||
place_data->sum += rhs_data->sum + (rhs_data->first - place_data->last);
|
||||
place_data->last = rhs_data->last;
|
||||
}
|
||||
else if ((rhs_data->first < place_data->last && rhs_data->seen && place_data->seen))
|
||||
{
|
||||
// In the opposite scenario, the lhs comes after the rhs, e.g. [4, 6] [1, 2]. Since we
|
||||
// assume the input interval states are sorted by time, we assume this is a counter
|
||||
// reset, and therefore do *not* add the difference between our first value and the
|
||||
// rhs last value.
|
||||
|
||||
place_data->sum += rhs_data->sum;
|
||||
place_data->last = rhs_data->last;
|
||||
}
|
||||
else if (rhs_data->seen && !place_data->seen)
|
||||
{
|
||||
// If we're here then the lhs is an empty state and the rhs does have some state, so
|
||||
// we'll just take that state.
|
||||
|
||||
place_data->first = rhs_data->first;
|
||||
place_data->last = rhs_data->last;
|
||||
place_data->sum = rhs_data->sum;
|
||||
place_data->seen = rhs_data->seen;
|
||||
}
|
||||
|
||||
// Otherwise lhs either has data or is uninitialized, so we don't need to modify its values.
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
writeBinaryLittleEndian(this->data(place).sum, buf);
|
||||
writeBinaryLittleEndian(this->data(place).first, buf);
|
||||
writeBinaryLittleEndian(this->data(place).last, buf);
|
||||
writeBinaryLittleEndian(this->data(place).seen, buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
readBinaryLittleEndian(this->data(place).sum, buf);
|
||||
readBinaryLittleEndian(this->data(place).first, buf);
|
||||
readBinaryLittleEndian(this->data(place).last, buf);
|
||||
readBinaryLittleEndian(this->data(place).seen, buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
assert_cast<ColumnVector<T> &>(to).getData().push_back(this->data(place).sum);
|
||||
}
|
||||
};
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionDeltaSum(
|
||||
const String & name,
|
||||
const DataTypes & arguments,
|
||||
|
@ -1,126 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
template <typename T>
|
||||
struct AggregationFunctionDeltaSumData
|
||||
{
|
||||
T sum = 0;
|
||||
T last = 0;
|
||||
T first = 0;
|
||||
bool seen = false;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class AggregationFunctionDeltaSum final
|
||||
: public IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>
|
||||
{
|
||||
public:
|
||||
AggregationFunctionDeltaSum(const DataTypes & arguments, const Array & params)
|
||||
: IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>{arguments, params, createResultType()}
|
||||
{}
|
||||
|
||||
AggregationFunctionDeltaSum()
|
||||
: IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>{}
|
||||
{}
|
||||
|
||||
String getName() const override { return "deltaSum"; }
|
||||
|
||||
static DataTypePtr createResultType() { return std::make_shared<DataTypeNumber<T>>(); }
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
void NO_SANITIZE_UNDEFINED ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
auto value = assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
|
||||
|
||||
if ((this->data(place).last < value) && this->data(place).seen)
|
||||
{
|
||||
this->data(place).sum += (value - this->data(place).last);
|
||||
}
|
||||
|
||||
this->data(place).last = value;
|
||||
|
||||
if (!this->data(place).seen)
|
||||
{
|
||||
this->data(place).first = value;
|
||||
this->data(place).seen = true;
|
||||
}
|
||||
}
|
||||
|
||||
void NO_SANITIZE_UNDEFINED ALWAYS_INLINE merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
auto place_data = &this->data(place);
|
||||
auto rhs_data = &this->data(rhs);
|
||||
|
||||
if ((place_data->last < rhs_data->first) && place_data->seen && rhs_data->seen)
|
||||
{
|
||||
// If the lhs last number seen is less than the first number the rhs saw, the lhs is before
|
||||
// the rhs, for example [0, 2] [4, 7]. So we want to add the deltasums, but also add the
|
||||
// difference between lhs last number and rhs first number (the 2 and 4). Then we want to
|
||||
// take last value from the rhs, so first and last become 0 and 7.
|
||||
|
||||
place_data->sum += rhs_data->sum + (rhs_data->first - place_data->last);
|
||||
place_data->last = rhs_data->last;
|
||||
}
|
||||
else if ((rhs_data->first < place_data->last && rhs_data->seen && place_data->seen))
|
||||
{
|
||||
// In the opposite scenario, the lhs comes after the rhs, e.g. [4, 6] [1, 2]. Since we
|
||||
// assume the input interval states are sorted by time, we assume this is a counter
|
||||
// reset, and therefore do *not* add the difference between our first value and the
|
||||
// rhs last value.
|
||||
|
||||
place_data->sum += rhs_data->sum;
|
||||
place_data->last = rhs_data->last;
|
||||
}
|
||||
else if (rhs_data->seen && !place_data->seen)
|
||||
{
|
||||
// If we're here then the lhs is an empty state and the rhs does have some state, so
|
||||
// we'll just take that state.
|
||||
|
||||
place_data->first = rhs_data->first;
|
||||
place_data->last = rhs_data->last;
|
||||
place_data->sum = rhs_data->sum;
|
||||
place_data->seen = rhs_data->seen;
|
||||
}
|
||||
|
||||
// Otherwise lhs either has data or is uninitialized, so we don't need to modify its values.
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
writeBinaryLittleEndian(this->data(place).sum, buf);
|
||||
writeBinaryLittleEndian(this->data(place).first, buf);
|
||||
writeBinaryLittleEndian(this->data(place).last, buf);
|
||||
writeBinaryLittleEndian(this->data(place).seen, buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
readBinaryLittleEndian(this->data(place).sum, buf);
|
||||
readBinaryLittleEndian(this->data(place).first, buf);
|
||||
readBinaryLittleEndian(this->data(place).last, buf);
|
||||
readBinaryLittleEndian(this->data(place).seen, buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
assert_cast<ColumnVector<T> &>(to).getData().push_back(this->data(place).sum);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -1,22 +1,181 @@
|
||||
#include <AggregateFunctions/AggregateFunctionDeltaSumTimestamp.h>
|
||||
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename ValueType, typename TimestampType>
|
||||
struct AggregationFunctionDeltaSumTimestampData
|
||||
{
|
||||
ValueType sum = 0;
|
||||
ValueType first = 0;
|
||||
ValueType last = 0;
|
||||
TimestampType first_ts = 0;
|
||||
TimestampType last_ts = 0;
|
||||
bool seen = false;
|
||||
};
|
||||
|
||||
template <typename ValueType, typename TimestampType>
|
||||
class AggregationFunctionDeltaSumTimestamp final
|
||||
: public IAggregateFunctionDataHelper<
|
||||
AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType>,
|
||||
AggregationFunctionDeltaSumTimestamp<ValueType, TimestampType>
|
||||
>
|
||||
{
|
||||
public:
|
||||
AggregationFunctionDeltaSumTimestamp(const DataTypes & arguments, const Array & params)
|
||||
: IAggregateFunctionDataHelper<
|
||||
AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType>,
|
||||
AggregationFunctionDeltaSumTimestamp<ValueType, TimestampType>
|
||||
>{arguments, params, createResultType()}
|
||||
{}
|
||||
|
||||
AggregationFunctionDeltaSumTimestamp()
|
||||
: IAggregateFunctionDataHelper<
|
||||
AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType>,
|
||||
AggregationFunctionDeltaSumTimestamp<ValueType, TimestampType>
|
||||
>{}
|
||||
{}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
String getName() const override { return "deltaSumTimestamp"; }
|
||||
|
||||
static DataTypePtr createResultType() { return std::make_shared<DataTypeNumber<ValueType>>(); }
|
||||
|
||||
void NO_SANITIZE_UNDEFINED ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
auto value = assert_cast<const ColumnVector<ValueType> &>(*columns[0]).getData()[row_num];
|
||||
auto ts = assert_cast<const ColumnVector<TimestampType> &>(*columns[1]).getData()[row_num];
|
||||
|
||||
auto & data = this->data(place);
|
||||
|
||||
if ((data.last < value) && data.seen)
|
||||
{
|
||||
data.sum += (value - data.last);
|
||||
}
|
||||
|
||||
data.last = value;
|
||||
data.last_ts = ts;
|
||||
|
||||
if (!data.seen)
|
||||
{
|
||||
data.first = value;
|
||||
data.seen = true;
|
||||
data.first_ts = ts;
|
||||
}
|
||||
}
|
||||
|
||||
// before returns true if lhs is before rhs or false if it is not or can't be determined
|
||||
bool ALWAYS_INLINE before(
|
||||
const AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType> & lhs,
|
||||
const AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType> & rhs) const
|
||||
{
|
||||
if (lhs.last_ts < rhs.first_ts)
|
||||
return true;
|
||||
if (lhs.last_ts == rhs.first_ts && (lhs.last_ts < rhs.last_ts || lhs.first_ts < rhs.first_ts))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
void NO_SANITIZE_UNDEFINED merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
auto & place_data = this->data(place);
|
||||
auto & rhs_data = this->data(rhs);
|
||||
|
||||
if (!place_data.seen && rhs_data.seen)
|
||||
{
|
||||
place_data.sum = rhs_data.sum;
|
||||
place_data.seen = true;
|
||||
place_data.first = rhs_data.first;
|
||||
place_data.first_ts = rhs_data.first_ts;
|
||||
place_data.last = rhs_data.last;
|
||||
place_data.last_ts = rhs_data.last_ts;
|
||||
}
|
||||
else if (place_data.seen && !rhs_data.seen)
|
||||
{
|
||||
return;
|
||||
}
|
||||
else if (before(place_data, rhs_data))
|
||||
{
|
||||
// This state came before the rhs state
|
||||
|
||||
if (rhs_data.first > place_data.last)
|
||||
place_data.sum += (rhs_data.first - place_data.last);
|
||||
place_data.sum += rhs_data.sum;
|
||||
place_data.last = rhs_data.last;
|
||||
place_data.last_ts = rhs_data.last_ts;
|
||||
}
|
||||
else if (before(rhs_data, place_data))
|
||||
{
|
||||
// This state came after the rhs state
|
||||
|
||||
if (place_data.first > rhs_data.last)
|
||||
place_data.sum += (place_data.first - rhs_data.last);
|
||||
place_data.sum += rhs_data.sum;
|
||||
place_data.first = rhs_data.first;
|
||||
place_data.first_ts = rhs_data.first_ts;
|
||||
}
|
||||
else
|
||||
{
|
||||
// If none of those conditions matched, it means both states we are merging have all
|
||||
// same timestamps. We have to pick either the smaller or larger value so that the
|
||||
// result is deterministic.
|
||||
|
||||
if (place_data.first < rhs_data.first)
|
||||
{
|
||||
place_data.first = rhs_data.first;
|
||||
place_data.last = rhs_data.last;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
const auto & data = this->data(place);
|
||||
writeBinaryLittleEndian(data.sum, buf);
|
||||
writeBinaryLittleEndian(data.first, buf);
|
||||
writeBinaryLittleEndian(data.first_ts, buf);
|
||||
writeBinaryLittleEndian(data.last, buf);
|
||||
writeBinaryLittleEndian(data.last_ts, buf);
|
||||
writeBinaryLittleEndian(data.seen, buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
auto & data = this->data(place);
|
||||
readBinaryLittleEndian(data.sum, buf);
|
||||
readBinaryLittleEndian(data.first, buf);
|
||||
readBinaryLittleEndian(data.first_ts, buf);
|
||||
readBinaryLittleEndian(data.last, buf);
|
||||
readBinaryLittleEndian(data.last_ts, buf);
|
||||
readBinaryLittleEndian(data.seen, buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
assert_cast<ColumnVector<ValueType> &>(to).getData().push_back(this->data(place).sum);
|
||||
}
|
||||
};
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionDeltaSumTimestamp(
|
||||
const String & name,
|
||||
const DataTypes & arguments,
|
||||
@ -24,10 +183,7 @@ AggregateFunctionPtr createAggregateFunctionDeltaSumTimestamp(
|
||||
const Settings *)
|
||||
{
|
||||
assertNoParameters(name, params);
|
||||
|
||||
if (arguments.size() != 2)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Incorrect number of arguments for aggregate function {}", name);
|
||||
assertBinary(name, arguments);
|
||||
|
||||
if (!isInteger(arguments[0]) && !isFloat(arguments[0]) && !isDate(arguments[0]) && !isDateTime(arguments[0]))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}, "
|
||||
|
@ -1,171 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
template <typename ValueType, typename TimestampType>
|
||||
struct AggregationFunctionDeltaSumTimestampData
|
||||
{
|
||||
ValueType sum = 0;
|
||||
ValueType first = 0;
|
||||
ValueType last = 0;
|
||||
TimestampType first_ts = 0;
|
||||
TimestampType last_ts = 0;
|
||||
bool seen = false;
|
||||
};
|
||||
|
||||
template <typename ValueType, typename TimestampType>
|
||||
class AggregationFunctionDeltaSumTimestamp final
|
||||
: public IAggregateFunctionDataHelper<
|
||||
AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType>,
|
||||
AggregationFunctionDeltaSumTimestamp<ValueType, TimestampType>
|
||||
>
|
||||
{
|
||||
public:
|
||||
AggregationFunctionDeltaSumTimestamp(const DataTypes & arguments, const Array & params)
|
||||
: IAggregateFunctionDataHelper<
|
||||
AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType>,
|
||||
AggregationFunctionDeltaSumTimestamp<ValueType, TimestampType>
|
||||
>{arguments, params, createResultType()}
|
||||
{}
|
||||
|
||||
AggregationFunctionDeltaSumTimestamp()
|
||||
: IAggregateFunctionDataHelper<
|
||||
AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType>,
|
||||
AggregationFunctionDeltaSumTimestamp<ValueType, TimestampType>
|
||||
>{}
|
||||
{}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
String getName() const override { return "deltaSumTimestamp"; }
|
||||
|
||||
static DataTypePtr createResultType() { return std::make_shared<DataTypeNumber<ValueType>>(); }
|
||||
|
||||
void NO_SANITIZE_UNDEFINED ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
auto value = assert_cast<const ColumnVector<ValueType> &>(*columns[0]).getData()[row_num];
|
||||
auto ts = assert_cast<const ColumnVector<TimestampType> &>(*columns[1]).getData()[row_num];
|
||||
|
||||
if ((this->data(place).last < value) && this->data(place).seen)
|
||||
{
|
||||
this->data(place).sum += (value - this->data(place).last);
|
||||
}
|
||||
|
||||
this->data(place).last = value;
|
||||
this->data(place).last_ts = ts;
|
||||
|
||||
if (!this->data(place).seen)
|
||||
{
|
||||
this->data(place).first = value;
|
||||
this->data(place).seen = true;
|
||||
this->data(place).first_ts = ts;
|
||||
}
|
||||
}
|
||||
|
||||
// before returns true if lhs is before rhs or false if it is not or can't be determined
|
||||
bool ALWAYS_INLINE before (
|
||||
const AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType> * lhs,
|
||||
const AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType> * rhs
|
||||
) const
|
||||
{
|
||||
if (lhs->last_ts < rhs->first_ts)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
if (lhs->last_ts == rhs->first_ts && (lhs->last_ts < rhs->last_ts || lhs->first_ts < rhs->first_ts))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void NO_SANITIZE_UNDEFINED ALWAYS_INLINE merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
auto place_data = &this->data(place);
|
||||
auto rhs_data = &this->data(rhs);
|
||||
|
||||
if (!place_data->seen && rhs_data->seen)
|
||||
{
|
||||
place_data->sum = rhs_data->sum;
|
||||
place_data->seen = true;
|
||||
place_data->first = rhs_data->first;
|
||||
place_data->first_ts = rhs_data->first_ts;
|
||||
place_data->last = rhs_data->last;
|
||||
place_data->last_ts = rhs_data->last_ts;
|
||||
}
|
||||
else if (place_data->seen && !rhs_data->seen)
|
||||
return;
|
||||
else if (before(place_data, rhs_data))
|
||||
{
|
||||
// This state came before the rhs state
|
||||
|
||||
if (rhs_data->first > place_data->last)
|
||||
place_data->sum += (rhs_data->first - place_data->last);
|
||||
place_data->sum += rhs_data->sum;
|
||||
place_data->last = rhs_data->last;
|
||||
place_data->last_ts = rhs_data->last_ts;
|
||||
}
|
||||
else if (before(rhs_data, place_data))
|
||||
{
|
||||
// This state came after the rhs state
|
||||
|
||||
if (place_data->first > rhs_data->last)
|
||||
place_data->sum += (place_data->first - rhs_data->last);
|
||||
place_data->sum += rhs_data->sum;
|
||||
place_data->first = rhs_data->first;
|
||||
place_data->first_ts = rhs_data->first_ts;
|
||||
}
|
||||
else
|
||||
{
|
||||
// If none of those conditions matched, it means both states we are merging have all
|
||||
// same timestamps. We have to pick either the smaller or larger value so that the
|
||||
// result is deterministic.
|
||||
|
||||
if (place_data->first < rhs_data->first)
|
||||
{
|
||||
place_data->first = rhs_data->first;
|
||||
place_data->last = rhs_data->last;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
writeBinaryLittleEndian(this->data(place).sum, buf);
|
||||
writeBinaryLittleEndian(this->data(place).first, buf);
|
||||
writeBinaryLittleEndian(this->data(place).first_ts, buf);
|
||||
writeBinaryLittleEndian(this->data(place).last, buf);
|
||||
writeBinaryLittleEndian(this->data(place).last_ts, buf);
|
||||
writeBinaryLittleEndian(this->data(place).seen, buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
readBinaryLittleEndian(this->data(place).sum, buf);
|
||||
readBinaryLittleEndian(this->data(place).first, buf);
|
||||
readBinaryLittleEndian(this->data(place).first_ts, buf);
|
||||
readBinaryLittleEndian(this->data(place).last, buf);
|
||||
readBinaryLittleEndian(this->data(place).last_ts, buf);
|
||||
readBinaryLittleEndian(this->data(place).seen, buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
assert_cast<ColumnVector<ValueType> &>(to).getData().push_back(this->data(place).sum);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -1,8 +1,18 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionEntropy.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Common/NaNUtils.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/UniqVariadicHash.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
#include <cmath>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -16,6 +26,133 @@ namespace ErrorCodes
|
||||
namespace
|
||||
{
|
||||
|
||||
/** Calculates Shannon Entropy, using HashMap and computing empirical distribution function.
|
||||
* Entropy is measured in bits (base-2 logarithm is used).
|
||||
*/
|
||||
template <typename Value>
|
||||
struct EntropyData
|
||||
{
|
||||
using Weight = UInt64;
|
||||
|
||||
using HashingMap = HashMapWithStackMemory<Value, Weight, HashCRC32<Value>, 4>;
|
||||
|
||||
/// For the case of pre-hashed values.
|
||||
using TrivialMap = HashMapWithStackMemory<Value, Weight, UInt128TrivialHash, 4>;
|
||||
|
||||
using Map = std::conditional_t<std::is_same_v<UInt128, Value>, TrivialMap, HashingMap>;
|
||||
|
||||
Map map;
|
||||
|
||||
void add(const Value & x)
|
||||
{
|
||||
if (!isNaN(x))
|
||||
++map[x];
|
||||
}
|
||||
|
||||
void add(const Value & x, const Weight & weight)
|
||||
{
|
||||
if (!isNaN(x))
|
||||
map[x] += weight;
|
||||
}
|
||||
|
||||
void merge(const EntropyData & rhs)
|
||||
{
|
||||
for (const auto & pair : rhs.map)
|
||||
map[pair.getKey()] += pair.getMapped();
|
||||
}
|
||||
|
||||
void serialize(WriteBuffer & buf) const
|
||||
{
|
||||
map.write(buf);
|
||||
}
|
||||
|
||||
void deserialize(ReadBuffer & buf)
|
||||
{
|
||||
typename Map::Reader reader(buf);
|
||||
while (reader.next())
|
||||
{
|
||||
const auto & pair = reader.get();
|
||||
map[pair.first] = pair.second;
|
||||
}
|
||||
}
|
||||
|
||||
Float64 get() const
|
||||
{
|
||||
UInt64 total_value = 0;
|
||||
for (const auto & pair : map)
|
||||
total_value += pair.getMapped();
|
||||
|
||||
Float64 shannon_entropy = 0;
|
||||
for (const auto & pair : map)
|
||||
{
|
||||
Float64 frequency = Float64(pair.getMapped()) / total_value;
|
||||
shannon_entropy -= frequency * log2(frequency);
|
||||
}
|
||||
|
||||
return shannon_entropy;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <typename Value>
|
||||
class AggregateFunctionEntropy final : public IAggregateFunctionDataHelper<EntropyData<Value>, AggregateFunctionEntropy<Value>>
|
||||
{
|
||||
private:
|
||||
size_t num_args;
|
||||
|
||||
public:
|
||||
explicit AggregateFunctionEntropy(const DataTypes & argument_types_)
|
||||
: IAggregateFunctionDataHelper<EntropyData<Value>, AggregateFunctionEntropy<Value>>(argument_types_, {}, createResultType())
|
||||
, num_args(argument_types_.size())
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override { return "entropy"; }
|
||||
|
||||
static DataTypePtr createResultType()
|
||||
{
|
||||
return std::make_shared<DataTypeNumber<Float64>>();
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
if constexpr (!std::is_same_v<UInt128, Value>)
|
||||
{
|
||||
/// Here we manage only with numerical types
|
||||
const auto & column = assert_cast<const ColumnVector <Value> &>(*columns[0]);
|
||||
this->data(place).add(column.getData()[row_num]);
|
||||
}
|
||||
else
|
||||
{
|
||||
this->data(place).add(UniqVariadicHash<true, false>::apply(num_args, columns, row_num));
|
||||
}
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
this->data(place).merge(this->data(rhs));
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
this->data(const_cast<AggregateDataPtr>(place)).serialize(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
this->data(place).deserialize(buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
auto & column = assert_cast<ColumnVector<Float64> &>(to);
|
||||
column.getData().push_back(this->data(place).get());
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionEntropy(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
{
|
||||
|
@ -1,145 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Common/NaNUtils.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/UniqVariadicHash.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
#include <cmath>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
/** Calculates Shannon Entropy, using HashMap and computing empirical distribution function.
|
||||
* Entropy is measured in bits (base-2 logarithm is used).
|
||||
*/
|
||||
template <typename Value>
|
||||
struct EntropyData
|
||||
{
|
||||
using Weight = UInt64;
|
||||
|
||||
using HashingMap = HashMapWithStackMemory<Value, Weight, HashCRC32<Value>, 4>;
|
||||
|
||||
/// For the case of pre-hashed values.
|
||||
using TrivialMap = HashMapWithStackMemory<Value, Weight, UInt128TrivialHash, 4>;
|
||||
|
||||
using Map = std::conditional_t<std::is_same_v<UInt128, Value>, TrivialMap, HashingMap>;
|
||||
|
||||
Map map;
|
||||
|
||||
void add(const Value & x)
|
||||
{
|
||||
if (!isNaN(x))
|
||||
++map[x];
|
||||
}
|
||||
|
||||
void add(const Value & x, const Weight & weight)
|
||||
{
|
||||
if (!isNaN(x))
|
||||
map[x] += weight;
|
||||
}
|
||||
|
||||
void merge(const EntropyData & rhs)
|
||||
{
|
||||
for (const auto & pair : rhs.map)
|
||||
map[pair.getKey()] += pair.getMapped();
|
||||
}
|
||||
|
||||
void serialize(WriteBuffer & buf) const
|
||||
{
|
||||
map.write(buf);
|
||||
}
|
||||
|
||||
void deserialize(ReadBuffer & buf)
|
||||
{
|
||||
typename Map::Reader reader(buf);
|
||||
while (reader.next())
|
||||
{
|
||||
const auto & pair = reader.get();
|
||||
map[pair.first] = pair.second;
|
||||
}
|
||||
}
|
||||
|
||||
Float64 get() const
|
||||
{
|
||||
UInt64 total_value = 0;
|
||||
for (const auto & pair : map)
|
||||
total_value += pair.getMapped();
|
||||
|
||||
Float64 shannon_entropy = 0;
|
||||
for (const auto & pair : map)
|
||||
{
|
||||
Float64 frequency = Float64(pair.getMapped()) / total_value;
|
||||
shannon_entropy -= frequency * log2(frequency);
|
||||
}
|
||||
|
||||
return shannon_entropy;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <typename Value>
|
||||
class AggregateFunctionEntropy final : public IAggregateFunctionDataHelper<EntropyData<Value>, AggregateFunctionEntropy<Value>>
|
||||
{
|
||||
private:
|
||||
size_t num_args;
|
||||
|
||||
public:
|
||||
explicit AggregateFunctionEntropy(const DataTypes & argument_types_)
|
||||
: IAggregateFunctionDataHelper<EntropyData<Value>, AggregateFunctionEntropy<Value>>(argument_types_, {}, createResultType())
|
||||
, num_args(argument_types_.size())
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override { return "entropy"; }
|
||||
|
||||
static DataTypePtr createResultType()
|
||||
{
|
||||
return std::make_shared<DataTypeNumber<Float64>>();
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
if constexpr (!std::is_same_v<UInt128, Value>)
|
||||
{
|
||||
/// Here we manage only with numerical types
|
||||
const auto & column = assert_cast<const ColumnVector <Value> &>(*columns[0]);
|
||||
this->data(place).add(column.getData()[row_num]);
|
||||
}
|
||||
else
|
||||
{
|
||||
this->data(place).add(UniqVariadicHash<true, false>::apply(num_args, columns, row_num));
|
||||
}
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
this->data(place).merge(this->data(rhs));
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
this->data(const_cast<AggregateDataPtr>(place)).serialize(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
this->data(place).deserialize(buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
auto & column = assert_cast<ColumnVector<Float64> &>(to);
|
||||
column.getData().push_back(this->data(place).get());
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -1,12 +1,32 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionGroupArray.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Core/ServerSettings.h>
|
||||
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/Operators.h>
|
||||
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
|
||||
#include <Common/ArenaAllocator.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#define AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE 0xFFFFFF
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -16,11 +36,670 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int TOO_LARGE_ARRAY_SIZE;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
enum class Sampler
|
||||
{
|
||||
NONE,
|
||||
RNG,
|
||||
};
|
||||
|
||||
template <bool Thas_limit, bool Tlast, Sampler Tsampler>
|
||||
struct GroupArrayTrait
|
||||
{
|
||||
static constexpr bool has_limit = Thas_limit;
|
||||
static constexpr bool last = Tlast;
|
||||
static constexpr Sampler sampler = Tsampler;
|
||||
};
|
||||
|
||||
template <typename Trait>
|
||||
constexpr const char * getNameByTrait()
|
||||
{
|
||||
if (Trait::last)
|
||||
return "groupArrayLast";
|
||||
if (Trait::sampler == Sampler::NONE)
|
||||
return "groupArray";
|
||||
else if (Trait::sampler == Sampler::RNG)
|
||||
return "groupArraySample";
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct GroupArraySamplerData
|
||||
{
|
||||
/// For easy serialization.
|
||||
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
|
||||
|
||||
// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
|
||||
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
|
||||
using Array = PODArray<T, 32, Allocator>;
|
||||
|
||||
Array value;
|
||||
size_t total_values = 0;
|
||||
pcg32_fast rng;
|
||||
|
||||
UInt64 genRandom(size_t lim)
|
||||
{
|
||||
chassert(lim != 0);
|
||||
|
||||
/// With a large number of values, we will generate random numbers several times slower.
|
||||
if (lim <= static_cast<UInt64>(rng.max()))
|
||||
return rng() % lim;
|
||||
else
|
||||
return (static_cast<UInt64>(rng()) * (static_cast<UInt64>(rng.max()) + 1ULL) + static_cast<UInt64>(rng())) % lim;
|
||||
}
|
||||
|
||||
void randomShuffle()
|
||||
{
|
||||
size_t size = value.size();
|
||||
chassert(size < std::numeric_limits<size_t>::max());
|
||||
|
||||
for (size_t i = 1; i < size; ++i)
|
||||
{
|
||||
size_t j = genRandom(i + 1);
|
||||
std::swap(value[i], value[j]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/// A particular case is an implementation for numeric types.
|
||||
template <typename T, bool has_sampler>
|
||||
struct GroupArrayNumericData;
|
||||
|
||||
template <typename T>
|
||||
struct GroupArrayNumericData<T, false>
|
||||
{
|
||||
/// For easy serialization.
|
||||
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
|
||||
|
||||
// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
|
||||
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
|
||||
using Array = PODArray<T, 32, Allocator>;
|
||||
|
||||
// For groupArrayLast()
|
||||
size_t total_values = 0;
|
||||
Array value;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct GroupArrayNumericData<T, true> : public GroupArraySamplerData<T>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T, typename Trait>
|
||||
class GroupArrayNumericImpl final
|
||||
: public IAggregateFunctionDataHelper<GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>, GroupArrayNumericImpl<T, Trait>>
|
||||
{
|
||||
using Data = GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>;
|
||||
static constexpr bool limit_num_elems = Trait::has_limit;
|
||||
UInt64 max_elems;
|
||||
std::optional<UInt64> seed;
|
||||
|
||||
public:
|
||||
explicit GroupArrayNumericImpl(
|
||||
const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_, std::optional<UInt64> seed_)
|
||||
: IAggregateFunctionDataHelper<GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>, GroupArrayNumericImpl<T, Trait>>(
|
||||
{data_type_}, parameters_, std::make_shared<DataTypeArray>(data_type_))
|
||||
, max_elems(max_elems_)
|
||||
, seed(seed_)
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override { return getNameByTrait<Trait>(); }
|
||||
|
||||
void insertWithSampler(Data & a, const T & v, Arena * arena) const
|
||||
{
|
||||
++a.total_values;
|
||||
if (a.value.size() < max_elems)
|
||||
a.value.push_back(v, arena);
|
||||
else
|
||||
{
|
||||
UInt64 rnd = a.genRandom(a.total_values);
|
||||
if (rnd < max_elems)
|
||||
a.value[rnd] = v;
|
||||
}
|
||||
}
|
||||
|
||||
void create(AggregateDataPtr __restrict place) const override /// NOLINT
|
||||
{
|
||||
[[maybe_unused]] auto a = new (place) Data;
|
||||
if constexpr (Trait::sampler == Sampler::RNG)
|
||||
a->rng.seed(seed.value_or(thread_local_rng()));
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
const auto & row_value = assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
|
||||
auto & cur_elems = this->data(place);
|
||||
|
||||
++cur_elems.total_values;
|
||||
|
||||
if constexpr (Trait::sampler == Sampler::NONE)
|
||||
{
|
||||
if (limit_num_elems && cur_elems.value.size() >= max_elems)
|
||||
{
|
||||
if constexpr (Trait::last)
|
||||
cur_elems.value[(cur_elems.total_values - 1) % max_elems] = row_value;
|
||||
return;
|
||||
}
|
||||
|
||||
cur_elems.value.push_back(row_value, arena);
|
||||
}
|
||||
|
||||
if constexpr (Trait::sampler == Sampler::RNG)
|
||||
{
|
||||
if (cur_elems.value.size() < max_elems)
|
||||
cur_elems.value.push_back(row_value, arena);
|
||||
else
|
||||
{
|
||||
UInt64 rnd = cur_elems.genRandom(cur_elems.total_values);
|
||||
if (rnd < max_elems)
|
||||
cur_elems.value[rnd] = row_value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
auto & cur_elems = this->data(place);
|
||||
auto & rhs_elems = this->data(rhs);
|
||||
|
||||
if (rhs_elems.value.empty())
|
||||
return;
|
||||
|
||||
if constexpr (Trait::last)
|
||||
mergeNoSamplerLast(cur_elems, rhs_elems, arena);
|
||||
else if constexpr (Trait::sampler == Sampler::NONE)
|
||||
mergeNoSampler(cur_elems, rhs_elems, arena);
|
||||
else if constexpr (Trait::sampler == Sampler::RNG)
|
||||
mergeWithRNGSampler(cur_elems, rhs_elems, arena);
|
||||
}
|
||||
|
||||
void mergeNoSamplerLast(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
|
||||
{
|
||||
UInt64 new_elements = std::min(static_cast<size_t>(max_elems), cur_elems.value.size() + rhs_elems.value.size());
|
||||
cur_elems.value.resize_exact(new_elements, arena);
|
||||
for (auto & value : rhs_elems.value)
|
||||
{
|
||||
cur_elems.value[cur_elems.total_values % max_elems] = value;
|
||||
++cur_elems.total_values;
|
||||
}
|
||||
chassert(rhs_elems.total_values >= rhs_elems.value.size());
|
||||
cur_elems.total_values += rhs_elems.total_values - rhs_elems.value.size();
|
||||
}
|
||||
|
||||
void mergeNoSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
|
||||
{
|
||||
if (!limit_num_elems)
|
||||
{
|
||||
if (rhs_elems.value.size())
|
||||
cur_elems.value.insertByOffsets(rhs_elems.value, 0, rhs_elems.value.size(), arena);
|
||||
}
|
||||
else
|
||||
{
|
||||
UInt64 elems_to_insert = std::min(static_cast<size_t>(max_elems) - cur_elems.value.size(), rhs_elems.value.size());
|
||||
if (elems_to_insert)
|
||||
cur_elems.value.insertByOffsets(rhs_elems.value, 0, elems_to_insert, arena);
|
||||
}
|
||||
}
|
||||
|
||||
void mergeWithRNGSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
|
||||
{
|
||||
if (rhs_elems.total_values <= max_elems)
|
||||
{
|
||||
for (size_t i = 0; i < rhs_elems.value.size(); ++i)
|
||||
insertWithSampler(cur_elems, rhs_elems.value[i], arena);
|
||||
}
|
||||
else if (cur_elems.total_values <= max_elems)
|
||||
{
|
||||
decltype(cur_elems.value) from;
|
||||
from.swap(cur_elems.value, arena);
|
||||
cur_elems.value.assign(rhs_elems.value.begin(), rhs_elems.value.end(), arena);
|
||||
cur_elems.total_values = rhs_elems.total_values;
|
||||
for (size_t i = 0; i < from.size(); ++i)
|
||||
insertWithSampler(cur_elems, from[i], arena);
|
||||
}
|
||||
else
|
||||
{
|
||||
cur_elems.randomShuffle();
|
||||
cur_elems.total_values += rhs_elems.total_values;
|
||||
for (size_t i = 0; i < max_elems; ++i)
|
||||
{
|
||||
UInt64 rnd = cur_elems.genRandom(cur_elems.total_values);
|
||||
if (rnd < rhs_elems.total_values)
|
||||
cur_elems.value[i] = rhs_elems.value[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void checkArraySize(size_t elems, size_t max_elems)
|
||||
{
|
||||
if (unlikely(elems > max_elems))
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
|
||||
"Too large array size {} (maximum: {})", elems, max_elems);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
const auto & value = this->data(place).value;
|
||||
const UInt64 size = value.size();
|
||||
checkArraySize(size, max_elems);
|
||||
writeVarUInt(size, buf);
|
||||
for (const auto & element : value)
|
||||
writeBinaryLittleEndian(element, buf);
|
||||
|
||||
if constexpr (Trait::last)
|
||||
writeBinaryLittleEndian(this->data(place).total_values, buf);
|
||||
|
||||
if constexpr (Trait::sampler == Sampler::RNG)
|
||||
{
|
||||
writeBinaryLittleEndian(this->data(place).total_values, buf);
|
||||
WriteBufferFromOwnString rng_buf;
|
||||
rng_buf << this->data(place).rng;
|
||||
writeStringBinary(rng_buf.str(), buf);
|
||||
}
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
size_t size = 0;
|
||||
readVarUInt(size, buf);
|
||||
checkArraySize(size, max_elems);
|
||||
|
||||
auto & value = this->data(place).value;
|
||||
|
||||
value.resize_exact(size, arena);
|
||||
for (auto & element : value)
|
||||
readBinaryLittleEndian(element, buf);
|
||||
|
||||
if constexpr (Trait::last)
|
||||
readBinaryLittleEndian(this->data(place).total_values, buf);
|
||||
|
||||
if constexpr (Trait::sampler == Sampler::RNG)
|
||||
{
|
||||
readBinaryLittleEndian(this->data(place).total_values, buf);
|
||||
std::string rng_string;
|
||||
readStringBinary(rng_string, buf);
|
||||
ReadBufferFromString rng_buf(rng_string);
|
||||
rng_buf >> this->data(place).rng;
|
||||
}
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
const auto & value = this->data(place).value;
|
||||
size_t size = value.size();
|
||||
|
||||
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
|
||||
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
|
||||
|
||||
offsets_to.push_back(offsets_to.back() + size);
|
||||
|
||||
if (size)
|
||||
{
|
||||
typename ColumnVector<T>::Container & data_to = assert_cast<ColumnVector<T> &>(arr_to.getData()).getData();
|
||||
data_to.insert(this->data(place).value.begin(), this->data(place).value.end());
|
||||
}
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return true; }
|
||||
};
|
||||
|
||||
|
||||
/// General case
|
||||
|
||||
|
||||
/// Nodes used to implement a linked list for storage of groupArray states
|
||||
|
||||
template <typename Node>
|
||||
struct GroupArrayNodeBase
|
||||
{
|
||||
UInt64 size; // size of payload
|
||||
|
||||
/// Returns pointer to actual payload
|
||||
char * data() { return reinterpret_cast<char *>(this) + sizeof(Node); }
|
||||
|
||||
const char * data() const { return reinterpret_cast<const char *>(this) + sizeof(Node); }
|
||||
|
||||
/// Clones existing node (does not modify next field)
|
||||
Node * clone(Arena * arena) const
|
||||
{
|
||||
return reinterpret_cast<Node *>(
|
||||
const_cast<char *>(arena->alignedInsert(reinterpret_cast<const char *>(this), sizeof(Node) + size, alignof(Node))));
|
||||
}
|
||||
|
||||
static void checkElementSize(size_t size, size_t max_size)
|
||||
{
|
||||
if (unlikely(size > max_size))
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
|
||||
"Too large array element size {} (maximum: {})", size, max_size);
|
||||
}
|
||||
|
||||
/// Write node to buffer
|
||||
void write(WriteBuffer & buf) const
|
||||
{
|
||||
checkElementSize(size, AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE);
|
||||
writeVarUInt(size, buf);
|
||||
buf.write(data(), size);
|
||||
}
|
||||
|
||||
/// Reads and allocates node from ReadBuffer's data (doesn't set next)
|
||||
static Node * read(ReadBuffer & buf, Arena * arena)
|
||||
{
|
||||
UInt64 size;
|
||||
readVarUInt(size, buf);
|
||||
checkElementSize(size, AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE);
|
||||
|
||||
Node * node = reinterpret_cast<Node *>(arena->alignedAlloc(sizeof(Node) + size, alignof(Node)));
|
||||
node->size = size;
|
||||
buf.readStrict(node->data(), size);
|
||||
return node;
|
||||
}
|
||||
};
|
||||
|
||||
struct GroupArrayNodeString : public GroupArrayNodeBase<GroupArrayNodeString>
|
||||
{
|
||||
using Node = GroupArrayNodeString;
|
||||
|
||||
/// Create node from string
|
||||
static Node * allocate(const IColumn & column, size_t row_num, Arena * arena)
|
||||
{
|
||||
StringRef string = assert_cast<const ColumnString &>(column).getDataAt(row_num);
|
||||
|
||||
Node * node = reinterpret_cast<Node *>(arena->alignedAlloc(sizeof(Node) + string.size, alignof(Node)));
|
||||
node->size = string.size;
|
||||
memcpy(node->data(), string.data, string.size);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
void insertInto(IColumn & column)
|
||||
{
|
||||
assert_cast<ColumnString &>(column).insertData(data(), size);
|
||||
}
|
||||
};
|
||||
|
||||
struct GroupArrayNodeGeneral : public GroupArrayNodeBase<GroupArrayNodeGeneral>
|
||||
{
|
||||
using Node = GroupArrayNodeGeneral;
|
||||
|
||||
static Node * allocate(const IColumn & column, size_t row_num, Arena * arena)
|
||||
{
|
||||
const char * begin = arena->alignedAlloc(sizeof(Node), alignof(Node));
|
||||
StringRef value = column.serializeValueIntoArena(row_num, *arena, begin);
|
||||
|
||||
Node * node = reinterpret_cast<Node *>(const_cast<char *>(begin));
|
||||
node->size = value.size;
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
void insertInto(IColumn & column) { column.deserializeAndInsertFromArena(data()); }
|
||||
};
|
||||
|
||||
template <typename Node, bool has_sampler>
|
||||
struct GroupArrayGeneralData;
|
||||
|
||||
template <typename Node>
|
||||
struct GroupArrayGeneralData<Node, false>
|
||||
{
|
||||
// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
|
||||
using Allocator = MixedAlignedArenaAllocator<alignof(Node *), 4096>;
|
||||
using Array = PODArray<Node *, 32, Allocator>;
|
||||
|
||||
// For groupArrayLast()
|
||||
size_t total_values = 0;
|
||||
Array value;
|
||||
};
|
||||
|
||||
template <typename Node>
|
||||
struct GroupArrayGeneralData<Node, true> : public GroupArraySamplerData<Node *>
|
||||
{
|
||||
};
|
||||
|
||||
/// Implementation of groupArray for String or any ComplexObject via Array
|
||||
template <typename Node, typename Trait>
|
||||
class GroupArrayGeneralImpl final
|
||||
: public IAggregateFunctionDataHelper<GroupArrayGeneralData<Node, Trait::sampler != Sampler::NONE>, GroupArrayGeneralImpl<Node, Trait>>
|
||||
{
|
||||
static constexpr bool limit_num_elems = Trait::has_limit;
|
||||
using Data = GroupArrayGeneralData<Node, Trait::sampler != Sampler::NONE>;
|
||||
static Data & data(AggregateDataPtr __restrict place) { return *reinterpret_cast<Data *>(place); }
|
||||
static const Data & data(ConstAggregateDataPtr __restrict place) { return *reinterpret_cast<const Data *>(place); }
|
||||
|
||||
DataTypePtr & data_type;
|
||||
UInt64 max_elems;
|
||||
std::optional<UInt64> seed;
|
||||
|
||||
public:
|
||||
GroupArrayGeneralImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_, std::optional<UInt64> seed_)
|
||||
: IAggregateFunctionDataHelper<GroupArrayGeneralData<Node, Trait::sampler != Sampler::NONE>, GroupArrayGeneralImpl<Node, Trait>>(
|
||||
{data_type_}, parameters_, std::make_shared<DataTypeArray>(data_type_))
|
||||
, data_type(this->argument_types[0])
|
||||
, max_elems(max_elems_)
|
||||
, seed(seed_)
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override { return getNameByTrait<Trait>(); }
|
||||
|
||||
void insertWithSampler(Data & a, const Node * v, Arena * arena) const
|
||||
{
|
||||
++a.total_values;
|
||||
if (a.value.size() < max_elems)
|
||||
a.value.push_back(v->clone(arena), arena);
|
||||
else
|
||||
{
|
||||
UInt64 rnd = a.genRandom(a.total_values);
|
||||
if (rnd < max_elems)
|
||||
a.value[rnd] = v->clone(arena);
|
||||
}
|
||||
}
|
||||
|
||||
void create(AggregateDataPtr __restrict place) const override /// NOLINT
|
||||
{
|
||||
[[maybe_unused]] auto a = new (place) Data;
|
||||
if constexpr (Trait::sampler == Sampler::RNG)
|
||||
a->rng.seed(seed.value_or(thread_local_rng()));
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
auto & cur_elems = data(place);
|
||||
|
||||
++cur_elems.total_values;
|
||||
|
||||
if constexpr (Trait::sampler == Sampler::NONE)
|
||||
{
|
||||
if (limit_num_elems && cur_elems.value.size() >= max_elems)
|
||||
{
|
||||
if (Trait::last)
|
||||
{
|
||||
Node * node = Node::allocate(*columns[0], row_num, arena);
|
||||
cur_elems.value[(cur_elems.total_values - 1) % max_elems] = node;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
Node * node = Node::allocate(*columns[0], row_num, arena);
|
||||
cur_elems.value.push_back(node, arena);
|
||||
}
|
||||
|
||||
if constexpr (Trait::sampler == Sampler::RNG)
|
||||
{
|
||||
if (cur_elems.value.size() < max_elems)
|
||||
cur_elems.value.push_back(Node::allocate(*columns[0], row_num, arena), arena);
|
||||
else
|
||||
{
|
||||
UInt64 rnd = cur_elems.genRandom(cur_elems.total_values);
|
||||
if (rnd < max_elems)
|
||||
cur_elems.value[rnd] = Node::allocate(*columns[0], row_num, arena);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
auto & cur_elems = data(place);
|
||||
auto & rhs_elems = data(rhs);
|
||||
|
||||
if (rhs_elems.value.empty())
|
||||
return;
|
||||
|
||||
if constexpr (Trait::last)
|
||||
mergeNoSamplerLast(cur_elems, rhs_elems, arena);
|
||||
else if constexpr (Trait::sampler == Sampler::NONE)
|
||||
mergeNoSampler(cur_elems, rhs_elems, arena);
|
||||
else if constexpr (Trait::sampler == Sampler::RNG)
|
||||
mergeWithRNGSampler(cur_elems, rhs_elems, arena);
|
||||
}
|
||||
|
||||
void ALWAYS_INLINE mergeNoSamplerLast(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
|
||||
{
|
||||
UInt64 new_elements = std::min(static_cast<size_t>(max_elems), cur_elems.value.size() + rhs_elems.value.size());
|
||||
cur_elems.value.resize_exact(new_elements, arena);
|
||||
for (auto & value : rhs_elems.value)
|
||||
{
|
||||
cur_elems.value[cur_elems.total_values % max_elems] = value->clone(arena);
|
||||
++cur_elems.total_values;
|
||||
}
|
||||
chassert(rhs_elems.total_values >= rhs_elems.value.size());
|
||||
cur_elems.total_values += rhs_elems.total_values - rhs_elems.value.size();
|
||||
}
|
||||
|
||||
void ALWAYS_INLINE mergeNoSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
|
||||
{
|
||||
UInt64 new_elems;
|
||||
if (limit_num_elems)
|
||||
{
|
||||
if (cur_elems.value.size() >= max_elems)
|
||||
return;
|
||||
new_elems = std::min(rhs_elems.value.size(), static_cast<size_t>(max_elems) - cur_elems.value.size());
|
||||
}
|
||||
else
|
||||
new_elems = rhs_elems.value.size();
|
||||
|
||||
for (UInt64 i = 0; i < new_elems; ++i)
|
||||
cur_elems.value.push_back(rhs_elems.value[i]->clone(arena), arena);
|
||||
}
|
||||
|
||||
void ALWAYS_INLINE mergeWithRNGSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
|
||||
{
|
||||
if (rhs_elems.total_values <= max_elems)
|
||||
{
|
||||
for (size_t i = 0; i < rhs_elems.value.size(); ++i)
|
||||
insertWithSampler(cur_elems, rhs_elems.value[i], arena);
|
||||
}
|
||||
else if (cur_elems.total_values <= max_elems)
|
||||
{
|
||||
decltype(cur_elems.value) from;
|
||||
from.swap(cur_elems.value, arena);
|
||||
for (auto & node : rhs_elems.value)
|
||||
cur_elems.value.push_back(node->clone(arena), arena);
|
||||
cur_elems.total_values = rhs_elems.total_values;
|
||||
for (size_t i = 0; i < from.size(); ++i)
|
||||
insertWithSampler(cur_elems, from[i], arena);
|
||||
}
|
||||
else
|
||||
{
|
||||
cur_elems.randomShuffle();
|
||||
cur_elems.total_values += rhs_elems.total_values;
|
||||
for (size_t i = 0; i < max_elems; ++i)
|
||||
{
|
||||
UInt64 rnd = cur_elems.genRandom(cur_elems.total_values);
|
||||
if (rnd < rhs_elems.total_values)
|
||||
cur_elems.value[i] = rhs_elems.value[i]->clone(arena);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void checkArraySize(size_t elems, size_t max_elems)
|
||||
{
|
||||
if (unlikely(elems > max_elems))
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
|
||||
"Too large array size {} (maximum: {})", elems, max_elems);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
UInt64 elems = data(place).value.size();
|
||||
checkArraySize(elems, max_elems);
|
||||
writeVarUInt(elems, buf);
|
||||
|
||||
auto & value = data(place).value;
|
||||
for (auto & node : value)
|
||||
node->write(buf);
|
||||
|
||||
if constexpr (Trait::last)
|
||||
writeBinaryLittleEndian(data(place).total_values, buf);
|
||||
|
||||
if constexpr (Trait::sampler == Sampler::RNG)
|
||||
{
|
||||
writeBinaryLittleEndian(data(place).total_values, buf);
|
||||
WriteBufferFromOwnString rng_buf;
|
||||
rng_buf << data(place).rng;
|
||||
writeStringBinary(rng_buf.str(), buf);
|
||||
}
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
UInt64 elems;
|
||||
readVarUInt(elems, buf);
|
||||
|
||||
if (unlikely(elems == 0))
|
||||
return;
|
||||
|
||||
checkArraySize(elems, max_elems);
|
||||
|
||||
auto & value = data(place).value;
|
||||
|
||||
value.resize_exact(elems, arena);
|
||||
for (UInt64 i = 0; i < elems; ++i)
|
||||
value[i] = Node::read(buf, arena);
|
||||
|
||||
if constexpr (Trait::last)
|
||||
readBinaryLittleEndian(data(place).total_values, buf);
|
||||
|
||||
if constexpr (Trait::sampler == Sampler::RNG)
|
||||
{
|
||||
readBinaryLittleEndian(data(place).total_values, buf);
|
||||
std::string rng_string;
|
||||
readStringBinary(rng_string, buf);
|
||||
ReadBufferFromString rng_buf(rng_string);
|
||||
rng_buf >> data(place).rng;
|
||||
}
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
auto & column_array = assert_cast<ColumnArray &>(to);
|
||||
|
||||
auto & offsets = column_array.getOffsets();
|
||||
offsets.push_back(offsets.back() + data(place).value.size());
|
||||
|
||||
auto & column_data = column_array.getData();
|
||||
|
||||
if (std::is_same_v<Node, GroupArrayNodeString>)
|
||||
{
|
||||
auto & string_offsets = assert_cast<ColumnString &>(column_data).getOffsets();
|
||||
string_offsets.reserve(string_offsets.size() + data(place).value.size());
|
||||
}
|
||||
|
||||
auto & value = data(place).value;
|
||||
for (auto & node : value)
|
||||
node->insertInto(column_data);
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return true; }
|
||||
};
|
||||
|
||||
|
||||
template <template <typename, typename> class AggregateFunctionTemplate, typename Data, typename ... TArgs>
|
||||
IAggregateFunction * createWithNumericOrTimeType(const IDataType & argument_type, TArgs && ... args)
|
||||
{
|
||||
@ -87,10 +766,10 @@ AggregateFunctionPtr createAggregateFunctionGroupArray(
|
||||
{
|
||||
if (Tlast)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "groupArrayLast make sense only with max_elems (groupArrayLast(max_elems)())");
|
||||
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait</* Thas_limit= */ false, Tlast, /* Tsampler= */ Sampler::NONE>>(argument_types[0], parameters, max_elems);
|
||||
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait</* Thas_limit= */ false, Tlast, /* Tsampler= */ Sampler::NONE>>(argument_types[0], parameters, max_elems, std::nullopt);
|
||||
}
|
||||
else
|
||||
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait</* Thas_limit= */ true, Tlast, /* Tsampler= */ Sampler::NONE>>(argument_types[0], parameters, max_elems);
|
||||
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait</* Thas_limit= */ true, Tlast, /* Tsampler= */ Sampler::NONE>>(argument_types[0], parameters, max_elems, std::nullopt);
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionGroupArraySample(
|
||||
@ -117,11 +796,9 @@ AggregateFunctionPtr createAggregateFunctionGroupArraySample(
|
||||
|
||||
UInt64 max_elems = get_parameter(0);
|
||||
|
||||
UInt64 seed;
|
||||
std::optional<UInt64> seed;
|
||||
if (parameters.size() >= 2)
|
||||
seed = get_parameter(1);
|
||||
else
|
||||
seed = thread_local_rng();
|
||||
|
||||
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait</* Thas_limit= */ true, /* Tlast= */ false, /* Tsampler= */ Sampler::RNG>>(argument_types[0], parameters, max_elems, seed);
|
||||
}
|
||||
|
@ -1,690 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/Operators.h>
|
||||
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
|
||||
#include <Common/ArenaAllocator.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#define AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE 0xFFFFFF
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TOO_LARGE_ARRAY_SIZE;
|
||||
}
|
||||
|
||||
enum class Sampler
|
||||
{
|
||||
NONE,
|
||||
RNG,
|
||||
};
|
||||
|
||||
template <bool Thas_limit, bool Tlast, Sampler Tsampler>
|
||||
struct GroupArrayTrait
|
||||
{
|
||||
static constexpr bool has_limit = Thas_limit;
|
||||
static constexpr bool last = Tlast;
|
||||
static constexpr Sampler sampler = Tsampler;
|
||||
};
|
||||
|
||||
template <typename Trait>
|
||||
static constexpr const char * getNameByTrait()
|
||||
{
|
||||
if (Trait::last)
|
||||
return "groupArrayLast";
|
||||
if (Trait::sampler == Sampler::NONE)
|
||||
return "groupArray";
|
||||
else if (Trait::sampler == Sampler::RNG)
|
||||
return "groupArraySample";
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct GroupArraySamplerData
|
||||
{
|
||||
/// For easy serialization.
|
||||
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
|
||||
|
||||
// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
|
||||
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
|
||||
using Array = PODArray<T, 32, Allocator>;
|
||||
|
||||
Array value;
|
||||
size_t total_values = 0;
|
||||
pcg32_fast rng;
|
||||
|
||||
UInt64 genRandom(size_t lim)
|
||||
{
|
||||
/// With a large number of values, we will generate random numbers several times slower.
|
||||
if (lim <= static_cast<UInt64>(rng.max()))
|
||||
return static_cast<UInt32>(rng()) % static_cast<UInt32>(lim);
|
||||
else
|
||||
return (static_cast<UInt64>(rng()) * (static_cast<UInt64>(rng.max()) + 1ULL) + static_cast<UInt64>(rng())) % lim;
|
||||
}
|
||||
|
||||
void randomShuffle()
|
||||
{
|
||||
for (size_t i = 1; i < value.size(); ++i)
|
||||
{
|
||||
size_t j = genRandom(i + 1);
|
||||
std::swap(value[i], value[j]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/// A particular case is an implementation for numeric types.
|
||||
template <typename T, bool has_sampler>
|
||||
struct GroupArrayNumericData;
|
||||
|
||||
template <typename T>
|
||||
struct GroupArrayNumericData<T, false>
|
||||
{
|
||||
/// For easy serialization.
|
||||
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
|
||||
|
||||
// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
|
||||
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
|
||||
using Array = PODArray<T, 32, Allocator>;
|
||||
|
||||
// For groupArrayLast()
|
||||
size_t total_values = 0;
|
||||
Array value;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct GroupArrayNumericData<T, true> : public GroupArraySamplerData<T>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T, typename Trait>
|
||||
class GroupArrayNumericImpl final
|
||||
: public IAggregateFunctionDataHelper<GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>, GroupArrayNumericImpl<T, Trait>>
|
||||
{
|
||||
using Data = GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>;
|
||||
static constexpr bool limit_num_elems = Trait::has_limit;
|
||||
UInt64 max_elems;
|
||||
UInt64 seed;
|
||||
|
||||
public:
|
||||
explicit GroupArrayNumericImpl(
|
||||
const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_, UInt64 seed_ = 123456)
|
||||
: IAggregateFunctionDataHelper<GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>, GroupArrayNumericImpl<T, Trait>>(
|
||||
{data_type_}, parameters_, std::make_shared<DataTypeArray>(data_type_))
|
||||
, max_elems(max_elems_)
|
||||
, seed(seed_)
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override { return getNameByTrait<Trait>(); }
|
||||
|
||||
void insertWithSampler(Data & a, const T & v, Arena * arena) const
|
||||
{
|
||||
++a.total_values;
|
||||
if (a.value.size() < max_elems)
|
||||
a.value.push_back(v, arena);
|
||||
else
|
||||
{
|
||||
UInt64 rnd = a.genRandom(a.total_values);
|
||||
if (rnd < max_elems)
|
||||
a.value[rnd] = v;
|
||||
}
|
||||
}
|
||||
|
||||
void create(AggregateDataPtr __restrict place) const override /// NOLINT
|
||||
{
|
||||
[[maybe_unused]] auto a = new (place) Data;
|
||||
if constexpr (Trait::sampler == Sampler::RNG)
|
||||
a->rng.seed(seed);
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
const auto & row_value = assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
|
||||
auto & cur_elems = this->data(place);
|
||||
|
||||
++cur_elems.total_values;
|
||||
|
||||
if constexpr (Trait::sampler == Sampler::NONE)
|
||||
{
|
||||
if (limit_num_elems && cur_elems.value.size() >= max_elems)
|
||||
{
|
||||
if constexpr (Trait::last)
|
||||
cur_elems.value[(cur_elems.total_values - 1) % max_elems] = row_value;
|
||||
return;
|
||||
}
|
||||
|
||||
cur_elems.value.push_back(row_value, arena);
|
||||
}
|
||||
|
||||
if constexpr (Trait::sampler == Sampler::RNG)
|
||||
{
|
||||
if (cur_elems.value.size() < max_elems)
|
||||
cur_elems.value.push_back(row_value, arena);
|
||||
else
|
||||
{
|
||||
UInt64 rnd = cur_elems.genRandom(cur_elems.total_values);
|
||||
if (rnd < max_elems)
|
||||
cur_elems.value[rnd] = row_value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
auto & cur_elems = this->data(place);
|
||||
auto & rhs_elems = this->data(rhs);
|
||||
|
||||
if (rhs_elems.value.empty())
|
||||
return;
|
||||
|
||||
if constexpr (Trait::last)
|
||||
mergeNoSamplerLast(cur_elems, rhs_elems, arena);
|
||||
else if constexpr (Trait::sampler == Sampler::NONE)
|
||||
mergeNoSampler(cur_elems, rhs_elems, arena);
|
||||
else if constexpr (Trait::sampler == Sampler::RNG)
|
||||
mergeWithRNGSampler(cur_elems, rhs_elems, arena);
|
||||
}
|
||||
|
||||
void mergeNoSamplerLast(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
|
||||
{
|
||||
UInt64 new_elements = std::min(static_cast<size_t>(max_elems), cur_elems.value.size() + rhs_elems.value.size());
|
||||
cur_elems.value.resize_exact(new_elements, arena);
|
||||
for (auto & value : rhs_elems.value)
|
||||
{
|
||||
cur_elems.value[cur_elems.total_values % max_elems] = value;
|
||||
++cur_elems.total_values;
|
||||
}
|
||||
assert(rhs_elems.total_values >= rhs_elems.value.size());
|
||||
cur_elems.total_values += rhs_elems.total_values - rhs_elems.value.size();
|
||||
}
|
||||
|
||||
void mergeNoSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
|
||||
{
|
||||
if (!limit_num_elems)
|
||||
{
|
||||
if (rhs_elems.value.size())
|
||||
cur_elems.value.insertByOffsets(rhs_elems.value, 0, rhs_elems.value.size(), arena);
|
||||
}
|
||||
else
|
||||
{
|
||||
UInt64 elems_to_insert = std::min(static_cast<size_t>(max_elems) - cur_elems.value.size(), rhs_elems.value.size());
|
||||
if (elems_to_insert)
|
||||
cur_elems.value.insertByOffsets(rhs_elems.value, 0, elems_to_insert, arena);
|
||||
}
|
||||
}
|
||||
|
||||
void mergeWithRNGSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
|
||||
{
|
||||
if (rhs_elems.total_values <= max_elems)
|
||||
{
|
||||
for (size_t i = 0; i < rhs_elems.value.size(); ++i)
|
||||
insertWithSampler(cur_elems, rhs_elems.value[i], arena);
|
||||
}
|
||||
else if (cur_elems.total_values <= max_elems)
|
||||
{
|
||||
decltype(cur_elems.value) from;
|
||||
from.swap(cur_elems.value, arena);
|
||||
cur_elems.value.assign(rhs_elems.value.begin(), rhs_elems.value.end(), arena);
|
||||
cur_elems.total_values = rhs_elems.total_values;
|
||||
for (size_t i = 0; i < from.size(); ++i)
|
||||
insertWithSampler(cur_elems, from[i], arena);
|
||||
}
|
||||
else
|
||||
{
|
||||
cur_elems.randomShuffle();
|
||||
cur_elems.total_values += rhs_elems.total_values;
|
||||
for (size_t i = 0; i < max_elems; ++i)
|
||||
{
|
||||
UInt64 rnd = cur_elems.genRandom(cur_elems.total_values);
|
||||
if (rnd < rhs_elems.total_values)
|
||||
cur_elems.value[i] = rhs_elems.value[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void checkArraySize(size_t elems, size_t max_elems)
|
||||
{
|
||||
if (unlikely(elems > max_elems))
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
|
||||
"Too large array size {} (maximum: {})", elems, max_elems);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
const auto & value = this->data(place).value;
|
||||
const UInt64 size = value.size();
|
||||
checkArraySize(size, max_elems);
|
||||
writeVarUInt(size, buf);
|
||||
for (const auto & element : value)
|
||||
writeBinaryLittleEndian(element, buf);
|
||||
|
||||
if constexpr (Trait::last)
|
||||
writeBinaryLittleEndian(this->data(place).total_values, buf);
|
||||
|
||||
if constexpr (Trait::sampler == Sampler::RNG)
|
||||
{
|
||||
writeBinaryLittleEndian(this->data(place).total_values, buf);
|
||||
WriteBufferFromOwnString rng_buf;
|
||||
rng_buf << this->data(place).rng;
|
||||
writeStringBinary(rng_buf.str(), buf);
|
||||
}
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
size_t size = 0;
|
||||
readVarUInt(size, buf);
|
||||
checkArraySize(size, max_elems);
|
||||
|
||||
auto & value = this->data(place).value;
|
||||
|
||||
value.resize_exact(size, arena);
|
||||
for (auto & element : value)
|
||||
readBinaryLittleEndian(element, buf);
|
||||
|
||||
if constexpr (Trait::last)
|
||||
readBinaryLittleEndian(this->data(place).total_values, buf);
|
||||
|
||||
if constexpr (Trait::sampler == Sampler::RNG)
|
||||
{
|
||||
readBinaryLittleEndian(this->data(place).total_values, buf);
|
||||
std::string rng_string;
|
||||
readStringBinary(rng_string, buf);
|
||||
ReadBufferFromString rng_buf(rng_string);
|
||||
rng_buf >> this->data(place).rng;
|
||||
}
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
const auto & value = this->data(place).value;
|
||||
size_t size = value.size();
|
||||
|
||||
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
|
||||
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
|
||||
|
||||
offsets_to.push_back(offsets_to.back() + size);
|
||||
|
||||
if (size)
|
||||
{
|
||||
typename ColumnVector<T>::Container & data_to = assert_cast<ColumnVector<T> &>(arr_to.getData()).getData();
|
||||
data_to.insert(this->data(place).value.begin(), this->data(place).value.end());
|
||||
}
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return true; }
|
||||
};
|
||||
|
||||
|
||||
/// General case
|
||||
|
||||
|
||||
/// Nodes used to implement a linked list for storage of groupArray states
|
||||
|
||||
template <typename Node>
|
||||
struct GroupArrayNodeBase
|
||||
{
|
||||
UInt64 size; // size of payload
|
||||
|
||||
/// Returns pointer to actual payload
|
||||
char * data() { return reinterpret_cast<char *>(this) + sizeof(Node); }
|
||||
|
||||
const char * data() const { return reinterpret_cast<const char *>(this) + sizeof(Node); }
|
||||
|
||||
/// Clones existing node (does not modify next field)
|
||||
Node * clone(Arena * arena) const
|
||||
{
|
||||
return reinterpret_cast<Node *>(
|
||||
const_cast<char *>(arena->alignedInsert(reinterpret_cast<const char *>(this), sizeof(Node) + size, alignof(Node))));
|
||||
}
|
||||
|
||||
static void checkElementSize(size_t size, size_t max_size)
|
||||
{
|
||||
if (unlikely(size > max_size))
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
|
||||
"Too large array element size {} (maximum: {})", size, max_size);
|
||||
}
|
||||
|
||||
/// Write node to buffer
|
||||
void write(WriteBuffer & buf) const
|
||||
{
|
||||
checkElementSize(size, AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE);
|
||||
writeVarUInt(size, buf);
|
||||
buf.write(data(), size);
|
||||
}
|
||||
|
||||
/// Reads and allocates node from ReadBuffer's data (doesn't set next)
|
||||
static Node * read(ReadBuffer & buf, Arena * arena)
|
||||
{
|
||||
UInt64 size;
|
||||
readVarUInt(size, buf);
|
||||
checkElementSize(size, AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE);
|
||||
|
||||
Node * node = reinterpret_cast<Node *>(arena->alignedAlloc(sizeof(Node) + size, alignof(Node)));
|
||||
node->size = size;
|
||||
buf.readStrict(node->data(), size);
|
||||
return node;
|
||||
}
|
||||
};
|
||||
|
||||
struct GroupArrayNodeString : public GroupArrayNodeBase<GroupArrayNodeString>
|
||||
{
|
||||
using Node = GroupArrayNodeString;
|
||||
|
||||
/// Create node from string
|
||||
static Node * allocate(const IColumn & column, size_t row_num, Arena * arena)
|
||||
{
|
||||
StringRef string = assert_cast<const ColumnString &>(column).getDataAt(row_num);
|
||||
|
||||
Node * node = reinterpret_cast<Node *>(arena->alignedAlloc(sizeof(Node) + string.size, alignof(Node)));
|
||||
node->size = string.size;
|
||||
memcpy(node->data(), string.data, string.size);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
void insertInto(IColumn & column)
|
||||
{
|
||||
assert_cast<ColumnString &>(column).insertData(data(), size);
|
||||
}
|
||||
};
|
||||
|
||||
struct GroupArrayNodeGeneral : public GroupArrayNodeBase<GroupArrayNodeGeneral>
|
||||
{
|
||||
using Node = GroupArrayNodeGeneral;
|
||||
|
||||
static Node * allocate(const IColumn & column, size_t row_num, Arena * arena)
|
||||
{
|
||||
const char * begin = arena->alignedAlloc(sizeof(Node), alignof(Node));
|
||||
StringRef value = column.serializeValueIntoArena(row_num, *arena, begin);
|
||||
|
||||
Node * node = reinterpret_cast<Node *>(const_cast<char *>(begin));
|
||||
node->size = value.size;
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
void insertInto(IColumn & column) { column.deserializeAndInsertFromArena(data()); }
|
||||
};
|
||||
|
||||
template <typename Node, bool has_sampler>
|
||||
struct GroupArrayGeneralData;
|
||||
|
||||
template <typename Node>
|
||||
struct GroupArrayGeneralData<Node, false>
|
||||
{
|
||||
// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
|
||||
using Allocator = MixedAlignedArenaAllocator<alignof(Node *), 4096>;
|
||||
using Array = PODArray<Node *, 32, Allocator>;
|
||||
|
||||
// For groupArrayLast()
|
||||
size_t total_values = 0;
|
||||
Array value;
|
||||
};
|
||||
|
||||
template <typename Node>
|
||||
struct GroupArrayGeneralData<Node, true> : public GroupArraySamplerData<Node *>
|
||||
{
|
||||
};
|
||||
|
||||
/// Implementation of groupArray for String or any ComplexObject via Array
|
||||
template <typename Node, typename Trait>
|
||||
class GroupArrayGeneralImpl final
|
||||
: public IAggregateFunctionDataHelper<GroupArrayGeneralData<Node, Trait::sampler != Sampler::NONE>, GroupArrayGeneralImpl<Node, Trait>>
|
||||
{
|
||||
static constexpr bool limit_num_elems = Trait::has_limit;
|
||||
using Data = GroupArrayGeneralData<Node, Trait::sampler != Sampler::NONE>;
|
||||
static Data & data(AggregateDataPtr __restrict place) { return *reinterpret_cast<Data *>(place); }
|
||||
static const Data & data(ConstAggregateDataPtr __restrict place) { return *reinterpret_cast<const Data *>(place); }
|
||||
|
||||
DataTypePtr & data_type;
|
||||
UInt64 max_elems;
|
||||
UInt64 seed;
|
||||
|
||||
public:
|
||||
GroupArrayGeneralImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_, UInt64 seed_ = 123456)
|
||||
: IAggregateFunctionDataHelper<GroupArrayGeneralData<Node, Trait::sampler != Sampler::NONE>, GroupArrayGeneralImpl<Node, Trait>>(
|
||||
{data_type_}, parameters_, std::make_shared<DataTypeArray>(data_type_))
|
||||
, data_type(this->argument_types[0])
|
||||
, max_elems(max_elems_)
|
||||
, seed(seed_)
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override { return getNameByTrait<Trait>(); }
|
||||
|
||||
void insertWithSampler(Data & a, const Node * v, Arena * arena) const
|
||||
{
|
||||
++a.total_values;
|
||||
if (a.value.size() < max_elems)
|
||||
a.value.push_back(v->clone(arena), arena);
|
||||
else
|
||||
{
|
||||
UInt64 rnd = a.genRandom(a.total_values);
|
||||
if (rnd < max_elems)
|
||||
a.value[rnd] = v->clone(arena);
|
||||
}
|
||||
}
|
||||
|
||||
void create(AggregateDataPtr __restrict place) const override /// NOLINT
|
||||
{
|
||||
[[maybe_unused]] auto a = new (place) Data;
|
||||
if constexpr (Trait::sampler == Sampler::RNG)
|
||||
a->rng.seed(seed);
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
auto & cur_elems = data(place);
|
||||
|
||||
++cur_elems.total_values;
|
||||
|
||||
if constexpr (Trait::sampler == Sampler::NONE)
|
||||
{
|
||||
if (limit_num_elems && cur_elems.value.size() >= max_elems)
|
||||
{
|
||||
if (Trait::last)
|
||||
{
|
||||
Node * node = Node::allocate(*columns[0], row_num, arena);
|
||||
cur_elems.value[(cur_elems.total_values - 1) % max_elems] = node;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
Node * node = Node::allocate(*columns[0], row_num, arena);
|
||||
cur_elems.value.push_back(node, arena);
|
||||
}
|
||||
|
||||
if constexpr (Trait::sampler == Sampler::RNG)
|
||||
{
|
||||
if (cur_elems.value.size() < max_elems)
|
||||
cur_elems.value.push_back(Node::allocate(*columns[0], row_num, arena), arena);
|
||||
else
|
||||
{
|
||||
UInt64 rnd = cur_elems.genRandom(cur_elems.total_values);
|
||||
if (rnd < max_elems)
|
||||
cur_elems.value[rnd] = Node::allocate(*columns[0], row_num, arena);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
auto & cur_elems = data(place);
|
||||
auto & rhs_elems = data(rhs);
|
||||
|
||||
if (rhs_elems.value.empty())
|
||||
return;
|
||||
|
||||
if constexpr (Trait::last)
|
||||
mergeNoSamplerLast(cur_elems, rhs_elems, arena);
|
||||
else if constexpr (Trait::sampler == Sampler::NONE)
|
||||
mergeNoSampler(cur_elems, rhs_elems, arena);
|
||||
else if constexpr (Trait::sampler == Sampler::RNG)
|
||||
mergeWithRNGSampler(cur_elems, rhs_elems, arena);
|
||||
}
|
||||
|
||||
void ALWAYS_INLINE mergeNoSamplerLast(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
|
||||
{
|
||||
UInt64 new_elements = std::min(static_cast<size_t>(max_elems), cur_elems.value.size() + rhs_elems.value.size());
|
||||
cur_elems.value.resize_exact(new_elements, arena);
|
||||
for (auto & value : rhs_elems.value)
|
||||
{
|
||||
cur_elems.value[cur_elems.total_values % max_elems] = value->clone(arena);
|
||||
++cur_elems.total_values;
|
||||
}
|
||||
assert(rhs_elems.total_values >= rhs_elems.value.size());
|
||||
cur_elems.total_values += rhs_elems.total_values - rhs_elems.value.size();
|
||||
}
|
||||
|
||||
void ALWAYS_INLINE mergeNoSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
|
||||
{
|
||||
UInt64 new_elems;
|
||||
if (limit_num_elems)
|
||||
{
|
||||
if (cur_elems.value.size() >= max_elems)
|
||||
return;
|
||||
new_elems = std::min(rhs_elems.value.size(), static_cast<size_t>(max_elems) - cur_elems.value.size());
|
||||
}
|
||||
else
|
||||
new_elems = rhs_elems.value.size();
|
||||
|
||||
for (UInt64 i = 0; i < new_elems; ++i)
|
||||
cur_elems.value.push_back(rhs_elems.value[i]->clone(arena), arena);
|
||||
}
|
||||
|
||||
void ALWAYS_INLINE mergeWithRNGSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
|
||||
{
|
||||
if (rhs_elems.total_values <= max_elems)
|
||||
{
|
||||
for (size_t i = 0; i < rhs_elems.value.size(); ++i)
|
||||
insertWithSampler(cur_elems, rhs_elems.value[i], arena);
|
||||
}
|
||||
else if (cur_elems.total_values <= max_elems)
|
||||
{
|
||||
decltype(cur_elems.value) from;
|
||||
from.swap(cur_elems.value, arena);
|
||||
for (auto & node : rhs_elems.value)
|
||||
cur_elems.value.push_back(node->clone(arena), arena);
|
||||
cur_elems.total_values = rhs_elems.total_values;
|
||||
for (size_t i = 0; i < from.size(); ++i)
|
||||
insertWithSampler(cur_elems, from[i], arena);
|
||||
}
|
||||
else
|
||||
{
|
||||
cur_elems.randomShuffle();
|
||||
cur_elems.total_values += rhs_elems.total_values;
|
||||
for (size_t i = 0; i < max_elems; ++i)
|
||||
{
|
||||
UInt64 rnd = cur_elems.genRandom(cur_elems.total_values);
|
||||
if (rnd < rhs_elems.total_values)
|
||||
cur_elems.value[i] = rhs_elems.value[i]->clone(arena);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void checkArraySize(size_t elems, size_t max_elems)
|
||||
{
|
||||
if (unlikely(elems > max_elems))
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
|
||||
"Too large array size {} (maximum: {})", elems, max_elems);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
UInt64 elems = data(place).value.size();
|
||||
checkArraySize(elems, max_elems);
|
||||
writeVarUInt(elems, buf);
|
||||
|
||||
auto & value = data(place).value;
|
||||
for (auto & node : value)
|
||||
node->write(buf);
|
||||
|
||||
if constexpr (Trait::last)
|
||||
writeBinaryLittleEndian(data(place).total_values, buf);
|
||||
|
||||
if constexpr (Trait::sampler == Sampler::RNG)
|
||||
{
|
||||
writeBinaryLittleEndian(data(place).total_values, buf);
|
||||
WriteBufferFromOwnString rng_buf;
|
||||
rng_buf << data(place).rng;
|
||||
writeStringBinary(rng_buf.str(), buf);
|
||||
}
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
UInt64 elems;
|
||||
readVarUInt(elems, buf);
|
||||
|
||||
if (unlikely(elems == 0))
|
||||
return;
|
||||
|
||||
checkArraySize(elems, max_elems);
|
||||
|
||||
auto & value = data(place).value;
|
||||
|
||||
value.resize_exact(elems, arena);
|
||||
for (UInt64 i = 0; i < elems; ++i)
|
||||
value[i] = Node::read(buf, arena);
|
||||
|
||||
if constexpr (Trait::last)
|
||||
readBinaryLittleEndian(data(place).total_values, buf);
|
||||
|
||||
if constexpr (Trait::sampler == Sampler::RNG)
|
||||
{
|
||||
readBinaryLittleEndian(data(place).total_values, buf);
|
||||
std::string rng_string;
|
||||
readStringBinary(rng_string, buf);
|
||||
ReadBufferFromString rng_buf(rng_string);
|
||||
rng_buf >> data(place).rng;
|
||||
}
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
auto & column_array = assert_cast<ColumnArray &>(to);
|
||||
|
||||
auto & offsets = column_array.getOffsets();
|
||||
offsets.push_back(offsets.back() + data(place).value.size());
|
||||
|
||||
auto & column_data = column_array.getData();
|
||||
|
||||
if (std::is_same_v<Node, GroupArrayNodeString>)
|
||||
{
|
||||
auto & string_offsets = assert_cast<ColumnString &>(column_data).getOffsets();
|
||||
string_offsets.reserve(string_offsets.size() + data(place).value.size());
|
||||
}
|
||||
|
||||
auto & value = data(place).value;
|
||||
for (auto & node : value)
|
||||
node->insertInto(column_data);
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return true; }
|
||||
};
|
||||
|
||||
#undef AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE
|
||||
|
||||
}
|
@ -1,21 +1,218 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
#include <Columns/ColumnArray.h>
|
||||
|
||||
#include <Common/FieldVisitorToString.h>
|
||||
#include <Common/FieldVisitorConvertToNumber.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Interpreters/convertFieldToType.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
|
||||
#define AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE 0xFFFFFF
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int TOO_LARGE_ARRAY_SIZE;
|
||||
extern const int CANNOT_CONVERT_TYPE;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
/** Aggregate function, that takes two arguments: value and position,
|
||||
* and as a result, builds an array with values are located at corresponding positions.
|
||||
*
|
||||
* If more than one value was inserted to single position, the any value (first in case of single thread) is stored.
|
||||
* If no values was inserted to some position, then default value will be substituted.
|
||||
*
|
||||
* Aggregate function also accept optional parameters:
|
||||
* - default value to substitute;
|
||||
* - length to resize result arrays (if you want to have results of same length for all aggregation keys);
|
||||
*
|
||||
* If you want to pass length, default value should be also given.
|
||||
*/
|
||||
|
||||
|
||||
/// Generic case (inefficient).
|
||||
struct AggregateFunctionGroupArrayInsertAtDataGeneric
|
||||
{
|
||||
Array value; /// TODO Add MemoryTracker
|
||||
};
|
||||
|
||||
|
||||
class AggregateFunctionGroupArrayInsertAtGeneric final
|
||||
: public IAggregateFunctionDataHelper<AggregateFunctionGroupArrayInsertAtDataGeneric, AggregateFunctionGroupArrayInsertAtGeneric>
|
||||
{
|
||||
private:
|
||||
DataTypePtr type;
|
||||
SerializationPtr serialization;
|
||||
Field default_value;
|
||||
UInt64 length_to_resize = 0; /// zero means - do not do resizing.
|
||||
|
||||
public:
|
||||
AggregateFunctionGroupArrayInsertAtGeneric(const DataTypes & arguments, const Array & params)
|
||||
: IAggregateFunctionDataHelper<AggregateFunctionGroupArrayInsertAtDataGeneric, AggregateFunctionGroupArrayInsertAtGeneric>(arguments, params, std::make_shared<DataTypeArray>(arguments[0]))
|
||||
, type(argument_types[0])
|
||||
, serialization(type->getDefaultSerialization())
|
||||
{
|
||||
if (!params.empty())
|
||||
{
|
||||
if (params.size() > 2)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires at most two parameters.", getName());
|
||||
|
||||
default_value = params[0];
|
||||
|
||||
if (params.size() == 2)
|
||||
{
|
||||
length_to_resize = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[1]);
|
||||
if (length_to_resize > AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE)
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
|
||||
"Too large array size (maximum: {})", AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
if (!isUInt(arguments[1]))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument of aggregate function {} must be unsigned integer.", getName());
|
||||
|
||||
if (default_value.isNull())
|
||||
default_value = type->getDefault();
|
||||
else
|
||||
{
|
||||
Field converted = convertFieldToType(default_value, *type);
|
||||
if (converted.isNull())
|
||||
throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Cannot convert parameter of aggregate function {} ({}) "
|
||||
"to type {} to be used as default value in array",
|
||||
getName(), applyVisitor(FieldVisitorToString(), default_value), type->getName());
|
||||
|
||||
default_value = converted;
|
||||
}
|
||||
}
|
||||
|
||||
String getName() const override { return "groupArrayInsertAt"; }
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
/// TODO Do positions need to be 1-based for this function?
|
||||
size_t position = columns[1]->getUInt(row_num);
|
||||
|
||||
/// If position is larger than size to which array will be cut - simply ignore value.
|
||||
if (length_to_resize && position >= length_to_resize)
|
||||
return;
|
||||
|
||||
if (position >= AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE)
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size: "
|
||||
"position argument ({}) is greater or equals to limit ({})",
|
||||
position, AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE);
|
||||
|
||||
Array & arr = data(place).value;
|
||||
|
||||
if (arr.size() <= position)
|
||||
arr.resize(position + 1);
|
||||
else if (!arr[position].isNull())
|
||||
return; /// Element was already inserted to the specified position.
|
||||
|
||||
columns[0]->get(row_num, arr[position]);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
Array & arr_lhs = data(place).value;
|
||||
const Array & arr_rhs = data(rhs).value;
|
||||
|
||||
if (arr_lhs.size() < arr_rhs.size())
|
||||
arr_lhs.resize(arr_rhs.size());
|
||||
|
||||
for (size_t i = 0, size = arr_rhs.size(); i < size; ++i)
|
||||
if (arr_lhs[i].isNull() && !arr_rhs[i].isNull())
|
||||
arr_lhs[i] = arr_rhs[i];
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
const Array & arr = data(place).value;
|
||||
size_t size = arr.size();
|
||||
writeVarUInt(size, buf);
|
||||
|
||||
for (const Field & elem : arr)
|
||||
{
|
||||
if (elem.isNull())
|
||||
{
|
||||
writeBinary(UInt8(1), buf);
|
||||
}
|
||||
else
|
||||
{
|
||||
writeBinary(UInt8(0), buf);
|
||||
serialization->serializeBinary(elem, buf, {});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
size_t size = 0;
|
||||
readVarUInt(size, buf);
|
||||
|
||||
if (size > AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE)
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
|
||||
"Too large array size (maximum: {})", AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE);
|
||||
|
||||
Array & arr = data(place).value;
|
||||
|
||||
arr.resize(size);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
UInt8 is_null = 0;
|
||||
readBinary(is_null, buf);
|
||||
if (!is_null)
|
||||
serialization->deserializeBinary(arr[i], buf, {});
|
||||
}
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
ColumnArray & to_array = assert_cast<ColumnArray &>(to);
|
||||
IColumn & to_data = to_array.getData();
|
||||
ColumnArray::Offsets & to_offsets = to_array.getOffsets();
|
||||
|
||||
const Array & arr = data(place).value;
|
||||
|
||||
for (const Field & elem : arr)
|
||||
{
|
||||
if (!elem.isNull())
|
||||
to_data.insert(elem);
|
||||
else
|
||||
to_data.insert(default_value);
|
||||
}
|
||||
|
||||
size_t result_array_size = length_to_resize ? length_to_resize : arr.size();
|
||||
|
||||
/// Pad array if need.
|
||||
for (size_t i = arr.size(); i < result_array_size; ++i)
|
||||
to_data.insert(default_value);
|
||||
|
||||
to_offsets.push_back(to_offsets.back() + result_array_size);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionGroupArrayInsertAt(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
{
|
||||
|
@ -1,215 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
|
||||
#include <Common/FieldVisitorToString.h>
|
||||
#include <Common/FieldVisitorConvertToNumber.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Interpreters/convertFieldToType.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
|
||||
#define AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE 0xFFFFFF
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TOO_LARGE_ARRAY_SIZE;
|
||||
extern const int CANNOT_CONVERT_TYPE;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
|
||||
/** Aggregate function, that takes two arguments: value and position,
|
||||
* and as a result, builds an array with values are located at corresponding positions.
|
||||
*
|
||||
* If more than one value was inserted to single position, the any value (first in case of single thread) is stored.
|
||||
* If no values was inserted to some position, then default value will be substituted.
|
||||
*
|
||||
* Aggregate function also accept optional parameters:
|
||||
* - default value to substitute;
|
||||
* - length to resize result arrays (if you want to have results of same length for all aggregation keys);
|
||||
*
|
||||
* If you want to pass length, default value should be also given.
|
||||
*/
|
||||
|
||||
|
||||
/// Generic case (inefficient).
|
||||
struct AggregateFunctionGroupArrayInsertAtDataGeneric
|
||||
{
|
||||
Array value; /// TODO Add MemoryTracker
|
||||
};
|
||||
|
||||
|
||||
class AggregateFunctionGroupArrayInsertAtGeneric final
|
||||
: public IAggregateFunctionDataHelper<AggregateFunctionGroupArrayInsertAtDataGeneric, AggregateFunctionGroupArrayInsertAtGeneric>
|
||||
{
|
||||
private:
|
||||
DataTypePtr type;
|
||||
SerializationPtr serialization;
|
||||
Field default_value;
|
||||
UInt64 length_to_resize = 0; /// zero means - do not do resizing.
|
||||
|
||||
public:
|
||||
AggregateFunctionGroupArrayInsertAtGeneric(const DataTypes & arguments, const Array & params)
|
||||
: IAggregateFunctionDataHelper<AggregateFunctionGroupArrayInsertAtDataGeneric, AggregateFunctionGroupArrayInsertAtGeneric>(arguments, params, std::make_shared<DataTypeArray>(arguments[0]))
|
||||
, type(argument_types[0])
|
||||
, serialization(type->getDefaultSerialization())
|
||||
{
|
||||
if (!params.empty())
|
||||
{
|
||||
if (params.size() > 2)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires at most two parameters.", getName());
|
||||
|
||||
default_value = params[0];
|
||||
|
||||
if (params.size() == 2)
|
||||
{
|
||||
length_to_resize = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[1]);
|
||||
if (length_to_resize > AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE)
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
|
||||
"Too large array size (maximum: {})", AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
if (!isUInt(arguments[1]))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument of aggregate function {} must be unsigned integer.", getName());
|
||||
|
||||
if (default_value.isNull())
|
||||
default_value = type->getDefault();
|
||||
else
|
||||
{
|
||||
Field converted = convertFieldToType(default_value, *type);
|
||||
if (converted.isNull())
|
||||
throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Cannot convert parameter of aggregate function {} ({}) "
|
||||
"to type {} to be used as default value in array",
|
||||
getName(), applyVisitor(FieldVisitorToString(), default_value), type->getName());
|
||||
|
||||
default_value = converted;
|
||||
}
|
||||
}
|
||||
|
||||
String getName() const override { return "groupArrayInsertAt"; }
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
/// TODO Do positions need to be 1-based for this function?
|
||||
size_t position = columns[1]->getUInt(row_num);
|
||||
|
||||
/// If position is larger than size to which array will be cut - simply ignore value.
|
||||
if (length_to_resize && position >= length_to_resize)
|
||||
return;
|
||||
|
||||
if (position >= AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE)
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size: "
|
||||
"position argument ({}) is greater or equals to limit ({})",
|
||||
position, AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE);
|
||||
|
||||
Array & arr = data(place).value;
|
||||
|
||||
if (arr.size() <= position)
|
||||
arr.resize(position + 1);
|
||||
else if (!arr[position].isNull())
|
||||
return; /// Element was already inserted to the specified position.
|
||||
|
||||
columns[0]->get(row_num, arr[position]);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
Array & arr_lhs = data(place).value;
|
||||
const Array & arr_rhs = data(rhs).value;
|
||||
|
||||
if (arr_lhs.size() < arr_rhs.size())
|
||||
arr_lhs.resize(arr_rhs.size());
|
||||
|
||||
for (size_t i = 0, size = arr_rhs.size(); i < size; ++i)
|
||||
if (arr_lhs[i].isNull() && !arr_rhs[i].isNull())
|
||||
arr_lhs[i] = arr_rhs[i];
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
const Array & arr = data(place).value;
|
||||
size_t size = arr.size();
|
||||
writeVarUInt(size, buf);
|
||||
|
||||
for (const Field & elem : arr)
|
||||
{
|
||||
if (elem.isNull())
|
||||
{
|
||||
writeBinary(UInt8(1), buf);
|
||||
}
|
||||
else
|
||||
{
|
||||
writeBinary(UInt8(0), buf);
|
||||
serialization->serializeBinary(elem, buf, {});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
size_t size = 0;
|
||||
readVarUInt(size, buf);
|
||||
|
||||
if (size > AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE)
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
|
||||
"Too large array size (maximum: {})", AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE);
|
||||
|
||||
Array & arr = data(place).value;
|
||||
|
||||
arr.resize(size);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
UInt8 is_null = 0;
|
||||
readBinary(is_null, buf);
|
||||
if (!is_null)
|
||||
serialization->deserializeBinary(arr[i], buf, {});
|
||||
}
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
ColumnArray & to_array = assert_cast<ColumnArray &>(to);
|
||||
IColumn & to_data = to_array.getData();
|
||||
ColumnArray::Offsets & to_offsets = to_array.getOffsets();
|
||||
|
||||
const Array & arr = data(place).value;
|
||||
|
||||
for (const Field & elem : arr)
|
||||
{
|
||||
if (!elem.isNull())
|
||||
to_data.insert(elem);
|
||||
else
|
||||
to_data.insert(default_value);
|
||||
}
|
||||
|
||||
size_t result_array_size = length_to_resize ? length_to_resize : arr.size();
|
||||
|
||||
/// Pad array if need.
|
||||
for (size_t i = arr.size(); i < result_array_size; ++i)
|
||||
to_data.insert(default_value);
|
||||
|
||||
to_offsets.push_back(to_offsets.back() + result_array_size);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#undef AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE
|
||||
|
||||
}
|
@ -2,8 +2,14 @@
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <DataTypes/DataTypeAggregateFunction.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <Columns/ColumnAggregateFunction.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
// TODO include this last because of a broken roaring header. See the comment inside.
|
||||
#include <AggregateFunctions/AggregateFunctionGroupBitmap.h>
|
||||
#include <AggregateFunctions/AggregateFunctionGroupBitmapData.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -17,77 +23,255 @@ namespace ErrorCodes
|
||||
|
||||
namespace
|
||||
{
|
||||
template <template <typename, typename> class AggregateFunctionTemplate, template <typename> typename Data, typename... TArgs>
|
||||
IAggregateFunction * createWithIntegerType(const IDataType & argument_type, TArgs &&... args)
|
||||
|
||||
/// Counts bitmap operation on numbers.
|
||||
template <typename T, typename Data>
|
||||
class AggregateFunctionBitmap final : public IAggregateFunctionDataHelper<Data, AggregateFunctionBitmap<T, Data>>
|
||||
{
|
||||
public:
|
||||
explicit AggregateFunctionBitmap(const DataTypePtr & type)
|
||||
: IAggregateFunctionDataHelper<Data, AggregateFunctionBitmap<T, Data>>({type}, {}, createResultType())
|
||||
{
|
||||
WhichDataType which(argument_type);
|
||||
if (which.idx == TypeIndex::UInt8) return new AggregateFunctionTemplate<UInt8, Data<UInt8>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::UInt16) return new AggregateFunctionTemplate<UInt16, Data<UInt16>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::UInt32) return new AggregateFunctionTemplate<UInt32, Data<UInt32>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::UInt64) return new AggregateFunctionTemplate<UInt64, Data<UInt64>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::Int8) return new AggregateFunctionTemplate<Int8, Data<Int8>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::Int16) return new AggregateFunctionTemplate<Int16, Data<Int16>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::Int32) return new AggregateFunctionTemplate<Int32, Data<Int32>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::Int64) return new AggregateFunctionTemplate<Int64, Data<Int64>>(std::forward<TArgs>(args)...);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <template <typename> typename Data>
|
||||
AggregateFunctionPtr createAggregateFunctionBitmap(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
String getName() const override { return Data::name(); }
|
||||
|
||||
static DataTypePtr createResultType() { return std::make_shared<DataTypeNumber<T>>(); }
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
assertNoParameters(name, parameters);
|
||||
assertUnary(name, argument_types);
|
||||
|
||||
if (!argument_types[0]->canBeUsedInBitOperations())
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"The type {} of argument for aggregate function {} "
|
||||
"is illegal, because it cannot be used in Bitmap operations",
|
||||
argument_types[0]->getName(), name);
|
||||
|
||||
AggregateFunctionPtr res(createWithIntegerType<AggregateFunctionBitmap, Data>(*argument_types[0], argument_types[0]));
|
||||
|
||||
if (!res)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}",
|
||||
argument_types[0]->getName(), name);
|
||||
|
||||
return res;
|
||||
this->data(place).roaring_bitmap_with_small_set.add(assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num]);
|
||||
}
|
||||
|
||||
// Additional aggregate functions to manipulate bitmaps.
|
||||
template <template <typename, typename> typename AggregateFunctionTemplate>
|
||||
AggregateFunctionPtr createAggregateFunctionBitmapL2(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
assertNoParameters(name, parameters);
|
||||
assertUnary(name, argument_types);
|
||||
|
||||
DataTypePtr argument_type_ptr = argument_types[0];
|
||||
WhichDataType which(*argument_type_ptr);
|
||||
if (which.idx != TypeIndex::AggregateFunction)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}",
|
||||
argument_types[0]->getName(), name);
|
||||
|
||||
/// groupBitmap needs to know about the data type that was used to create bitmaps.
|
||||
/// We need to look inside the type of its argument to obtain it.
|
||||
const DataTypeAggregateFunction & datatype_aggfunc = dynamic_cast<const DataTypeAggregateFunction &>(*argument_type_ptr);
|
||||
AggregateFunctionPtr aggfunc = datatype_aggfunc.getFunction();
|
||||
|
||||
if (aggfunc->getName() != AggregateFunctionGroupBitmapData<UInt8>::name())
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}",
|
||||
argument_types[0]->getName(), name);
|
||||
|
||||
DataTypePtr nested_argument_type_ptr = aggfunc->getArgumentTypes()[0];
|
||||
|
||||
AggregateFunctionPtr res(createWithIntegerType<AggregateFunctionTemplate, AggregateFunctionGroupBitmapData>(
|
||||
*nested_argument_type_ptr, argument_type_ptr));
|
||||
|
||||
if (!res)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}",
|
||||
argument_types[0]->getName(), name);
|
||||
|
||||
return res;
|
||||
this->data(place).roaring_bitmap_with_small_set.merge(this->data(rhs).roaring_bitmap_with_small_set);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
this->data(place).roaring_bitmap_with_small_set.write(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
this->data(place).roaring_bitmap_with_small_set.read(buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
assert_cast<ColumnVector<T> &>(to).getData().push_back(
|
||||
static_cast<T>(this->data(place).roaring_bitmap_with_small_set.size()));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/// This aggregate function takes the states of AggregateFunctionBitmap as its argument.
|
||||
template <typename T, typename Data, typename Policy>
|
||||
class AggregateFunctionBitmapL2 final : public IAggregateFunctionDataHelper<Data, AggregateFunctionBitmapL2<T, Data, Policy>>
|
||||
{
|
||||
private:
|
||||
static constexpr size_t STATE_VERSION_1_MIN_REVISION = 54455;
|
||||
public:
|
||||
explicit AggregateFunctionBitmapL2(const DataTypePtr & type)
|
||||
: IAggregateFunctionDataHelper<Data, AggregateFunctionBitmapL2<T, Data, Policy>>({type}, {}, createResultType())
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override { return Policy::name; }
|
||||
|
||||
static DataTypePtr createResultType() { return std::make_shared<DataTypeNumber<T>>(); }
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
DataTypePtr getStateType() const override
|
||||
{
|
||||
return this->argument_types.at(0);
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
Data & data_lhs = this->data(place);
|
||||
const Data & data_rhs = this->data(assert_cast<const ColumnAggregateFunction &>(*columns[0]).getData()[row_num]);
|
||||
if (!data_lhs.init)
|
||||
{
|
||||
data_lhs.init = true;
|
||||
data_lhs.roaring_bitmap_with_small_set.merge(data_rhs.roaring_bitmap_with_small_set);
|
||||
}
|
||||
else
|
||||
{
|
||||
Policy::apply(data_lhs, data_rhs);
|
||||
}
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
Data & data_lhs = this->data(place);
|
||||
const Data & data_rhs = this->data(rhs);
|
||||
|
||||
if (!data_rhs.init)
|
||||
return;
|
||||
|
||||
if (!data_lhs.init)
|
||||
{
|
||||
data_lhs.init = true;
|
||||
data_lhs.roaring_bitmap_with_small_set.merge(data_rhs.roaring_bitmap_with_small_set);
|
||||
}
|
||||
else
|
||||
{
|
||||
Policy::apply(data_lhs, data_rhs);
|
||||
}
|
||||
}
|
||||
|
||||
bool isVersioned() const override { return true; }
|
||||
|
||||
size_t getDefaultVersion() const override { return 1; }
|
||||
|
||||
size_t getVersionFromRevision(size_t revision) const override
|
||||
{
|
||||
if (revision >= STATE_VERSION_1_MIN_REVISION)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> version) const override
|
||||
{
|
||||
if (!version)
|
||||
version = getDefaultVersion();
|
||||
|
||||
if (*version >= 1)
|
||||
DB::writeBoolText(this->data(place).init, buf);
|
||||
|
||||
this->data(place).roaring_bitmap_with_small_set.write(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> version, Arena *) const override
|
||||
{
|
||||
if (!version)
|
||||
version = getDefaultVersion();
|
||||
|
||||
if (*version >= 1)
|
||||
DB::readBoolText(this->data(place).init, buf);
|
||||
this->data(place).roaring_bitmap_with_small_set.read(buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
assert_cast<ColumnVector<T> &>(to).getData().push_back(
|
||||
static_cast<T>(this->data(place).roaring_bitmap_with_small_set.size()));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <typename Data>
|
||||
class BitmapAndPolicy
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "groupBitmapAnd";
|
||||
static void apply(Data & lhs, const Data & rhs) { lhs.roaring_bitmap_with_small_set.rb_and(rhs.roaring_bitmap_with_small_set); }
|
||||
};
|
||||
|
||||
template <typename Data>
|
||||
class BitmapOrPolicy
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "groupBitmapOr";
|
||||
static void apply(Data & lhs, const Data & rhs) { lhs.roaring_bitmap_with_small_set.rb_or(rhs.roaring_bitmap_with_small_set); }
|
||||
};
|
||||
|
||||
template <typename Data>
|
||||
class BitmapXorPolicy
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "groupBitmapXor";
|
||||
static void apply(Data & lhs, const Data & rhs) { lhs.roaring_bitmap_with_small_set.rb_xor(rhs.roaring_bitmap_with_small_set); }
|
||||
};
|
||||
|
||||
template <typename T, typename Data>
|
||||
using AggregateFunctionBitmapL2And = AggregateFunctionBitmapL2<T, Data, BitmapAndPolicy<Data>>;
|
||||
|
||||
template <typename T, typename Data>
|
||||
using AggregateFunctionBitmapL2Or = AggregateFunctionBitmapL2<T, Data, BitmapOrPolicy<Data>>;
|
||||
|
||||
template <typename T, typename Data>
|
||||
using AggregateFunctionBitmapL2Xor = AggregateFunctionBitmapL2<T, Data, BitmapXorPolicy<Data>>;
|
||||
|
||||
|
||||
template <template <typename, typename> class AggregateFunctionTemplate, template <typename> typename Data, typename... TArgs>
|
||||
IAggregateFunction * createWithIntegerType(const IDataType & argument_type, TArgs &&... args)
|
||||
{
|
||||
WhichDataType which(argument_type);
|
||||
if (which.idx == TypeIndex::UInt8) return new AggregateFunctionTemplate<UInt8, Data<UInt8>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::UInt16) return new AggregateFunctionTemplate<UInt16, Data<UInt16>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::UInt32) return new AggregateFunctionTemplate<UInt32, Data<UInt32>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::UInt64) return new AggregateFunctionTemplate<UInt64, Data<UInt64>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::Int8) return new AggregateFunctionTemplate<Int8, Data<Int8>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::Int16) return new AggregateFunctionTemplate<Int16, Data<Int16>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::Int32) return new AggregateFunctionTemplate<Int32, Data<Int32>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::Int64) return new AggregateFunctionTemplate<Int64, Data<Int64>>(std::forward<TArgs>(args)...);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <template <typename> typename Data>
|
||||
AggregateFunctionPtr createAggregateFunctionBitmap(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
{
|
||||
assertNoParameters(name, parameters);
|
||||
assertUnary(name, argument_types);
|
||||
|
||||
if (!argument_types[0]->canBeUsedInBitOperations())
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"The type {} of argument for aggregate function {} "
|
||||
"is illegal, because it cannot be used in Bitmap operations",
|
||||
argument_types[0]->getName(), name);
|
||||
|
||||
AggregateFunctionPtr res(createWithIntegerType<AggregateFunctionBitmap, Data>(*argument_types[0], argument_types[0]));
|
||||
|
||||
if (!res)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}",
|
||||
argument_types[0]->getName(), name);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
// Additional aggregate functions to manipulate bitmaps.
|
||||
template <template <typename, typename> typename AggregateFunctionTemplate>
|
||||
AggregateFunctionPtr createAggregateFunctionBitmapL2(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
{
|
||||
assertNoParameters(name, parameters);
|
||||
assertUnary(name, argument_types);
|
||||
|
||||
DataTypePtr argument_type_ptr = argument_types[0];
|
||||
WhichDataType which(*argument_type_ptr);
|
||||
if (which.idx != TypeIndex::AggregateFunction)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}",
|
||||
argument_types[0]->getName(), name);
|
||||
|
||||
/// groupBitmap needs to know about the data type that was used to create bitmaps.
|
||||
/// We need to look inside the type of its argument to obtain it.
|
||||
const DataTypeAggregateFunction & datatype_aggfunc = dynamic_cast<const DataTypeAggregateFunction &>(*argument_type_ptr);
|
||||
AggregateFunctionPtr aggfunc = datatype_aggfunc.getFunction();
|
||||
|
||||
if (aggfunc->getName() != AggregateFunctionGroupBitmapData<UInt8>::name())
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}",
|
||||
argument_types[0]->getName(), name);
|
||||
|
||||
DataTypePtr nested_argument_type_ptr = aggfunc->getArgumentTypes()[0];
|
||||
|
||||
AggregateFunctionPtr res(createWithIntegerType<AggregateFunctionTemplate, AggregateFunctionGroupBitmapData>(
|
||||
*nested_argument_type_ptr, argument_type_ptr));
|
||||
|
||||
if (!res)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}",
|
||||
argument_types[0]->getName(), name);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionsBitmap(AggregateFunctionFactory & factory)
|
||||
|
@ -1,191 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <Columns/ColumnAggregateFunction.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
// TODO include this last because of a broken roaring header. See the comment inside.
|
||||
#include <AggregateFunctions/AggregateFunctionGroupBitmapData.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Counts bitmap operation on numbers.
|
||||
template <typename T, typename Data>
|
||||
class AggregateFunctionBitmap final : public IAggregateFunctionDataHelper<Data, AggregateFunctionBitmap<T, Data>>
|
||||
{
|
||||
public:
|
||||
explicit AggregateFunctionBitmap(const DataTypePtr & type)
|
||||
: IAggregateFunctionDataHelper<Data, AggregateFunctionBitmap<T, Data>>({type}, {}, createResultType())
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override { return Data::name(); }
|
||||
|
||||
static DataTypePtr createResultType() { return std::make_shared<DataTypeNumber<T>>(); }
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
this->data(place).roaring_bitmap_with_small_set.add(assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num]);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
this->data(place).roaring_bitmap_with_small_set.merge(this->data(rhs).roaring_bitmap_with_small_set);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
this->data(place).roaring_bitmap_with_small_set.write(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
this->data(place).roaring_bitmap_with_small_set.read(buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
assert_cast<ColumnVector<T> &>(to).getData().push_back(
|
||||
static_cast<T>(this->data(place).roaring_bitmap_with_small_set.size()));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/// This aggregate function takes the states of AggregateFunctionBitmap as its argument.
|
||||
template <typename T, typename Data, typename Policy>
|
||||
class AggregateFunctionBitmapL2 final : public IAggregateFunctionDataHelper<Data, AggregateFunctionBitmapL2<T, Data, Policy>>
|
||||
{
|
||||
private:
|
||||
static constexpr size_t STATE_VERSION_1_MIN_REVISION = 54455;
|
||||
public:
|
||||
explicit AggregateFunctionBitmapL2(const DataTypePtr & type)
|
||||
: IAggregateFunctionDataHelper<Data, AggregateFunctionBitmapL2<T, Data, Policy>>({type}, {}, createResultType())
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override { return Policy::name; }
|
||||
|
||||
static DataTypePtr createResultType() { return std::make_shared<DataTypeNumber<T>>(); }
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
DataTypePtr getStateType() const override
|
||||
{
|
||||
return this->argument_types.at(0);
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
Data & data_lhs = this->data(place);
|
||||
const Data & data_rhs = this->data(assert_cast<const ColumnAggregateFunction &>(*columns[0]).getData()[row_num]);
|
||||
if (!data_lhs.init)
|
||||
{
|
||||
data_lhs.init = true;
|
||||
data_lhs.roaring_bitmap_with_small_set.merge(data_rhs.roaring_bitmap_with_small_set);
|
||||
}
|
||||
else
|
||||
{
|
||||
Policy::apply(data_lhs, data_rhs);
|
||||
}
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
Data & data_lhs = this->data(place);
|
||||
const Data & data_rhs = this->data(rhs);
|
||||
|
||||
if (!data_rhs.init)
|
||||
return;
|
||||
|
||||
if (!data_lhs.init)
|
||||
{
|
||||
data_lhs.init = true;
|
||||
data_lhs.roaring_bitmap_with_small_set.merge(data_rhs.roaring_bitmap_with_small_set);
|
||||
}
|
||||
else
|
||||
{
|
||||
Policy::apply(data_lhs, data_rhs);
|
||||
}
|
||||
}
|
||||
|
||||
bool isVersioned() const override { return true; }
|
||||
|
||||
size_t getDefaultVersion() const override { return 1; }
|
||||
|
||||
size_t getVersionFromRevision(size_t revision) const override
|
||||
{
|
||||
if (revision >= STATE_VERSION_1_MIN_REVISION)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> version) const override
|
||||
{
|
||||
if (!version)
|
||||
version = getDefaultVersion();
|
||||
|
||||
if (*version >= 1)
|
||||
DB::writeBoolText(this->data(place).init, buf);
|
||||
|
||||
this->data(place).roaring_bitmap_with_small_set.write(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> version, Arena *) const override
|
||||
{
|
||||
if (!version)
|
||||
version = getDefaultVersion();
|
||||
|
||||
if (*version >= 1)
|
||||
DB::readBoolText(this->data(place).init, buf);
|
||||
this->data(place).roaring_bitmap_with_small_set.read(buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
assert_cast<ColumnVector<T> &>(to).getData().push_back(
|
||||
static_cast<T>(this->data(place).roaring_bitmap_with_small_set.size()));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <typename Data>
|
||||
class BitmapAndPolicy
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "groupBitmapAnd";
|
||||
static void apply(Data & lhs, const Data & rhs) { lhs.roaring_bitmap_with_small_set.rb_and(rhs.roaring_bitmap_with_small_set); }
|
||||
};
|
||||
|
||||
template <typename Data>
|
||||
class BitmapOrPolicy
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "groupBitmapOr";
|
||||
static void apply(Data & lhs, const Data & rhs) { lhs.roaring_bitmap_with_small_set.rb_or(rhs.roaring_bitmap_with_small_set); }
|
||||
};
|
||||
|
||||
template <typename Data>
|
||||
class BitmapXorPolicy
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "groupBitmapXor";
|
||||
static void apply(Data & lhs, const Data & rhs) { lhs.roaring_bitmap_with_small_set.rb_xor(rhs.roaring_bitmap_with_small_set); }
|
||||
};
|
||||
|
||||
template <typename T, typename Data>
|
||||
using AggregateFunctionBitmapL2And = AggregateFunctionBitmapL2<T, Data, BitmapAndPolicy<Data>>;
|
||||
|
||||
template <typename T, typename Data>
|
||||
using AggregateFunctionBitmapL2Or = AggregateFunctionBitmapL2<T, Data, BitmapOrPolicy<Data>>;
|
||||
|
||||
template <typename T, typename Data>
|
||||
using AggregateFunctionBitmapL2Xor = AggregateFunctionBitmapL2<T, Data, BitmapXorPolicy<Data>>;
|
||||
|
||||
}
|
@ -1,14 +1,31 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionGroupUniqArray.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <DataTypes/DataTypeIPv4andIPv6.h>
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/ReadHelpersArena.h>
|
||||
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
|
||||
#include <Columns/ColumnArray.h>
|
||||
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
#include <Common/HashTable/HashTableKeyHolder.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/KeyHolderHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
@ -21,6 +38,211 @@ namespace ErrorCodes
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename T>
|
||||
struct AggregateFunctionGroupUniqArrayData
|
||||
{
|
||||
/// When creating, the hash table must be small.
|
||||
using Set = HashSetWithStackMemory<T, DefaultHash<T>, 4>;
|
||||
|
||||
Set value;
|
||||
};
|
||||
|
||||
|
||||
/// Puts all values to the hash set. Returns an array of unique values. Implemented for numeric types.
|
||||
template <typename T, typename LimitNumElems>
|
||||
class AggregateFunctionGroupUniqArray
|
||||
: public IAggregateFunctionDataHelper<AggregateFunctionGroupUniqArrayData<T>, AggregateFunctionGroupUniqArray<T, LimitNumElems>>
|
||||
{
|
||||
static constexpr bool limit_num_elems = LimitNumElems::value;
|
||||
UInt64 max_elems;
|
||||
|
||||
private:
|
||||
using State = AggregateFunctionGroupUniqArrayData<T>;
|
||||
|
||||
public:
|
||||
AggregateFunctionGroupUniqArray(const DataTypePtr & argument_type, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
|
||||
: IAggregateFunctionDataHelper<AggregateFunctionGroupUniqArrayData<T>,
|
||||
AggregateFunctionGroupUniqArray<T, LimitNumElems>>({argument_type}, parameters_, std::make_shared<DataTypeArray>(argument_type)),
|
||||
max_elems(max_elems_) {}
|
||||
|
||||
AggregateFunctionGroupUniqArray(const DataTypePtr & argument_type, const Array & parameters_, const DataTypePtr & result_type_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
|
||||
: IAggregateFunctionDataHelper<AggregateFunctionGroupUniqArrayData<T>,
|
||||
AggregateFunctionGroupUniqArray<T, LimitNumElems>>({argument_type}, parameters_, result_type_),
|
||||
max_elems(max_elems_) {}
|
||||
|
||||
|
||||
String getName() const override { return "groupUniqArray"; }
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
if (limit_num_elems && this->data(place).value.size() >= max_elems)
|
||||
return;
|
||||
this->data(place).value.insert(assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num]);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
if (!limit_num_elems)
|
||||
this->data(place).value.merge(this->data(rhs).value);
|
||||
else
|
||||
{
|
||||
auto & cur_set = this->data(place).value;
|
||||
auto & rhs_set = this->data(rhs).value;
|
||||
|
||||
for (auto & rhs_elem : rhs_set)
|
||||
{
|
||||
if (cur_set.size() >= max_elems)
|
||||
return;
|
||||
cur_set.insert(rhs_elem.getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
auto & set = this->data(place).value;
|
||||
size_t size = set.size();
|
||||
writeVarUInt(size, buf);
|
||||
for (const auto & elem : set)
|
||||
writeBinaryLittleEndian(elem.key, buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
this->data(place).value.read(buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
|
||||
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
|
||||
|
||||
const typename State::Set & set = this->data(place).value;
|
||||
size_t size = set.size();
|
||||
|
||||
offsets_to.push_back(offsets_to.back() + size);
|
||||
|
||||
typename ColumnVector<T>::Container & data_to = assert_cast<ColumnVector<T> &>(arr_to.getData()).getData();
|
||||
size_t old_size = data_to.size();
|
||||
data_to.resize(old_size + size);
|
||||
|
||||
size_t i = 0;
|
||||
for (auto it = set.begin(); it != set.end(); ++it, ++i)
|
||||
data_to[old_size + i] = it->getValue();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/// Generic implementation, it uses serialized representation as object descriptor.
|
||||
struct AggregateFunctionGroupUniqArrayGenericData
|
||||
{
|
||||
static constexpr size_t INITIAL_SIZE_DEGREE = 3; /// adjustable
|
||||
|
||||
using Set = HashSetWithSavedHashWithStackMemory<StringRef, StringRefHash,
|
||||
INITIAL_SIZE_DEGREE>;
|
||||
|
||||
Set value;
|
||||
};
|
||||
|
||||
template <bool is_plain_column>
|
||||
static void deserializeAndInsertImpl(StringRef str, IColumn & data_to);
|
||||
|
||||
/** Template parameter with true value should be used for columns that store their elements in memory continuously.
|
||||
* For such columns groupUniqArray() can be implemented more efficiently (especially for small numeric arrays).
|
||||
*/
|
||||
template <bool is_plain_column = false, typename LimitNumElems = std::false_type>
|
||||
class AggregateFunctionGroupUniqArrayGeneric
|
||||
: public IAggregateFunctionDataHelper<AggregateFunctionGroupUniqArrayGenericData,
|
||||
AggregateFunctionGroupUniqArrayGeneric<is_plain_column, LimitNumElems>>
|
||||
{
|
||||
DataTypePtr & input_data_type;
|
||||
|
||||
static constexpr bool limit_num_elems = LimitNumElems::value;
|
||||
UInt64 max_elems;
|
||||
|
||||
using State = AggregateFunctionGroupUniqArrayGenericData;
|
||||
|
||||
public:
|
||||
AggregateFunctionGroupUniqArrayGeneric(const DataTypePtr & input_data_type_, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
|
||||
: IAggregateFunctionDataHelper<AggregateFunctionGroupUniqArrayGenericData, AggregateFunctionGroupUniqArrayGeneric<is_plain_column, LimitNumElems>>({input_data_type_}, parameters_, std::make_shared<DataTypeArray>(input_data_type_))
|
||||
, input_data_type(this->argument_types[0])
|
||||
, max_elems(max_elems_) {}
|
||||
|
||||
String getName() const override { return "groupUniqArray"; }
|
||||
|
||||
bool allocatesMemoryInArena() const override
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
auto & set = this->data(place).value;
|
||||
writeVarUInt(set.size(), buf);
|
||||
|
||||
for (const auto & elem : set)
|
||||
{
|
||||
writeStringBinary(elem.getValue(), buf);
|
||||
}
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
auto & set = this->data(place).value;
|
||||
size_t size;
|
||||
readVarUInt(size, buf);
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
set.insert(readStringBinaryInto(*arena, buf));
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
auto & set = this->data(place).value;
|
||||
if (limit_num_elems && set.size() >= max_elems)
|
||||
return;
|
||||
|
||||
bool inserted;
|
||||
State::Set::LookupResult it;
|
||||
auto key_holder = getKeyHolder<is_plain_column>(*columns[0], row_num, *arena);
|
||||
set.emplace(key_holder, it, inserted);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
auto & cur_set = this->data(place).value;
|
||||
auto & rhs_set = this->data(rhs).value;
|
||||
|
||||
bool inserted;
|
||||
State::Set::LookupResult it;
|
||||
for (auto & rhs_elem : rhs_set)
|
||||
{
|
||||
if (limit_num_elems && cur_set.size() >= max_elems)
|
||||
return;
|
||||
|
||||
// We have to copy the keys to our arena.
|
||||
chassert(arena != nullptr);
|
||||
cur_set.emplace(ArenaKeyHolder{rhs_elem.getValue(), *arena}, it, inserted);
|
||||
}
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
|
||||
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
|
||||
IColumn & data_to = arr_to.getData();
|
||||
|
||||
auto & set = this->data(place).value;
|
||||
offsets_to.push_back(offsets_to.back() + set.size());
|
||||
|
||||
for (auto & elem : set)
|
||||
deserializeAndInsert<is_plain_column>(elem.getValue(), data_to);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/// Substitute return type for Date and DateTime
|
||||
template <typename HasLimit>
|
||||
class AggregateFunctionGroupUniqArrayDate : public AggregateFunctionGroupUniqArray<DataTypeDate::FieldType, HasLimit>
|
||||
|
@ -1,236 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/ReadHelpersArena.h>
|
||||
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
|
||||
#include <Columns/ColumnArray.h>
|
||||
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
#include <Common/HashTable/HashTableKeyHolder.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/KeyHolderHelpers.h>
|
||||
|
||||
#define AGGREGATE_FUNCTION_GROUP_ARRAY_UNIQ_MAX_SIZE 0xFFFFFF
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
|
||||
template <typename T>
|
||||
struct AggregateFunctionGroupUniqArrayData
|
||||
{
|
||||
/// When creating, the hash table must be small.
|
||||
using Set = HashSetWithStackMemory<T, DefaultHash<T>, 4>;
|
||||
|
||||
Set value;
|
||||
};
|
||||
|
||||
|
||||
/// Puts all values to the hash set. Returns an array of unique values. Implemented for numeric types.
|
||||
template <typename T, typename LimitNumElems>
|
||||
class AggregateFunctionGroupUniqArray
|
||||
: public IAggregateFunctionDataHelper<AggregateFunctionGroupUniqArrayData<T>, AggregateFunctionGroupUniqArray<T, LimitNumElems>>
|
||||
{
|
||||
static constexpr bool limit_num_elems = LimitNumElems::value;
|
||||
UInt64 max_elems;
|
||||
|
||||
private:
|
||||
using State = AggregateFunctionGroupUniqArrayData<T>;
|
||||
|
||||
public:
|
||||
AggregateFunctionGroupUniqArray(const DataTypePtr & argument_type, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
|
||||
: IAggregateFunctionDataHelper<AggregateFunctionGroupUniqArrayData<T>,
|
||||
AggregateFunctionGroupUniqArray<T, LimitNumElems>>({argument_type}, parameters_, std::make_shared<DataTypeArray>(argument_type)),
|
||||
max_elems(max_elems_) {}
|
||||
|
||||
AggregateFunctionGroupUniqArray(const DataTypePtr & argument_type, const Array & parameters_, const DataTypePtr & result_type_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
|
||||
: IAggregateFunctionDataHelper<AggregateFunctionGroupUniqArrayData<T>,
|
||||
AggregateFunctionGroupUniqArray<T, LimitNumElems>>({argument_type}, parameters_, result_type_),
|
||||
max_elems(max_elems_) {}
|
||||
|
||||
|
||||
String getName() const override { return "groupUniqArray"; }
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
if (limit_num_elems && this->data(place).value.size() >= max_elems)
|
||||
return;
|
||||
this->data(place).value.insert(assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num]);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
if (!limit_num_elems)
|
||||
this->data(place).value.merge(this->data(rhs).value);
|
||||
else
|
||||
{
|
||||
auto & cur_set = this->data(place).value;
|
||||
auto & rhs_set = this->data(rhs).value;
|
||||
|
||||
for (auto & rhs_elem : rhs_set)
|
||||
{
|
||||
if (cur_set.size() >= max_elems)
|
||||
return;
|
||||
cur_set.insert(rhs_elem.getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
auto & set = this->data(place).value;
|
||||
size_t size = set.size();
|
||||
writeVarUInt(size, buf);
|
||||
for (const auto & elem : set)
|
||||
writeBinaryLittleEndian(elem.key, buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
this->data(place).value.read(buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
|
||||
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
|
||||
|
||||
const typename State::Set & set = this->data(place).value;
|
||||
size_t size = set.size();
|
||||
|
||||
offsets_to.push_back(offsets_to.back() + size);
|
||||
|
||||
typename ColumnVector<T>::Container & data_to = assert_cast<ColumnVector<T> &>(arr_to.getData()).getData();
|
||||
size_t old_size = data_to.size();
|
||||
data_to.resize(old_size + size);
|
||||
|
||||
size_t i = 0;
|
||||
for (auto it = set.begin(); it != set.end(); ++it, ++i)
|
||||
data_to[old_size + i] = it->getValue();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/// Generic implementation, it uses serialized representation as object descriptor.
|
||||
struct AggregateFunctionGroupUniqArrayGenericData
|
||||
{
|
||||
static constexpr size_t INITIAL_SIZE_DEGREE = 3; /// adjustable
|
||||
|
||||
using Set = HashSetWithSavedHashWithStackMemory<StringRef, StringRefHash,
|
||||
INITIAL_SIZE_DEGREE>;
|
||||
|
||||
Set value;
|
||||
};
|
||||
|
||||
template <bool is_plain_column>
|
||||
static void deserializeAndInsertImpl(StringRef str, IColumn & data_to);
|
||||
|
||||
/** Template parameter with true value should be used for columns that store their elements in memory continuously.
|
||||
* For such columns groupUniqArray() can be implemented more efficiently (especially for small numeric arrays).
|
||||
*/
|
||||
template <bool is_plain_column = false, typename LimitNumElems = std::false_type>
|
||||
class AggregateFunctionGroupUniqArrayGeneric
|
||||
: public IAggregateFunctionDataHelper<AggregateFunctionGroupUniqArrayGenericData,
|
||||
AggregateFunctionGroupUniqArrayGeneric<is_plain_column, LimitNumElems>>
|
||||
{
|
||||
DataTypePtr & input_data_type;
|
||||
|
||||
static constexpr bool limit_num_elems = LimitNumElems::value;
|
||||
UInt64 max_elems;
|
||||
|
||||
using State = AggregateFunctionGroupUniqArrayGenericData;
|
||||
|
||||
public:
|
||||
AggregateFunctionGroupUniqArrayGeneric(const DataTypePtr & input_data_type_, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
|
||||
: IAggregateFunctionDataHelper<AggregateFunctionGroupUniqArrayGenericData, AggregateFunctionGroupUniqArrayGeneric<is_plain_column, LimitNumElems>>({input_data_type_}, parameters_, std::make_shared<DataTypeArray>(input_data_type_))
|
||||
, input_data_type(this->argument_types[0])
|
||||
, max_elems(max_elems_) {}
|
||||
|
||||
String getName() const override { return "groupUniqArray"; }
|
||||
|
||||
bool allocatesMemoryInArena() const override
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
auto & set = this->data(place).value;
|
||||
writeVarUInt(set.size(), buf);
|
||||
|
||||
for (const auto & elem : set)
|
||||
{
|
||||
writeStringBinary(elem.getValue(), buf);
|
||||
}
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
auto & set = this->data(place).value;
|
||||
size_t size;
|
||||
readVarUInt(size, buf);
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
set.insert(readStringBinaryInto(*arena, buf));
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
auto & set = this->data(place).value;
|
||||
if (limit_num_elems && set.size() >= max_elems)
|
||||
return;
|
||||
|
||||
bool inserted;
|
||||
State::Set::LookupResult it;
|
||||
auto key_holder = getKeyHolder<is_plain_column>(*columns[0], row_num, *arena);
|
||||
set.emplace(key_holder, it, inserted);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
auto & cur_set = this->data(place).value;
|
||||
auto & rhs_set = this->data(rhs).value;
|
||||
|
||||
bool inserted;
|
||||
State::Set::LookupResult it;
|
||||
for (auto & rhs_elem : rhs_set)
|
||||
{
|
||||
if (limit_num_elems && cur_set.size() >= max_elems)
|
||||
return;
|
||||
|
||||
// We have to copy the keys to our arena.
|
||||
assert(arena != nullptr);
|
||||
cur_set.emplace(ArenaKeyHolder{rhs_elem.getValue(), *arena}, it, inserted);
|
||||
}
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
|
||||
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
|
||||
IColumn & data_to = arr_to.getData();
|
||||
|
||||
auto & set = this->data(place).value;
|
||||
offsets_to.push_back(offsets_to.back() + set.size());
|
||||
|
||||
for (auto & elem : set)
|
||||
deserializeAndInsert<is_plain_column>(elem.getValue(), data_to);
|
||||
}
|
||||
};
|
||||
|
||||
#undef AGGREGATE_FUNCTION_GROUP_ARRAY_UNIQ_MAX_SIZE
|
||||
|
||||
}
|
@ -1,9 +1,31 @@
|
||||
#include <AggregateFunctions/AggregateFunctionHistogram.h>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <Common/FieldVisitorConvertToNumber.h>
|
||||
|
||||
#include <Common/NaNUtils.h>
|
||||
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/VarInt.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
|
||||
#include <queue>
|
||||
#include <cmath>
|
||||
#include <cstddef>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -16,12 +38,357 @@ namespace ErrorCodes
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int UNSUPPORTED_PARAMETER;
|
||||
extern const int PARAMETER_OUT_OF_BOUND;
|
||||
extern const int TOO_LARGE_ARRAY_SIZE;
|
||||
extern const int INCORRECT_DATA;
|
||||
}
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
/** distance compression algorithm implementation
|
||||
* http://jmlr.org/papers/volume11/ben-haim10a/ben-haim10a.pdf
|
||||
*/
|
||||
class AggregateFunctionHistogramData
|
||||
{
|
||||
public:
|
||||
using Mean = Float64;
|
||||
using Weight = Float64;
|
||||
|
||||
constexpr static size_t bins_count_limit = 250;
|
||||
|
||||
private:
|
||||
struct WeightedValue
|
||||
{
|
||||
Mean mean;
|
||||
Weight weight;
|
||||
|
||||
WeightedValue operator+(const WeightedValue & other) const
|
||||
{
|
||||
return {mean + other.weight * (other.mean - mean) / (other.weight + weight), other.weight + weight};
|
||||
}
|
||||
};
|
||||
|
||||
// quantity of stored weighted-values
|
||||
UInt32 size;
|
||||
|
||||
// calculated lower and upper bounds of seen points
|
||||
Mean lower_bound;
|
||||
Mean upper_bound;
|
||||
|
||||
// Weighted values representation of histogram.
|
||||
WeightedValue points[0];
|
||||
|
||||
void sort()
|
||||
{
|
||||
::sort(points, points + size,
|
||||
[](const WeightedValue & first, const WeightedValue & second)
|
||||
{
|
||||
return first.mean < second.mean;
|
||||
});
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct PriorityQueueStorage
|
||||
{
|
||||
size_t size = 0;
|
||||
T * data_ptr;
|
||||
|
||||
explicit PriorityQueueStorage(T * value)
|
||||
: data_ptr(value)
|
||||
{
|
||||
}
|
||||
|
||||
void push_back(T val) /// NOLINT
|
||||
{
|
||||
data_ptr[size] = std::move(val);
|
||||
++size;
|
||||
}
|
||||
|
||||
void pop_back() { --size; } /// NOLINT
|
||||
T * begin() { return data_ptr; }
|
||||
T * end() const { return data_ptr + size; }
|
||||
bool empty() const { return size == 0; }
|
||||
T & front() { return *data_ptr; }
|
||||
const T & front() const { return *data_ptr; }
|
||||
|
||||
using value_type = T;
|
||||
using reference = T&;
|
||||
using const_reference = const T&;
|
||||
using size_type = size_t;
|
||||
};
|
||||
|
||||
/**
|
||||
* Repeatedly fuse most close values until max_bins bins left
|
||||
*/
|
||||
void compress(UInt32 max_bins)
|
||||
{
|
||||
sort();
|
||||
auto new_size = size;
|
||||
if (size <= max_bins)
|
||||
return;
|
||||
|
||||
// Maintain doubly-linked list of "active" points
|
||||
// and store neighbour pairs in priority queue by distance
|
||||
UInt32 previous[size + 1];
|
||||
UInt32 next[size + 1];
|
||||
bool active[size + 1];
|
||||
std::fill(active, active + size, true);
|
||||
active[size] = false;
|
||||
|
||||
auto delete_node = [&](UInt32 i)
|
||||
{
|
||||
previous[next[i]] = previous[i];
|
||||
next[previous[i]] = next[i];
|
||||
active[i] = false;
|
||||
};
|
||||
|
||||
for (size_t i = 0; i <= size; ++i)
|
||||
{
|
||||
previous[i] = static_cast<UInt32>(i - 1);
|
||||
next[i] = static_cast<UInt32>(i + 1);
|
||||
}
|
||||
|
||||
next[size] = 0;
|
||||
previous[0] = size;
|
||||
|
||||
using QueueItem = std::pair<Mean, UInt32>;
|
||||
|
||||
QueueItem storage[2 * size - max_bins];
|
||||
|
||||
std::priority_queue<
|
||||
QueueItem,
|
||||
PriorityQueueStorage<QueueItem>,
|
||||
std::greater<>>
|
||||
queue{std::greater<>(),
|
||||
PriorityQueueStorage<QueueItem>(storage)};
|
||||
|
||||
auto quality = [&](UInt32 i) { return points[next[i]].mean - points[i].mean; };
|
||||
|
||||
for (size_t i = 0; i + 1 < size; ++i)
|
||||
queue.push({quality(static_cast<UInt32>(i)), i});
|
||||
|
||||
while (new_size > max_bins && !queue.empty())
|
||||
{
|
||||
auto min_item = queue.top();
|
||||
queue.pop();
|
||||
auto left = min_item.second;
|
||||
auto right = next[left];
|
||||
|
||||
if (!active[left] || !active[right] || quality(left) > min_item.first)
|
||||
continue;
|
||||
|
||||
points[left] = points[left] + points[right];
|
||||
|
||||
delete_node(right);
|
||||
if (active[next[left]])
|
||||
queue.push({quality(left), left});
|
||||
if (active[previous[left]])
|
||||
queue.push({quality(previous[left]), previous[left]});
|
||||
|
||||
--new_size;
|
||||
}
|
||||
|
||||
size_t left = 0;
|
||||
for (size_t right = 0; right < size; ++right)
|
||||
{
|
||||
if (active[right])
|
||||
{
|
||||
points[left] = points[right];
|
||||
++left;
|
||||
}
|
||||
}
|
||||
size = new_size;
|
||||
}
|
||||
|
||||
/***
|
||||
* Delete too close points from histogram.
|
||||
* Assumes that points are sorted.
|
||||
*/
|
||||
void unique()
|
||||
{
|
||||
if (size == 0)
|
||||
return;
|
||||
|
||||
size_t left = 0;
|
||||
|
||||
for (auto right = left + 1; right < size; ++right)
|
||||
{
|
||||
// Fuse points if their text representations differ only in last digit
|
||||
auto min_diff = 10 * (points[left].mean + points[right].mean) * std::numeric_limits<Mean>::epsilon();
|
||||
if (points[left].mean + std::fabs(min_diff) >= points[right].mean)
|
||||
{
|
||||
points[left] = points[left] + points[right];
|
||||
}
|
||||
else
|
||||
{
|
||||
++left;
|
||||
points[left] = points[right];
|
||||
}
|
||||
}
|
||||
size = static_cast<UInt32>(left + 1);
|
||||
}
|
||||
|
||||
public:
|
||||
AggregateFunctionHistogramData()
|
||||
: size(0)
|
||||
, lower_bound(std::numeric_limits<Mean>::max())
|
||||
, upper_bound(std::numeric_limits<Mean>::lowest())
|
||||
{
|
||||
static_assert(offsetof(AggregateFunctionHistogramData, points) == sizeof(AggregateFunctionHistogramData), "points should be last member");
|
||||
}
|
||||
|
||||
static size_t structSize(size_t max_bins)
|
||||
{
|
||||
return sizeof(AggregateFunctionHistogramData) + max_bins * 2 * sizeof(WeightedValue);
|
||||
}
|
||||
|
||||
void insertResultInto(ColumnVector<Mean> & to_lower, ColumnVector<Mean> & to_upper, ColumnVector<Weight> & to_weights, UInt32 max_bins)
|
||||
{
|
||||
compress(max_bins);
|
||||
unique();
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
to_lower.insertValue((i == 0) ? lower_bound : (points[i].mean + points[i - 1].mean) / 2);
|
||||
to_upper.insertValue((i + 1 == size) ? upper_bound : (points[i].mean + points[i + 1].mean) / 2);
|
||||
|
||||
// linear density approximation
|
||||
Weight lower_weight = (i == 0) ? points[i].weight : ((points[i - 1].weight) + points[i].weight * 3) / 4;
|
||||
Weight upper_weight = (i + 1 == size) ? points[i].weight : (points[i + 1].weight + points[i].weight * 3) / 4;
|
||||
to_weights.insertValue((lower_weight + upper_weight) / 2);
|
||||
}
|
||||
}
|
||||
|
||||
void add(Mean value, Weight weight, UInt32 max_bins)
|
||||
{
|
||||
// nans break sort and compression
|
||||
// infs don't fit in bins partition method
|
||||
if (!isFinite(value))
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid value (inf or nan) for aggregation by 'histogram' function");
|
||||
|
||||
points[size] = {value, weight};
|
||||
++size;
|
||||
lower_bound = std::min(lower_bound, value);
|
||||
upper_bound = std::max(upper_bound, value);
|
||||
|
||||
if (size >= max_bins * 2)
|
||||
compress(max_bins);
|
||||
}
|
||||
|
||||
void merge(const AggregateFunctionHistogramData & other, UInt32 max_bins)
|
||||
{
|
||||
lower_bound = std::min(lower_bound, other.lower_bound);
|
||||
upper_bound = std::max(upper_bound, other.upper_bound);
|
||||
for (size_t i = 0; i < other.size; ++i)
|
||||
add(other.points[i].mean, other.points[i].weight, max_bins);
|
||||
}
|
||||
|
||||
void write(WriteBuffer & buf) const
|
||||
{
|
||||
writeBinary(lower_bound, buf);
|
||||
writeBinary(upper_bound, buf);
|
||||
|
||||
writeVarUInt(size, buf);
|
||||
buf.write(reinterpret_cast<const char *>(points), size * sizeof(WeightedValue));
|
||||
}
|
||||
|
||||
void read(ReadBuffer & buf, UInt32 max_bins)
|
||||
{
|
||||
readBinary(lower_bound, buf);
|
||||
readBinary(upper_bound, buf);
|
||||
|
||||
readVarUInt(size, buf);
|
||||
if (size > max_bins * 2)
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too many bins");
|
||||
static constexpr size_t max_size = 1_GiB;
|
||||
if (size > max_size)
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
|
||||
"Too large array size in histogram (maximum: {})", max_size);
|
||||
|
||||
buf.readStrict(reinterpret_cast<char *>(points), size * sizeof(WeightedValue));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class AggregateFunctionHistogram final: public IAggregateFunctionDataHelper<AggregateFunctionHistogramData, AggregateFunctionHistogram<T>>
|
||||
{
|
||||
private:
|
||||
using Data = AggregateFunctionHistogramData;
|
||||
|
||||
const UInt32 max_bins;
|
||||
|
||||
public:
|
||||
AggregateFunctionHistogram(UInt32 max_bins_, const DataTypes & arguments, const Array & params)
|
||||
: IAggregateFunctionDataHelper<AggregateFunctionHistogramData, AggregateFunctionHistogram<T>>(arguments, params, createResultType())
|
||||
, max_bins(max_bins_)
|
||||
{
|
||||
}
|
||||
|
||||
size_t sizeOfData() const override
|
||||
{
|
||||
return Data::structSize(max_bins);
|
||||
}
|
||||
static DataTypePtr createResultType()
|
||||
{
|
||||
DataTypes types;
|
||||
auto mean = std::make_shared<DataTypeNumber<Data::Mean>>();
|
||||
auto weight = std::make_shared<DataTypeNumber<Data::Weight>>();
|
||||
|
||||
// lower bound
|
||||
types.emplace_back(mean);
|
||||
// upper bound
|
||||
types.emplace_back(mean);
|
||||
// weight
|
||||
types.emplace_back(weight);
|
||||
|
||||
auto tuple = std::make_shared<DataTypeTuple>(types);
|
||||
return std::make_shared<DataTypeArray>(tuple);
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
auto val = assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
|
||||
this->data(place).add(static_cast<Data::Mean>(val), 1, max_bins);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
this->data(place).merge(this->data(rhs), max_bins);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
this->data(place).write(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
this->data(place).read(buf, max_bins);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
auto & data = this->data(place);
|
||||
|
||||
auto & to_array = assert_cast<ColumnArray &>(to);
|
||||
ColumnArray::Offsets & offsets_to = to_array.getOffsets();
|
||||
auto & to_tuple = assert_cast<ColumnTuple &>(to_array.getData());
|
||||
|
||||
auto & to_lower = assert_cast<ColumnVector<Data::Mean> &>(to_tuple.getColumn(0));
|
||||
auto & to_upper = assert_cast<ColumnVector<Data::Mean> &>(to_tuple.getColumn(1));
|
||||
auto & to_weights = assert_cast<ColumnVector<Data::Weight> &>(to_tuple.getColumn(2));
|
||||
data.insertResultInto(to_lower, to_upper, to_weights, max_bins);
|
||||
|
||||
offsets_to.push_back(to_tuple.size());
|
||||
}
|
||||
|
||||
String getName() const override { return "histogram"; }
|
||||
};
|
||||
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionHistogram(const std::string & name, const DataTypes & arguments, const Array & params, const Settings *)
|
||||
{
|
||||
if (params.size() != 1)
|
||||
|
@ -1,382 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/sort.h>
|
||||
|
||||
#include <Common/NaNUtils.h>
|
||||
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/VarInt.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
|
||||
#include <math.h>
|
||||
#include <queue>
|
||||
#include <stddef.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
class Arena;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TOO_LARGE_ARRAY_SIZE;
|
||||
extern const int INCORRECT_DATA;
|
||||
}
|
||||
|
||||
/**
|
||||
* distance compression algorithm implementation
|
||||
* http://jmlr.org/papers/volume11/ben-haim10a/ben-haim10a.pdf
|
||||
*/
|
||||
class AggregateFunctionHistogramData
|
||||
{
|
||||
public:
|
||||
using Mean = Float64;
|
||||
using Weight = Float64;
|
||||
|
||||
constexpr static size_t bins_count_limit = 250;
|
||||
|
||||
private:
|
||||
struct WeightedValue
|
||||
{
|
||||
Mean mean;
|
||||
Weight weight;
|
||||
|
||||
WeightedValue operator+(const WeightedValue & other) const
|
||||
{
|
||||
return {mean + other.weight * (other.mean - mean) / (other.weight + weight), other.weight + weight};
|
||||
}
|
||||
};
|
||||
|
||||
// quantity of stored weighted-values
|
||||
UInt32 size;
|
||||
|
||||
// calculated lower and upper bounds of seen points
|
||||
Mean lower_bound;
|
||||
Mean upper_bound;
|
||||
|
||||
// Weighted values representation of histogram.
|
||||
WeightedValue points[0];
|
||||
|
||||
void sort()
|
||||
{
|
||||
::sort(points, points + size,
|
||||
[](const WeightedValue & first, const WeightedValue & second)
|
||||
{
|
||||
return first.mean < second.mean;
|
||||
});
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct PriorityQueueStorage
|
||||
{
|
||||
size_t size = 0;
|
||||
T * data_ptr;
|
||||
|
||||
explicit PriorityQueueStorage(T * value)
|
||||
: data_ptr(value)
|
||||
{
|
||||
}
|
||||
|
||||
void push_back(T val) /// NOLINT
|
||||
{
|
||||
data_ptr[size] = std::move(val);
|
||||
++size;
|
||||
}
|
||||
|
||||
void pop_back() { --size; } /// NOLINT
|
||||
T * begin() { return data_ptr; }
|
||||
T * end() const { return data_ptr + size; }
|
||||
bool empty() const { return size == 0; }
|
||||
T & front() { return *data_ptr; }
|
||||
const T & front() const { return *data_ptr; }
|
||||
|
||||
using value_type = T;
|
||||
using reference = T&;
|
||||
using const_reference = const T&;
|
||||
using size_type = size_t;
|
||||
};
|
||||
|
||||
/**
|
||||
* Repeatedly fuse most close values until max_bins bins left
|
||||
*/
|
||||
void compress(UInt32 max_bins)
|
||||
{
|
||||
sort();
|
||||
auto new_size = size;
|
||||
if (size <= max_bins)
|
||||
return;
|
||||
|
||||
// Maintain doubly-linked list of "active" points
|
||||
// and store neighbour pairs in priority queue by distance
|
||||
UInt32 previous[size + 1];
|
||||
UInt32 next[size + 1];
|
||||
bool active[size + 1];
|
||||
std::fill(active, active + size, true);
|
||||
active[size] = false;
|
||||
|
||||
auto delete_node = [&](UInt32 i)
|
||||
{
|
||||
previous[next[i]] = previous[i];
|
||||
next[previous[i]] = next[i];
|
||||
active[i] = false;
|
||||
};
|
||||
|
||||
for (size_t i = 0; i <= size; ++i)
|
||||
{
|
||||
previous[i] = static_cast<UInt32>(i - 1);
|
||||
next[i] = static_cast<UInt32>(i + 1);
|
||||
}
|
||||
|
||||
next[size] = 0;
|
||||
previous[0] = size;
|
||||
|
||||
using QueueItem = std::pair<Mean, UInt32>;
|
||||
|
||||
QueueItem storage[2 * size - max_bins];
|
||||
|
||||
std::priority_queue<
|
||||
QueueItem,
|
||||
PriorityQueueStorage<QueueItem>,
|
||||
std::greater<QueueItem>>
|
||||
queue{std::greater<QueueItem>(),
|
||||
PriorityQueueStorage<QueueItem>(storage)};
|
||||
|
||||
auto quality = [&](UInt32 i) { return points[next[i]].mean - points[i].mean; };
|
||||
|
||||
for (size_t i = 0; i + 1 < size; ++i)
|
||||
queue.push({quality(static_cast<UInt32>(i)), i});
|
||||
|
||||
while (new_size > max_bins && !queue.empty())
|
||||
{
|
||||
auto min_item = queue.top();
|
||||
queue.pop();
|
||||
auto left = min_item.second;
|
||||
auto right = next[left];
|
||||
|
||||
if (!active[left] || !active[right] || quality(left) > min_item.first)
|
||||
continue;
|
||||
|
||||
points[left] = points[left] + points[right];
|
||||
|
||||
delete_node(right);
|
||||
if (active[next[left]])
|
||||
queue.push({quality(left), left});
|
||||
if (active[previous[left]])
|
||||
queue.push({quality(previous[left]), previous[left]});
|
||||
|
||||
--new_size;
|
||||
}
|
||||
|
||||
size_t left = 0;
|
||||
for (size_t right = 0; right < size; ++right)
|
||||
{
|
||||
if (active[right])
|
||||
{
|
||||
points[left] = points[right];
|
||||
++left;
|
||||
}
|
||||
}
|
||||
size = new_size;
|
||||
}
|
||||
|
||||
/***
|
||||
* Delete too close points from histogram.
|
||||
* Assumes that points are sorted.
|
||||
*/
|
||||
void unique()
|
||||
{
|
||||
if (size == 0)
|
||||
return;
|
||||
|
||||
size_t left = 0;
|
||||
|
||||
for (auto right = left + 1; right < size; ++right)
|
||||
{
|
||||
// Fuse points if their text representations differ only in last digit
|
||||
auto min_diff = 10 * (points[left].mean + points[right].mean) * std::numeric_limits<Mean>::epsilon();
|
||||
if (points[left].mean + std::fabs(min_diff) >= points[right].mean)
|
||||
{
|
||||
points[left] = points[left] + points[right];
|
||||
}
|
||||
else
|
||||
{
|
||||
++left;
|
||||
points[left] = points[right];
|
||||
}
|
||||
}
|
||||
size = static_cast<UInt32>(left + 1);
|
||||
}
|
||||
|
||||
public:
|
||||
AggregateFunctionHistogramData()
|
||||
: size(0)
|
||||
, lower_bound(std::numeric_limits<Mean>::max())
|
||||
, upper_bound(std::numeric_limits<Mean>::lowest())
|
||||
{
|
||||
static_assert(offsetof(AggregateFunctionHistogramData, points) == sizeof(AggregateFunctionHistogramData), "points should be last member");
|
||||
}
|
||||
|
||||
static size_t structSize(size_t max_bins)
|
||||
{
|
||||
return sizeof(AggregateFunctionHistogramData) + max_bins * 2 * sizeof(WeightedValue);
|
||||
}
|
||||
|
||||
void insertResultInto(ColumnVector<Mean> & to_lower, ColumnVector<Mean> & to_upper, ColumnVector<Weight> & to_weights, UInt32 max_bins)
|
||||
{
|
||||
compress(max_bins);
|
||||
unique();
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
to_lower.insertValue((i == 0) ? lower_bound : (points[i].mean + points[i - 1].mean) / 2);
|
||||
to_upper.insertValue((i + 1 == size) ? upper_bound : (points[i].mean + points[i + 1].mean) / 2);
|
||||
|
||||
// linear density approximation
|
||||
Weight lower_weight = (i == 0) ? points[i].weight : ((points[i - 1].weight) + points[i].weight * 3) / 4;
|
||||
Weight upper_weight = (i + 1 == size) ? points[i].weight : (points[i + 1].weight + points[i].weight * 3) / 4;
|
||||
to_weights.insertValue((lower_weight + upper_weight) / 2);
|
||||
}
|
||||
}
|
||||
|
||||
void add(Mean value, Weight weight, UInt32 max_bins)
|
||||
{
|
||||
// nans break sort and compression
|
||||
// infs don't fit in bins partition method
|
||||
if (!isFinite(value))
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid value (inf or nan) for aggregation by 'histogram' function");
|
||||
|
||||
points[size] = {value, weight};
|
||||
++size;
|
||||
lower_bound = std::min(lower_bound, value);
|
||||
upper_bound = std::max(upper_bound, value);
|
||||
|
||||
if (size >= max_bins * 2)
|
||||
compress(max_bins);
|
||||
}
|
||||
|
||||
void merge(const AggregateFunctionHistogramData & other, UInt32 max_bins)
|
||||
{
|
||||
lower_bound = std::min(lower_bound, other.lower_bound);
|
||||
upper_bound = std::max(upper_bound, other.upper_bound);
|
||||
for (size_t i = 0; i < other.size; ++i)
|
||||
add(other.points[i].mean, other.points[i].weight, max_bins);
|
||||
}
|
||||
|
||||
void write(WriteBuffer & buf) const
|
||||
{
|
||||
writeBinary(lower_bound, buf);
|
||||
writeBinary(upper_bound, buf);
|
||||
|
||||
writeVarUInt(size, buf);
|
||||
buf.write(reinterpret_cast<const char *>(points), size * sizeof(WeightedValue));
|
||||
}
|
||||
|
||||
void read(ReadBuffer & buf, UInt32 max_bins)
|
||||
{
|
||||
readBinary(lower_bound, buf);
|
||||
readBinary(upper_bound, buf);
|
||||
|
||||
readVarUInt(size, buf);
|
||||
if (size > max_bins * 2)
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too many bins");
|
||||
static constexpr size_t max_size = 1_GiB;
|
||||
if (size > max_size)
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
|
||||
"Too large array size in histogram (maximum: {})", max_size);
|
||||
|
||||
buf.readStrict(reinterpret_cast<char *>(points), size * sizeof(WeightedValue));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class AggregateFunctionHistogram final: public IAggregateFunctionDataHelper<AggregateFunctionHistogramData, AggregateFunctionHistogram<T>>
|
||||
{
|
||||
private:
|
||||
using Data = AggregateFunctionHistogramData;
|
||||
|
||||
const UInt32 max_bins;
|
||||
|
||||
public:
|
||||
AggregateFunctionHistogram(UInt32 max_bins_, const DataTypes & arguments, const Array & params)
|
||||
: IAggregateFunctionDataHelper<AggregateFunctionHistogramData, AggregateFunctionHistogram<T>>(arguments, params, createResultType())
|
||||
, max_bins(max_bins_)
|
||||
{
|
||||
}
|
||||
|
||||
size_t sizeOfData() const override
|
||||
{
|
||||
return Data::structSize(max_bins);
|
||||
}
|
||||
static DataTypePtr createResultType()
|
||||
{
|
||||
DataTypes types;
|
||||
auto mean = std::make_shared<DataTypeNumber<Data::Mean>>();
|
||||
auto weight = std::make_shared<DataTypeNumber<Data::Weight>>();
|
||||
|
||||
// lower bound
|
||||
types.emplace_back(mean);
|
||||
// upper bound
|
||||
types.emplace_back(mean);
|
||||
// weight
|
||||
types.emplace_back(weight);
|
||||
|
||||
auto tuple = std::make_shared<DataTypeTuple>(types);
|
||||
return std::make_shared<DataTypeArray>(tuple);
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
auto val = assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
|
||||
this->data(place).add(static_cast<Data::Mean>(val), 1, max_bins);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
this->data(place).merge(this->data(rhs), max_bins);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
this->data(place).write(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
this->data(place).read(buf, max_bins);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
auto & data = this->data(place);
|
||||
|
||||
auto & to_array = assert_cast<ColumnArray &>(to);
|
||||
ColumnArray::Offsets & offsets_to = to_array.getOffsets();
|
||||
auto & to_tuple = assert_cast<ColumnTuple &>(to_array.getData());
|
||||
|
||||
auto & to_lower = assert_cast<ColumnVector<Data::Mean> &>(to_tuple.getColumn(0));
|
||||
auto & to_upper = assert_cast<ColumnVector<Data::Mean> &>(to_tuple.getColumn(1));
|
||||
auto & to_weights = assert_cast<ColumnVector<Data::Weight> &>(to_tuple.getColumn(2));
|
||||
data.insertResultInto(to_lower, to_upper, to_weights, max_bins);
|
||||
|
||||
offsets_to.push_back(to_tuple.size());
|
||||
}
|
||||
|
||||
String getName() const override { return "histogram"; }
|
||||
};
|
||||
|
||||
}
|
@ -1,57 +1,272 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionIntervalLengthSum.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
|
||||
#include <base/range.h>
|
||||
#include <unordered_set>
|
||||
|
||||
#include <AggregateFunctions/Combinators/AggregateFunctionNull.h>
|
||||
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
|
||||
#include <Common/assert_cast.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int TOO_LARGE_ARRAY_SIZE;
|
||||
}
|
||||
|
||||
struct Settings;
|
||||
|
||||
namespace
|
||||
{
|
||||
template <template <typename> class Data>
|
||||
AggregateFunctionPtr
|
||||
createAggregateFunctionIntervalLengthSum(const std::string & name, const DataTypes & arguments, const Array &, const Settings *)
|
||||
|
||||
/** Calculate total length of intervals without intersections. Each interval is the pair of numbers [begin, end];
|
||||
* Returns UInt64 for integral types (UInt/Int*, Date/DateTime) and returns Float64 for Float*.
|
||||
*
|
||||
* Implementation simply stores intervals sorted by beginning and sums lengths at final.
|
||||
*/
|
||||
template <typename T>
|
||||
struct AggregateFunctionIntervalLengthSumData
|
||||
{
|
||||
constexpr static size_t MAX_ARRAY_SIZE = 0xFFFFFF;
|
||||
|
||||
using Segment = std::pair<T, T>;
|
||||
using Segments = PODArrayWithStackMemory<Segment, 64>;
|
||||
|
||||
bool sorted = false;
|
||||
|
||||
Segments segments;
|
||||
|
||||
void add(T begin, T end)
|
||||
{
|
||||
if (arguments.size() != 2)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Aggregate function {} requires two timestamps argument.", name);
|
||||
/// Reversed intervals are counted by absolute value of their length.
|
||||
if (unlikely(end < begin))
|
||||
std::swap(begin, end);
|
||||
else if (unlikely(begin == end))
|
||||
return;
|
||||
|
||||
auto args = {arguments[0].get(), arguments[1].get()};
|
||||
if (sorted && !segments.empty())
|
||||
sorted = segments.back().first <= begin;
|
||||
segments.emplace_back(begin, end);
|
||||
}
|
||||
|
||||
if (WhichDataType{args.begin()[0]}.idx != WhichDataType{args.begin()[1]}.idx)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal types {} and {} of arguments "
|
||||
"of aggregate function {}, both arguments should have same data type",
|
||||
args.begin()[0]->getName(), args.begin()[1]->getName(), name);
|
||||
void merge(const AggregateFunctionIntervalLengthSumData & other)
|
||||
{
|
||||
if (other.segments.empty())
|
||||
return;
|
||||
|
||||
for (const auto & arg : args)
|
||||
const auto size = segments.size();
|
||||
|
||||
segments.insert(std::begin(other.segments), std::end(other.segments));
|
||||
|
||||
/// either sort whole container or do so partially merging ranges afterwards
|
||||
if (!sorted && !other.sorted)
|
||||
{
|
||||
if (!isNativeNumber(arg) && !isDate(arg) && !isDateTime(arg))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal type {} of argument of aggregate function {}, must "
|
||||
"be native integral type, Date/DateTime or Float", arg->getName(), name);
|
||||
::sort(std::begin(segments), std::end(segments));
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto begin = std::begin(segments);
|
||||
const auto middle = std::next(begin, size);
|
||||
const auto end = std::end(segments);
|
||||
|
||||
if (!sorted)
|
||||
::sort(begin, middle);
|
||||
|
||||
if (!other.sorted)
|
||||
::sort(middle, end);
|
||||
|
||||
std::inplace_merge(begin, middle, end);
|
||||
}
|
||||
|
||||
AggregateFunctionPtr res(createWithBasicNumberOrDateOrDateTime<AggregateFunctionIntervalLengthSum, Data>(*arguments[0], arguments));
|
||||
sorted = true;
|
||||
}
|
||||
|
||||
if (res)
|
||||
return res;
|
||||
void sort()
|
||||
{
|
||||
if (sorted)
|
||||
return;
|
||||
|
||||
::sort(std::begin(segments), std::end(segments));
|
||||
sorted = true;
|
||||
}
|
||||
|
||||
void serialize(WriteBuffer & buf) const
|
||||
{
|
||||
writeBinary(sorted, buf);
|
||||
writeBinary(segments.size(), buf);
|
||||
|
||||
for (const auto & time_gap : segments)
|
||||
{
|
||||
writeBinary(time_gap.first, buf);
|
||||
writeBinary(time_gap.second, buf);
|
||||
}
|
||||
}
|
||||
|
||||
void deserialize(ReadBuffer & buf)
|
||||
{
|
||||
readBinary(sorted, buf);
|
||||
|
||||
size_t size;
|
||||
readBinary(size, buf);
|
||||
|
||||
if (unlikely(size > MAX_ARRAY_SIZE))
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size (maximum: {})", MAX_ARRAY_SIZE);
|
||||
|
||||
segments.clear();
|
||||
segments.reserve(size);
|
||||
|
||||
Segment segment;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
readBinary(segment.first, buf);
|
||||
readBinary(segment.second, buf);
|
||||
segments.emplace_back(segment);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename Data>
|
||||
class AggregateFunctionIntervalLengthSum final : public IAggregateFunctionDataHelper<Data, AggregateFunctionIntervalLengthSum<T, Data>>
|
||||
{
|
||||
private:
|
||||
static auto NO_SANITIZE_UNDEFINED length(typename Data::Segment segment)
|
||||
{
|
||||
return segment.second - segment.first;
|
||||
}
|
||||
|
||||
template <typename TResult>
|
||||
TResult getIntervalLengthSum(Data & data) const
|
||||
{
|
||||
if (data.segments.empty())
|
||||
return 0;
|
||||
|
||||
data.sort();
|
||||
|
||||
TResult res = 0;
|
||||
|
||||
typename Data::Segment curr_segment = data.segments[0];
|
||||
|
||||
for (size_t i = 1, size = data.segments.size(); i < size; ++i)
|
||||
{
|
||||
const typename Data::Segment & next_segment = data.segments[i];
|
||||
|
||||
/// Check if current interval intersects with next one then add length, otherwise advance interval end.
|
||||
if (curr_segment.second < next_segment.first)
|
||||
{
|
||||
res += length(curr_segment);
|
||||
curr_segment = next_segment;
|
||||
}
|
||||
else if (next_segment.second > curr_segment.second)
|
||||
{
|
||||
curr_segment.second = next_segment.second;
|
||||
}
|
||||
}
|
||||
res += length(curr_segment);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
public:
|
||||
String getName() const override { return "intervalLengthSum"; }
|
||||
|
||||
explicit AggregateFunctionIntervalLengthSum(const DataTypes & arguments)
|
||||
: IAggregateFunctionDataHelper<Data, AggregateFunctionIntervalLengthSum<T, Data>>(arguments, {}, createResultType())
|
||||
{
|
||||
}
|
||||
|
||||
static DataTypePtr createResultType()
|
||||
{
|
||||
if constexpr (std::is_floating_point_v<T>)
|
||||
return std::make_shared<DataTypeFloat64>();
|
||||
return std::make_shared<DataTypeUInt64>();
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
AggregateFunctionPtr getOwnNullAdapter(
|
||||
const AggregateFunctionPtr & nested_function,
|
||||
const DataTypes & arguments,
|
||||
const Array & params,
|
||||
const AggregateFunctionProperties & /*properties*/) const override
|
||||
{
|
||||
return std::make_shared<AggregateFunctionNullVariadic<false, false>>(nested_function, arguments, params);
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override
|
||||
{
|
||||
auto begin = assert_cast<const ColumnVector<T> *>(columns[0])->getData()[row_num];
|
||||
auto end = assert_cast<const ColumnVector<T> *>(columns[1])->getData()[row_num];
|
||||
this->data(place).add(begin, end);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
this->data(place).merge(this->data(rhs));
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
this->data(place).serialize(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
this->data(place).deserialize(buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
if constexpr (std::is_floating_point_v<T>)
|
||||
assert_cast<ColumnFloat64 &>(to).getData().push_back(getIntervalLengthSum<Float64>(this->data(place)));
|
||||
else
|
||||
assert_cast<ColumnUInt64 &>(to).getData().push_back(getIntervalLengthSum<UInt64>(this->data(place)));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <template <typename> class Data>
|
||||
AggregateFunctionPtr
|
||||
createAggregateFunctionIntervalLengthSum(const std::string & name, const DataTypes & arguments, const Array &, const Settings *)
|
||||
{
|
||||
if (arguments.size() != 2)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Aggregate function {} requires two timestamps argument.", name);
|
||||
|
||||
auto args = {arguments[0].get(), arguments[1].get()};
|
||||
|
||||
if (WhichDataType{args.begin()[0]}.idx != WhichDataType{args.begin()[1]}.idx)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal type {} of argument of aggregate function {}, must "
|
||||
"be native integral type, Date/DateTime or Float", arguments.front().get()->getName(), name);
|
||||
"Illegal types {} and {} of arguments "
|
||||
"of aggregate function {}, both arguments should have same data type",
|
||||
args.begin()[0]->getName(), args.begin()[1]->getName(), name);
|
||||
|
||||
for (const auto & arg : args)
|
||||
{
|
||||
if (!isNativeNumber(arg) && !isDate(arg) && !isDateTime(arg))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal type {} of argument of aggregate function {}, must "
|
||||
"be native integral type, Date/DateTime or Float", arg->getName(), name);
|
||||
}
|
||||
|
||||
AggregateFunctionPtr res(createWithBasicNumberOrDateOrDateTime<AggregateFunctionIntervalLengthSum, Data>(*arguments[0], arguments));
|
||||
|
||||
if (res)
|
||||
return res;
|
||||
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal type {} of argument of aggregate function {}, must "
|
||||
"be native integral type, Date/DateTime or Float", arguments.front().get()->getName(), name);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,232 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <unordered_set>
|
||||
|
||||
#include <AggregateFunctions/Combinators/AggregateFunctionNull.h>
|
||||
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
|
||||
#include <Common/assert_cast.h>
|
||||
#include <base/arithmeticOverflow.h>
|
||||
#include <base/sort.h>
|
||||
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TOO_LARGE_ARRAY_SIZE;
|
||||
}
|
||||
|
||||
/** Calculate total length of intervals without intersections. Each interval is the pair of numbers [begin, end];
|
||||
* Returns UInt64 for integral types (UInt/Int*, Date/DateTime) and returns Float64 for Float*.
|
||||
*
|
||||
* Implementation simply stores intervals sorted by beginning and sums lengths at final.
|
||||
*/
|
||||
template <typename T>
|
||||
struct AggregateFunctionIntervalLengthSumData
|
||||
{
|
||||
constexpr static size_t MAX_ARRAY_SIZE = 0xFFFFFF;
|
||||
|
||||
using Segment = std::pair<T, T>;
|
||||
using Segments = PODArrayWithStackMemory<Segment, 64>;
|
||||
|
||||
bool sorted = false;
|
||||
|
||||
Segments segments;
|
||||
|
||||
void add(T begin, T end)
|
||||
{
|
||||
/// Reversed intervals are counted by absolute value of their length.
|
||||
if (unlikely(end < begin))
|
||||
std::swap(begin, end);
|
||||
else if (unlikely(begin == end))
|
||||
return;
|
||||
|
||||
if (sorted && !segments.empty())
|
||||
sorted = segments.back().first <= begin;
|
||||
segments.emplace_back(begin, end);
|
||||
}
|
||||
|
||||
void merge(const AggregateFunctionIntervalLengthSumData & other)
|
||||
{
|
||||
if (other.segments.empty())
|
||||
return;
|
||||
|
||||
const auto size = segments.size();
|
||||
|
||||
segments.insert(std::begin(other.segments), std::end(other.segments));
|
||||
|
||||
/// either sort whole container or do so partially merging ranges afterwards
|
||||
if (!sorted && !other.sorted)
|
||||
{
|
||||
::sort(std::begin(segments), std::end(segments));
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto begin = std::begin(segments);
|
||||
const auto middle = std::next(begin, size);
|
||||
const auto end = std::end(segments);
|
||||
|
||||
if (!sorted)
|
||||
::sort(begin, middle);
|
||||
|
||||
if (!other.sorted)
|
||||
::sort(middle, end);
|
||||
|
||||
std::inplace_merge(begin, middle, end);
|
||||
}
|
||||
|
||||
sorted = true;
|
||||
}
|
||||
|
||||
void sort()
|
||||
{
|
||||
if (sorted)
|
||||
return;
|
||||
|
||||
::sort(std::begin(segments), std::end(segments));
|
||||
sorted = true;
|
||||
}
|
||||
|
||||
void serialize(WriteBuffer & buf) const
|
||||
{
|
||||
writeBinary(sorted, buf);
|
||||
writeBinary(segments.size(), buf);
|
||||
|
||||
for (const auto & time_gap : segments)
|
||||
{
|
||||
writeBinary(time_gap.first, buf);
|
||||
writeBinary(time_gap.second, buf);
|
||||
}
|
||||
}
|
||||
|
||||
void deserialize(ReadBuffer & buf)
|
||||
{
|
||||
readBinary(sorted, buf);
|
||||
|
||||
size_t size;
|
||||
readBinary(size, buf);
|
||||
|
||||
if (unlikely(size > MAX_ARRAY_SIZE))
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size (maximum: {})", MAX_ARRAY_SIZE);
|
||||
|
||||
segments.clear();
|
||||
segments.reserve(size);
|
||||
|
||||
Segment segment;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
readBinary(segment.first, buf);
|
||||
readBinary(segment.second, buf);
|
||||
segments.emplace_back(segment);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename Data>
|
||||
class AggregateFunctionIntervalLengthSum final : public IAggregateFunctionDataHelper<Data, AggregateFunctionIntervalLengthSum<T, Data>>
|
||||
{
|
||||
private:
|
||||
static auto NO_SANITIZE_UNDEFINED length(typename Data::Segment segment)
|
||||
{
|
||||
return segment.second - segment.first;
|
||||
}
|
||||
|
||||
template <typename TResult>
|
||||
TResult getIntervalLengthSum(Data & data) const
|
||||
{
|
||||
if (data.segments.empty())
|
||||
return 0;
|
||||
|
||||
data.sort();
|
||||
|
||||
TResult res = 0;
|
||||
|
||||
typename Data::Segment curr_segment = data.segments[0];
|
||||
|
||||
for (size_t i = 1, size = data.segments.size(); i < size; ++i)
|
||||
{
|
||||
const typename Data::Segment & next_segment = data.segments[i];
|
||||
|
||||
/// Check if current interval intersects with next one then add length, otherwise advance interval end.
|
||||
if (curr_segment.second < next_segment.first)
|
||||
{
|
||||
res += length(curr_segment);
|
||||
curr_segment = next_segment;
|
||||
}
|
||||
else if (next_segment.second > curr_segment.second)
|
||||
{
|
||||
curr_segment.second = next_segment.second;
|
||||
}
|
||||
}
|
||||
res += length(curr_segment);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
public:
|
||||
String getName() const override { return "intervalLengthSum"; }
|
||||
|
||||
explicit AggregateFunctionIntervalLengthSum(const DataTypes & arguments)
|
||||
: IAggregateFunctionDataHelper<Data, AggregateFunctionIntervalLengthSum<T, Data>>(arguments, {}, createResultType())
|
||||
{
|
||||
}
|
||||
|
||||
static DataTypePtr createResultType()
|
||||
{
|
||||
if constexpr (std::is_floating_point_v<T>)
|
||||
return std::make_shared<DataTypeFloat64>();
|
||||
return std::make_shared<DataTypeUInt64>();
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
AggregateFunctionPtr getOwnNullAdapter(
|
||||
const AggregateFunctionPtr & nested_function,
|
||||
const DataTypes & arguments,
|
||||
const Array & params,
|
||||
const AggregateFunctionProperties & /*properties*/) const override
|
||||
{
|
||||
return std::make_shared<AggregateFunctionNullVariadic<false, false>>(nested_function, arguments, params);
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override
|
||||
{
|
||||
auto begin = assert_cast<const ColumnVector<T> *>(columns[0])->getData()[row_num];
|
||||
auto end = assert_cast<const ColumnVector<T> *>(columns[1])->getData()[row_num];
|
||||
this->data(place).add(begin, end);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
this->data(place).merge(this->data(rhs));
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
this->data(place).serialize(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
this->data(place).deserialize(buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
if constexpr (std::is_floating_point_v<T>)
|
||||
assert_cast<ColumnFloat64 &>(to).getData().push_back(getIntervalLengthSum<Float64>(this->data(place)));
|
||||
else
|
||||
assert_cast<ColumnUInt64 &>(to).getData().push_back(getIntervalLengthSum<UInt64>(this->data(place)));
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -1,19 +1,339 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/StatCommon.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/PODArray_fwd.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct Settings;
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
struct KolmogorovSmirnov : public StatisticalSample<Float64, Float64>
|
||||
{
|
||||
enum class Alternative
|
||||
{
|
||||
TwoSided,
|
||||
Less,
|
||||
Greater
|
||||
};
|
||||
|
||||
std::pair<Float64, Float64> getResult(Alternative alternative, String method)
|
||||
{
|
||||
::sort(x.begin(), x.end());
|
||||
::sort(y.begin(), y.end());
|
||||
|
||||
Float64 max_s = std::numeric_limits<Float64>::min();
|
||||
Float64 min_s = std::numeric_limits<Float64>::max();
|
||||
Float64 now_s = 0;
|
||||
UInt64 pos_x = 0;
|
||||
UInt64 pos_y = 0;
|
||||
UInt64 pos_tmp;
|
||||
UInt64 n1 = x.size();
|
||||
UInt64 n2 = y.size();
|
||||
|
||||
const Float64 n1_d = 1. / n1;
|
||||
const Float64 n2_d = 1. / n2;
|
||||
const Float64 tol = 1e-7;
|
||||
|
||||
// reference: https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test
|
||||
while (pos_x < x.size() && pos_y < y.size())
|
||||
{
|
||||
if (likely(fabs(x[pos_x] - y[pos_y]) >= tol))
|
||||
{
|
||||
if (x[pos_x] < y[pos_y])
|
||||
{
|
||||
now_s += n1_d;
|
||||
++pos_x;
|
||||
}
|
||||
else
|
||||
{
|
||||
now_s -= n2_d;
|
||||
++pos_y;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
pos_tmp = pos_x + 1;
|
||||
while (pos_tmp < x.size() && unlikely(fabs(x[pos_tmp] - x[pos_x]) <= tol))
|
||||
pos_tmp++;
|
||||
now_s += n1_d * (pos_tmp - pos_x);
|
||||
pos_x = pos_tmp;
|
||||
pos_tmp = pos_y + 1;
|
||||
while (pos_tmp < y.size() && unlikely(fabs(y[pos_tmp] - y[pos_y]) <= tol))
|
||||
pos_tmp++;
|
||||
now_s -= n2_d * (pos_tmp - pos_y);
|
||||
pos_y = pos_tmp;
|
||||
}
|
||||
max_s = std::max(max_s, now_s);
|
||||
min_s = std::min(min_s, now_s);
|
||||
}
|
||||
now_s += n1_d * (x.size() - pos_x) - n2_d * (y.size() - pos_y);
|
||||
min_s = std::min(min_s, now_s);
|
||||
max_s = std::max(max_s, now_s);
|
||||
|
||||
Float64 d = 0;
|
||||
if (alternative == Alternative::TwoSided)
|
||||
d = std::max(std::abs(max_s), std::abs(min_s));
|
||||
else if (alternative == Alternative::Less)
|
||||
d = -min_s;
|
||||
else if (alternative == Alternative::Greater)
|
||||
d = max_s;
|
||||
|
||||
UInt64 g = std::__gcd(n1, n2);
|
||||
UInt64 nx_g = n1 / g;
|
||||
UInt64 ny_g = n2 / g;
|
||||
|
||||
if (method == "auto")
|
||||
method = std::max(n1, n2) <= 10000 ? "exact" : "asymptotic";
|
||||
else if (method == "exact" && nx_g >= std::numeric_limits<Int32>::max() / ny_g)
|
||||
method = "asymptotic";
|
||||
|
||||
Float64 p_value = std::numeric_limits<Float64>::infinity();
|
||||
|
||||
if (method == "exact")
|
||||
{
|
||||
/* reference:
|
||||
* Gunar Schröer and Dietrich Trenkler
|
||||
* Exact and Randomization Distributions of Kolmogorov-Smirnov, Tests for Two or Three Samples
|
||||
*
|
||||
* and
|
||||
*
|
||||
* Thomas Viehmann
|
||||
* Numerically more stable computation of the p-values for the two-sample Kolmogorov-Smirnov test
|
||||
*/
|
||||
if (n2 > n1)
|
||||
std::swap(n1, n2);
|
||||
|
||||
const Float64 f_n1 = static_cast<Float64>(n1);
|
||||
const Float64 f_n2 = static_cast<Float64>(n2);
|
||||
const Float64 k_d = (0.5 + floor(d * f_n2 * f_n1 - tol)) / (f_n2 * f_n1);
|
||||
PaddedPODArray<Float64> c(n1 + 1);
|
||||
|
||||
auto check = alternative == Alternative::TwoSided ?
|
||||
[](const Float64 & q, const Float64 & r, const Float64 & s) { return fabs(r - s) >= q; }
|
||||
: [](const Float64 & q, const Float64 & r, const Float64 & s) { return r - s >= q; };
|
||||
|
||||
c[0] = 0;
|
||||
for (UInt64 j = 1; j <= n1; j++)
|
||||
if (check(k_d, 0., j / f_n1))
|
||||
c[j] = 1.;
|
||||
else
|
||||
c[j] = c[j - 1];
|
||||
|
||||
for (UInt64 i = 1; i <= n2; i++)
|
||||
{
|
||||
if (check(k_d, i / f_n2, 0.))
|
||||
c[0] = 1.;
|
||||
for (UInt64 j = 1; j <= n1; j++)
|
||||
if (check(k_d, i / f_n2, j / f_n1))
|
||||
c[j] = 1.;
|
||||
else
|
||||
{
|
||||
Float64 v = i / static_cast<Float64>(i + j);
|
||||
Float64 w = j / static_cast<Float64>(i + j);
|
||||
c[j] = v * c[j] + w * c[j - 1];
|
||||
}
|
||||
}
|
||||
p_value = c[n1];
|
||||
}
|
||||
else if (method == "asymp" || method == "asymptotic")
|
||||
{
|
||||
Float64 n = std::min(n1, n2);
|
||||
Float64 m = std::max(n1, n2);
|
||||
Float64 p = sqrt((n * m) / (n + m)) * d;
|
||||
|
||||
if (alternative == Alternative::TwoSided)
|
||||
{
|
||||
/* reference:
|
||||
* J.DURBIN
|
||||
* Distribution theory for tests based on the sample distribution function
|
||||
*/
|
||||
Float64 new_val, old_val, s, w, z;
|
||||
UInt64 k_max = static_cast<UInt64>(sqrt(2 - log(tol)));
|
||||
|
||||
if (p < 1)
|
||||
{
|
||||
z = - (M_PI_2 * M_PI_4) / (p * p);
|
||||
w = log(p);
|
||||
s = 0;
|
||||
for (UInt64 k = 1; k < k_max; k += 2)
|
||||
s += exp(k * k * z - w);
|
||||
p = s / 0.398942280401432677939946059934;
|
||||
}
|
||||
else
|
||||
{
|
||||
z = -2 * p * p;
|
||||
s = -1;
|
||||
UInt64 k = 1;
|
||||
old_val = 0;
|
||||
new_val = 1;
|
||||
while (fabs(old_val - new_val) > tol)
|
||||
{
|
||||
old_val = new_val;
|
||||
new_val += 2 * s * exp(z * k * k);
|
||||
s *= -1;
|
||||
k++;
|
||||
}
|
||||
p = new_val;
|
||||
}
|
||||
p_value = 1 - p;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* reference:
|
||||
* J. L. HODGES, Jr
|
||||
* The significance probability of the Smirnov two-sample test
|
||||
*/
|
||||
|
||||
// Use Hodges' suggested approximation Eqn 5.3
|
||||
// Requires m to be the larger of (n1, n2)
|
||||
Float64 expt = -2 * p * p - 2 * p * (m + 2 * n) / sqrt(m * n * (m + n)) / 3.0;
|
||||
p_value = exp(expt);
|
||||
}
|
||||
}
|
||||
return {d, p_value};
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
class AggregateFunctionKolmogorovSmirnov final:
|
||||
public IAggregateFunctionDataHelper<KolmogorovSmirnov, AggregateFunctionKolmogorovSmirnov>
|
||||
{
|
||||
private:
|
||||
using Alternative = typename KolmogorovSmirnov::Alternative;
|
||||
Alternative alternative = Alternative::TwoSided;
|
||||
String method = "auto";
|
||||
|
||||
public:
|
||||
explicit AggregateFunctionKolmogorovSmirnov(const DataTypes & arguments, const Array & params)
|
||||
: IAggregateFunctionDataHelper<KolmogorovSmirnov, AggregateFunctionKolmogorovSmirnov> ({arguments}, {}, createResultType())
|
||||
{
|
||||
if (params.size() > 2)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require two parameter or less", getName());
|
||||
|
||||
if (params.empty())
|
||||
return;
|
||||
|
||||
if (params[0].getType() != Field::Types::String)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a String", getName());
|
||||
|
||||
const auto & param = params[0].get<String>();
|
||||
if (param == "two-sided")
|
||||
alternative = Alternative::TwoSided;
|
||||
else if (param == "less")
|
||||
alternative = Alternative::Less;
|
||||
else if (param == "greater")
|
||||
alternative = Alternative::Greater;
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown parameter in aggregate function {}. "
|
||||
"It must be one of: 'two-sided', 'less', 'greater'", getName());
|
||||
|
||||
if (params.size() != 2)
|
||||
return;
|
||||
|
||||
if (params[1].getType() != Field::Types::String)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require second parameter to be a String", getName());
|
||||
|
||||
method = params[1].get<String>();
|
||||
if (method != "auto" && method != "exact" && method != "asymp" && method != "asymptotic")
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown method in aggregate function {}. "
|
||||
"It must be one of: 'auto', 'exact', 'asymp' (or 'asymptotic')", getName());
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return "kolmogorovSmirnovTest";
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return true; }
|
||||
|
||||
static DataTypePtr createResultType()
|
||||
{
|
||||
DataTypes types
|
||||
{
|
||||
std::make_shared<DataTypeNumber<Float64>>(),
|
||||
std::make_shared<DataTypeNumber<Float64>>(),
|
||||
};
|
||||
|
||||
Strings names
|
||||
{
|
||||
"d_statistic",
|
||||
"p_value"
|
||||
};
|
||||
|
||||
return std::make_shared<DataTypeTuple>(
|
||||
std::move(types),
|
||||
std::move(names)
|
||||
);
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
Float64 value = columns[0]->getFloat64(row_num);
|
||||
UInt8 is_second = columns[1]->getUInt(row_num);
|
||||
if (is_second)
|
||||
this->data(place).addY(value, arena);
|
||||
else
|
||||
this->data(place).addX(value, arena);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
this->data(place).merge(this->data(rhs), arena);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
this->data(place).write(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
this->data(place).read(buf, arena);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
if (!this->data(place).size_x || !this->data(place).size_y)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} require both samples to be non empty", getName());
|
||||
|
||||
auto [d_statistic, p_value] = this->data(place).getResult(alternative, method);
|
||||
|
||||
/// Because p-value is a probability.
|
||||
p_value = std::min(1.0, std::max(0.0, p_value));
|
||||
|
||||
auto & column_tuple = assert_cast<ColumnTuple &>(to);
|
||||
auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
|
||||
auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
|
||||
|
||||
column_stat.getData().push_back(d_statistic);
|
||||
column_value.getData().push_back(p_value);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionKolmogorovSmirnovTest(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
{
|
||||
|
@ -1,331 +0,0 @@
|
||||
#pragma once
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/StatCommon.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/PODArray_fwd.h>
|
||||
#include <base/types.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
struct KolmogorovSmirnov : public StatisticalSample<Float64, Float64>
|
||||
{
|
||||
enum class Alternative
|
||||
{
|
||||
TwoSided,
|
||||
Less,
|
||||
Greater
|
||||
};
|
||||
|
||||
std::pair<Float64, Float64> getResult(Alternative alternative, String method)
|
||||
{
|
||||
::sort(x.begin(), x.end());
|
||||
::sort(y.begin(), y.end());
|
||||
|
||||
Float64 max_s = std::numeric_limits<Float64>::min();
|
||||
Float64 min_s = std::numeric_limits<Float64>::max();
|
||||
Float64 now_s = 0;
|
||||
UInt64 pos_x = 0;
|
||||
UInt64 pos_y = 0;
|
||||
UInt64 pos_tmp;
|
||||
UInt64 n1 = x.size();
|
||||
UInt64 n2 = y.size();
|
||||
|
||||
const Float64 n1_d = 1. / n1;
|
||||
const Float64 n2_d = 1. / n2;
|
||||
const Float64 tol = 1e-7;
|
||||
|
||||
// reference: https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test
|
||||
while (pos_x < x.size() && pos_y < y.size())
|
||||
{
|
||||
if (likely(fabs(x[pos_x] - y[pos_y]) >= tol))
|
||||
{
|
||||
if (x[pos_x] < y[pos_y])
|
||||
{
|
||||
now_s += n1_d;
|
||||
++pos_x;
|
||||
}
|
||||
else
|
||||
{
|
||||
now_s -= n2_d;
|
||||
++pos_y;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
pos_tmp = pos_x + 1;
|
||||
while (pos_tmp < x.size() && unlikely(fabs(x[pos_tmp] - x[pos_x]) <= tol))
|
||||
pos_tmp++;
|
||||
now_s += n1_d * (pos_tmp - pos_x);
|
||||
pos_x = pos_tmp;
|
||||
pos_tmp = pos_y + 1;
|
||||
while (pos_tmp < y.size() && unlikely(fabs(y[pos_tmp] - y[pos_y]) <= tol))
|
||||
pos_tmp++;
|
||||
now_s -= n2_d * (pos_tmp - pos_y);
|
||||
pos_y = pos_tmp;
|
||||
}
|
||||
max_s = std::max(max_s, now_s);
|
||||
min_s = std::min(min_s, now_s);
|
||||
}
|
||||
now_s += n1_d * (x.size() - pos_x) - n2_d * (y.size() - pos_y);
|
||||
min_s = std::min(min_s, now_s);
|
||||
max_s = std::max(max_s, now_s);
|
||||
|
||||
Float64 d = 0;
|
||||
if (alternative == Alternative::TwoSided)
|
||||
d = std::max(std::abs(max_s), std::abs(min_s));
|
||||
else if (alternative == Alternative::Less)
|
||||
d = -min_s;
|
||||
else if (alternative == Alternative::Greater)
|
||||
d = max_s;
|
||||
|
||||
UInt64 g = std::__gcd(n1, n2);
|
||||
UInt64 nx_g = n1 / g;
|
||||
UInt64 ny_g = n2 / g;
|
||||
|
||||
if (method == "auto")
|
||||
method = std::max(n1, n2) <= 10000 ? "exact" : "asymptotic";
|
||||
else if (method == "exact" && nx_g >= std::numeric_limits<Int32>::max() / ny_g)
|
||||
method = "asymptotic";
|
||||
|
||||
Float64 p_value = std::numeric_limits<Float64>::infinity();
|
||||
|
||||
if (method == "exact")
|
||||
{
|
||||
/* reference:
|
||||
* Gunar Schröer and Dietrich Trenkler
|
||||
* Exact and Randomization Distributions of Kolmogorov-Smirnov, Tests for Two or Three Samples
|
||||
*
|
||||
* and
|
||||
*
|
||||
* Thomas Viehmann
|
||||
* Numerically more stable computation of the p-values for the two-sample Kolmogorov-Smirnov test
|
||||
*/
|
||||
if (n2 > n1)
|
||||
std::swap(n1, n2);
|
||||
|
||||
const Float64 f_n1 = static_cast<Float64>(n1);
|
||||
const Float64 f_n2 = static_cast<Float64>(n2);
|
||||
const Float64 k_d = (0.5 + floor(d * f_n2 * f_n1 - tol)) / (f_n2 * f_n1);
|
||||
PaddedPODArray<Float64> c(n1 + 1);
|
||||
|
||||
auto check = alternative == Alternative::TwoSided ?
|
||||
[](const Float64 & q, const Float64 & r, const Float64 & s) { return fabs(r - s) >= q; }
|
||||
: [](const Float64 & q, const Float64 & r, const Float64 & s) { return r - s >= q; };
|
||||
|
||||
c[0] = 0;
|
||||
for (UInt64 j = 1; j <= n1; j++)
|
||||
if (check(k_d, 0., j / f_n1))
|
||||
c[j] = 1.;
|
||||
else
|
||||
c[j] = c[j - 1];
|
||||
|
||||
for (UInt64 i = 1; i <= n2; i++)
|
||||
{
|
||||
if (check(k_d, i / f_n2, 0.))
|
||||
c[0] = 1.;
|
||||
for (UInt64 j = 1; j <= n1; j++)
|
||||
if (check(k_d, i / f_n2, j / f_n1))
|
||||
c[j] = 1.;
|
||||
else
|
||||
{
|
||||
Float64 v = i / static_cast<Float64>(i + j);
|
||||
Float64 w = j / static_cast<Float64>(i + j);
|
||||
c[j] = v * c[j] + w * c[j - 1];
|
||||
}
|
||||
}
|
||||
p_value = c[n1];
|
||||
}
|
||||
else if (method == "asymp" || method == "asymptotic")
|
||||
{
|
||||
Float64 n = std::min(n1, n2);
|
||||
Float64 m = std::max(n1, n2);
|
||||
Float64 p = sqrt((n * m) / (n + m)) * d;
|
||||
|
||||
if (alternative == Alternative::TwoSided)
|
||||
{
|
||||
/* reference:
|
||||
* J.DURBIN
|
||||
* Distribution theory for tests based on the sample distribution function
|
||||
*/
|
||||
Float64 new_val, old_val, s, w, z;
|
||||
UInt64 k_max = static_cast<UInt64>(sqrt(2 - log(tol)));
|
||||
|
||||
if (p < 1)
|
||||
{
|
||||
z = - (M_PI_2 * M_PI_4) / (p * p);
|
||||
w = log(p);
|
||||
s = 0;
|
||||
for (UInt64 k = 1; k < k_max; k += 2)
|
||||
s += exp(k * k * z - w);
|
||||
p = s / 0.398942280401432677939946059934;
|
||||
}
|
||||
else
|
||||
{
|
||||
z = -2 * p * p;
|
||||
s = -1;
|
||||
UInt64 k = 1;
|
||||
old_val = 0;
|
||||
new_val = 1;
|
||||
while (fabs(old_val - new_val) > tol)
|
||||
{
|
||||
old_val = new_val;
|
||||
new_val += 2 * s * exp(z * k * k);
|
||||
s *= -1;
|
||||
k++;
|
||||
}
|
||||
p = new_val;
|
||||
}
|
||||
p_value = 1 - p;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* reference:
|
||||
* J. L. HODGES, Jr
|
||||
* The significance probability of the Smirnov two-sample test
|
||||
*/
|
||||
|
||||
// Use Hodges' suggested approximation Eqn 5.3
|
||||
// Requires m to be the larger of (n1, n2)
|
||||
Float64 expt = -2 * p * p - 2 * p * (m + 2 * n) / sqrt(m * n * (m + n)) / 3.0;
|
||||
p_value = exp(expt);
|
||||
}
|
||||
}
|
||||
return {d, p_value};
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
class AggregateFunctionKolmogorovSmirnov final:
|
||||
public IAggregateFunctionDataHelper<KolmogorovSmirnov, AggregateFunctionKolmogorovSmirnov>
|
||||
{
|
||||
private:
|
||||
using Alternative = typename KolmogorovSmirnov::Alternative;
|
||||
Alternative alternative = Alternative::TwoSided;
|
||||
String method = "auto";
|
||||
|
||||
public:
|
||||
explicit AggregateFunctionKolmogorovSmirnov(const DataTypes & arguments, const Array & params)
|
||||
: IAggregateFunctionDataHelper<KolmogorovSmirnov, AggregateFunctionKolmogorovSmirnov> ({arguments}, {}, createResultType())
|
||||
{
|
||||
if (params.size() > 2)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require two parameter or less", getName());
|
||||
|
||||
if (params.empty())
|
||||
return;
|
||||
|
||||
if (params[0].getType() != Field::Types::String)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a String", getName());
|
||||
|
||||
const auto & param = params[0].get<String>();
|
||||
if (param == "two-sided")
|
||||
alternative = Alternative::TwoSided;
|
||||
else if (param == "less")
|
||||
alternative = Alternative::Less;
|
||||
else if (param == "greater")
|
||||
alternative = Alternative::Greater;
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown parameter in aggregate function {}. "
|
||||
"It must be one of: 'two-sided', 'less', 'greater'", getName());
|
||||
|
||||
if (params.size() != 2)
|
||||
return;
|
||||
|
||||
if (params[1].getType() != Field::Types::String)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require second parameter to be a String", getName());
|
||||
|
||||
method = params[1].get<String>();
|
||||
if (method != "auto" && method != "exact" && method != "asymp" && method != "asymptotic")
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown method in aggregate function {}. "
|
||||
"It must be one of: 'auto', 'exact', 'asymp' (or 'asymptotic')", getName());
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return "kolmogorovSmirnovTest";
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return true; }
|
||||
|
||||
static DataTypePtr createResultType()
|
||||
{
|
||||
DataTypes types
|
||||
{
|
||||
std::make_shared<DataTypeNumber<Float64>>(),
|
||||
std::make_shared<DataTypeNumber<Float64>>(),
|
||||
};
|
||||
|
||||
Strings names
|
||||
{
|
||||
"d_statistic",
|
||||
"p_value"
|
||||
};
|
||||
|
||||
return std::make_shared<DataTypeTuple>(
|
||||
std::move(types),
|
||||
std::move(names)
|
||||
);
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
Float64 value = columns[0]->getFloat64(row_num);
|
||||
UInt8 is_second = columns[1]->getUInt(row_num);
|
||||
if (is_second)
|
||||
this->data(place).addY(value, arena);
|
||||
else
|
||||
this->data(place).addX(value, arena);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
this->data(place).merge(this->data(rhs), arena);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
this->data(place).write(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
this->data(place).read(buf, arena);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
if (!this->data(place).size_x || !this->data(place).size_y)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} require both samples to be non empty", getName());
|
||||
|
||||
auto [d_statistic, p_value] = this->data(place).getResult(alternative, method);
|
||||
|
||||
/// Because p-value is a probability.
|
||||
p_value = std::min(1.0, std::max(0.0, p_value));
|
||||
|
||||
auto & column_tuple = assert_cast<ColumnTuple &>(to);
|
||||
auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
|
||||
auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
|
||||
|
||||
column_stat.getData().push_back(d_statistic);
|
||||
column_value.getData().push_back(p_value);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
@ -1,12 +1,30 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
|
||||
#include <numeric>
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/StatCommon.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnsDateTime.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <Common/PODArray.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
#include <boost/math/distributions/normal.hpp>
|
||||
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
@ -16,29 +34,320 @@ struct Settings;
|
||||
namespace
|
||||
{
|
||||
|
||||
AggregateFunctionPtr
|
||||
createAggregateFunctionLargestTriangleThreeBuckets(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
struct LargestTriangleThreeBucketsData : public StatisticalSample<Float64, Float64>
|
||||
{
|
||||
void add(const Float64 xval, const Float64 yval, Arena * arena)
|
||||
{
|
||||
assertBinary(name, argument_types);
|
||||
|
||||
|
||||
if (!(isNumber(argument_types[0]) || isDateOrDate32(argument_types[0]) || isDateTime(argument_types[0])
|
||||
|| isDateTime64(argument_types[0])))
|
||||
throw Exception(
|
||||
ErrorCodes::NOT_IMPLEMENTED,
|
||||
"Aggregate function {} only supports Date, Date32, DateTime, DateTime64 and Number as the first argument",
|
||||
name);
|
||||
|
||||
if (!(isNumber(argument_types[1]) || isDateOrDate32(argument_types[1]) || isDateTime(argument_types[1])
|
||||
|| isDateTime64(argument_types[1])))
|
||||
throw Exception(
|
||||
ErrorCodes::NOT_IMPLEMENTED,
|
||||
"Aggregate function {} only supports Date, Date32, DateTime, DateTime64 and Number as the second argument",
|
||||
name);
|
||||
|
||||
return std::make_shared<AggregateFunctionLargestTriangleThreeBuckets>(argument_types, parameters);
|
||||
this->addX(xval, arena);
|
||||
this->addY(yval, arena);
|
||||
}
|
||||
|
||||
void sort(Arena * arena)
|
||||
{
|
||||
// sort the this->x and this->y in ascending order of this->x using index
|
||||
std::vector<size_t> index(this->x.size());
|
||||
|
||||
std::iota(index.begin(), index.end(), 0);
|
||||
::sort(index.begin(), index.end(), [&](size_t i1, size_t i2) { return this->x[i1] < this->x[i2]; });
|
||||
|
||||
SampleX temp_x{};
|
||||
SampleY temp_y{};
|
||||
|
||||
for (size_t i = 0; i < this->x.size(); ++i)
|
||||
{
|
||||
temp_x.push_back(this->x[index[i]], arena);
|
||||
temp_y.push_back(this->y[index[i]], arena);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < this->x.size(); ++i)
|
||||
{
|
||||
this->x[i] = temp_x[i];
|
||||
this->y[i] = temp_y[i];
|
||||
}
|
||||
}
|
||||
|
||||
PODArray<std::pair<Float64, Float64>> getResult(size_t total_buckets, Arena * arena)
|
||||
{
|
||||
// Sort the data
|
||||
this->sort(arena);
|
||||
|
||||
PODArray<std::pair<Float64, Float64>> result;
|
||||
|
||||
// Handle special cases for small data list
|
||||
if (this->x.size() <= total_buckets)
|
||||
{
|
||||
for (size_t i = 0; i < this->x.size(); ++i)
|
||||
{
|
||||
result.emplace_back(std::make_pair(this->x[i], this->y[i]));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Handle special cases for 0 or 1 or 2 buckets
|
||||
if (total_buckets == 0)
|
||||
return result;
|
||||
if (total_buckets == 1)
|
||||
{
|
||||
result.emplace_back(std::make_pair(this->x.front(), this->y.front()));
|
||||
return result;
|
||||
}
|
||||
if (total_buckets == 2)
|
||||
{
|
||||
result.emplace_back(std::make_pair(this->x.front(), this->y.front()));
|
||||
result.emplace_back(std::make_pair(this->x.back(), this->y.back()));
|
||||
return result;
|
||||
}
|
||||
|
||||
// Find the size of each bucket
|
||||
size_t single_bucket_size = this->x.size() / total_buckets;
|
||||
|
||||
// Include the first data point
|
||||
result.emplace_back(std::make_pair(this->x[0], this->y[0]));
|
||||
|
||||
for (size_t i = 1; i < total_buckets - 1; ++i) // Skip the first and last bucket
|
||||
{
|
||||
size_t start_index = i * single_bucket_size;
|
||||
size_t end_index = (i + 1) * single_bucket_size;
|
||||
|
||||
// Compute the average point in the next bucket
|
||||
Float64 avg_x = 0;
|
||||
Float64 avg_y = 0;
|
||||
for (size_t j = end_index; j < (i + 2) * single_bucket_size; ++j)
|
||||
{
|
||||
avg_x += this->x[j];
|
||||
avg_y += this->y[j];
|
||||
}
|
||||
avg_x /= single_bucket_size;
|
||||
avg_y /= single_bucket_size;
|
||||
|
||||
// Find the point in the current bucket that forms the largest triangle
|
||||
size_t max_index = start_index;
|
||||
Float64 max_area = 0.0;
|
||||
for (size_t j = start_index; j < end_index; ++j)
|
||||
{
|
||||
Float64 area = std::abs(
|
||||
0.5
|
||||
* (result.back().first * this->y[j] + this->x[j] * avg_y + avg_x * result.back().second - result.back().first * avg_y
|
||||
- this->x[j] * result.back().second - avg_x * this->y[j]));
|
||||
if (area > max_area)
|
||||
{
|
||||
max_area = area;
|
||||
max_index = j;
|
||||
}
|
||||
}
|
||||
|
||||
// Include the selected point
|
||||
result.emplace_back(std::make_pair(this->x[max_index], this->y[max_index]));
|
||||
}
|
||||
|
||||
// Include the last data point
|
||||
result.emplace_back(std::make_pair(this->x.back(), this->y.back()));
|
||||
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
class AggregateFunctionLargestTriangleThreeBuckets final : public IAggregateFunctionDataHelper<LargestTriangleThreeBucketsData, AggregateFunctionLargestTriangleThreeBuckets>
|
||||
{
|
||||
private:
|
||||
UInt64 total_buckets{0};
|
||||
TypeIndex x_type;
|
||||
TypeIndex y_type;
|
||||
|
||||
public:
|
||||
explicit AggregateFunctionLargestTriangleThreeBuckets(const DataTypes & arguments, const Array & params)
|
||||
: IAggregateFunctionDataHelper<LargestTriangleThreeBucketsData, AggregateFunctionLargestTriangleThreeBuckets>({arguments}, {}, createResultType(arguments))
|
||||
{
|
||||
if (params.size() != 1)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require one parameter", getName());
|
||||
|
||||
if (params[0].getType() != Field::Types::UInt64)
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a UInt64", getName());
|
||||
|
||||
total_buckets = params[0].get<UInt64>();
|
||||
|
||||
this->x_type = WhichDataType(arguments[0]).idx;
|
||||
this->y_type = WhichDataType(arguments[1]).idx;
|
||||
}
|
||||
|
||||
static constexpr auto name = "largestTriangleThreeBuckets";
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
bool allocatesMemoryInArena() const override { return true; }
|
||||
|
||||
static DataTypePtr createResultType(const DataTypes & arguments)
|
||||
{
|
||||
TypeIndex x_type = arguments[0]->getTypeId();
|
||||
TypeIndex y_type = arguments[1]->getTypeId();
|
||||
|
||||
UInt32 x_scale = 0;
|
||||
UInt32 y_scale = 0;
|
||||
|
||||
if (const auto * datetime64_type = typeid_cast<const DataTypeDateTime64 *>(arguments[0].get()))
|
||||
{
|
||||
x_scale = datetime64_type->getScale();
|
||||
}
|
||||
|
||||
if (const auto * datetime64_type = typeid_cast<const DataTypeDateTime64 *>(arguments[1].get()))
|
||||
{
|
||||
y_scale = datetime64_type->getScale();
|
||||
}
|
||||
|
||||
DataTypes types = {getDataTypeFromTypeIndex(x_type, x_scale), getDataTypeFromTypeIndex(y_type, y_scale)};
|
||||
|
||||
auto tuple = std::make_shared<DataTypeTuple>(std::move(types));
|
||||
|
||||
return std::make_shared<DataTypeArray>(tuple);
|
||||
}
|
||||
|
||||
static DataTypePtr getDataTypeFromTypeIndex(TypeIndex type_index, UInt32 scale)
|
||||
{
|
||||
DataTypePtr data_type;
|
||||
switch (type_index)
|
||||
{
|
||||
case TypeIndex::Date:
|
||||
data_type = std::make_shared<DataTypeDate>();
|
||||
break;
|
||||
case TypeIndex::Date32:
|
||||
data_type = std::make_shared<DataTypeDate32>();
|
||||
break;
|
||||
case TypeIndex::DateTime:
|
||||
data_type = std::make_shared<DataTypeDateTime>();
|
||||
break;
|
||||
case TypeIndex::DateTime64:
|
||||
data_type = std::make_shared<DataTypeDateTime64>(scale);
|
||||
break;
|
||||
default:
|
||||
data_type = std::make_shared<DataTypeNumber<Float64>>();
|
||||
}
|
||||
return data_type;
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
Float64 x = getFloat64DataFromColumn(columns[0], row_num, this->x_type);
|
||||
Float64 y = getFloat64DataFromColumn(columns[1], row_num, this->y_type);
|
||||
this->data(place).add(x, y, arena);
|
||||
}
|
||||
|
||||
Float64 getFloat64DataFromColumn(const IColumn * column, size_t row_num, TypeIndex type_index) const
|
||||
{
|
||||
switch (type_index)
|
||||
{
|
||||
case TypeIndex::Date:
|
||||
return static_cast<const ColumnDate &>(*column).getData()[row_num];
|
||||
case TypeIndex::Date32:
|
||||
return static_cast<const ColumnDate32 &>(*column).getData()[row_num];
|
||||
case TypeIndex::DateTime:
|
||||
return static_cast<const ColumnDateTime &>(*column).getData()[row_num];
|
||||
case TypeIndex::DateTime64:
|
||||
return static_cast<const ColumnDateTime64 &>(*column).getData()[row_num];
|
||||
default:
|
||||
return column->getFloat64(row_num);
|
||||
}
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
auto & a = this->data(place);
|
||||
const auto & b = this->data(rhs);
|
||||
|
||||
a.merge(b, arena);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
this->data(place).write(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
this->data(place).read(buf, arena);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
|
||||
{
|
||||
auto res = this->data(place).getResult(total_buckets, arena);
|
||||
|
||||
auto & col = assert_cast<ColumnArray &>(to);
|
||||
auto & col_offsets = assert_cast<ColumnArray::ColumnOffsets &>(col.getOffsetsColumn());
|
||||
|
||||
auto column_x_adder_func = getColumnAdderFunc(x_type);
|
||||
auto column_y_adder_func = getColumnAdderFunc(y_type);
|
||||
|
||||
for (const auto & elem : res)
|
||||
{
|
||||
auto & column_tuple = assert_cast<ColumnTuple &>(col.getData());
|
||||
column_x_adder_func(column_tuple.getColumn(0), elem.first);
|
||||
column_y_adder_func(column_tuple.getColumn(1), elem.second);
|
||||
}
|
||||
|
||||
col_offsets.getData().push_back(col.getData().size());
|
||||
}
|
||||
|
||||
std::function<void(IColumn &, Float64)> getColumnAdderFunc(TypeIndex type_index) const
|
||||
{
|
||||
switch (type_index)
|
||||
{
|
||||
case TypeIndex::Date:
|
||||
return [](IColumn & column, Float64 value)
|
||||
{
|
||||
auto & col = assert_cast<ColumnDate &>(column);
|
||||
col.getData().push_back(static_cast<UInt16>(value));
|
||||
};
|
||||
case TypeIndex::Date32:
|
||||
return [](IColumn & column, Float64 value)
|
||||
{
|
||||
auto & col = assert_cast<ColumnDate32 &>(column);
|
||||
col.getData().push_back(static_cast<UInt32>(value));
|
||||
};
|
||||
case TypeIndex::DateTime:
|
||||
return [](IColumn & column, Float64 value)
|
||||
{
|
||||
auto & col = assert_cast<ColumnDateTime &>(column);
|
||||
col.getData().push_back(static_cast<UInt32>(value));
|
||||
};
|
||||
case TypeIndex::DateTime64:
|
||||
return [](IColumn & column, Float64 value)
|
||||
{
|
||||
auto & col = assert_cast<ColumnDateTime64 &>(column);
|
||||
col.getData().push_back(static_cast<UInt64>(value));
|
||||
};
|
||||
default:
|
||||
return [](IColumn & column, Float64 value)
|
||||
{
|
||||
auto & col = assert_cast<ColumnFloat64 &>(column);
|
||||
col.getData().push_back(value);
|
||||
};
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
AggregateFunctionPtr
|
||||
createAggregateFunctionLargestTriangleThreeBuckets(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
{
|
||||
assertBinary(name, argument_types);
|
||||
|
||||
if (!(isNumber(argument_types[0]) || isDateOrDate32(argument_types[0]) || isDateTime(argument_types[0])
|
||||
|| isDateTime64(argument_types[0])))
|
||||
throw Exception(
|
||||
ErrorCodes::NOT_IMPLEMENTED,
|
||||
"Aggregate function {} only supports Date, Date32, DateTime, DateTime64 and Number as the first argument",
|
||||
name);
|
||||
|
||||
if (!(isNumber(argument_types[1]) || isDateOrDate32(argument_types[1]) || isDateTime(argument_types[1])
|
||||
|| isDateTime64(argument_types[1])))
|
||||
throw Exception(
|
||||
ErrorCodes::NOT_IMPLEMENTED,
|
||||
"Aggregate function {} only supports Date, Date32, DateTime, DateTime64 and Number as the second argument",
|
||||
name);
|
||||
|
||||
return std::make_shared<AggregateFunctionLargestTriangleThreeBuckets>(argument_types, parameters);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,327 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <numeric>
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/StatCommon.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnsDateTime.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <base/types.h>
|
||||
#include <Common/PODArray_fwd.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
#include <boost/math/distributions/normal.hpp>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
|
||||
struct LargestTriangleThreeBucketsData : public StatisticalSample<Float64, Float64>
|
||||
{
|
||||
void add(const Float64 xval, const Float64 yval, Arena * arena)
|
||||
{
|
||||
this->addX(xval, arena);
|
||||
this->addY(yval, arena);
|
||||
}
|
||||
|
||||
void sort(Arena * arena)
|
||||
{
|
||||
// sort the this->x and this->y in ascending order of this->x using index
|
||||
std::vector<size_t> index(this->x.size());
|
||||
|
||||
std::iota(index.begin(), index.end(), 0);
|
||||
::sort(index.begin(), index.end(), [&](size_t i1, size_t i2) { return this->x[i1] < this->x[i2]; });
|
||||
|
||||
SampleX temp_x{};
|
||||
SampleY temp_y{};
|
||||
|
||||
for (size_t i = 0; i < this->x.size(); ++i)
|
||||
{
|
||||
temp_x.push_back(this->x[index[i]], arena);
|
||||
temp_y.push_back(this->y[index[i]], arena);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < this->x.size(); ++i)
|
||||
{
|
||||
this->x[i] = temp_x[i];
|
||||
this->y[i] = temp_y[i];
|
||||
}
|
||||
}
|
||||
|
||||
PODArray<std::pair<Float64, Float64>> getResult(size_t total_buckets, Arena * arena)
|
||||
{
|
||||
// Sort the data
|
||||
this->sort(arena);
|
||||
|
||||
PODArray<std::pair<Float64, Float64>> result;
|
||||
|
||||
// Handle special cases for small data list
|
||||
if (this->x.size() <= total_buckets)
|
||||
{
|
||||
for (size_t i = 0; i < this->x.size(); ++i)
|
||||
{
|
||||
result.emplace_back(std::make_pair(this->x[i], this->y[i]));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Handle special cases for 0 or 1 or 2 buckets
|
||||
if (total_buckets == 0)
|
||||
return result;
|
||||
if (total_buckets == 1)
|
||||
{
|
||||
result.emplace_back(std::make_pair(this->x.front(), this->y.front()));
|
||||
return result;
|
||||
}
|
||||
if (total_buckets == 2)
|
||||
{
|
||||
result.emplace_back(std::make_pair(this->x.front(), this->y.front()));
|
||||
result.emplace_back(std::make_pair(this->x.back(), this->y.back()));
|
||||
return result;
|
||||
}
|
||||
|
||||
// Find the size of each bucket
|
||||
size_t single_bucket_size = this->x.size() / total_buckets;
|
||||
|
||||
// Include the first data point
|
||||
result.emplace_back(std::make_pair(this->x[0], this->y[0]));
|
||||
|
||||
for (size_t i = 1; i < total_buckets - 1; ++i) // Skip the first and last bucket
|
||||
{
|
||||
size_t start_index = i * single_bucket_size;
|
||||
size_t end_index = (i + 1) * single_bucket_size;
|
||||
|
||||
// Compute the average point in the next bucket
|
||||
Float64 avg_x = 0;
|
||||
Float64 avg_y = 0;
|
||||
for (size_t j = end_index; j < (i + 2) * single_bucket_size; ++j)
|
||||
{
|
||||
avg_x += this->x[j];
|
||||
avg_y += this->y[j];
|
||||
}
|
||||
avg_x /= single_bucket_size;
|
||||
avg_y /= single_bucket_size;
|
||||
|
||||
// Find the point in the current bucket that forms the largest triangle
|
||||
size_t max_index = start_index;
|
||||
Float64 max_area = 0.0;
|
||||
for (size_t j = start_index; j < end_index; ++j)
|
||||
{
|
||||
Float64 area = std::abs(
|
||||
0.5
|
||||
* (result.back().first * this->y[j] + this->x[j] * avg_y + avg_x * result.back().second - result.back().first * avg_y
|
||||
- this->x[j] * result.back().second - avg_x * this->y[j]));
|
||||
if (area > max_area)
|
||||
{
|
||||
max_area = area;
|
||||
max_index = j;
|
||||
}
|
||||
}
|
||||
|
||||
// Include the selected point
|
||||
result.emplace_back(std::make_pair(this->x[max_index], this->y[max_index]));
|
||||
}
|
||||
|
||||
// Include the last data point
|
||||
result.emplace_back(std::make_pair(this->x.back(), this->y.back()));
|
||||
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
class AggregateFunctionLargestTriangleThreeBuckets final : public IAggregateFunctionDataHelper<LargestTriangleThreeBucketsData, AggregateFunctionLargestTriangleThreeBuckets>
|
||||
{
|
||||
private:
|
||||
UInt64 total_buckets{0};
|
||||
TypeIndex x_type;
|
||||
TypeIndex y_type;
|
||||
|
||||
public:
|
||||
explicit AggregateFunctionLargestTriangleThreeBuckets(const DataTypes & arguments, const Array & params)
|
||||
: IAggregateFunctionDataHelper<LargestTriangleThreeBucketsData, AggregateFunctionLargestTriangleThreeBuckets>({arguments}, {}, createResultType(arguments))
|
||||
{
|
||||
if (params.size() != 1)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require one parameter", getName());
|
||||
|
||||
if (params[0].getType() != Field::Types::UInt64)
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a UInt64", getName());
|
||||
|
||||
total_buckets = params[0].get<UInt64>();
|
||||
|
||||
this->x_type = WhichDataType(arguments[0]).idx;
|
||||
this->y_type = WhichDataType(arguments[1]).idx;
|
||||
}
|
||||
|
||||
static constexpr auto name = "largestTriangleThreeBuckets";
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
bool allocatesMemoryInArena() const override { return true; }
|
||||
|
||||
static DataTypePtr createResultType(const DataTypes & arguments)
|
||||
{
|
||||
TypeIndex x_type = arguments[0]->getTypeId();
|
||||
TypeIndex y_type = arguments[1]->getTypeId();
|
||||
|
||||
UInt32 x_scale = 0;
|
||||
UInt32 y_scale = 0;
|
||||
|
||||
if (const auto * datetime64_type = typeid_cast<const DataTypeDateTime64 *>(arguments[0].get()))
|
||||
{
|
||||
x_scale = datetime64_type->getScale();
|
||||
}
|
||||
|
||||
if (const auto * datetime64_type = typeid_cast<const DataTypeDateTime64 *>(arguments[1].get()))
|
||||
{
|
||||
y_scale = datetime64_type->getScale();
|
||||
}
|
||||
|
||||
DataTypes types = {getDataTypeFromTypeIndex(x_type, x_scale), getDataTypeFromTypeIndex(y_type, y_scale)};
|
||||
|
||||
auto tuple = std::make_shared<DataTypeTuple>(std::move(types));
|
||||
|
||||
return std::make_shared<DataTypeArray>(tuple);
|
||||
}
|
||||
|
||||
static DataTypePtr getDataTypeFromTypeIndex(TypeIndex type_index, UInt32 scale)
|
||||
{
|
||||
DataTypePtr data_type;
|
||||
switch (type_index)
|
||||
{
|
||||
case TypeIndex::Date:
|
||||
data_type = std::make_shared<DataTypeDate>();
|
||||
break;
|
||||
case TypeIndex::Date32:
|
||||
data_type = std::make_shared<DataTypeDate32>();
|
||||
break;
|
||||
case TypeIndex::DateTime:
|
||||
data_type = std::make_shared<DataTypeDateTime>();
|
||||
break;
|
||||
case TypeIndex::DateTime64:
|
||||
data_type = std::make_shared<DataTypeDateTime64>(scale);
|
||||
break;
|
||||
default:
|
||||
data_type = std::make_shared<DataTypeNumber<Float64>>();
|
||||
}
|
||||
return data_type;
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
Float64 x = getFloat64DataFromColumn(columns[0], row_num, this->x_type);
|
||||
Float64 y = getFloat64DataFromColumn(columns[1], row_num, this->y_type);
|
||||
this->data(place).add(x, y, arena);
|
||||
}
|
||||
|
||||
Float64 getFloat64DataFromColumn(const IColumn * column, size_t row_num, TypeIndex type_index) const
|
||||
{
|
||||
switch (type_index)
|
||||
{
|
||||
case TypeIndex::Date:
|
||||
return static_cast<const ColumnDate &>(*column).getData()[row_num];
|
||||
case TypeIndex::Date32:
|
||||
return static_cast<const ColumnDate32 &>(*column).getData()[row_num];
|
||||
case TypeIndex::DateTime:
|
||||
return static_cast<const ColumnDateTime &>(*column).getData()[row_num];
|
||||
case TypeIndex::DateTime64:
|
||||
return static_cast<const ColumnDateTime64 &>(*column).getData()[row_num];
|
||||
default:
|
||||
return column->getFloat64(row_num);
|
||||
}
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
auto & a = this->data(place);
|
||||
const auto & b = this->data(rhs);
|
||||
|
||||
a.merge(b, arena);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
this->data(place).write(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
this->data(place).read(buf, arena);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
|
||||
{
|
||||
auto res = this->data(place).getResult(total_buckets, arena);
|
||||
|
||||
auto & col = assert_cast<ColumnArray &>(to);
|
||||
auto & col_offsets = assert_cast<ColumnArray::ColumnOffsets &>(col.getOffsetsColumn());
|
||||
|
||||
auto column_x_adder_func = getColumnAdderFunc(x_type);
|
||||
auto column_y_adder_func = getColumnAdderFunc(y_type);
|
||||
|
||||
for (size_t i = 0; i < res.size(); ++i)
|
||||
{
|
||||
auto & column_tuple = assert_cast<ColumnTuple &>(col.getData());
|
||||
column_x_adder_func(column_tuple.getColumn(0), res[i].first);
|
||||
column_y_adder_func(column_tuple.getColumn(1), res[i].second);
|
||||
}
|
||||
|
||||
col_offsets.getData().push_back(col.getData().size());
|
||||
}
|
||||
|
||||
std::function<void(IColumn &, Float64)> getColumnAdderFunc(TypeIndex type_index) const
|
||||
{
|
||||
switch (type_index)
|
||||
{
|
||||
case TypeIndex::Date:
|
||||
return [](IColumn & column, Float64 value)
|
||||
{
|
||||
auto & col = assert_cast<ColumnDate &>(column);
|
||||
col.getData().push_back(static_cast<UInt16>(value));
|
||||
};
|
||||
case TypeIndex::Date32:
|
||||
return [](IColumn & column, Float64 value)
|
||||
{
|
||||
auto & col = assert_cast<ColumnDate32 &>(column);
|
||||
col.getData().push_back(static_cast<UInt32>(value));
|
||||
};
|
||||
case TypeIndex::DateTime:
|
||||
return [](IColumn & column, Float64 value)
|
||||
{
|
||||
auto & col = assert_cast<ColumnDateTime &>(column);
|
||||
col.getData().push_back(static_cast<UInt32>(value));
|
||||
};
|
||||
case TypeIndex::DateTime64:
|
||||
return [](IColumn & column, Float64 value)
|
||||
{
|
||||
auto & col = assert_cast<ColumnDateTime64 &>(column);
|
||||
col.getData().push_back(static_cast<UInt64>(value));
|
||||
};
|
||||
default:
|
||||
return [](IColumn & column, Float64 value)
|
||||
{
|
||||
auto & col = assert_cast<ColumnFloat64 &>(column);
|
||||
col.getData().push_back(value);
|
||||
};
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -1,21 +1,254 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionMannWhitney.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/StatCommon.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/PODArray.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <limits>
|
||||
|
||||
#include <boost/math/distributions/normal.hpp>
|
||||
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct Settings;
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
struct MannWhitneyData : public StatisticalSample<Float64, Float64>
|
||||
{
|
||||
/*Since null hypothesis is "for randomly selected values X and Y from two populations,
|
||||
*the probability of X being greater than Y is equal to the probability of Y being greater than X".
|
||||
*Or "the distribution F of first sample equals to the distribution G of second sample".
|
||||
*Then alternative for this hypothesis (H1) is "two-sided"(F != G), "less"(F < G), "greater" (F > G). */
|
||||
enum class Alternative
|
||||
{
|
||||
TwoSided,
|
||||
Less,
|
||||
Greater
|
||||
};
|
||||
|
||||
/// The behaviour equals to the similar function from scipy.
|
||||
/// https://github.com/scipy/scipy/blob/ab9e9f17e0b7b2d618c4d4d8402cd4c0c200d6c0/scipy/stats/stats.py#L6978
|
||||
std::pair<Float64, Float64> getResult(Alternative alternative, bool continuity_correction)
|
||||
{
|
||||
ConcatenatedSamples both(this->x, this->y);
|
||||
RanksArray ranks;
|
||||
Float64 tie_correction;
|
||||
|
||||
/// Compute ranks according to both samples.
|
||||
std::tie(ranks, tie_correction) = computeRanksAndTieCorrection(both);
|
||||
|
||||
const Float64 n1 = this->size_x;
|
||||
const Float64 n2 = this->size_y;
|
||||
|
||||
Float64 r1 = 0;
|
||||
for (size_t i = 0; i < n1; ++i)
|
||||
r1 += ranks[i];
|
||||
|
||||
const Float64 u1 = n1 * n2 + (n1 * (n1 + 1.)) / 2. - r1;
|
||||
const Float64 u2 = n1 * n2 - u1;
|
||||
|
||||
/// The distribution of U-statistic under null hypothesis H0 is symmetric with respect to meanrank.
|
||||
const Float64 meanrank = n1 * n2 /2. + 0.5 * continuity_correction;
|
||||
const Float64 sd = std::sqrt(tie_correction * n1 * n2 * (n1 + n2 + 1) / 12.0);
|
||||
|
||||
Float64 u = 0;
|
||||
if (alternative == Alternative::TwoSided)
|
||||
/// There is no difference which u_i to take as u, because z will be differ only in sign and we take std::abs() from it.
|
||||
u = std::max(u1, u2);
|
||||
else if (alternative == Alternative::Less)
|
||||
u = u1;
|
||||
else if (alternative == Alternative::Greater)
|
||||
u = u2;
|
||||
|
||||
Float64 z = (u - meanrank) / sd;
|
||||
|
||||
if (unlikely(!std::isfinite(z)))
|
||||
return {std::numeric_limits<Float64>::quiet_NaN(), std::numeric_limits<Float64>::quiet_NaN()};
|
||||
|
||||
if (alternative == Alternative::TwoSided)
|
||||
z = std::abs(z);
|
||||
|
||||
auto standard_normal_distribution = boost::math::normal_distribution<Float64>();
|
||||
auto cdf = boost::math::cdf(standard_normal_distribution, z);
|
||||
|
||||
Float64 p_value = 0;
|
||||
if (alternative == Alternative::TwoSided)
|
||||
p_value = 2 - 2 * cdf;
|
||||
else
|
||||
p_value = 1 - cdf;
|
||||
|
||||
return {u2, p_value};
|
||||
}
|
||||
|
||||
private:
|
||||
using Sample = typename StatisticalSample<Float64, Float64>::SampleX;
|
||||
|
||||
/// We need to compute ranks according to all samples. Use this class to avoid extra copy and memory allocation.
|
||||
class ConcatenatedSamples
|
||||
{
|
||||
public:
|
||||
ConcatenatedSamples(const Sample & first_, const Sample & second_)
|
||||
: first(first_), second(second_) {}
|
||||
|
||||
const Float64 & operator[](size_t ind) const
|
||||
{
|
||||
if (ind < first.size())
|
||||
return first[ind];
|
||||
return second[ind % first.size()];
|
||||
}
|
||||
|
||||
size_t size() const
|
||||
{
|
||||
return first.size() + second.size();
|
||||
}
|
||||
|
||||
private:
|
||||
const Sample & first;
|
||||
const Sample & second;
|
||||
};
|
||||
};
|
||||
|
||||
class AggregateFunctionMannWhitney final:
|
||||
public IAggregateFunctionDataHelper<MannWhitneyData, AggregateFunctionMannWhitney>
|
||||
{
|
||||
private:
|
||||
using Alternative = typename MannWhitneyData::Alternative;
|
||||
Alternative alternative;
|
||||
bool continuity_correction{true};
|
||||
|
||||
public:
|
||||
explicit AggregateFunctionMannWhitney(const DataTypes & arguments, const Array & params)
|
||||
: IAggregateFunctionDataHelper<MannWhitneyData, AggregateFunctionMannWhitney> ({arguments}, {}, createResultType())
|
||||
{
|
||||
if (params.size() > 2)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require two parameter or less", getName());
|
||||
|
||||
if (params.empty())
|
||||
{
|
||||
alternative = Alternative::TwoSided;
|
||||
return;
|
||||
}
|
||||
|
||||
if (params[0].getType() != Field::Types::String)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a String", getName());
|
||||
|
||||
const auto & param = params[0].get<String>();
|
||||
if (param == "two-sided")
|
||||
alternative = Alternative::TwoSided;
|
||||
else if (param == "less")
|
||||
alternative = Alternative::Less;
|
||||
else if (param == "greater")
|
||||
alternative = Alternative::Greater;
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown parameter in aggregate function {}. "
|
||||
"It must be one of: 'two-sided', 'less', 'greater'", getName());
|
||||
|
||||
if (params.size() != 2)
|
||||
return;
|
||||
|
||||
if (params[1].getType() != Field::Types::UInt64)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require second parameter to be a UInt64", getName());
|
||||
|
||||
continuity_correction = static_cast<bool>(params[1].get<UInt64>());
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return "mannWhitneyUTest";
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return true; }
|
||||
|
||||
static DataTypePtr createResultType()
|
||||
{
|
||||
DataTypes types
|
||||
{
|
||||
std::make_shared<DataTypeNumber<Float64>>(),
|
||||
std::make_shared<DataTypeNumber<Float64>>(),
|
||||
};
|
||||
|
||||
Strings names
|
||||
{
|
||||
"u_statistic",
|
||||
"p_value"
|
||||
};
|
||||
|
||||
return std::make_shared<DataTypeTuple>(
|
||||
std::move(types),
|
||||
std::move(names)
|
||||
);
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
Float64 value = columns[0]->getFloat64(row_num);
|
||||
UInt8 is_second = columns[1]->getUInt(row_num);
|
||||
|
||||
if (is_second)
|
||||
this->data(place).addY(value, arena);
|
||||
else
|
||||
this->data(place).addX(value, arena);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
auto & a = this->data(place);
|
||||
const auto & b = this->data(rhs);
|
||||
|
||||
a.merge(b, arena);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
this->data(place).write(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
this->data(place).read(buf, arena);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
if (!this->data(place).size_x || !this->data(place).size_y)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} require both samples to be non empty", getName());
|
||||
|
||||
auto [u_statistic, p_value] = this->data(place).getResult(alternative, continuity_correction);
|
||||
|
||||
/// Because p-value is a probability.
|
||||
p_value = std::min(1.0, std::max(0.0, p_value));
|
||||
|
||||
auto & column_tuple = assert_cast<ColumnTuple &>(to);
|
||||
auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
|
||||
auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
|
||||
|
||||
column_stat.getData().push_back(u_statistic);
|
||||
column_value.getData().push_back(p_value);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionMannWhitneyUTest(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
{
|
||||
|
@ -1,249 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/StatCommon.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/PODArray_fwd.h>
|
||||
#include <base/types.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <limits>
|
||||
|
||||
#include <boost/math/distributions/normal.hpp>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
|
||||
struct MannWhitneyData : public StatisticalSample<Float64, Float64>
|
||||
{
|
||||
/*Since null hypothesis is "for randomly selected values X and Y from two populations,
|
||||
*the probability of X being greater than Y is equal to the probability of Y being greater than X".
|
||||
*Or "the distribution F of first sample equals to the distribution G of second sample".
|
||||
*Then alternative for this hypothesis (H1) is "two-sided"(F != G), "less"(F < G), "greater" (F > G). */
|
||||
enum class Alternative
|
||||
{
|
||||
TwoSided,
|
||||
Less,
|
||||
Greater
|
||||
};
|
||||
|
||||
/// The behaviour equals to the similar function from scipy.
|
||||
/// https://github.com/scipy/scipy/blob/ab9e9f17e0b7b2d618c4d4d8402cd4c0c200d6c0/scipy/stats/stats.py#L6978
|
||||
std::pair<Float64, Float64> getResult(Alternative alternative, bool continuity_correction)
|
||||
{
|
||||
ConcatenatedSamples both(this->x, this->y);
|
||||
RanksArray ranks;
|
||||
Float64 tie_correction;
|
||||
|
||||
/// Compute ranks according to both samples.
|
||||
std::tie(ranks, tie_correction) = computeRanksAndTieCorrection(both);
|
||||
|
||||
const Float64 n1 = this->size_x;
|
||||
const Float64 n2 = this->size_y;
|
||||
|
||||
Float64 r1 = 0;
|
||||
for (size_t i = 0; i < n1; ++i)
|
||||
r1 += ranks[i];
|
||||
|
||||
const Float64 u1 = n1 * n2 + (n1 * (n1 + 1.)) / 2. - r1;
|
||||
const Float64 u2 = n1 * n2 - u1;
|
||||
|
||||
/// The distribution of U-statistic under null hypothesis H0 is symmetric with respect to meanrank.
|
||||
const Float64 meanrank = n1 * n2 /2. + 0.5 * continuity_correction;
|
||||
const Float64 sd = std::sqrt(tie_correction * n1 * n2 * (n1 + n2 + 1) / 12.0);
|
||||
|
||||
Float64 u = 0;
|
||||
if (alternative == Alternative::TwoSided)
|
||||
/// There is no difference which u_i to take as u, because z will be differ only in sign and we take std::abs() from it.
|
||||
u = std::max(u1, u2);
|
||||
else if (alternative == Alternative::Less)
|
||||
u = u1;
|
||||
else if (alternative == Alternative::Greater)
|
||||
u = u2;
|
||||
|
||||
Float64 z = (u - meanrank) / sd;
|
||||
|
||||
if (unlikely(!std::isfinite(z)))
|
||||
return {std::numeric_limits<Float64>::quiet_NaN(), std::numeric_limits<Float64>::quiet_NaN()};
|
||||
|
||||
if (alternative == Alternative::TwoSided)
|
||||
z = std::abs(z);
|
||||
|
||||
auto standard_normal_distribution = boost::math::normal_distribution<Float64>();
|
||||
auto cdf = boost::math::cdf(standard_normal_distribution, z);
|
||||
|
||||
Float64 p_value = 0;
|
||||
if (alternative == Alternative::TwoSided)
|
||||
p_value = 2 - 2 * cdf;
|
||||
else
|
||||
p_value = 1 - cdf;
|
||||
|
||||
return {u2, p_value};
|
||||
}
|
||||
|
||||
private:
|
||||
using Sample = typename StatisticalSample<Float64, Float64>::SampleX;
|
||||
|
||||
/// We need to compute ranks according to all samples. Use this class to avoid extra copy and memory allocation.
|
||||
class ConcatenatedSamples
|
||||
{
|
||||
public:
|
||||
ConcatenatedSamples(const Sample & first_, const Sample & second_)
|
||||
: first(first_), second(second_) {}
|
||||
|
||||
const Float64 & operator[](size_t ind) const
|
||||
{
|
||||
if (ind < first.size())
|
||||
return first[ind];
|
||||
return second[ind % first.size()];
|
||||
}
|
||||
|
||||
size_t size() const
|
||||
{
|
||||
return first.size() + second.size();
|
||||
}
|
||||
|
||||
private:
|
||||
const Sample & first;
|
||||
const Sample & second;
|
||||
};
|
||||
};
|
||||
|
||||
class AggregateFunctionMannWhitney final:
|
||||
public IAggregateFunctionDataHelper<MannWhitneyData, AggregateFunctionMannWhitney>
|
||||
{
|
||||
private:
|
||||
using Alternative = typename MannWhitneyData::Alternative;
|
||||
Alternative alternative;
|
||||
bool continuity_correction{true};
|
||||
|
||||
public:
|
||||
explicit AggregateFunctionMannWhitney(const DataTypes & arguments, const Array & params)
|
||||
: IAggregateFunctionDataHelper<MannWhitneyData, AggregateFunctionMannWhitney> ({arguments}, {}, createResultType())
|
||||
{
|
||||
if (params.size() > 2)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require two parameter or less", getName());
|
||||
|
||||
if (params.empty())
|
||||
{
|
||||
alternative = Alternative::TwoSided;
|
||||
return;
|
||||
}
|
||||
|
||||
if (params[0].getType() != Field::Types::String)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a String", getName());
|
||||
|
||||
const auto & param = params[0].get<String>();
|
||||
if (param == "two-sided")
|
||||
alternative = Alternative::TwoSided;
|
||||
else if (param == "less")
|
||||
alternative = Alternative::Less;
|
||||
else if (param == "greater")
|
||||
alternative = Alternative::Greater;
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown parameter in aggregate function {}. "
|
||||
"It must be one of: 'two-sided', 'less', 'greater'", getName());
|
||||
|
||||
if (params.size() != 2)
|
||||
return;
|
||||
|
||||
if (params[1].getType() != Field::Types::UInt64)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require second parameter to be a UInt64", getName());
|
||||
|
||||
continuity_correction = static_cast<bool>(params[1].get<UInt64>());
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return "mannWhitneyUTest";
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return true; }
|
||||
|
||||
static DataTypePtr createResultType()
|
||||
{
|
||||
DataTypes types
|
||||
{
|
||||
std::make_shared<DataTypeNumber<Float64>>(),
|
||||
std::make_shared<DataTypeNumber<Float64>>(),
|
||||
};
|
||||
|
||||
Strings names
|
||||
{
|
||||
"u_statistic",
|
||||
"p_value"
|
||||
};
|
||||
|
||||
return std::make_shared<DataTypeTuple>(
|
||||
std::move(types),
|
||||
std::move(names)
|
||||
);
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
Float64 value = columns[0]->getFloat64(row_num);
|
||||
UInt8 is_second = columns[1]->getUInt(row_num);
|
||||
|
||||
if (is_second)
|
||||
this->data(place).addY(value, arena);
|
||||
else
|
||||
this->data(place).addX(value, arena);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
auto & a = this->data(place);
|
||||
const auto & b = this->data(rhs);
|
||||
|
||||
a.merge(b, arena);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
this->data(place).write(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
this->data(place).read(buf, arena);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
if (!this->data(place).size_x || !this->data(place).size_y)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} require both samples to be non empty", getName());
|
||||
|
||||
auto [u_statistic, p_value] = this->data(place).getResult(alternative, continuity_correction);
|
||||
|
||||
/// Because p-value is a probability.
|
||||
p_value = std::min(1.0, std::max(0.0, p_value));
|
||||
|
||||
auto & column_tuple = assert_cast<ColumnTuple &>(to);
|
||||
auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
|
||||
auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
|
||||
|
||||
column_stat.getData().push_back(u_statistic);
|
||||
column_value.getData().push_back(p_value);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
@ -1,8 +1,21 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionMaxIntersections.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#include <Common/ArenaAllocator.h>
|
||||
#include <Common/NaNUtils.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
|
||||
#define AGGREGATE_FUNCTION_MAX_INTERSECTIONS_MAX_ARRAY_SIZE 0xFFFFFF
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -11,24 +24,186 @@ struct Settings;
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int TOO_LARGE_ARRAY_SIZE;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
AggregateFunctionPtr createAggregateFunctionMaxIntersections(
|
||||
AggregateFunctionIntersectionsKind kind,
|
||||
const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||
|
||||
/** maxIntersections: returns maximum count of the intersected intervals defined by start_column and end_column values,
|
||||
* maxIntersectionsPosition: returns leftmost position of maximum intersection of intervals.
|
||||
*/
|
||||
|
||||
/// Similar to GroupArrayNumericData.
|
||||
template <typename T>
|
||||
struct MaxIntersectionsData
|
||||
{
|
||||
/// Left or right end of the interval and signed weight; with positive sign for begin of interval and negative sign for end of interval.
|
||||
using Value = std::pair<T, Int64>;
|
||||
|
||||
// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
|
||||
using Allocator = MixedAlignedArenaAllocator<alignof(Value), 4096>;
|
||||
using Array = PODArray<Value, 32, Allocator>;
|
||||
|
||||
Array value;
|
||||
};
|
||||
|
||||
enum class AggregateFunctionIntersectionsKind
|
||||
{
|
||||
Count,
|
||||
Position
|
||||
};
|
||||
|
||||
template <typename PointType>
|
||||
class AggregateFunctionIntersectionsMax final
|
||||
: public IAggregateFunctionDataHelper<MaxIntersectionsData<PointType>, AggregateFunctionIntersectionsMax<PointType>>
|
||||
{
|
||||
private:
|
||||
AggregateFunctionIntersectionsKind kind;
|
||||
|
||||
public:
|
||||
AggregateFunctionIntersectionsMax(AggregateFunctionIntersectionsKind kind_, const DataTypes & arguments)
|
||||
: IAggregateFunctionDataHelper<MaxIntersectionsData<PointType>, AggregateFunctionIntersectionsMax<PointType>>(arguments, {}, createResultType(kind_))
|
||||
, kind(kind_)
|
||||
{
|
||||
assertBinary(name, argument_types);
|
||||
assertNoParameters(name, parameters);
|
||||
if (!isNativeNumber(arguments[0]))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{}: first argument must be represented by integer", getName());
|
||||
|
||||
AggregateFunctionPtr res(createWithNumericType<AggregateFunctionIntersectionsMax>(*argument_types[0], kind, argument_types));
|
||||
if (!res)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal types {} and {} of argument for aggregate function {}",
|
||||
argument_types[0]->getName(), argument_types[1]->getName(), name);
|
||||
if (!isNativeNumber(arguments[1]))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{}: second argument must be represented by integer", getName());
|
||||
|
||||
return res;
|
||||
if (!arguments[0]->equals(*arguments[1]))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{}: arguments must have the same type", getName());
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return kind == AggregateFunctionIntersectionsKind::Count
|
||||
? "maxIntersections"
|
||||
: "maxIntersectionsPosition";
|
||||
}
|
||||
|
||||
static DataTypePtr createResultType(AggregateFunctionIntersectionsKind kind_)
|
||||
{
|
||||
if (kind_ == AggregateFunctionIntersectionsKind::Count)
|
||||
return std::make_shared<DataTypeUInt64>();
|
||||
else
|
||||
return std::make_shared<DataTypeNumber<PointType>>();
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
PointType left = assert_cast<const ColumnVector<PointType> &>(*columns[0]).getData()[row_num];
|
||||
PointType right = assert_cast<const ColumnVector<PointType> &>(*columns[1]).getData()[row_num];
|
||||
|
||||
if (!isNaN(left))
|
||||
this->data(place).value.push_back(std::make_pair(left, Int64(1)), arena);
|
||||
|
||||
if (!isNaN(right))
|
||||
this->data(place).value.push_back(std::make_pair(right, Int64(-1)), arena);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
auto & cur_elems = this->data(place);
|
||||
auto & rhs_elems = this->data(rhs);
|
||||
|
||||
cur_elems.value.insert(rhs_elems.value.begin(), rhs_elems.value.end(), arena);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
const auto & value = this->data(place).value;
|
||||
size_t size = value.size();
|
||||
writeVarUInt(size, buf);
|
||||
|
||||
/// In this version, pairs were serialized with padding.
|
||||
/// We must ensure that padding bytes are zero-filled.
|
||||
|
||||
static_assert(offsetof(typename MaxIntersectionsData<PointType>::Value, first) == 0);
|
||||
static_assert(offsetof(typename MaxIntersectionsData<PointType>::Value, second) > 0);
|
||||
|
||||
char zero_padding[offsetof(typename MaxIntersectionsData<PointType>::Value, second) - sizeof(value[0].first)]{};
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
writePODBinary(value[i].first, buf);
|
||||
writePODBinary(zero_padding, buf);
|
||||
if constexpr (std::endian::native == std::endian::little)
|
||||
writePODBinary(value[i].second, buf);
|
||||
else
|
||||
writePODBinary(std::byteswap(value[i].second), buf);
|
||||
}
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
size_t size = 0;
|
||||
readVarUInt(size, buf);
|
||||
|
||||
if (unlikely(size > AGGREGATE_FUNCTION_MAX_INTERSECTIONS_MAX_ARRAY_SIZE))
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
|
||||
"Too large array size (maximum: {})", AGGREGATE_FUNCTION_MAX_INTERSECTIONS_MAX_ARRAY_SIZE);
|
||||
|
||||
auto & value = this->data(place).value;
|
||||
|
||||
value.resize(size, arena);
|
||||
buf.readStrict(reinterpret_cast<char *>(value.data()), size * sizeof(value[0]));
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
Int64 current_intersections = 0;
|
||||
Int64 max_intersections = 0;
|
||||
PointType position_of_max_intersections = 0;
|
||||
|
||||
/// const_cast because we will sort the array
|
||||
auto & array = this->data(place).value;
|
||||
|
||||
/// Sort by position; for equal position, sort by weight to get deterministic result.
|
||||
::sort(array.begin(), array.end());
|
||||
|
||||
for (const auto & point_weight : array)
|
||||
{
|
||||
current_intersections += point_weight.second;
|
||||
if (current_intersections > max_intersections)
|
||||
{
|
||||
max_intersections = current_intersections;
|
||||
position_of_max_intersections = point_weight.first;
|
||||
}
|
||||
}
|
||||
|
||||
if (kind == AggregateFunctionIntersectionsKind::Count)
|
||||
{
|
||||
auto & result_column = assert_cast<ColumnUInt64 &>(to).getData();
|
||||
result_column.push_back(max_intersections);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & result_column = assert_cast<ColumnVector<PointType> &>(to).getData();
|
||||
result_column.push_back(position_of_max_intersections);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionMaxIntersections(
|
||||
AggregateFunctionIntersectionsKind kind,
|
||||
const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
assertBinary(name, argument_types);
|
||||
assertNoParameters(name, parameters);
|
||||
|
||||
AggregateFunctionPtr res(createWithNumericType<AggregateFunctionIntersectionsMax>(*argument_types[0], kind, argument_types));
|
||||
if (!res)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal types {} and {} of argument for aggregate function {}",
|
||||
argument_types[0]->getName(), argument_types[1]->getName(), name);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionsMaxIntersections(AggregateFunctionFactory & factory)
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user