mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-19 16:20:50 +00:00
Merge remote-tracking branch 'origin/master' into pr-local-plan
This commit is contained in:
commit
9900abade6
168
.github/actions/release/action.yml
vendored
Normal file
168
.github/actions/release/action.yml
vendored
Normal file
@ -0,0 +1,168 @@
|
||||
name: Release
|
||||
|
||||
description: Makes patch releases and creates new release branch
|
||||
|
||||
inputs:
|
||||
ref:
|
||||
description: 'Git reference (branch or commit sha) from which to create the release'
|
||||
required: true
|
||||
type: string
|
||||
type:
|
||||
description: 'The type of release: "new" for a new release or "patch" for a patch release'
|
||||
required: true
|
||||
type: choice
|
||||
options:
|
||||
- patch
|
||||
- new
|
||||
dry-run:
|
||||
description: 'Dry run'
|
||||
required: false
|
||||
default: true
|
||||
type: boolean
|
||||
token:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Prepare Release Info
|
||||
shell: bash
|
||||
run: |
|
||||
python3 ./tests/ci/create_release.py --prepare-release-info \
|
||||
--ref ${{ inputs.ref }} --release-type ${{ inputs.type }} \
|
||||
${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
echo "::group::Release Info"
|
||||
python3 -m json.tool /tmp/release_info.json
|
||||
echo "::endgroup::"
|
||||
release_tag=$(jq -r '.release_tag' /tmp/release_info.json)
|
||||
commit_sha=$(jq -r '.commit_sha' /tmp/release_info.json)
|
||||
echo "Release Tag: $release_tag"
|
||||
echo "RELEASE_TAG=$release_tag" >> "$GITHUB_ENV"
|
||||
echo "COMMIT_SHA=$commit_sha" >> "$GITHUB_ENV"
|
||||
- name: Download All Release Artifacts
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
shell: bash
|
||||
run: |
|
||||
python3 ./tests/ci/create_release.py --download-packages ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Push Git Tag for the Release
|
||||
shell: bash
|
||||
run: |
|
||||
python3 ./tests/ci/create_release.py --push-release-tag ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Push New Release Branch
|
||||
if: ${{ inputs.type == 'new' }}
|
||||
shell: bash
|
||||
run: |
|
||||
python3 ./tests/ci/create_release.py --push-new-release-branch ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Bump CH Version and Update Contributors' List
|
||||
shell: bash
|
||||
run: |
|
||||
python3 ./tests/ci/create_release.py --create-bump-version-pr ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Bump Docker versions, Changelog, Security
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
shell: bash
|
||||
run: |
|
||||
git checkout master
|
||||
python3 ./tests/ci/create_release.py --set-progress-started --progress "update changelog, docker version, security"
|
||||
echo "List versions"
|
||||
./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv
|
||||
echo "Update docker version"
|
||||
./utils/list-versions/update-docker-version.sh
|
||||
echo "Generate ChangeLog"
|
||||
export CI=1
|
||||
docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 -e CI=1 --network=host \
|
||||
--volume=".:/ClickHouse" clickhouse/style-test \
|
||||
/ClickHouse/tests/ci/changelog.py -v --debug-helpers \
|
||||
--gh-user-or-token=${{ inputs.token }} --jobs=5 \
|
||||
--output="/ClickHouse/docs/changelogs/${{ env.RELEASE_TAG }}.md" ${{ env.RELEASE_TAG }}
|
||||
git add ./docs/changelogs/${{ env.RELEASE_TAG }}.md
|
||||
echo "Generate Security"
|
||||
python3 ./utils/security-generator/generate_security.py > SECURITY.md
|
||||
git diff HEAD
|
||||
- name: Create ChangeLog PR
|
||||
if: ${{ inputs.type == 'patch' && ! inputs.dry-run }}
|
||||
uses: peter-evans/create-pull-request@v6
|
||||
with:
|
||||
author: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
|
||||
token: ${{ inputs.token }}
|
||||
committer: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
|
||||
commit-message: Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }}
|
||||
branch: auto/${{ env.RELEASE_TAG }}
|
||||
assignees: ${{ github.event.sender.login }} # assign the PR to the tag pusher
|
||||
delete-branch: true
|
||||
title: Update version_date.tsv and changelog after ${{ env.RELEASE_TAG }}
|
||||
labels: do not test
|
||||
body: |
|
||||
Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }}
|
||||
### Changelog category (leave one):
|
||||
- Not for changelog (changelog entry is not required)
|
||||
- name: Complete previous steps and Restore git state
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
shell: bash
|
||||
run: |
|
||||
python3 ./tests/ci/create_release.py --set-progress-completed
|
||||
git reset --hard HEAD
|
||||
git checkout "$GITHUB_REF_NAME"
|
||||
- name: Create GH Release
|
||||
shell: bash
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
run: |
|
||||
python3 ./tests/ci/create_release.py --create-gh-release ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Export TGZ Packages
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
shell: bash
|
||||
run: |
|
||||
python3 ./tests/ci/artifactory.py --export-tgz ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Test TGZ Packages
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
shell: bash
|
||||
run: |
|
||||
python3 ./tests/ci/artifactory.py --test-tgz ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Export RPM Packages
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
shell: bash
|
||||
run: |
|
||||
python3 ./tests/ci/artifactory.py --export-rpm ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Test RPM Packages
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
shell: bash
|
||||
run: |
|
||||
python3 ./tests/ci/artifactory.py --test-rpm ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Export Debian Packages
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
shell: bash
|
||||
run: |
|
||||
python3 ./tests/ci/artifactory.py --export-debian ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Test Debian Packages
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
shell: bash
|
||||
run: |
|
||||
python3 ./tests/ci/artifactory.py --test-debian ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Docker clickhouse/clickhouse-server building
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
shell: bash
|
||||
run: |
|
||||
cd "./tests/ci"
|
||||
python3 ./create_release.py --set-progress-started --progress "docker server release"
|
||||
export CHECK_NAME="Docker server image"
|
||||
python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }}
|
||||
python3 ./create_release.py --set-progress-completed
|
||||
- name: Docker clickhouse/clickhouse-keeper building
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
shell: bash
|
||||
run: |
|
||||
cd "./tests/ci"
|
||||
python3 ./create_release.py --set-progress-started --progress "docker keeper release"
|
||||
export CHECK_NAME="Docker keeper image"
|
||||
python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }}
|
||||
python3 ./create_release.py --set-progress-completed
|
||||
- name: Set current Release progress to Completed with OK
|
||||
shell: bash
|
||||
run: |
|
||||
python3 ./tests/ci/create_release.py --set-progress-started --progress "completed"
|
||||
python3 ./tests/ci/create_release.py --set-progress-completed
|
||||
- name: Post Slack Message
|
||||
if: ${{ !cancelled() }}
|
||||
shell: bash
|
||||
run: |
|
||||
python3 ./tests/ci/create_release.py --post-status ${{ inputs.dry-run && '--dry-run' || '' }}
|
98
.github/workflows/auto_release.yml
vendored
98
.github/workflows/auto_release.yml
vendored
@ -1,44 +1,110 @@
|
||||
name: AutoRelease
|
||||
|
||||
env:
|
||||
# Force the stdout and stderr streams to be unbuffered
|
||||
PYTHONUNBUFFERED: 1
|
||||
DRY_RUN: true
|
||||
|
||||
concurrency:
|
||||
group: auto-release
|
||||
group: release
|
||||
on: # yamllint disable-line rule:truthy
|
||||
# schedule:
|
||||
# - cron: '0 10-16 * * 1-5'
|
||||
# Workflow uses a test bucket for packages and dry run mode (no real releases)
|
||||
schedule:
|
||||
- cron: '0 9 * * *'
|
||||
- cron: '0 15 * * *'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
dry-run:
|
||||
description: 'Dry run'
|
||||
required: false
|
||||
default: true
|
||||
type: boolean
|
||||
|
||||
jobs:
|
||||
CherryPick:
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
AutoRelease:
|
||||
runs-on: [self-hosted, release-maker]
|
||||
steps:
|
||||
- name: DebugInfo
|
||||
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
|
||||
- name: Set envs
|
||||
# https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/cherry_pick
|
||||
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
|
||||
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
|
||||
RCSK
|
||||
REPO_OWNER=ClickHouse
|
||||
REPO_NAME=ClickHouse
|
||||
REPO_TEAM=core
|
||||
EOF
|
||||
- name: Set DRY_RUN for schedule
|
||||
if: ${{ github.event_name == 'schedule' }}
|
||||
run: echo "DRY_RUN=true" >> "$GITHUB_ENV"
|
||||
- name: Set DRY_RUN for dispatch
|
||||
if: ${{ github.event_name == 'workflow_dispatch' }}
|
||||
run: echo "DRY_RUN=${{ github.event.inputs.dry-run }}" >> "$GITHUB_ENV"
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
|
||||
fetch-depth: 0
|
||||
- name: Auto-release
|
||||
- name: Auto Release Prepare
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 auto_release.py --release-after-days=3
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
python3 auto_release.py --prepare
|
||||
echo "::group::Auto Release Info"
|
||||
python3 -m json.tool /tmp/autorelease_info.json
|
||||
echo "::endgroup::"
|
||||
{
|
||||
echo 'AUTO_RELEASE_PARAMS<<EOF'
|
||||
cat /tmp/autorelease_info.json
|
||||
echo 'EOF'
|
||||
} >> "$GITHUB_ENV"
|
||||
- name: Post Release Branch statuses
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 auto_release.py --post-status
|
||||
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0].release_branch }}
|
||||
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0] && fromJson(env.AUTO_RELEASE_PARAMS).releases[0].ready }}
|
||||
uses: ./.github/actions/release
|
||||
with:
|
||||
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0].commit_sha }}
|
||||
type: patch
|
||||
dry-run: ${{ env.DRY_RUN }}
|
||||
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
|
||||
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[1].release_branch }}
|
||||
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0] && fromJson(env.AUTO_RELEASE_PARAMS).releases[1].ready }}
|
||||
uses: ./.github/actions/release
|
||||
with:
|
||||
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[1].commit_sha }}
|
||||
type: patch
|
||||
dry-run: ${{ env.DRY_RUN }}
|
||||
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
|
||||
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[2].release_branch }}
|
||||
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[2] && fromJson(env.AUTO_RELEASE_PARAMS).releases[2].ready }}
|
||||
uses: ./.github/actions/release
|
||||
with:
|
||||
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[2].commit_sha }}
|
||||
type: patch
|
||||
dry-run: ${{ env.DRY_RUN }}
|
||||
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
|
||||
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[3].release_branch }}
|
||||
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[3] && fromJson(env.AUTO_RELEASE_PARAMS).releases[3].ready }}
|
||||
uses: ./.github/actions/release
|
||||
with:
|
||||
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[3].commit_sha }}
|
||||
type: patch
|
||||
dry-run: ${{ env.DRY_RUN }}
|
||||
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
|
||||
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[4].release_branch }}
|
||||
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[4] && fromJson(env.AUTO_RELEASE_PARAMS).releases[4].ready }}
|
||||
uses: ./.github/actions/release
|
||||
with:
|
||||
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[4].commit_sha }}
|
||||
type: patch
|
||||
dry-run: ${{ env.DRY_RUN }}
|
||||
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
|
||||
- name: Post Slack Message
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 auto_release.py --post-auto-release-complete --wf-status ${{ job.status }}
|
||||
- name: Clean up
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
|
134
.github/workflows/create_release.yml
vendored
134
.github/workflows/create_release.yml
vendored
@ -2,7 +2,6 @@ name: CreateRelease
|
||||
|
||||
concurrency:
|
||||
group: release
|
||||
|
||||
'on':
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
@ -31,136 +30,15 @@ jobs:
|
||||
steps:
|
||||
- name: DebugInfo
|
||||
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
|
||||
- name: Set envs
|
||||
# https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
|
||||
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
|
||||
RCSK
|
||||
RELEASE_INFO_FILE=${{ runner.temp }}/release_info.json
|
||||
EOF
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
|
||||
fetch-depth: 0
|
||||
- name: Prepare Release Info
|
||||
run: |
|
||||
python3 ./tests/ci/create_release.py --prepare-release-info \
|
||||
--ref ${{ inputs.ref }} --release-type ${{ inputs.type }} \
|
||||
--outfile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
echo "::group::Release Info"
|
||||
python3 -m json.tool "$RELEASE_INFO_FILE"
|
||||
echo "::endgroup::"
|
||||
release_tag=$(jq -r '.release_tag' "$RELEASE_INFO_FILE")
|
||||
commit_sha=$(jq -r '.commit_sha' "$RELEASE_INFO_FILE")
|
||||
echo "Release Tag: $release_tag"
|
||||
echo "RELEASE_TAG=$release_tag" >> "$GITHUB_ENV"
|
||||
echo "COMMIT_SHA=$commit_sha" >> "$GITHUB_ENV"
|
||||
- name: Download All Release Artifacts
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
run: |
|
||||
python3 ./tests/ci/create_release.py --infile "$RELEASE_INFO_FILE" --download-packages ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Push Git Tag for the Release
|
||||
run: |
|
||||
python3 ./tests/ci/create_release.py --push-release-tag --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Push New Release Branch
|
||||
if: ${{ inputs.type == 'new' }}
|
||||
run: |
|
||||
python3 ./tests/ci/create_release.py --push-new-release-branch --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Bump CH Version and Update Contributors' List
|
||||
run: |
|
||||
python3 ./tests/ci/create_release.py --create-bump-version-pr --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Checkout master
|
||||
run: |
|
||||
git checkout master
|
||||
- name: Bump Docker versions, Changelog, Security
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
run: |
|
||||
[ "$(git branch --show-current)" != "master" ] && echo "not on the master" && exit 1
|
||||
echo "List versions"
|
||||
./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv
|
||||
echo "Update docker version"
|
||||
./utils/list-versions/update-docker-version.sh
|
||||
echo "Generate ChangeLog"
|
||||
export CI=1
|
||||
docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 -e CI=1 --network=host \
|
||||
--volume=".:/ClickHouse" clickhouse/style-test \
|
||||
/ClickHouse/tests/ci/changelog.py -v --debug-helpers \
|
||||
--gh-user-or-token="$GH_TOKEN" --jobs=5 \
|
||||
--output="/ClickHouse/docs/changelogs/${{ env.RELEASE_TAG }}.md" ${{ env.RELEASE_TAG }}
|
||||
git add ./docs/changelogs/${{ env.RELEASE_TAG }}.md
|
||||
echo "Generate Security"
|
||||
python3 ./utils/security-generator/generate_security.py > SECURITY.md
|
||||
git diff HEAD
|
||||
- name: Create ChangeLog PR
|
||||
if: ${{ inputs.type == 'patch' && ! inputs.dry-run }}
|
||||
uses: peter-evans/create-pull-request@v6
|
||||
- name: Call Release Action
|
||||
uses: ./.github/actions/release
|
||||
with:
|
||||
author: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
|
||||
token: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }}
|
||||
committer: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
|
||||
commit-message: Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }}
|
||||
branch: auto/${{ env.RELEASE_TAG }}
|
||||
assignees: ${{ github.event.sender.login }} # assign the PR to the tag pusher
|
||||
delete-branch: true
|
||||
title: Update version_date.tsv and changelog after ${{ env.RELEASE_TAG }}
|
||||
labels: do not test
|
||||
body: |
|
||||
Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }}
|
||||
### Changelog category (leave one):
|
||||
- Not for changelog (changelog entry is not required)
|
||||
- name: Reset changes if Dry-run
|
||||
if: ${{ inputs.dry-run }}
|
||||
run: |
|
||||
git reset --hard HEAD
|
||||
- name: Checkout back to GITHUB_REF
|
||||
run: |
|
||||
git checkout "$GITHUB_REF_NAME"
|
||||
- name: Create GH Release
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
run: |
|
||||
python3 ./tests/ci/create_release.py --create-gh-release \
|
||||
--infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
|
||||
- name: Export TGZ Packages
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
run: |
|
||||
python3 ./tests/ci/artifactory.py --export-tgz --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Test TGZ Packages
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
run: |
|
||||
python3 ./tests/ci/artifactory.py --test-tgz --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Export RPM Packages
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
run: |
|
||||
python3 ./tests/ci/artifactory.py --export-rpm --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Test RPM Packages
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
run: |
|
||||
python3 ./tests/ci/artifactory.py --test-rpm --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Export Debian Packages
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
run: |
|
||||
python3 ./tests/ci/artifactory.py --export-debian --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Test Debian Packages
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
run: |
|
||||
python3 ./tests/ci/artifactory.py --test-debian --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Docker clickhouse/clickhouse-server building
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
run: |
|
||||
cd "./tests/ci"
|
||||
export CHECK_NAME="Docker server image"
|
||||
python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }}
|
||||
- name: Docker clickhouse/clickhouse-keeper building
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
run: |
|
||||
cd "./tests/ci"
|
||||
export CHECK_NAME="Docker keeper image"
|
||||
python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }}
|
||||
- name: Post Slack Message
|
||||
if: always()
|
||||
run: |
|
||||
echo Slack Message
|
||||
ref: ${{ inputs.ref }}
|
||||
type: ${{ inputs.type }}
|
||||
dry-run: ${{ inputs.dry-run }}
|
||||
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
|
||||
|
@ -297,7 +297,7 @@ Algorithm requires the special column in tables. This column:
|
||||
|
||||
- Must contain an ordered sequence.
|
||||
- Can be one of the following types: [Int, UInt](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), [Date](../../../sql-reference/data-types/date.md), [DateTime](../../../sql-reference/data-types/datetime.md), [Decimal](../../../sql-reference/data-types/decimal.md).
|
||||
- Can’t be the only column in the `JOIN` clause.
|
||||
- For `hash` join algorithm it can’t be the only column in the `JOIN` clause.
|
||||
|
||||
Syntax `ASOF JOIN ... ON`:
|
||||
|
||||
@ -337,7 +337,8 @@ For example, consider the following tables:
|
||||
`ASOF JOIN` can take the timestamp of a user event from `table_1` and find an event in `table_2` where the timestamp is closest to the timestamp of the event from `table_1` corresponding to the closest match condition. Equal timestamp values are the closest if available. Here, the `user_id` column can be used for joining on equality and the `ev_time` column can be used for joining on the closest match. In our example, `event_1_1` can be joined with `event_2_1` and `event_1_2` can be joined with `event_2_3`, but `event_2_2` can’t be joined.
|
||||
|
||||
:::note
|
||||
`ASOF` join is **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine.
|
||||
`ASOF JOIN` is supported only by `hash` and `full_sorting_merge` join algorithms.
|
||||
It's **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine.
|
||||
:::
|
||||
|
||||
## PASTE JOIN Usage
|
||||
|
@ -6,38 +6,38 @@ sidebar_label: Playground
|
||||
|
||||
# ClickHouse Playground {#clickhouse-playground}
|
||||
|
||||
[ClickHouse Playground](https://play.clickhouse.com/play?user=play) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster.
|
||||
Several example datasets are available in Playground.
|
||||
[ClickHouse Playground](https://play.clickhouse.com/play?user=play) позволяет пользователям экспериментировать с ClickHouse, выполняя запросы мгновенно, без необходимости настройки сервера или кластера.
|
||||
В Playground доступны несколько примеров наборов данных.
|
||||
|
||||
You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md).
|
||||
Вы можете выполнять запросы к Playground, используя любой HTTP-клиент, например [curl](https://curl.haxx.se) или [wget](https://www.gnu.org/software/wget/), или настроить соединение, используя драйверы [JDBC](../interfaces/jdbc.md) или [ODBC](../interfaces/odbc.md). Дополнительную информацию о программных продуктах, поддерживающих ClickHouse, можно найти [здесь](../interfaces/index.md).
|
||||
|
||||
## Credentials {#credentials}
|
||||
## Учетные данные {#credentials}
|
||||
|
||||
| Parameter | Value |
|
||||
| Параметр | Значение |
|
||||
|:--------------------|:-----------------------------------|
|
||||
| HTTPS endpoint | `https://play.clickhouse.com:443/` |
|
||||
| Native TCP endpoint | `play.clickhouse.com:9440` |
|
||||
| User | `explorer` or `play` |
|
||||
| Password | (empty) |
|
||||
| HTTPS-адрес | `https://play.clickhouse.com:443/` |
|
||||
| TCP-адрес | `play.clickhouse.com:9440` |
|
||||
| Пользователь | `explorer` или `play` |
|
||||
| Пароль | (пусто) |
|
||||
|
||||
## Limitations {#limitations}
|
||||
## Ограничения {#limitations}
|
||||
|
||||
The queries are executed as a read-only user. It implies some limitations:
|
||||
Запросы выполняются от имени пользователя с правами только на чтение. Это предполагает некоторые ограничения:
|
||||
|
||||
- DDL queries are not allowed
|
||||
- INSERT queries are not allowed
|
||||
- DDL-запросы не разрешены
|
||||
- INSERT-запросы не разрешены
|
||||
|
||||
The service also have quotas on its usage.
|
||||
Сервис также имеет квоты на использование.
|
||||
|
||||
## Examples {#examples}
|
||||
## Примеры {#examples}
|
||||
|
||||
HTTPS endpoint example with `curl`:
|
||||
Пример использования HTTPS-адреса с `curl`:
|
||||
|
||||
``` bash
|
||||
```bash
|
||||
curl "https://play.clickhouse.com/?user=explorer" --data-binary "SELECT 'Play ClickHouse'"
|
||||
```
|
||||
|
||||
TCP endpoint example with [CLI](../interfaces/cli.md):
|
||||
Пример использования TCP-адреса с [CLI](../interfaces/cli.md):
|
||||
|
||||
``` bash
|
||||
clickhouse client --secure --host play.clickhouse.com --user explorer
|
||||
|
@ -68,6 +68,41 @@ QueryTreeNodePtr findEqualsFunction(const QueryTreeNodes & nodes)
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// Checks if the node is combination of isNull and notEquals functions of two the same arguments
|
||||
bool matchIsNullOfTwoArgs(const QueryTreeNodes & nodes, QueryTreeNodePtr & lhs, QueryTreeNodePtr & rhs)
|
||||
{
|
||||
QueryTreeNodePtrWithHashSet all_arguments;
|
||||
for (const auto & node : nodes)
|
||||
{
|
||||
const auto * func_node = node->as<FunctionNode>();
|
||||
if (!func_node)
|
||||
return false;
|
||||
|
||||
const auto & arguments = func_node->getArguments().getNodes();
|
||||
if (func_node->getFunctionName() == "isNull" && arguments.size() == 1)
|
||||
all_arguments.insert(QueryTreeNodePtrWithHash(arguments[0]));
|
||||
else if (func_node->getFunctionName() == "notEquals" && arguments.size() == 2)
|
||||
{
|
||||
if (arguments[0]->isEqual(*arguments[1]))
|
||||
return false;
|
||||
all_arguments.insert(QueryTreeNodePtrWithHash(arguments[0]));
|
||||
all_arguments.insert(QueryTreeNodePtrWithHash(arguments[1]));
|
||||
}
|
||||
else
|
||||
return false;
|
||||
|
||||
if (all_arguments.size() > 2)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (all_arguments.size() != 2)
|
||||
return false;
|
||||
|
||||
lhs = all_arguments.begin()->node;
|
||||
rhs = std::next(all_arguments.begin())->node;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool isBooleanConstant(const QueryTreeNodePtr & node, bool expected_value)
|
||||
{
|
||||
const auto * constant_node = node->as<ConstantNode>();
|
||||
@ -213,11 +248,14 @@ private:
|
||||
else if (func_name == "and")
|
||||
{
|
||||
const auto & and_arguments = argument_function->getArguments().getNodes();
|
||||
bool all_are_is_null = and_arguments.size() == 2 && isNodeFunction(and_arguments[0], "isNull") && isNodeFunction(and_arguments[1], "isNull");
|
||||
if (all_are_is_null)
|
||||
|
||||
QueryTreeNodePtr is_null_lhs_arg;
|
||||
QueryTreeNodePtr is_null_rhs_arg;
|
||||
if (matchIsNullOfTwoArgs(and_arguments, is_null_lhs_arg, is_null_rhs_arg))
|
||||
{
|
||||
is_null_argument_to_indices[getFunctionArgument(and_arguments.front(), 0)].push_back(or_operands.size() - 1);
|
||||
is_null_argument_to_indices[getFunctionArgument(and_arguments.back(), 0)].push_back(or_operands.size() - 1);
|
||||
is_null_argument_to_indices[is_null_lhs_arg].push_back(or_operands.size() - 1);
|
||||
is_null_argument_to_indices[is_null_rhs_arg].push_back(or_operands.size() - 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
/// Expression `a = b AND (a IS NOT NULL) AND true AND (b IS NOT NULL)` we can be replaced with `a = b`
|
||||
|
@ -4124,7 +4124,9 @@ void QueryAnalyzer::resolveInterpolateColumnsNodeList(QueryTreeNodePtr & interpo
|
||||
|
||||
auto * column_to_interpolate = interpolate_node_typed.getExpression()->as<IdentifierNode>();
|
||||
if (!column_to_interpolate)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "INTERPOLATE can work only for indentifiers, but {} is found",
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"INTERPOLATE can work only for identifiers, but {} is found",
|
||||
interpolate_node_typed.getExpression()->formatASTForErrorMessage());
|
||||
auto column_to_interpolate_name = column_to_interpolate->getIdentifier().getFullName();
|
||||
|
||||
|
@ -366,13 +366,10 @@ void ColumnAggregateFunction::updateHashWithValue(size_t n, SipHash & hash) cons
|
||||
hash.update(wbuf.str().c_str(), wbuf.str().size());
|
||||
}
|
||||
|
||||
void ColumnAggregateFunction::updateWeakHash32(WeakHash32 & hash) const
|
||||
WeakHash32 ColumnAggregateFunction::getWeakHash32() const
|
||||
{
|
||||
auto s = data.size();
|
||||
if (hash.getData().size() != data.size())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
||||
"column size is {}, hash size is {}", std::to_string(s), hash.getData().size());
|
||||
|
||||
WeakHash32 hash(s);
|
||||
auto & hash_data = hash.getData();
|
||||
|
||||
std::vector<UInt8> v;
|
||||
@ -383,6 +380,8 @@ void ColumnAggregateFunction::updateWeakHash32(WeakHash32 & hash) const
|
||||
wbuf.finalize();
|
||||
hash_data[i] = ::updateWeakHash32(v.data(), v.size(), hash_data[i]);
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
void ColumnAggregateFunction::updateHashFast(SipHash & hash) const
|
||||
|
@ -177,7 +177,7 @@ public:
|
||||
|
||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||
|
||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
||||
WeakHash32 getWeakHash32() const override;
|
||||
|
||||
void updateHashFast(SipHash & hash) const override;
|
||||
|
||||
|
@ -271,15 +271,12 @@ void ColumnArray::updateHashWithValue(size_t n, SipHash & hash) const
|
||||
getData().updateHashWithValue(offset + i, hash);
|
||||
}
|
||||
|
||||
void ColumnArray::updateWeakHash32(WeakHash32 & hash) const
|
||||
WeakHash32 ColumnArray::getWeakHash32() const
|
||||
{
|
||||
auto s = offsets->size();
|
||||
if (hash.getData().size() != s)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
||||
"column size is {}, hash size is {}", s, hash.getData().size());
|
||||
WeakHash32 hash(s);
|
||||
|
||||
WeakHash32 internal_hash(data->size());
|
||||
data->updateWeakHash32(internal_hash);
|
||||
WeakHash32 internal_hash = data->getWeakHash32();
|
||||
|
||||
Offset prev_offset = 0;
|
||||
const auto & offsets_data = getOffsets();
|
||||
@ -300,6 +297,8 @@ void ColumnArray::updateWeakHash32(WeakHash32 & hash) const
|
||||
|
||||
prev_offset = offsets_data[i];
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
void ColumnArray::updateHashFast(SipHash & hash) const
|
||||
|
@ -82,7 +82,7 @@ public:
|
||||
const char * deserializeAndInsertFromArena(const char * pos) override;
|
||||
const char * skipSerializedInArena(const char * pos) const override;
|
||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
||||
WeakHash32 getWeakHash32() const override;
|
||||
void updateHashFast(SipHash & hash) const override;
|
||||
#if !defined(ABORT_ON_LOGICAL_ERROR)
|
||||
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <optional>
|
||||
#include <Core/Field.h>
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Common/WeakHash.h>
|
||||
#include <IO/BufferWithOwnMemory.h>
|
||||
|
||||
|
||||
@ -98,7 +99,7 @@ public:
|
||||
const char * deserializeAndInsertFromArena(const char *) override { throwMustBeDecompressed(); }
|
||||
const char * skipSerializedInArena(const char *) const override { throwMustBeDecompressed(); }
|
||||
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeDecompressed(); }
|
||||
void updateWeakHash32(WeakHash32 &) const override { throwMustBeDecompressed(); }
|
||||
WeakHash32 getWeakHash32() const override { throwMustBeDecompressed(); }
|
||||
void updateHashFast(SipHash &) const override { throwMustBeDecompressed(); }
|
||||
ColumnPtr filter(const Filter &, ssize_t) const override { throwMustBeDecompressed(); }
|
||||
void expand(const Filter &, bool) override { throwMustBeDecompressed(); }
|
||||
|
@ -137,18 +137,10 @@ void ColumnConst::updatePermutation(PermutationSortDirection /*direction*/, Perm
|
||||
{
|
||||
}
|
||||
|
||||
void ColumnConst::updateWeakHash32(WeakHash32 & hash) const
|
||||
WeakHash32 ColumnConst::getWeakHash32() const
|
||||
{
|
||||
if (hash.getData().size() != s)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
||||
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
|
||||
|
||||
WeakHash32 element_hash(1);
|
||||
data->updateWeakHash32(element_hash);
|
||||
size_t data_hash = element_hash.getData()[0];
|
||||
|
||||
for (auto & value : hash.getData())
|
||||
value = static_cast<UInt32>(intHashCRC32(data_hash, value));
|
||||
WeakHash32 element_hash = data->getWeakHash32();
|
||||
return WeakHash32(s, element_hash.getData()[0]);
|
||||
}
|
||||
|
||||
void ColumnConst::compareColumn(
|
||||
|
@ -204,7 +204,7 @@ public:
|
||||
data->updateHashWithValue(0, hash);
|
||||
}
|
||||
|
||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
||||
WeakHash32 getWeakHash32() const override;
|
||||
|
||||
void updateHashFast(SipHash & hash) const override
|
||||
{
|
||||
|
@ -28,7 +28,6 @@ namespace ErrorCodes
|
||||
extern const int PARAMETER_OUT_OF_BOUND;
|
||||
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
template <is_decimal T>
|
||||
@ -76,13 +75,10 @@ void ColumnDecimal<T>::updateHashWithValue(size_t n, SipHash & hash) const
|
||||
}
|
||||
|
||||
template <is_decimal T>
|
||||
void ColumnDecimal<T>::updateWeakHash32(WeakHash32 & hash) const
|
||||
WeakHash32 ColumnDecimal<T>::getWeakHash32() const
|
||||
{
|
||||
auto s = data.size();
|
||||
|
||||
if (hash.getData().size() != s)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
||||
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
|
||||
WeakHash32 hash(s);
|
||||
|
||||
const T * begin = data.data();
|
||||
const T * end = begin + s;
|
||||
@ -94,6 +90,8 @@ void ColumnDecimal<T>::updateWeakHash32(WeakHash32 & hash) const
|
||||
++begin;
|
||||
++hash_data;
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
template <is_decimal T>
|
||||
|
@ -102,7 +102,7 @@ public:
|
||||
const char * deserializeAndInsertFromArena(const char * pos) override;
|
||||
const char * skipSerializedInArena(const char * pos) const override;
|
||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
||||
WeakHash32 getWeakHash32() const override;
|
||||
void updateHashFast(SipHash & hash) const override;
|
||||
#if !defined(ABORT_ON_LOGICAL_ERROR)
|
||||
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnVariant.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <Common/WeakHash.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -174,9 +175,9 @@ public:
|
||||
|
||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||
|
||||
void updateWeakHash32(WeakHash32 & hash) const override
|
||||
WeakHash32 getWeakHash32() const override
|
||||
{
|
||||
variant_column->updateWeakHash32(hash);
|
||||
return variant_column->getWeakHash32();
|
||||
}
|
||||
|
||||
void updateHashFast(SipHash & hash) const override
|
||||
|
@ -137,14 +137,10 @@ void ColumnFixedString::updateHashWithValue(size_t index, SipHash & hash) const
|
||||
hash.update(reinterpret_cast<const char *>(&chars[n * index]), n);
|
||||
}
|
||||
|
||||
void ColumnFixedString::updateWeakHash32(WeakHash32 & hash) const
|
||||
WeakHash32 ColumnFixedString::getWeakHash32() const
|
||||
{
|
||||
auto s = size();
|
||||
|
||||
if (hash.getData().size() != s)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
||||
"column size is {}, "
|
||||
"hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
|
||||
WeakHash32 hash(s);
|
||||
|
||||
const UInt8 * pos = chars.data();
|
||||
UInt32 * hash_data = hash.getData().data();
|
||||
@ -156,6 +152,8 @@ void ColumnFixedString::updateWeakHash32(WeakHash32 & hash) const
|
||||
pos += n;
|
||||
++hash_data;
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
void ColumnFixedString::updateHashFast(SipHash & hash) const
|
||||
|
@ -133,7 +133,7 @@ public:
|
||||
|
||||
void updateHashWithValue(size_t index, SipHash & hash) const override;
|
||||
|
||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
||||
WeakHash32 getWeakHash32() const override;
|
||||
|
||||
void updateHashFast(SipHash & hash) const override;
|
||||
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <Core/NamesAndTypes.h>
|
||||
#include <Core/ColumnsWithTypeAndName.h>
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Common/WeakHash.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -130,9 +131,9 @@ public:
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "updateHashWithValue is not implemented for {}", getName());
|
||||
}
|
||||
|
||||
void updateWeakHash32(WeakHash32 &) const override
|
||||
WeakHash32 getWeakHash32() const override
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "updateWeakHash32 is not implemented for {}", getName());
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "getWeakHash32 is not implemented for {}", getName());
|
||||
}
|
||||
|
||||
void updateHashFast(SipHash &) const override
|
||||
|
@ -7,8 +7,7 @@
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Common/WeakHash.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include "Storages/IndicesDescription.h"
|
||||
#include "base/types.h"
|
||||
#include <base/types.h>
|
||||
#include <base/sort.h>
|
||||
#include <base/scope_guard.h>
|
||||
|
||||
@ -320,19 +319,10 @@ const char * ColumnLowCardinality::skipSerializedInArena(const char * pos) const
|
||||
return getDictionary().skipSerializedInArena(pos);
|
||||
}
|
||||
|
||||
void ColumnLowCardinality::updateWeakHash32(WeakHash32 & hash) const
|
||||
WeakHash32 ColumnLowCardinality::getWeakHash32() const
|
||||
{
|
||||
auto s = size();
|
||||
|
||||
if (hash.getData().size() != s)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
||||
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
|
||||
|
||||
const auto & dict = getDictionary().getNestedColumn();
|
||||
WeakHash32 dict_hash(dict->size());
|
||||
dict->updateWeakHash32(dict_hash);
|
||||
|
||||
idx.updateWeakHash(hash, dict_hash);
|
||||
WeakHash32 dict_hash = getDictionary().getNestedColumn()->getWeakHash32();
|
||||
return idx.getWeakHash(dict_hash);
|
||||
}
|
||||
|
||||
void ColumnLowCardinality::updateHashFast(SipHash & hash) const
|
||||
@ -832,10 +822,11 @@ bool ColumnLowCardinality::Index::containsDefault() const
|
||||
return contains;
|
||||
}
|
||||
|
||||
void ColumnLowCardinality::Index::updateWeakHash(WeakHash32 & hash, WeakHash32 & dict_hash) const
|
||||
WeakHash32 ColumnLowCardinality::Index::getWeakHash(const WeakHash32 & dict_hash) const
|
||||
{
|
||||
WeakHash32 hash(positions->size());
|
||||
auto & hash_data = hash.getData();
|
||||
auto & dict_hash_data = dict_hash.getData();
|
||||
const auto & dict_hash_data = dict_hash.getData();
|
||||
|
||||
auto update_weak_hash = [&](auto x)
|
||||
{
|
||||
@ -844,10 +835,11 @@ void ColumnLowCardinality::Index::updateWeakHash(WeakHash32 & hash, WeakHash32 &
|
||||
auto size = data.size();
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
hash_data[i] = static_cast<UInt32>(intHashCRC32(dict_hash_data[data[i]], hash_data[i]));
|
||||
hash_data[i] = dict_hash_data[data[i]];
|
||||
};
|
||||
|
||||
callForType(std::move(update_weak_hash), size_of_type);
|
||||
return hash;
|
||||
}
|
||||
|
||||
void ColumnLowCardinality::Index::collectSerializedValueSizes(
|
||||
|
@ -111,7 +111,7 @@ public:
|
||||
getDictionary().updateHashWithValue(getIndexes().getUInt(n), hash);
|
||||
}
|
||||
|
||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
||||
WeakHash32 getWeakHash32() const override;
|
||||
|
||||
void updateHashFast(SipHash &) const override;
|
||||
|
||||
@ -325,7 +325,7 @@ public:
|
||||
|
||||
bool containsDefault() const;
|
||||
|
||||
void updateWeakHash(WeakHash32 & hash, WeakHash32 & dict_hash) const;
|
||||
WeakHash32 getWeakHash(const WeakHash32 & dict_hash) const;
|
||||
|
||||
void collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const PaddedPODArray<UInt64> & dict_sizes) const;
|
||||
|
||||
|
@ -143,9 +143,9 @@ void ColumnMap::updateHashWithValue(size_t n, SipHash & hash) const
|
||||
nested->updateHashWithValue(n, hash);
|
||||
}
|
||||
|
||||
void ColumnMap::updateWeakHash32(WeakHash32 & hash) const
|
||||
WeakHash32 ColumnMap::getWeakHash32() const
|
||||
{
|
||||
nested->updateWeakHash32(hash);
|
||||
return nested->getWeakHash32();
|
||||
}
|
||||
|
||||
void ColumnMap::updateHashFast(SipHash & hash) const
|
||||
|
@ -64,7 +64,7 @@ public:
|
||||
const char * deserializeAndInsertFromArena(const char * pos) override;
|
||||
const char * skipSerializedInArena(const char * pos) const override;
|
||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
||||
WeakHash32 getWeakHash32() const override;
|
||||
void updateHashFast(SipHash & hash) const override;
|
||||
|
||||
#if !defined(ABORT_ON_LOGICAL_ERROR)
|
||||
|
@ -56,25 +56,21 @@ void ColumnNullable::updateHashWithValue(size_t n, SipHash & hash) const
|
||||
getNestedColumn().updateHashWithValue(n, hash);
|
||||
}
|
||||
|
||||
void ColumnNullable::updateWeakHash32(WeakHash32 & hash) const
|
||||
WeakHash32 ColumnNullable::getWeakHash32() const
|
||||
{
|
||||
auto s = size();
|
||||
|
||||
if (hash.getData().size() != s)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
||||
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
|
||||
|
||||
WeakHash32 old_hash = hash;
|
||||
nested_column->updateWeakHash32(hash);
|
||||
WeakHash32 hash = nested_column->getWeakHash32();
|
||||
|
||||
const auto & null_map_data = getNullMapData();
|
||||
auto & hash_data = hash.getData();
|
||||
auto & old_hash_data = old_hash.getData();
|
||||
|
||||
/// Use old data for nulls.
|
||||
/// Use default for nulls.
|
||||
for (size_t row = 0; row < s; ++row)
|
||||
if (null_map_data[row])
|
||||
hash_data[row] = old_hash_data[row];
|
||||
hash_data[row] = WeakHash32::kDefaultInitialValue;
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
void ColumnNullable::updateHashFast(SipHash & hash) const
|
||||
|
@ -133,7 +133,7 @@ public:
|
||||
void protect() override;
|
||||
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
|
||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
||||
WeakHash32 getWeakHash32() const override;
|
||||
void updateHashFast(SipHash & hash) const override;
|
||||
void getExtremes(Field & min, Field & max) const override;
|
||||
// Special function for nullable minmax index
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <Core/Names.h>
|
||||
#include <DataTypes/Serializations/SubcolumnsTree.h>
|
||||
#include <Common/PODArray.h>
|
||||
#include <Common/WeakHash.h>
|
||||
|
||||
#include <DataTypes/IDataType.h>
|
||||
|
||||
@ -252,7 +253,7 @@ public:
|
||||
const char * deserializeAndInsertFromArena(const char *) override { throwMustBeConcrete(); }
|
||||
const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); }
|
||||
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); }
|
||||
void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); }
|
||||
WeakHash32 getWeakHash32() const override { throwMustBeConcrete(); }
|
||||
void updateHashFast(SipHash & hash) const override;
|
||||
void expand(const Filter &, bool) override { throwMustBeConcrete(); }
|
||||
bool hasEqualValues() const override { throwMustBeConcrete(); }
|
||||
|
@ -678,20 +678,22 @@ void ColumnSparse::updateHashWithValue(size_t n, SipHash & hash) const
|
||||
values->updateHashWithValue(getValueIndex(n), hash);
|
||||
}
|
||||
|
||||
void ColumnSparse::updateWeakHash32(WeakHash32 & hash) const
|
||||
WeakHash32 ColumnSparse::getWeakHash32() const
|
||||
{
|
||||
if (hash.getData().size() != _size)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
||||
"column size is {}, hash size is {}", _size, hash.getData().size());
|
||||
WeakHash32 values_hash = values->getWeakHash32();
|
||||
WeakHash32 hash(size());
|
||||
|
||||
auto & hash_data = hash.getData();
|
||||
auto & values_hash_data = values_hash.getData();
|
||||
|
||||
auto offset_it = begin();
|
||||
auto & hash_data = hash.getData();
|
||||
for (size_t i = 0; i < _size; ++i, ++offset_it)
|
||||
{
|
||||
size_t value_index = offset_it.getValueIndex();
|
||||
auto data_ref = values->getDataAt(value_index);
|
||||
hash_data[i] = ::updateWeakHash32(reinterpret_cast<const UInt8 *>(data_ref.data), data_ref.size, hash_data[i]);
|
||||
hash_data[i] = values_hash_data[value_index];
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
void ColumnSparse::updateHashFast(SipHash & hash) const
|
||||
|
@ -139,7 +139,7 @@ public:
|
||||
void protect() override;
|
||||
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
|
||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
||||
WeakHash32 getWeakHash32() const override;
|
||||
void updateHashFast(SipHash & hash) const override;
|
||||
void getExtremes(Field & min, Field & max) const override;
|
||||
|
||||
|
@ -108,13 +108,10 @@ MutableColumnPtr ColumnString::cloneResized(size_t to_size) const
|
||||
return res;
|
||||
}
|
||||
|
||||
void ColumnString::updateWeakHash32(WeakHash32 & hash) const
|
||||
WeakHash32 ColumnString::getWeakHash32() const
|
||||
{
|
||||
auto s = offsets.size();
|
||||
|
||||
if (hash.getData().size() != s)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
||||
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
|
||||
WeakHash32 hash(s);
|
||||
|
||||
const UInt8 * pos = chars.data();
|
||||
UInt32 * hash_data = hash.getData().data();
|
||||
@ -130,6 +127,8 @@ void ColumnString::updateWeakHash32(WeakHash32 & hash) const
|
||||
prev_offset = offset;
|
||||
++hash_data;
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
|
||||
|
@ -212,7 +212,7 @@ public:
|
||||
hash.update(reinterpret_cast<const char *>(&chars[offset]), string_size);
|
||||
}
|
||||
|
||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
||||
WeakHash32 getWeakHash32() const override;
|
||||
|
||||
void updateHashFast(SipHash & hash) const override
|
||||
{
|
||||
|
@ -310,16 +310,15 @@ void ColumnTuple::updateHashWithValue(size_t n, SipHash & hash) const
|
||||
column->updateHashWithValue(n, hash);
|
||||
}
|
||||
|
||||
void ColumnTuple::updateWeakHash32(WeakHash32 & hash) const
|
||||
WeakHash32 ColumnTuple::getWeakHash32() const
|
||||
{
|
||||
auto s = size();
|
||||
|
||||
if (hash.getData().size() != s)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
||||
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
|
||||
WeakHash32 hash(s);
|
||||
|
||||
for (const auto & column : columns)
|
||||
column->updateWeakHash32(hash);
|
||||
hash.update(column->getWeakHash32());
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
void ColumnTuple::updateHashFast(SipHash & hash) const
|
||||
|
@ -81,7 +81,7 @@ public:
|
||||
const char * deserializeAndInsertFromArena(const char * pos) override;
|
||||
const char * skipSerializedInArena(const char * pos) const override;
|
||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
||||
WeakHash32 getWeakHash32() const override;
|
||||
void updateHashFast(SipHash & hash) const override;
|
||||
#if !defined(ABORT_ON_LOGICAL_ERROR)
|
||||
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
|
||||
|
@ -789,36 +789,26 @@ void ColumnVariant::updateHashWithValue(size_t n, SipHash & hash) const
|
||||
variants[localDiscriminatorByGlobal(global_discr)]->updateHashWithValue(offsetAt(n), hash);
|
||||
}
|
||||
|
||||
void ColumnVariant::updateWeakHash32(WeakHash32 & hash) const
|
||||
WeakHash32 ColumnVariant::getWeakHash32() const
|
||||
{
|
||||
auto s = size();
|
||||
|
||||
if (hash.getData().size() != s)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
||||
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
|
||||
|
||||
/// If we have only NULLs, keep hash unchanged.
|
||||
if (hasOnlyNulls())
|
||||
return;
|
||||
return WeakHash32(s);
|
||||
|
||||
/// Optimization for case when there is only 1 non-empty variant and no NULLs.
|
||||
/// In this case we can just calculate weak hash for this variant.
|
||||
if (auto non_empty_local_discr = getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls())
|
||||
{
|
||||
variants[*non_empty_local_discr]->updateWeakHash32(hash);
|
||||
return;
|
||||
}
|
||||
return variants[*non_empty_local_discr]->getWeakHash32();
|
||||
|
||||
/// Calculate weak hash for all variants.
|
||||
std::vector<WeakHash32> nested_hashes;
|
||||
for (const auto & variant : variants)
|
||||
{
|
||||
WeakHash32 nested_hash(variant->size());
|
||||
variant->updateWeakHash32(nested_hash);
|
||||
nested_hashes.emplace_back(std::move(nested_hash));
|
||||
}
|
||||
nested_hashes.emplace_back(variant->getWeakHash32());
|
||||
|
||||
/// For each row hash is a hash of corresponding row from corresponding variant.
|
||||
WeakHash32 hash(s);
|
||||
auto & hash_data = hash.getData();
|
||||
const auto & local_discriminators_data = getLocalDiscriminators();
|
||||
const auto & offsets_data = getOffsets();
|
||||
@ -827,11 +817,10 @@ void ColumnVariant::updateWeakHash32(WeakHash32 & hash) const
|
||||
Discriminator discr = local_discriminators_data[i];
|
||||
/// Update hash only for non-NULL values
|
||||
if (discr != NULL_DISCRIMINATOR)
|
||||
{
|
||||
auto nested_hash = nested_hashes[local_discriminators_data[i]].getData()[offsets_data[i]];
|
||||
hash_data[i] = static_cast<UInt32>(hashCRC32(nested_hash, hash_data[i]));
|
||||
}
|
||||
hash_data[i] = nested_hashes[discr].getData()[offsets_data[i]];
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
void ColumnVariant::updateHashFast(SipHash & hash) const
|
||||
|
@ -213,7 +213,7 @@ public:
|
||||
const char * deserializeVariantAndInsertFromArena(Discriminator global_discr, const char * pos);
|
||||
const char * skipSerializedInArena(const char * pos) const override;
|
||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
||||
WeakHash32 getWeakHash32() const override;
|
||||
void updateHashFast(SipHash & hash) const override;
|
||||
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
|
||||
void expand(const Filter & mask, bool inverted) override;
|
||||
|
@ -73,13 +73,10 @@ void ColumnVector<T>::updateHashWithValue(size_t n, SipHash & hash) const
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ColumnVector<T>::updateWeakHash32(WeakHash32 & hash) const
|
||||
WeakHash32 ColumnVector<T>::getWeakHash32() const
|
||||
{
|
||||
auto s = data.size();
|
||||
|
||||
if (hash.getData().size() != s)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
||||
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
|
||||
WeakHash32 hash(s);
|
||||
|
||||
const T * begin = data.data();
|
||||
const T * end = begin + s;
|
||||
@ -91,6 +88,8 @@ void ColumnVector<T>::updateWeakHash32(WeakHash32 & hash) const
|
||||
++begin;
|
||||
++hash_data;
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -114,7 +114,7 @@ public:
|
||||
|
||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||
|
||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
||||
WeakHash32 getWeakHash32() const override;
|
||||
|
||||
void updateHashFast(SipHash & hash) const override;
|
||||
|
||||
|
@ -300,10 +300,10 @@ public:
|
||||
/// passed bytes to hash must identify sequence of values unambiguously.
|
||||
virtual void updateHashWithValue(size_t n, SipHash & hash) const = 0;
|
||||
|
||||
/// Update hash function value. Hash is calculated for each element.
|
||||
/// Get hash function value. Hash is calculated for each element.
|
||||
/// It's a fast weak hash function. Mainly need to scatter data between threads.
|
||||
/// WeakHash32 must have the same size as column.
|
||||
virtual void updateWeakHash32(WeakHash32 & hash) const = 0;
|
||||
virtual WeakHash32 getWeakHash32() const = 0;
|
||||
|
||||
/// Update state of hash with all column.
|
||||
virtual void updateHashFast(SipHash & hash) const = 0;
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Common/WeakHash.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -63,8 +64,9 @@ public:
|
||||
{
|
||||
}
|
||||
|
||||
void updateWeakHash32(WeakHash32 & /*hash*/) const override
|
||||
WeakHash32 getWeakHash32() const override
|
||||
{
|
||||
return WeakHash32(s);
|
||||
}
|
||||
|
||||
void updateHashFast(SipHash & /*hash*/) const override
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
#include <optional>
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Common/WeakHash.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -166,9 +167,9 @@ public:
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method scatter is not supported for ColumnUnique.");
|
||||
}
|
||||
|
||||
void updateWeakHash32(WeakHash32 &) const override
|
||||
WeakHash32 getWeakHash32() const override
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method updateWeakHash32 is not supported for ColumnUnique.");
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getWeakHash32 is not supported for ColumnUnique.");
|
||||
}
|
||||
|
||||
void updateHashFast(SipHash &) const override
|
||||
|
@ -60,8 +60,7 @@ TEST(WeakHash32, ColumnVectorU8)
|
||||
data.push_back(i);
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), col->getData());
|
||||
}
|
||||
@ -77,8 +76,7 @@ TEST(WeakHash32, ColumnVectorI8)
|
||||
data.push_back(i);
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), col->getData());
|
||||
}
|
||||
@ -94,8 +92,7 @@ TEST(WeakHash32, ColumnVectorU16)
|
||||
data.push_back(i);
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), col->getData());
|
||||
}
|
||||
@ -111,8 +108,7 @@ TEST(WeakHash32, ColumnVectorI16)
|
||||
data.push_back(i);
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), col->getData());
|
||||
}
|
||||
@ -128,8 +124,7 @@ TEST(WeakHash32, ColumnVectorU32)
|
||||
data.push_back(i << 16u);
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), col->getData());
|
||||
}
|
||||
@ -145,8 +140,7 @@ TEST(WeakHash32, ColumnVectorI32)
|
||||
data.push_back(i << 16);
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), col->getData());
|
||||
}
|
||||
@ -162,8 +156,7 @@ TEST(WeakHash32, ColumnVectorU64)
|
||||
data.push_back(i << 32u);
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), col->getData());
|
||||
}
|
||||
@ -179,8 +172,7 @@ TEST(WeakHash32, ColumnVectorI64)
|
||||
data.push_back(i << 32);
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), col->getData());
|
||||
}
|
||||
@ -204,8 +196,7 @@ TEST(WeakHash32, ColumnVectorU128)
|
||||
}
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), eq_data);
|
||||
}
|
||||
@ -221,8 +212,7 @@ TEST(WeakHash32, ColumnVectorI128)
|
||||
data.push_back(i << 32);
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), col->getData());
|
||||
}
|
||||
@ -238,8 +228,7 @@ TEST(WeakHash32, ColumnDecimal32)
|
||||
data.push_back(i << 16);
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), col->getData());
|
||||
}
|
||||
@ -255,8 +244,7 @@ TEST(WeakHash32, ColumnDecimal64)
|
||||
data.push_back(i << 32);
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), col->getData());
|
||||
}
|
||||
@ -272,8 +260,7 @@ TEST(WeakHash32, ColumnDecimal128)
|
||||
data.push_back(i << 32);
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), col->getData());
|
||||
}
|
||||
@ -294,8 +281,7 @@ TEST(WeakHash32, ColumnString1)
|
||||
}
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), data);
|
||||
}
|
||||
@ -331,8 +317,7 @@ TEST(WeakHash32, ColumnString2)
|
||||
}
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), data);
|
||||
}
|
||||
@ -369,8 +354,7 @@ TEST(WeakHash32, ColumnString3)
|
||||
}
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), data);
|
||||
}
|
||||
@ -397,8 +381,7 @@ TEST(WeakHash32, ColumnFixedString)
|
||||
}
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), data);
|
||||
}
|
||||
@ -444,8 +427,7 @@ TEST(WeakHash32, ColumnArray)
|
||||
|
||||
auto col_arr = ColumnArray::create(std::move(val), std::move(off));
|
||||
|
||||
WeakHash32 hash(col_arr->size());
|
||||
col_arr->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col_arr->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), eq_data);
|
||||
}
|
||||
@ -479,8 +461,7 @@ TEST(WeakHash32, ColumnArray2)
|
||||
|
||||
auto col_arr = ColumnArray::create(std::move(val), std::move(off));
|
||||
|
||||
WeakHash32 hash(col_arr->size());
|
||||
col_arr->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col_arr->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), eq_data);
|
||||
}
|
||||
@ -536,8 +517,7 @@ TEST(WeakHash32, ColumnArrayArray)
|
||||
auto col_arr = ColumnArray::create(std::move(val), std::move(off));
|
||||
auto col_arr_arr = ColumnArray::create(std::move(col_arr), std::move(off2));
|
||||
|
||||
WeakHash32 hash(col_arr_arr->size());
|
||||
col_arr_arr->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col_arr_arr->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), eq_data);
|
||||
}
|
||||
@ -555,8 +535,7 @@ TEST(WeakHash32, ColumnConst)
|
||||
|
||||
auto col_const = ColumnConst::create(std::move(inner_col), 256);
|
||||
|
||||
WeakHash32 hash(col_const->size());
|
||||
col_const->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col_const->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), data);
|
||||
}
|
||||
@ -576,8 +555,7 @@ TEST(WeakHash32, ColumnLowcardinality)
|
||||
}
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), data);
|
||||
}
|
||||
@ -602,8 +580,7 @@ TEST(WeakHash32, ColumnNullable)
|
||||
|
||||
auto col_null = ColumnNullable::create(std::move(col), std::move(mask));
|
||||
|
||||
WeakHash32 hash(col_null->size());
|
||||
col_null->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col_null->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), eq);
|
||||
}
|
||||
@ -633,8 +610,7 @@ TEST(WeakHash32, ColumnTupleUInt64UInt64)
|
||||
columns.emplace_back(std::move(col2));
|
||||
auto col_tuple = ColumnTuple::create(std::move(columns));
|
||||
|
||||
WeakHash32 hash(col_tuple->size());
|
||||
col_tuple->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col_tuple->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), eq);
|
||||
}
|
||||
@ -671,8 +647,7 @@ TEST(WeakHash32, ColumnTupleUInt64String)
|
||||
columns.emplace_back(std::move(col2));
|
||||
auto col_tuple = ColumnTuple::create(std::move(columns));
|
||||
|
||||
WeakHash32 hash(col_tuple->size());
|
||||
col_tuple->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col_tuple->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), eq);
|
||||
}
|
||||
@ -709,8 +684,7 @@ TEST(WeakHash32, ColumnTupleUInt64FixedString)
|
||||
columns.emplace_back(std::move(col2));
|
||||
auto col_tuple = ColumnTuple::create(std::move(columns));
|
||||
|
||||
WeakHash32 hash(col_tuple->size());
|
||||
col_tuple->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col_tuple->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), eq);
|
||||
}
|
||||
@ -756,8 +730,7 @@ TEST(WeakHash32, ColumnTupleUInt64Array)
|
||||
columns.emplace_back(ColumnArray::create(std::move(val), std::move(off)));
|
||||
auto col_tuple = ColumnTuple::create(std::move(columns));
|
||||
|
||||
WeakHash32 hash(col_tuple->size());
|
||||
col_tuple->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col_tuple->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), eq_data);
|
||||
}
|
||||
|
@ -206,7 +206,7 @@ void * Allocator<clear_memory_, populate>::realloc(void * buf, size_t old_size,
|
||||
}
|
||||
else
|
||||
{
|
||||
[[maybe_unused]] auto trace_free = CurrentMemoryTracker::free(old_size);
|
||||
[[maybe_unused]] auto trace_free = CurrentMemoryTracker::free(new_size);
|
||||
ProfileEvents::increment(ProfileEvents::GWPAsanAllocateFailed);
|
||||
}
|
||||
}
|
||||
@ -239,7 +239,7 @@ void * Allocator<clear_memory_, populate>::realloc(void * buf, size_t old_size,
|
||||
void * new_buf = ::realloc(buf, new_size);
|
||||
if (nullptr == new_buf)
|
||||
{
|
||||
[[maybe_unused]] auto trace_free = CurrentMemoryTracker::free(old_size);
|
||||
[[maybe_unused]] auto trace_free = CurrentMemoryTracker::free(new_size);
|
||||
throw DB::ErrnoException(
|
||||
DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY,
|
||||
"Allocator: Cannot realloc from {} to {}",
|
||||
|
@ -244,6 +244,15 @@ private:
|
||||
const char * className() const noexcept override { return "DB::ErrnoException"; }
|
||||
};
|
||||
|
||||
/// An exception to use in unit tests to test interfaces.
|
||||
/// It is distinguished from others, so it does not have to be logged.
|
||||
class TestException : public Exception
|
||||
{
|
||||
public:
|
||||
using Exception::Exception;
|
||||
};
|
||||
|
||||
|
||||
using Exceptions = std::vector<std::exception_ptr>;
|
||||
|
||||
/** Try to write an exception to the log (and forget about it).
|
||||
|
@ -23,8 +23,20 @@ namespace DB
|
||||
|
||||
LazyPipeFDs TraceSender::pipe;
|
||||
|
||||
static thread_local bool inside_send = false;
|
||||
void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Extras extras)
|
||||
{
|
||||
/** The method shouldn't be called recursively or throw exceptions.
|
||||
* There are several reasons:
|
||||
* - avoid infinite recursion when some of subsequent functions invoke tracing;
|
||||
* - avoid inconsistent writes if the method was interrupted by a signal handler in the middle of writing,
|
||||
* and then another tracing is invoked (e.g., from query profiler).
|
||||
*/
|
||||
if (unlikely(inside_send))
|
||||
return;
|
||||
inside_send = true;
|
||||
DENY_ALLOCATIONS_IN_SCOPE;
|
||||
|
||||
constexpr size_t buf_size = sizeof(char) /// TraceCollector stop flag
|
||||
+ sizeof(UInt8) /// String size
|
||||
+ QUERY_ID_MAX_LEN /// Maximum query_id length
|
||||
@ -80,6 +92,8 @@ void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Ext
|
||||
writePODBinary(extras.increment, out);
|
||||
|
||||
out.next();
|
||||
|
||||
inside_send = false;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,2 +1,24 @@
|
||||
#include <Common/WeakHash.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/HashTable/Hash.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
void WeakHash32::update(const WeakHash32 & other)
|
||||
{
|
||||
size_t size = data.size();
|
||||
if (size != other.data.size())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match:"
|
||||
"left size is {}, right size is {}", size, other.data.size());
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
data[i] = static_cast<UInt32>(intHashCRC32(other.data[i], data[i]));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -11,9 +11,8 @@ namespace DB
|
||||
/// The main purpose why this class needed is to support data initialization. Initially, every bit is 1.
|
||||
class WeakHash32
|
||||
{
|
||||
static constexpr UInt32 kDefaultInitialValue = ~UInt32(0);
|
||||
|
||||
public:
|
||||
static constexpr UInt32 kDefaultInitialValue = ~UInt32(0);
|
||||
|
||||
using Container = PaddedPODArray<UInt32>;
|
||||
|
||||
@ -22,6 +21,8 @@ public:
|
||||
|
||||
void reset(size_t size, UInt32 initial_value = kDefaultInitialValue) { data.assign(size, initial_value); }
|
||||
|
||||
void update(const WeakHash32 & other);
|
||||
|
||||
const Container & getData() const { return data; }
|
||||
Container & getData() { return data; }
|
||||
|
||||
|
@ -6,12 +6,17 @@ namespace DB
|
||||
{
|
||||
|
||||
String getRandomASCIIString(size_t length)
|
||||
{
|
||||
return getRandomASCIIString(length, thread_local_rng);
|
||||
}
|
||||
|
||||
String getRandomASCIIString(size_t length, pcg64 & rng)
|
||||
{
|
||||
std::uniform_int_distribution<int> distribution('a', 'z');
|
||||
String res;
|
||||
res.resize(length);
|
||||
for (auto & c : res)
|
||||
c = distribution(thread_local_rng);
|
||||
c = distribution(rng);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -2,11 +2,14 @@
|
||||
|
||||
#include <Core/Types.h>
|
||||
|
||||
#include <pcg_random.hpp>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Slow random string. Useful for random names and things like this. Not for generating data.
|
||||
String getRandomASCIIString(size_t length);
|
||||
String getRandomASCIIString(size_t length, pcg64 & rng);
|
||||
|
||||
}
|
||||
|
@ -54,16 +54,3 @@ TEST(ShellCommand, ExecuteWithInput)
|
||||
|
||||
EXPECT_EQ(res, "Hello, world!\n");
|
||||
}
|
||||
|
||||
TEST(ShellCommand, AutoWait)
|
||||
{
|
||||
// <defunct> hunting:
|
||||
for (int i = 0; i < 1000; ++i)
|
||||
{
|
||||
auto command = ShellCommand::execute("echo " + std::to_string(i));
|
||||
//command->wait(); // now automatic
|
||||
}
|
||||
|
||||
// std::cerr << "inspect me: ps auxwwf\n";
|
||||
// std::this_thread::sleep_for(std::chrono::seconds(100));
|
||||
}
|
||||
|
@ -47,54 +47,85 @@ bool allArgumentsAreConstants(const ColumnsWithTypeAndName & args)
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Replaces single low cardinality column in a function call by its dictionary
|
||||
/// This can only happen after the arguments have been adapted in IFunctionOverloadResolver::getReturnType
|
||||
/// as it's only possible if there is one low cardinality column and, optionally, const columns
|
||||
ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
|
||||
ColumnsWithTypeAndName & args, bool can_be_executed_on_default_arguments, size_t input_rows_count)
|
||||
{
|
||||
size_t num_rows = input_rows_count;
|
||||
/// We return the LC indexes so the LC can be reconstructed with the function result
|
||||
ColumnPtr indexes;
|
||||
|
||||
/// Find first LowCardinality column and replace it to nested dictionary.
|
||||
for (auto & column : args)
|
||||
size_t number_low_cardinality_columns = 0;
|
||||
size_t last_low_cardinality = 0;
|
||||
size_t number_const_columns = 0;
|
||||
size_t number_full_columns = 0;
|
||||
|
||||
for (size_t i = 0; i < args.size(); i++)
|
||||
{
|
||||
if (const auto * low_cardinality_column = checkAndGetColumn<ColumnLowCardinality>(column.column.get()))
|
||||
auto const & arg = args[i];
|
||||
if (checkAndGetColumn<ColumnLowCardinality>(arg.column.get()))
|
||||
{
|
||||
/// Single LowCardinality column is supported now.
|
||||
if (indexes)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected single dictionary argument for function.");
|
||||
|
||||
const auto * low_cardinality_type = checkAndGetDataType<DataTypeLowCardinality>(column.type.get());
|
||||
|
||||
if (!low_cardinality_type)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Incompatible type for LowCardinality column: {}",
|
||||
column.type->getName());
|
||||
|
||||
if (can_be_executed_on_default_arguments)
|
||||
{
|
||||
/// Normal case, when function can be executed on values' default.
|
||||
column.column = low_cardinality_column->getDictionary().getNestedColumn();
|
||||
indexes = low_cardinality_column->getIndexesPtr();
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Special case when default value can't be used. Example: 1 % LowCardinality(Int).
|
||||
/// LowCardinality always contains default, so 1 % 0 will throw exception in normal case.
|
||||
auto dict_encoded = low_cardinality_column->getMinimalDictionaryEncodedColumn(0, low_cardinality_column->size());
|
||||
column.column = dict_encoded.dictionary;
|
||||
indexes = dict_encoded.indexes;
|
||||
}
|
||||
|
||||
num_rows = column.column->size();
|
||||
column.type = low_cardinality_type->getDictionaryType();
|
||||
number_low_cardinality_columns++;
|
||||
last_low_cardinality = i;
|
||||
}
|
||||
else if (checkAndGetColumn<ColumnConst>(arg.column.get()))
|
||||
number_const_columns++;
|
||||
else
|
||||
number_full_columns++;
|
||||
}
|
||||
|
||||
/// Change size of constants.
|
||||
if (!number_low_cardinality_columns && !number_const_columns)
|
||||
return nullptr;
|
||||
|
||||
if (number_full_columns > 0 || number_low_cardinality_columns > 1)
|
||||
{
|
||||
/// This should not be possible but currently there are multiple tests in CI failing because of it
|
||||
/// TODO: Fix those cases, then enable this exception
|
||||
#if 0
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected low cardinality types found. Low cardinality: {}. Full {}. Const {}",
|
||||
number_low_cardinality_columns, number_full_columns, number_const_columns);
|
||||
#else
|
||||
return nullptr;
|
||||
#endif
|
||||
}
|
||||
else if (number_low_cardinality_columns == 1)
|
||||
{
|
||||
auto & lc_arg = args[last_low_cardinality];
|
||||
|
||||
const auto * low_cardinality_type = checkAndGetDataType<DataTypeLowCardinality>(lc_arg.type.get());
|
||||
if (!low_cardinality_type)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Incompatible type for LowCardinality column: {}", lc_arg.type->getName());
|
||||
|
||||
const auto * low_cardinality_column = checkAndGetColumn<ColumnLowCardinality>(lc_arg.column.get());
|
||||
chassert(low_cardinality_column);
|
||||
|
||||
if (can_be_executed_on_default_arguments)
|
||||
{
|
||||
/// Normal case, when function can be executed on values' default.
|
||||
lc_arg.column = low_cardinality_column->getDictionary().getNestedColumn();
|
||||
indexes = low_cardinality_column->getIndexesPtr();
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Special case when default value can't be used. Example: 1 % LowCardinality(Int).
|
||||
/// LowCardinality always contains default, so 1 % 0 will throw exception in normal case.
|
||||
auto dict_encoded = low_cardinality_column->getMinimalDictionaryEncodedColumn(0, low_cardinality_column->size());
|
||||
lc_arg.column = dict_encoded.dictionary;
|
||||
indexes = dict_encoded.indexes;
|
||||
}
|
||||
|
||||
/// The new column will have a different number of rows, normally less but occasionally it might be more (NULL)
|
||||
input_rows_count = lc_arg.column->size();
|
||||
lc_arg.type = low_cardinality_type->getDictionaryType();
|
||||
}
|
||||
|
||||
/// Change size of constants
|
||||
for (auto & column : args)
|
||||
{
|
||||
if (const auto * column_const = checkAndGetColumn<ColumnConst>(column.column.get()))
|
||||
{
|
||||
column.column = ColumnConst::create(recursiveRemoveLowCardinality(column_const->getDataColumnPtr()), num_rows);
|
||||
column.column = ColumnConst::create(recursiveRemoveLowCardinality(column_const->getDataColumnPtr()), input_rows_count);
|
||||
column.type = recursiveRemoveLowCardinality(column.type);
|
||||
}
|
||||
}
|
||||
@ -270,6 +301,8 @@ ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithType
|
||||
bool can_be_executed_on_default_arguments = canBeExecutedOnDefaultArguments();
|
||||
|
||||
const auto & dictionary_type = res_low_cardinality_type->getDictionaryType();
|
||||
/// The arguments should have been adapted in IFunctionOverloadResolver::getReturnType
|
||||
/// So there is only one low cardinality column (and optionally some const columns) and no full column
|
||||
ColumnPtr indexes = replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
|
||||
columns_without_low_cardinality, can_be_executed_on_default_arguments, input_rows_count);
|
||||
|
||||
|
@ -310,7 +310,7 @@ IColumn::Selector ConcurrentHashJoin::selectDispatchBlock(const Strings & key_co
|
||||
{
|
||||
const auto & key_col = from_block.getByName(key_name).column->convertToFullColumnIfConst();
|
||||
const auto & key_col_no_lc = recursiveRemoveLowCardinality(recursiveRemoveSparse(key_col));
|
||||
key_col_no_lc->updateWeakHash32(hash);
|
||||
hash.update(key_col_no_lc->getWeakHash32());
|
||||
}
|
||||
return hashToSelector(hash, num_shards);
|
||||
}
|
||||
|
@ -1726,7 +1726,10 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
|
||||
const auto & join_clause = table_join.getOnlyClause();
|
||||
|
||||
auto join_kind = table_join.kind();
|
||||
bool kind_allows_filtering = isInner(join_kind) || isLeft(join_kind) || isRight(join_kind);
|
||||
auto join_strictness = table_join.strictness();
|
||||
|
||||
bool join_type_allows_filtering = (join_strictness == JoinStrictness::All || join_strictness == JoinStrictness::Any)
|
||||
&& (isInner(join_kind) || isLeft(join_kind) || isRight(join_kind));
|
||||
|
||||
auto has_non_const = [](const Block & block, const auto & keys)
|
||||
{
|
||||
@ -1745,7 +1748,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
|
||||
bool has_non_const_keys = has_non_const(query_plan.getCurrentDataStream().header, join_clause.key_names_left)
|
||||
&& has_non_const(joined_plan->getCurrentDataStream().header, join_clause.key_names_right);
|
||||
|
||||
if (settings.max_rows_in_set_to_optimize_join > 0 && kind_allows_filtering && has_non_const_keys)
|
||||
if (settings.max_rows_in_set_to_optimize_join > 0 && join_type_allows_filtering && has_non_const_keys)
|
||||
{
|
||||
auto * left_set = add_create_set(query_plan, join_clause.key_names_left, JoinTableSide::Left);
|
||||
auto * right_set = add_create_set(*joined_plan, join_clause.key_names_right, JoinTableSide::Right);
|
||||
|
@ -554,7 +554,7 @@ static Blocks scatterBlockByHashImpl(const Strings & key_columns_names, const Bl
|
||||
for (const auto & key_name : key_columns_names)
|
||||
{
|
||||
ColumnPtr key_col = materializeColumn(block, key_name);
|
||||
key_col->updateWeakHash32(hash);
|
||||
hash.update(key_col->getWeakHash32());
|
||||
}
|
||||
auto selector = hashToSelector(hash, sharder);
|
||||
|
||||
|
@ -7,7 +7,6 @@
|
||||
#include <Common/FieldVisitorToString.h>
|
||||
#include <Common/KnownObjectNames.h>
|
||||
#include <Common/SipHash.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
@ -19,9 +18,6 @@
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <Parsers/ASTSetQuery.h>
|
||||
#include <Parsers/FunctionSecretArgumentsFinderAST.h>
|
||||
#include <Core/QualifiedTableName.h>
|
||||
|
||||
#include <boost/algorithm/string.hpp>
|
||||
|
||||
|
||||
using namespace std::literals;
|
||||
@ -632,6 +628,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
|
||||
settings.ostr << ", ";
|
||||
if (arguments->children[i]->as<ASTSetQuery>())
|
||||
settings.ostr << "SETTINGS ";
|
||||
nested_dont_need_parens.list_element_index = i;
|
||||
arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens);
|
||||
}
|
||||
settings.ostr << (settings.hilite ? hilite_operator : "") << ']' << (settings.hilite ? hilite_none : "");
|
||||
@ -642,12 +639,14 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_operator : "") << ((frame.need_parens && !alias.empty()) ? "tuple" : "") << '('
|
||||
<< (settings.hilite ? hilite_none : "");
|
||||
|
||||
for (size_t i = 0; i < arguments->children.size(); ++i)
|
||||
{
|
||||
if (i != 0)
|
||||
settings.ostr << ", ";
|
||||
if (arguments->children[i]->as<ASTSetQuery>())
|
||||
settings.ostr << "SETTINGS ";
|
||||
nested_dont_need_parens.list_element_index = i;
|
||||
arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens);
|
||||
}
|
||||
settings.ostr << (settings.hilite ? hilite_operator : "") << ')' << (settings.hilite ? hilite_none : "");
|
||||
@ -663,6 +662,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
|
||||
settings.ostr << ", ";
|
||||
if (arguments->children[i]->as<ASTSetQuery>())
|
||||
settings.ostr << "SETTINGS ";
|
||||
nested_dont_need_parens.list_element_index = i;
|
||||
arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens);
|
||||
}
|
||||
settings.ostr << (settings.hilite ? hilite_operator : "") << ')' << (settings.hilite ? hilite_none : "");
|
||||
|
@ -745,7 +745,12 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan,
|
||||
{
|
||||
auto & interpolate_node_typed = interpolate_node->as<InterpolateNode &>();
|
||||
|
||||
PlannerActionsVisitor planner_actions_visitor(planner_context);
|
||||
PlannerActionsVisitor planner_actions_visitor(
|
||||
planner_context,
|
||||
/* use_column_identifier_as_action_node_name_, (default value)*/ true,
|
||||
/// Prefer the INPUT to CONSTANT nodes (actions must be non constant)
|
||||
/* always_use_const_column_for_constant_nodes */ false);
|
||||
|
||||
auto expression_to_interpolate_expression_nodes = planner_actions_visitor.visit(*interpolate_actions_dag,
|
||||
interpolate_node_typed.getExpression());
|
||||
if (expression_to_interpolate_expression_nodes.size() != 1)
|
||||
|
@ -487,16 +487,33 @@ public:
|
||||
return node;
|
||||
}
|
||||
|
||||
const ActionsDAG::Node * addConstantIfNecessary(const std::string & node_name, const ColumnWithTypeAndName & column)
|
||||
[[nodiscard]] String addConstantIfNecessary(
|
||||
const std::string & node_name, const ColumnWithTypeAndName & column, bool always_use_const_column_for_constant_nodes)
|
||||
{
|
||||
chassert(column.column != nullptr);
|
||||
auto it = node_name_to_node.find(node_name);
|
||||
if (it != node_name_to_node.end() && (!always_use_const_column_for_constant_nodes || it->second->column))
|
||||
return {node_name};
|
||||
|
||||
if (it != node_name_to_node.end())
|
||||
return it->second;
|
||||
{
|
||||
/// There is a node with this name, but it doesn't have a column
|
||||
/// This likely happens because we executed the query until WithMergeableState with a const node in the
|
||||
/// WHERE clause and, as the results of headers are materialized, the column was removed
|
||||
/// Let's add a new column and keep this
|
||||
String dupped_name{node_name + "_dupped"};
|
||||
if (node_name_to_node.find(dupped_name) != node_name_to_node.end())
|
||||
return dupped_name;
|
||||
|
||||
const auto * node = &actions_dag.addColumn(column);
|
||||
node_name_to_node[dupped_name] = node;
|
||||
return dupped_name;
|
||||
}
|
||||
|
||||
const auto * node = &actions_dag.addColumn(column);
|
||||
node_name_to_node[node->result_name] = node;
|
||||
|
||||
return node;
|
||||
return {node_name};
|
||||
}
|
||||
|
||||
template <typename FunctionOrOverloadResolver>
|
||||
@ -525,7 +542,7 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
std::unordered_map<std::string_view, const ActionsDAG::Node *> node_name_to_node;
|
||||
std::unordered_map<String, const ActionsDAG::Node *> node_name_to_node;
|
||||
ActionsDAG & actions_dag;
|
||||
QueryTreeNodePtr scope_node;
|
||||
};
|
||||
@ -533,9 +550,11 @@ private:
|
||||
class PlannerActionsVisitorImpl
|
||||
{
|
||||
public:
|
||||
PlannerActionsVisitorImpl(ActionsDAG & actions_dag,
|
||||
PlannerActionsVisitorImpl(
|
||||
ActionsDAG & actions_dag,
|
||||
const PlannerContextPtr & planner_context_,
|
||||
bool use_column_identifier_as_action_node_name_);
|
||||
bool use_column_identifier_as_action_node_name_,
|
||||
bool always_use_const_column_for_constant_nodes_);
|
||||
|
||||
ActionsDAG::NodeRawConstPtrs visit(QueryTreeNodePtr expression_node);
|
||||
|
||||
@ -595,14 +614,18 @@ private:
|
||||
const PlannerContextPtr planner_context;
|
||||
ActionNodeNameHelper action_node_name_helper;
|
||||
bool use_column_identifier_as_action_node_name;
|
||||
bool always_use_const_column_for_constant_nodes;
|
||||
};
|
||||
|
||||
PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(ActionsDAG & actions_dag,
|
||||
PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(
|
||||
ActionsDAG & actions_dag,
|
||||
const PlannerContextPtr & planner_context_,
|
||||
bool use_column_identifier_as_action_node_name_)
|
||||
bool use_column_identifier_as_action_node_name_,
|
||||
bool always_use_const_column_for_constant_nodes_)
|
||||
: planner_context(planner_context_)
|
||||
, action_node_name_helper(node_to_node_name, *planner_context, use_column_identifier_as_action_node_name_)
|
||||
, use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_)
|
||||
, always_use_const_column_for_constant_nodes(always_use_const_column_for_constant_nodes_)
|
||||
{
|
||||
actions_stack.emplace_back(actions_dag, nullptr);
|
||||
}
|
||||
@ -725,17 +748,16 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi
|
||||
column.type = constant_type;
|
||||
column.column = column.type->createColumnConst(1, constant_literal);
|
||||
|
||||
actions_stack[0].addConstantIfNecessary(constant_node_name, column);
|
||||
String final_name = actions_stack[0].addConstantIfNecessary(constant_node_name, column, always_use_const_column_for_constant_nodes);
|
||||
|
||||
size_t actions_stack_size = actions_stack.size();
|
||||
for (size_t i = 1; i < actions_stack_size; ++i)
|
||||
{
|
||||
auto & actions_stack_node = actions_stack[i];
|
||||
actions_stack_node.addInputConstantColumnIfNecessary(constant_node_name, column);
|
||||
actions_stack_node.addInputConstantColumnIfNecessary(final_name, column);
|
||||
}
|
||||
|
||||
return {constant_node_name, Levels(0)};
|
||||
|
||||
return {final_name, Levels(0)};
|
||||
}
|
||||
|
||||
PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitLambda(const QueryTreeNodePtr & node)
|
||||
@ -864,16 +886,16 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::ma
|
||||
else
|
||||
column.column = std::move(column_set);
|
||||
|
||||
actions_stack[0].addConstantIfNecessary(column.name, column);
|
||||
String final_name = actions_stack[0].addConstantIfNecessary(column.name, column, always_use_const_column_for_constant_nodes);
|
||||
|
||||
size_t actions_stack_size = actions_stack.size();
|
||||
for (size_t i = 1; i < actions_stack_size; ++i)
|
||||
{
|
||||
auto & actions_stack_node = actions_stack[i];
|
||||
actions_stack_node.addInputConstantColumnIfNecessary(column.name, column);
|
||||
actions_stack_node.addInputConstantColumnIfNecessary(final_name, column);
|
||||
}
|
||||
|
||||
return {column.name, Levels(0)};
|
||||
return {final_name, Levels(0)};
|
||||
}
|
||||
|
||||
PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitIndexHintFunction(const QueryTreeNodePtr & node)
|
||||
@ -1010,14 +1032,19 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi
|
||||
|
||||
}
|
||||
|
||||
PlannerActionsVisitor::PlannerActionsVisitor(const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_)
|
||||
PlannerActionsVisitor::PlannerActionsVisitor(
|
||||
const PlannerContextPtr & planner_context_,
|
||||
bool use_column_identifier_as_action_node_name_,
|
||||
bool always_use_const_column_for_constant_nodes_)
|
||||
: planner_context(planner_context_)
|
||||
, use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_)
|
||||
, always_use_const_column_for_constant_nodes(always_use_const_column_for_constant_nodes_)
|
||||
{}
|
||||
|
||||
ActionsDAG::NodeRawConstPtrs PlannerActionsVisitor::visit(ActionsDAG & actions_dag, QueryTreeNodePtr expression_node)
|
||||
{
|
||||
PlannerActionsVisitorImpl actions_visitor_impl(actions_dag, planner_context, use_column_identifier_as_action_node_name);
|
||||
PlannerActionsVisitorImpl actions_visitor_impl(
|
||||
actions_dag, planner_context, use_column_identifier_as_action_node_name, always_use_const_column_for_constant_nodes);
|
||||
return actions_visitor_impl.visit(expression_node);
|
||||
}
|
||||
|
||||
|
@ -27,11 +27,17 @@ using PlannerContextPtr = std::shared_ptr<PlannerContext>;
|
||||
* During actions build, there is special handling for following functions:
|
||||
* 1. Aggregate functions are added in actions dag as INPUT nodes. Aggregate functions arguments are not added.
|
||||
* 2. For function `in` and its variants, already collected sets from planner context are used.
|
||||
* 3. When building actions that use CONSTANT nodes, by default we ignore pre-existing INPUTs if those don't have
|
||||
* a column (a const column always has a column). This is for compatibility with previous headers. We disable this
|
||||
* behaviour when we explicitly want to override CONSTANT nodes with the input (resolving InterpolateNode for example)
|
||||
*/
|
||||
class PlannerActionsVisitor
|
||||
{
|
||||
public:
|
||||
explicit PlannerActionsVisitor(const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_ = true);
|
||||
explicit PlannerActionsVisitor(
|
||||
const PlannerContextPtr & planner_context_,
|
||||
bool use_column_identifier_as_action_node_name_ = true,
|
||||
bool always_use_const_column_for_constant_nodes_ = true);
|
||||
|
||||
/** Add actions necessary to calculate expression node into expression dag.
|
||||
* Necessary actions are not added in actions dag output.
|
||||
@ -42,6 +48,7 @@ public:
|
||||
private:
|
||||
const PlannerContextPtr planner_context;
|
||||
bool use_column_identifier_as_action_node_name = true;
|
||||
bool always_use_const_column_for_constant_nodes = true;
|
||||
};
|
||||
|
||||
/** Calculate query tree expression node action dag name and add them into node to name map.
|
||||
|
@ -77,7 +77,6 @@ namespace ErrorCodes
|
||||
extern const int INVALID_JOIN_ON_EXPRESSION;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int SYNTAX_ERROR;
|
||||
extern const int ACCESS_DENIED;
|
||||
extern const int PARAMETER_OUT_OF_BOUND;
|
||||
extern const int TOO_MANY_COLUMNS;
|
||||
@ -1417,12 +1416,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
|
||||
{
|
||||
if (!join_clause.hasASOF())
|
||||
throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
|
||||
"JOIN {} no inequality in ASOF JOIN ON section.",
|
||||
join_node.formatASTForErrorMessage());
|
||||
|
||||
if (table_join_clause.key_names_left.size() <= 1)
|
||||
throw Exception(ErrorCodes::SYNTAX_ERROR,
|
||||
"JOIN {} ASOF join needs at least one equi-join column",
|
||||
"JOIN {} no inequality in ASOF JOIN ON section",
|
||||
join_node.formatASTForErrorMessage());
|
||||
}
|
||||
|
||||
@ -1544,7 +1538,9 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
|
||||
{
|
||||
const auto & join_clause = table_join->getOnlyClause();
|
||||
|
||||
bool kind_allows_filtering = isInner(join_kind) || isLeft(join_kind) || isRight(join_kind);
|
||||
bool join_type_allows_filtering = (join_strictness == JoinStrictness::All || join_strictness == JoinStrictness::Any)
|
||||
&& (isInner(join_kind) || isLeft(join_kind) || isRight(join_kind));
|
||||
|
||||
|
||||
auto has_non_const = [](const Block & block, const auto & keys)
|
||||
{
|
||||
@ -1564,7 +1560,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
|
||||
bool has_non_const_keys = has_non_const(left_plan.getCurrentDataStream().header, join_clause.key_names_left)
|
||||
&& has_non_const(right_plan.getCurrentDataStream().header, join_clause.key_names_right);
|
||||
|
||||
if (settings.max_rows_in_set_to_optimize_join > 0 && kind_allows_filtering && has_non_const_keys)
|
||||
if (settings.max_rows_in_set_to_optimize_join > 0 && join_type_allows_filtering && has_non_const_keys)
|
||||
{
|
||||
auto * left_set = add_create_set(left_plan, join_clause.key_names_left, JoinTableSide::Left);
|
||||
auto * right_set = add_create_set(right_plan, join_clause.key_names_right, JoinTableSide::Right);
|
||||
|
@ -34,13 +34,20 @@ namespace ErrorCodes
|
||||
namespace
|
||||
{
|
||||
|
||||
FullMergeJoinCursorPtr createCursor(const Block & block, const Names & columns)
|
||||
FullMergeJoinCursorPtr createCursor(const Block & block, const Names & columns, JoinStrictness strictness)
|
||||
{
|
||||
SortDescription desc;
|
||||
desc.reserve(columns.size());
|
||||
for (const auto & name : columns)
|
||||
desc.emplace_back(name);
|
||||
return std::make_unique<FullMergeJoinCursor>(block, desc);
|
||||
return std::make_unique<FullMergeJoinCursor>(block, desc, strictness == JoinStrictness::Asof);
|
||||
}
|
||||
|
||||
bool ALWAYS_INLINE isNullAt(const IColumn & column, size_t row)
|
||||
{
|
||||
if (const auto * nullable_column = checkAndGetColumn<ColumnNullable>(&column))
|
||||
return nullable_column->isNullAt(row);
|
||||
return false;
|
||||
}
|
||||
|
||||
template <bool has_left_nulls, bool has_right_nulls>
|
||||
@ -54,7 +61,7 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column,
|
||||
if (left_nullable && right_nullable)
|
||||
{
|
||||
int res = left_nullable->compareAt(lhs_pos, rhs_pos, right_column, null_direction_hint);
|
||||
if (res)
|
||||
if (res != 0)
|
||||
return res;
|
||||
|
||||
/// NULL != NULL case
|
||||
@ -90,9 +97,10 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column,
|
||||
|
||||
int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, size_t lpos,
|
||||
const SortCursorImpl & rhs, size_t rpos,
|
||||
size_t key_length,
|
||||
int null_direction_hint)
|
||||
{
|
||||
for (size_t i = 0; i < lhs.sort_columns_size; ++i)
|
||||
for (size_t i = 0; i < key_length; ++i)
|
||||
{
|
||||
/// TODO(@vdimir): use nullableCompareAt only if there's nullable columns
|
||||
int cmp = nullableCompareAt<true, true>(*lhs.sort_columns[i], *rhs.sort_columns[i], lpos, rpos, null_direction_hint);
|
||||
@ -104,13 +112,18 @@ int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, size_t lpos,
|
||||
|
||||
int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, const SortCursorImpl & rhs, int null_direction_hint)
|
||||
{
|
||||
return compareCursors(lhs, lhs.getRow(), rhs, rhs.getRow(), null_direction_hint);
|
||||
return compareCursors(lhs, lhs.getRow(), rhs, rhs.getRow(), lhs.sort_columns_size, null_direction_hint);
|
||||
}
|
||||
|
||||
int compareAsofCursors(const FullMergeJoinCursor & lhs, const FullMergeJoinCursor & rhs, int null_direction_hint)
|
||||
{
|
||||
return nullableCompareAt<true, true>(*lhs.getAsofColumn(), *rhs.getAsofColumn(), lhs->getRow(), rhs->getRow(), null_direction_hint);
|
||||
}
|
||||
|
||||
bool ALWAYS_INLINE totallyLess(SortCursorImpl & lhs, SortCursorImpl & rhs, int null_direction_hint)
|
||||
{
|
||||
/// The last row of left cursor is less than the current row of the right cursor.
|
||||
int cmp = compareCursors(lhs, lhs.rows - 1, rhs, rhs.getRow(), null_direction_hint);
|
||||
int cmp = compareCursors(lhs, lhs.rows - 1, rhs, rhs.getRow(), lhs.sort_columns_size, null_direction_hint);
|
||||
return cmp < 0;
|
||||
}
|
||||
|
||||
@ -222,25 +235,136 @@ Chunk getRowFromChunk(const Chunk & chunk, size_t pos)
|
||||
return result;
|
||||
}
|
||||
|
||||
void inline addRange(PaddedPODArray<UInt64> & left_map, size_t start, size_t end)
|
||||
void inline addRange(PaddedPODArray<UInt64> & values, UInt64 start, UInt64 end)
|
||||
{
|
||||
assert(end > start);
|
||||
for (size_t i = start; i < end; ++i)
|
||||
left_map.push_back(i);
|
||||
for (UInt64 i = start; i < end; ++i)
|
||||
values.push_back(i);
|
||||
}
|
||||
|
||||
void inline addMany(PaddedPODArray<UInt64> & left_or_right_map, size_t idx, size_t num)
|
||||
void inline addMany(PaddedPODArray<UInt64> & values, UInt64 value, size_t num)
|
||||
{
|
||||
for (size_t i = 0; i < num; ++i)
|
||||
left_or_right_map.push_back(idx);
|
||||
values.resize_fill(values.size() + num, value);
|
||||
}
|
||||
}
|
||||
|
||||
FullMergeJoinCursor::FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_)
|
||||
: sample_block(materializeBlock(sample_block_).cloneEmpty()), desc(description_)
|
||||
JoinKeyRow::JoinKeyRow(const FullMergeJoinCursor & cursor, size_t pos)
|
||||
{
|
||||
row.reserve(cursor->sort_columns.size());
|
||||
for (const auto & col : cursor->sort_columns)
|
||||
{
|
||||
auto new_col = col->cloneEmpty();
|
||||
new_col->insertFrom(*col, pos);
|
||||
row.push_back(std::move(new_col));
|
||||
}
|
||||
if (const IColumn * asof_column = cursor.getAsofColumn())
|
||||
{
|
||||
if (const auto * nullable_asof_column = checkAndGetColumn<ColumnNullable>(asof_column))
|
||||
{
|
||||
/// We save matched column, and since NULL do not match anything, we can't use it as a key
|
||||
chassert(!nullable_asof_column->isNullAt(pos));
|
||||
asof_column = nullable_asof_column->getNestedColumnPtr().get();
|
||||
}
|
||||
auto new_col = asof_column->cloneEmpty();
|
||||
new_col->insertFrom(*asof_column, pos);
|
||||
row.push_back(std::move(new_col));
|
||||
}
|
||||
}
|
||||
|
||||
void JoinKeyRow::reset()
|
||||
{
|
||||
row.clear();
|
||||
}
|
||||
|
||||
bool JoinKeyRow::equals(const FullMergeJoinCursor & cursor) const
|
||||
{
|
||||
if (row.empty())
|
||||
return false;
|
||||
|
||||
for (size_t i = 0; i < cursor->sort_columns_size; ++i)
|
||||
{
|
||||
// int cmp = this->row[i]->compareAt(0, cursor->getRow(), *(cursor->sort_columns[i]), cursor->desc[i].nulls_direction);
|
||||
int cmp = nullableCompareAt<true, true>(*this->row[i], *cursor->sort_columns[i], 0, cursor->getRow(), cursor->desc[i].nulls_direction);
|
||||
if (cmp != 0)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool JoinKeyRow::asofMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const
|
||||
{
|
||||
chassert(this->row.size() == cursor->sort_columns_size + 1);
|
||||
if (!equals(cursor))
|
||||
return false;
|
||||
|
||||
const auto & asof_row = row.back();
|
||||
if (isNullAt(*asof_row, 0) || isNullAt(*cursor.getAsofColumn(), cursor->getRow()))
|
||||
return false;
|
||||
|
||||
int cmp = 0;
|
||||
if (const auto * nullable_column = checkAndGetColumn<ColumnNullable>(cursor.getAsofColumn()))
|
||||
cmp = nullable_column->getNestedColumn().compareAt(cursor->getRow(), 0, *asof_row, 1);
|
||||
else
|
||||
cmp = cursor.getAsofColumn()->compareAt(cursor->getRow(), 0, *asof_row, 1);
|
||||
|
||||
return (asof_inequality == ASOFJoinInequality::Less && cmp < 0)
|
||||
|| (asof_inequality == ASOFJoinInequality::LessOrEquals && cmp <= 0)
|
||||
|| (asof_inequality == ASOFJoinInequality::Greater && cmp > 0)
|
||||
|| (asof_inequality == ASOFJoinInequality::GreaterOrEquals && cmp >= 0);
|
||||
}
|
||||
|
||||
void AnyJoinState::set(size_t source_num, const FullMergeJoinCursor & cursor)
|
||||
{
|
||||
assert(cursor->rows);
|
||||
keys[source_num] = JoinKeyRow(cursor, cursor->rows - 1);
|
||||
}
|
||||
|
||||
void AnyJoinState::reset(size_t source_num)
|
||||
{
|
||||
keys[source_num].reset();
|
||||
value.clear();
|
||||
}
|
||||
|
||||
void AnyJoinState::setValue(Chunk value_)
|
||||
{
|
||||
value = std::move(value_);
|
||||
}
|
||||
|
||||
bool AnyJoinState::empty() const { return keys[0].row.empty() && keys[1].row.empty(); }
|
||||
|
||||
|
||||
void AsofJoinState::set(const FullMergeJoinCursor & rcursor, size_t rpos)
|
||||
{
|
||||
key = JoinKeyRow(rcursor, rpos);
|
||||
value = rcursor.getCurrent().clone();
|
||||
value_row = rpos;
|
||||
}
|
||||
|
||||
void AsofJoinState::reset()
|
||||
{
|
||||
key.reset();
|
||||
value.clear();
|
||||
}
|
||||
|
||||
FullMergeJoinCursor::FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_, bool is_asof)
|
||||
: sample_block(materializeBlock(sample_block_).cloneEmpty())
|
||||
, desc(description_)
|
||||
{
|
||||
if (desc.empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty sort description for FullMergeJoinCursor");
|
||||
|
||||
if (is_asof)
|
||||
{
|
||||
/// For ASOF join prefix of sort description is used for equality comparison
|
||||
/// and the last column is used for inequality comparison and is handled separately
|
||||
|
||||
auto asof_column_description = desc.back();
|
||||
desc.pop_back();
|
||||
|
||||
chassert(asof_column_description.direction == 1 && asof_column_description.nulls_direction == 1);
|
||||
asof_column_position = sample_block.getPositionByName(asof_column_description.column_name);
|
||||
}
|
||||
}
|
||||
|
||||
const Chunk & FullMergeJoinCursor::getCurrent() const
|
||||
{
|
||||
@ -278,48 +402,103 @@ bool FullMergeJoinCursor::fullyCompleted() const
|
||||
return !cursor.isValid() && recieved_all_blocks;
|
||||
}
|
||||
|
||||
String FullMergeJoinCursor::dump() const
|
||||
{
|
||||
Strings row_dump;
|
||||
if (cursor.isValid())
|
||||
{
|
||||
Field val;
|
||||
for (size_t i = 0; i < cursor.sort_columns_size; ++i)
|
||||
{
|
||||
cursor.sort_columns[i]->get(cursor.getRow(), val);
|
||||
row_dump.push_back(val.dump());
|
||||
}
|
||||
|
||||
if (const auto * asof_column = getAsofColumn())
|
||||
{
|
||||
asof_column->get(cursor.getRow(), val);
|
||||
row_dump.push_back(val.dump());
|
||||
}
|
||||
}
|
||||
|
||||
return fmt::format("<{}/{}{}>[{}]",
|
||||
cursor.getRow(), cursor.rows,
|
||||
recieved_all_blocks ? "(finished)" : "",
|
||||
fmt::join(row_dump, ", "));
|
||||
}
|
||||
|
||||
MergeJoinAlgorithm::MergeJoinAlgorithm(
|
||||
JoinPtr table_join_,
|
||||
JoinKind kind_,
|
||||
JoinStrictness strictness_,
|
||||
const TableJoin::JoinOnClause & on_clause_,
|
||||
const Blocks & input_headers,
|
||||
size_t max_block_size_)
|
||||
: table_join(table_join_)
|
||||
: kind(kind_)
|
||||
, strictness(strictness_)
|
||||
, max_block_size(max_block_size_)
|
||||
, log(getLogger("MergeJoinAlgorithm"))
|
||||
{
|
||||
if (input_headers.size() != 2)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeJoinAlgorithm requires exactly two inputs");
|
||||
|
||||
auto strictness = table_join->getTableJoin().strictness();
|
||||
if (strictness != JoinStrictness::Any && strictness != JoinStrictness::All)
|
||||
if (strictness != JoinStrictness::Any && strictness != JoinStrictness::All && strictness != JoinStrictness::Asof)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm is not implemented for strictness {}", strictness);
|
||||
|
||||
auto kind = table_join->getTableJoin().kind();
|
||||
if (strictness == JoinStrictness::Asof)
|
||||
{
|
||||
if (kind != JoinKind::Left && kind != JoinKind::Inner)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm does not implement ASOF {} join", kind);
|
||||
}
|
||||
|
||||
if (!isInner(kind) && !isLeft(kind) && !isRight(kind) && !isFull(kind))
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm is not implemented for kind {}", kind);
|
||||
|
||||
const auto & join_on = table_join->getTableJoin().getOnlyClause();
|
||||
|
||||
if (join_on.on_filter_condition_left || join_on.on_filter_condition_right)
|
||||
if (on_clause_.on_filter_condition_left || on_clause_.on_filter_condition_right)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm does not support ON filter conditions");
|
||||
|
||||
cursors = {
|
||||
createCursor(input_headers[0], join_on.key_names_left),
|
||||
createCursor(input_headers[1], join_on.key_names_right)
|
||||
createCursor(input_headers[0], on_clause_.key_names_left, strictness),
|
||||
createCursor(input_headers[1], on_clause_.key_names_right, strictness),
|
||||
};
|
||||
}
|
||||
|
||||
for (const auto & [left_key, right_key] : table_join->getTableJoin().leftToRightKeyRemap())
|
||||
MergeJoinAlgorithm::MergeJoinAlgorithm(
|
||||
JoinPtr join_ptr,
|
||||
const Blocks & input_headers,
|
||||
size_t max_block_size_)
|
||||
: MergeJoinAlgorithm(
|
||||
join_ptr->getTableJoin().kind(),
|
||||
join_ptr->getTableJoin().strictness(),
|
||||
join_ptr->getTableJoin().getOnlyClause(),
|
||||
input_headers,
|
||||
max_block_size_)
|
||||
{
|
||||
for (const auto & [left_key, right_key] : join_ptr->getTableJoin().leftToRightKeyRemap())
|
||||
{
|
||||
size_t left_idx = input_headers[0].getPositionByName(left_key);
|
||||
size_t right_idx = input_headers[1].getPositionByName(right_key);
|
||||
left_to_right_key_remap[left_idx] = right_idx;
|
||||
}
|
||||
|
||||
const auto *smjPtr = typeid_cast<const FullSortingMergeJoin *>(table_join.get());
|
||||
const auto *smjPtr = typeid_cast<const FullSortingMergeJoin *>(join_ptr.get());
|
||||
if (smjPtr)
|
||||
{
|
||||
null_direction_hint = smjPtr->getNullDirection();
|
||||
}
|
||||
|
||||
if (strictness == JoinStrictness::Asof)
|
||||
setAsofInequality(join_ptr->getTableJoin().getAsofInequality());
|
||||
}
|
||||
|
||||
void MergeJoinAlgorithm::setAsofInequality(ASOFJoinInequality asof_inequality_)
|
||||
{
|
||||
if (strictness != JoinStrictness::Asof)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "setAsofInequality is only supported for ASOF joins");
|
||||
|
||||
if (asof_inequality_ == ASOFJoinInequality::None)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "ASOF inequality cannot be None");
|
||||
|
||||
asof_inequality = asof_inequality_;
|
||||
}
|
||||
|
||||
void MergeJoinAlgorithm::logElapsed(double seconds)
|
||||
@ -407,7 +586,7 @@ struct AllJoinImpl
|
||||
size_t lnum = nextDistinct(left_cursor.cursor);
|
||||
size_t rnum = nextDistinct(right_cursor.cursor);
|
||||
|
||||
bool all_fit_in_block = std::max(left_map.size(), right_map.size()) + lnum * rnum <= max_block_size;
|
||||
bool all_fit_in_block = !max_block_size || std::max(left_map.size(), right_map.size()) + lnum * rnum <= max_block_size;
|
||||
bool have_all_ranges = left_cursor.cursor.isValid() && right_cursor.cursor.isValid();
|
||||
if (all_fit_in_block && have_all_ranges)
|
||||
{
|
||||
@ -421,7 +600,7 @@ struct AllJoinImpl
|
||||
else
|
||||
{
|
||||
assert(state == nullptr);
|
||||
state = std::make_unique<AllJoinState>(left_cursor.cursor, lpos, right_cursor.cursor, rpos);
|
||||
state = std::make_unique<AllJoinState>(left_cursor, lpos, right_cursor, rpos);
|
||||
state->addRange(0, left_cursor.getCurrent().clone(), lpos, lnum);
|
||||
state->addRange(1, right_cursor.getCurrent().clone(), rpos, rnum);
|
||||
return;
|
||||
@ -466,6 +645,17 @@ void dispatchKind(JoinKind kind, Args && ... args)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported join kind: \"{}\"", kind);
|
||||
}
|
||||
|
||||
MutableColumns MergeJoinAlgorithm::getEmptyResultColumns() const
|
||||
{
|
||||
MutableColumns result_cols;
|
||||
for (size_t i = 0; i < 2; ++i)
|
||||
{
|
||||
for (const auto & col : cursors[i]->sampleColumns())
|
||||
result_cols.push_back(col->cloneEmpty());
|
||||
}
|
||||
return result_cols;
|
||||
}
|
||||
|
||||
std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAllJoinState()
|
||||
{
|
||||
if (all_join_state && all_join_state->finished())
|
||||
@ -479,7 +669,7 @@ std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAllJoinState
|
||||
/// Accumulate blocks with same key in all_join_state
|
||||
for (size_t i = 0; i < 2; ++i)
|
||||
{
|
||||
if (cursors[i]->cursor.isValid() && all_join_state->keys[i].equals(cursors[i]->cursor))
|
||||
if (cursors[i]->cursor.isValid() && all_join_state->keys[i].equals(*cursors[i]))
|
||||
{
|
||||
size_t pos = cursors[i]->cursor.getRow();
|
||||
size_t num = nextDistinct(cursors[i]->cursor);
|
||||
@ -499,15 +689,10 @@ std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAllJoinState
|
||||
stat.max_blocks_loaded = std::max(stat.max_blocks_loaded, all_join_state->blocksStored());
|
||||
|
||||
/// join all rows with current key
|
||||
MutableColumns result_cols;
|
||||
for (size_t i = 0; i < 2; ++i)
|
||||
{
|
||||
for (const auto & col : cursors[i]->sampleColumns())
|
||||
result_cols.push_back(col->cloneEmpty());
|
||||
}
|
||||
MutableColumns result_cols = getEmptyResultColumns();
|
||||
|
||||
size_t total_rows = 0;
|
||||
while (total_rows < max_block_size)
|
||||
while (!max_block_size || total_rows < max_block_size)
|
||||
{
|
||||
const auto & left_range = all_join_state->getLeft();
|
||||
const auto & right_range = all_join_state->getRight();
|
||||
@ -532,7 +717,52 @@ std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAllJoinState
|
||||
return {};
|
||||
}
|
||||
|
||||
MergeJoinAlgorithm::Status MergeJoinAlgorithm::allJoin(JoinKind kind)
|
||||
std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAsofJoinState()
|
||||
{
|
||||
if (strictness != JoinStrictness::Asof)
|
||||
return {};
|
||||
|
||||
if (!cursors[1]->fullyCompleted())
|
||||
return {};
|
||||
|
||||
auto & left_cursor = *cursors[0];
|
||||
const auto & left_columns = left_cursor.getCurrent().getColumns();
|
||||
|
||||
MutableColumns result_cols = getEmptyResultColumns();
|
||||
|
||||
while (left_cursor->isValid() && asof_join_state.hasMatch(left_cursor, asof_inequality))
|
||||
{
|
||||
size_t i = 0;
|
||||
for (const auto & col : left_columns)
|
||||
result_cols[i++]->insertFrom(*col, left_cursor->getRow());
|
||||
for (const auto & col : asof_join_state.value.getColumns())
|
||||
result_cols[i++]->insertFrom(*col, asof_join_state.value_row);
|
||||
chassert(i == result_cols.size());
|
||||
left_cursor->next();
|
||||
}
|
||||
|
||||
while (isLeft(kind) && left_cursor->isValid())
|
||||
{
|
||||
/// return row with default values at right side
|
||||
size_t i = 0;
|
||||
for (const auto & col : left_columns)
|
||||
result_cols[i++]->insertFrom(*col, left_cursor->getRow());
|
||||
for (; i < result_cols.size(); ++i)
|
||||
result_cols[i]->insertDefault();
|
||||
chassert(i == result_cols.size());
|
||||
|
||||
left_cursor->next();
|
||||
}
|
||||
|
||||
size_t result_rows = result_cols.empty() ? 0 : result_cols.front()->size();
|
||||
if (result_rows)
|
||||
return Status(Chunk(std::move(result_cols), result_rows));
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
|
||||
MergeJoinAlgorithm::Status MergeJoinAlgorithm::allJoin()
|
||||
{
|
||||
PaddedPODArray<UInt64> idx_map[2];
|
||||
|
||||
@ -595,7 +825,7 @@ struct AnyJoinImpl
|
||||
FullMergeJoinCursor & right_cursor,
|
||||
PaddedPODArray<UInt64> & left_map,
|
||||
PaddedPODArray<UInt64> & right_map,
|
||||
AnyJoinState & state,
|
||||
AnyJoinState & any_join_state,
|
||||
int null_direction_hint)
|
||||
{
|
||||
assert(enabled);
|
||||
@ -656,21 +886,21 @@ struct AnyJoinImpl
|
||||
}
|
||||
}
|
||||
|
||||
/// Remember index of last joined row to propagate it to next block
|
||||
/// Remember last joined row to propagate it to next block
|
||||
|
||||
state.setValue({});
|
||||
any_join_state.setValue({});
|
||||
if (!left_cursor->isValid())
|
||||
{
|
||||
state.set(0, left_cursor.cursor);
|
||||
any_join_state.set(0, left_cursor);
|
||||
if (cmp == 0 && isLeft(kind))
|
||||
state.setValue(getRowFromChunk(right_cursor.getCurrent(), rpos));
|
||||
any_join_state.setValue(getRowFromChunk(right_cursor.getCurrent(), rpos));
|
||||
}
|
||||
|
||||
if (!right_cursor->isValid())
|
||||
{
|
||||
state.set(1, right_cursor.cursor);
|
||||
any_join_state.set(1, right_cursor);
|
||||
if (cmp == 0 && isRight(kind))
|
||||
state.setValue(getRowFromChunk(left_cursor.getCurrent(), lpos));
|
||||
any_join_state.setValue(getRowFromChunk(left_cursor.getCurrent(), lpos));
|
||||
}
|
||||
}
|
||||
};
|
||||
@ -680,40 +910,34 @@ std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAnyJoinState
|
||||
if (any_join_state.empty())
|
||||
return {};
|
||||
|
||||
auto kind = table_join->getTableJoin().kind();
|
||||
|
||||
Chunk result;
|
||||
|
||||
for (size_t source_num = 0; source_num < 2; ++source_num)
|
||||
{
|
||||
auto & current = *cursors[source_num];
|
||||
auto & state = any_join_state;
|
||||
if (any_join_state.keys[source_num].equals(current.cursor))
|
||||
if (any_join_state.keys[source_num].equals(current))
|
||||
{
|
||||
size_t start_pos = current->getRow();
|
||||
size_t length = nextDistinct(current.cursor);
|
||||
|
||||
if (length && isLeft(kind) && source_num == 0)
|
||||
{
|
||||
if (state.value)
|
||||
result = copyChunkResized(current.getCurrent(), state.value, start_pos, length);
|
||||
if (any_join_state.value)
|
||||
result = copyChunkResized(current.getCurrent(), any_join_state.value, start_pos, length);
|
||||
else
|
||||
result = createBlockWithDefaults(source_num, start_pos, length);
|
||||
}
|
||||
|
||||
if (length && isRight(kind) && source_num == 1)
|
||||
{
|
||||
if (state.value)
|
||||
result = copyChunkResized(state.value, current.getCurrent(), start_pos, length);
|
||||
if (any_join_state.value)
|
||||
result = copyChunkResized(any_join_state.value, current.getCurrent(), start_pos, length);
|
||||
else
|
||||
result = createBlockWithDefaults(source_num, start_pos, length);
|
||||
}
|
||||
|
||||
/// We've found row with other key, no need to skip more rows with current key
|
||||
if (current->isValid())
|
||||
{
|
||||
state.keys[source_num].reset();
|
||||
}
|
||||
any_join_state.keys[source_num].reset();
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -726,7 +950,7 @@ std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAnyJoinState
|
||||
return {};
|
||||
}
|
||||
|
||||
MergeJoinAlgorithm::Status MergeJoinAlgorithm::anyJoin(JoinKind kind)
|
||||
MergeJoinAlgorithm::Status MergeJoinAlgorithm::anyJoin()
|
||||
{
|
||||
if (auto result = handleAnyJoinState())
|
||||
return std::move(*result);
|
||||
@ -771,10 +995,151 @@ MergeJoinAlgorithm::Status MergeJoinAlgorithm::anyJoin(JoinKind kind)
|
||||
return Status(std::move(result));
|
||||
}
|
||||
|
||||
|
||||
MergeJoinAlgorithm::Status MergeJoinAlgorithm::asofJoin()
|
||||
{
|
||||
auto & left_cursor = *cursors[0];
|
||||
if (!left_cursor->isValid())
|
||||
return Status(0);
|
||||
|
||||
auto & right_cursor = *cursors[1];
|
||||
if (!right_cursor->isValid())
|
||||
return Status(1);
|
||||
|
||||
const auto & left_columns = left_cursor.getCurrent().getColumns();
|
||||
const auto & right_columns = right_cursor.getCurrent().getColumns();
|
||||
|
||||
MutableColumns result_cols = getEmptyResultColumns();
|
||||
|
||||
while (left_cursor->isValid() && right_cursor->isValid())
|
||||
{
|
||||
auto lpos = left_cursor->getRow();
|
||||
auto rpos = right_cursor->getRow();
|
||||
auto cmp = compareCursors(*left_cursor, *right_cursor, null_direction_hint);
|
||||
if (cmp == 0)
|
||||
{
|
||||
if (isNullAt(*left_cursor.getAsofColumn(), lpos))
|
||||
cmp = -1;
|
||||
if (isNullAt(*right_cursor.getAsofColumn(), rpos))
|
||||
cmp = 1;
|
||||
}
|
||||
|
||||
if (cmp == 0)
|
||||
{
|
||||
auto asof_cmp = compareAsofCursors(left_cursor, right_cursor, null_direction_hint);
|
||||
|
||||
if ((asof_inequality == ASOFJoinInequality::Less && asof_cmp <= -1)
|
||||
|| (asof_inequality == ASOFJoinInequality::LessOrEquals && asof_cmp <= 0))
|
||||
{
|
||||
/// First row in right table that is greater (or equal) than current row in left table
|
||||
/// matches asof join condition the best
|
||||
size_t i = 0;
|
||||
for (const auto & col : left_columns)
|
||||
result_cols[i++]->insertFrom(*col, lpos);
|
||||
for (const auto & col : right_columns)
|
||||
result_cols[i++]->insertFrom(*col, rpos);
|
||||
chassert(i == result_cols.size());
|
||||
|
||||
left_cursor->next();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (asof_inequality == ASOFJoinInequality::Less || asof_inequality == ASOFJoinInequality::LessOrEquals)
|
||||
{
|
||||
/// Asof condition is not (yet) satisfied, skip row in right table
|
||||
right_cursor->next();
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((asof_inequality == ASOFJoinInequality::Greater && asof_cmp >= 1)
|
||||
|| (asof_inequality == ASOFJoinInequality::GreaterOrEquals && asof_cmp >= 0))
|
||||
{
|
||||
/// condition is satisfied, remember this row and move next to try to find better match
|
||||
asof_join_state.set(right_cursor, rpos);
|
||||
right_cursor->next();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (asof_inequality == ASOFJoinInequality::Greater || asof_inequality == ASOFJoinInequality::GreaterOrEquals)
|
||||
{
|
||||
/// Asof condition is not satisfied anymore, use last matched row from right table
|
||||
if (asof_join_state.hasMatch(left_cursor, asof_inequality))
|
||||
{
|
||||
size_t i = 0;
|
||||
for (const auto & col : left_columns)
|
||||
result_cols[i++]->insertFrom(*col, lpos);
|
||||
for (const auto & col : asof_join_state.value.getColumns())
|
||||
result_cols[i++]->insertFrom(*col, asof_join_state.value_row);
|
||||
chassert(i == result_cols.size());
|
||||
}
|
||||
else
|
||||
{
|
||||
asof_join_state.reset();
|
||||
if (isLeft(kind))
|
||||
{
|
||||
/// return row with default values at right side
|
||||
size_t i = 0;
|
||||
for (const auto & col : left_columns)
|
||||
result_cols[i++]->insertFrom(*col, lpos);
|
||||
for (; i < result_cols.size(); ++i)
|
||||
result_cols[i]->insertDefault();
|
||||
chassert(i == result_cols.size());
|
||||
}
|
||||
}
|
||||
left_cursor->next();
|
||||
continue;
|
||||
}
|
||||
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "TODO: implement ASOF equality join");
|
||||
}
|
||||
else if (cmp < 0)
|
||||
{
|
||||
if (asof_join_state.hasMatch(left_cursor, asof_inequality))
|
||||
{
|
||||
size_t i = 0;
|
||||
for (const auto & col : left_columns)
|
||||
result_cols[i++]->insertFrom(*col, lpos);
|
||||
for (const auto & col : asof_join_state.value.getColumns())
|
||||
result_cols[i++]->insertFrom(*col, asof_join_state.value_row);
|
||||
chassert(i == result_cols.size());
|
||||
left_cursor->next();
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
asof_join_state.reset();
|
||||
}
|
||||
|
||||
/// no matches for rows in left table, just pass them through
|
||||
size_t num = nextDistinct(*left_cursor);
|
||||
|
||||
if (isLeft(kind) && num)
|
||||
{
|
||||
/// return them with default values at right side
|
||||
size_t i = 0;
|
||||
for (const auto & col : left_columns)
|
||||
result_cols[i++]->insertRangeFrom(*col, lpos, num);
|
||||
for (; i < result_cols.size(); ++i)
|
||||
result_cols[i]->insertManyDefaults(num);
|
||||
chassert(i == result_cols.size());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/// skip rows in right table until we find match for current row in left table
|
||||
nextDistinct(*right_cursor);
|
||||
}
|
||||
}
|
||||
size_t num_rows = result_cols.empty() ? 0 : result_cols.front()->size();
|
||||
return Status(Chunk(std::move(result_cols), num_rows));
|
||||
}
|
||||
|
||||
|
||||
/// if `source_num == 0` get data from left cursor and fill defaults at right
|
||||
/// otherwise - vice versa
|
||||
Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num, size_t start, size_t num_rows) const
|
||||
{
|
||||
|
||||
ColumnRawPtrs cols;
|
||||
{
|
||||
const auto & columns_left = source_num == 0 ? cursors[0]->getCurrent().getColumns() : cursors[0]->sampleColumns();
|
||||
@ -797,7 +1162,6 @@ Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num, size_t star
|
||||
cols.push_back(col.get());
|
||||
}
|
||||
}
|
||||
|
||||
Chunk result_chunk;
|
||||
copyColumnsResized(cols, start, num_rows, result_chunk);
|
||||
return result_chunk;
|
||||
@ -813,7 +1177,6 @@ Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num)
|
||||
|
||||
IMergingAlgorithm::Status MergeJoinAlgorithm::merge()
|
||||
{
|
||||
auto kind = table_join->getTableJoin().kind();
|
||||
|
||||
if (!cursors[0]->cursor.isValid() && !cursors[0]->fullyCompleted())
|
||||
return Status(0);
|
||||
@ -821,11 +1184,11 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge()
|
||||
if (!cursors[1]->cursor.isValid() && !cursors[1]->fullyCompleted())
|
||||
return Status(1);
|
||||
|
||||
|
||||
if (auto result = handleAllJoinState())
|
||||
{
|
||||
return std::move(*result);
|
||||
}
|
||||
|
||||
if (auto result = handleAsofJoinState())
|
||||
return std::move(*result);
|
||||
|
||||
if (cursors[0]->fullyCompleted() || cursors[1]->fullyCompleted())
|
||||
{
|
||||
@ -839,7 +1202,7 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge()
|
||||
}
|
||||
|
||||
/// check if blocks are not intersecting at all
|
||||
if (int cmp = totallyCompare(cursors[0]->cursor, cursors[1]->cursor, null_direction_hint); cmp != 0)
|
||||
if (int cmp = totallyCompare(cursors[0]->cursor, cursors[1]->cursor, null_direction_hint); cmp != 0 && strictness != JoinStrictness::Asof)
|
||||
{
|
||||
if (cmp < 0)
|
||||
{
|
||||
@ -858,13 +1221,14 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge()
|
||||
}
|
||||
}
|
||||
|
||||
auto strictness = table_join->getTableJoin().strictness();
|
||||
|
||||
if (strictness == JoinStrictness::Any)
|
||||
return anyJoin(kind);
|
||||
return anyJoin();
|
||||
|
||||
if (strictness == JoinStrictness::All)
|
||||
return allJoin(kind);
|
||||
return allJoin();
|
||||
|
||||
if (strictness == JoinStrictness::Asof)
|
||||
return asofJoin();
|
||||
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported strictness '{}'", strictness);
|
||||
}
|
||||
@ -883,9 +1247,26 @@ MergeJoinTransform::MergeJoinTransform(
|
||||
/* always_read_till_end_= */ false,
|
||||
/* empty_chunk_on_finish_= */ true,
|
||||
table_join, input_headers, max_block_size)
|
||||
, log(getLogger("MergeJoinTransform"))
|
||||
{
|
||||
LOG_TRACE(log, "Use MergeJoinTransform");
|
||||
}
|
||||
|
||||
MergeJoinTransform::MergeJoinTransform(
|
||||
JoinKind kind_,
|
||||
JoinStrictness strictness_,
|
||||
const TableJoin::JoinOnClause & on_clause_,
|
||||
const Blocks & input_headers,
|
||||
const Block & output_header,
|
||||
size_t max_block_size,
|
||||
UInt64 limit_hint_)
|
||||
: IMergingTransform<MergeJoinAlgorithm>(
|
||||
input_headers,
|
||||
output_header,
|
||||
/* have_all_inputs_= */ true,
|
||||
limit_hint_,
|
||||
/* always_read_till_end_= */ false,
|
||||
/* empty_chunk_on_finish_= */ true,
|
||||
kind_, strictness_, on_clause_, input_headers, max_block_size)
|
||||
{
|
||||
}
|
||||
|
||||
void MergeJoinTransform::onFinish()
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <array>
|
||||
|
||||
#include <boost/core/noncopyable.hpp>
|
||||
|
||||
@ -19,6 +20,7 @@
|
||||
#include <Processors/Chunk.h>
|
||||
#include <Processors/Merges/Algorithms/IMergingAlgorithm.h>
|
||||
#include <Processors/Merges/IMergingTransform.h>
|
||||
#include <Interpreters/TableJoin.h>
|
||||
|
||||
namespace Poco { class Logger; }
|
||||
|
||||
@ -35,57 +37,28 @@ using FullMergeJoinCursorPtr = std::unique_ptr<FullMergeJoinCursor>;
|
||||
/// Used instead of storing previous block
|
||||
struct JoinKeyRow
|
||||
{
|
||||
std::vector<ColumnPtr> row;
|
||||
|
||||
JoinKeyRow() = default;
|
||||
|
||||
explicit JoinKeyRow(const SortCursorImpl & impl_, size_t pos)
|
||||
{
|
||||
row.reserve(impl_.sort_columns.size());
|
||||
for (const auto & col : impl_.sort_columns)
|
||||
{
|
||||
auto new_col = col->cloneEmpty();
|
||||
new_col->insertFrom(*col, pos);
|
||||
row.push_back(std::move(new_col));
|
||||
}
|
||||
}
|
||||
JoinKeyRow(const FullMergeJoinCursor & cursor, size_t pos);
|
||||
|
||||
void reset()
|
||||
{
|
||||
row.clear();
|
||||
}
|
||||
bool equals(const FullMergeJoinCursor & cursor) const;
|
||||
bool asofMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const;
|
||||
|
||||
bool equals(const SortCursorImpl & impl) const
|
||||
{
|
||||
if (row.empty())
|
||||
return false;
|
||||
void reset();
|
||||
|
||||
assert(this->row.size() == impl.sort_columns_size);
|
||||
for (size_t i = 0; i < impl.sort_columns_size; ++i)
|
||||
{
|
||||
int cmp = this->row[i]->compareAt(0, impl.getRow(), *impl.sort_columns[i], impl.desc[i].nulls_direction);
|
||||
if (cmp != 0)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
std::vector<ColumnPtr> row;
|
||||
};
|
||||
|
||||
/// Remembers previous key if it was joined in previous block
|
||||
class AnyJoinState : boost::noncopyable
|
||||
{
|
||||
public:
|
||||
AnyJoinState() = default;
|
||||
void set(size_t source_num, const FullMergeJoinCursor & cursor);
|
||||
void setValue(Chunk value_);
|
||||
|
||||
void set(size_t source_num, const SortCursorImpl & cursor)
|
||||
{
|
||||
assert(cursor.rows);
|
||||
keys[source_num] = JoinKeyRow(cursor, cursor.rows - 1);
|
||||
}
|
||||
void reset(size_t source_num);
|
||||
|
||||
void setValue(Chunk value_) { value = std::move(value_); }
|
||||
|
||||
bool empty() const { return keys[0].row.empty() && keys[1].row.empty(); }
|
||||
bool empty() const;
|
||||
|
||||
/// current keys
|
||||
JoinKeyRow keys[2];
|
||||
@ -118,8 +91,8 @@ public:
|
||||
Chunk chunk;
|
||||
};
|
||||
|
||||
AllJoinState(const SortCursorImpl & lcursor, size_t lpos,
|
||||
const SortCursorImpl & rcursor, size_t rpos)
|
||||
AllJoinState(const FullMergeJoinCursor & lcursor, size_t lpos,
|
||||
const FullMergeJoinCursor & rcursor, size_t rpos)
|
||||
: keys{JoinKeyRow(lcursor, lpos), JoinKeyRow(rcursor, rpos)}
|
||||
{
|
||||
}
|
||||
@ -187,13 +160,32 @@ private:
|
||||
size_t ridx = 0;
|
||||
};
|
||||
|
||||
|
||||
class AsofJoinState : boost::noncopyable
|
||||
{
|
||||
public:
|
||||
void set(const FullMergeJoinCursor & rcursor, size_t rpos);
|
||||
void reset();
|
||||
|
||||
bool hasMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const
|
||||
{
|
||||
if (value.empty())
|
||||
return false;
|
||||
return key.asofMatch(cursor, asof_inequality);
|
||||
}
|
||||
|
||||
JoinKeyRow key;
|
||||
Chunk value;
|
||||
size_t value_row = 0;
|
||||
};
|
||||
|
||||
/*
|
||||
* Wrapper for SortCursorImpl
|
||||
*/
|
||||
class FullMergeJoinCursor : boost::noncopyable
|
||||
{
|
||||
public:
|
||||
explicit FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_);
|
||||
explicit FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_, bool is_asof = false);
|
||||
|
||||
bool fullyCompleted() const;
|
||||
void setChunk(Chunk && chunk);
|
||||
@ -203,17 +195,31 @@ public:
|
||||
SortCursorImpl * operator-> () { return &cursor; }
|
||||
const SortCursorImpl * operator-> () const { return &cursor; }
|
||||
|
||||
SortCursorImpl & operator* () { return cursor; }
|
||||
const SortCursorImpl & operator* () const { return cursor; }
|
||||
|
||||
SortCursorImpl cursor;
|
||||
|
||||
const Block & sampleBlock() const { return sample_block; }
|
||||
Columns sampleColumns() const { return sample_block.getColumns(); }
|
||||
|
||||
const IColumn * getAsofColumn() const
|
||||
{
|
||||
if (!asof_column_position)
|
||||
return nullptr;
|
||||
return cursor.all_columns[*asof_column_position];
|
||||
}
|
||||
|
||||
String dump() const;
|
||||
|
||||
private:
|
||||
Block sample_block;
|
||||
SortDescription desc;
|
||||
|
||||
Chunk current_chunk;
|
||||
bool recieved_all_blocks = false;
|
||||
|
||||
std::optional<size_t> asof_column_position;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -223,22 +229,33 @@ private:
|
||||
class MergeJoinAlgorithm final : public IMergingAlgorithm
|
||||
{
|
||||
public:
|
||||
explicit MergeJoinAlgorithm(JoinPtr table_join, const Blocks & input_headers, size_t max_block_size_);
|
||||
MergeJoinAlgorithm(JoinKind kind_,
|
||||
JoinStrictness strictness_,
|
||||
const TableJoin::JoinOnClause & on_clause_,
|
||||
const Blocks & input_headers,
|
||||
size_t max_block_size_);
|
||||
|
||||
MergeJoinAlgorithm(JoinPtr join_ptr, const Blocks & input_headers, size_t max_block_size_);
|
||||
|
||||
const char * getName() const override { return "MergeJoinAlgorithm"; }
|
||||
void initialize(Inputs inputs) override;
|
||||
void consume(Input & input, size_t source_num) override;
|
||||
Status merge() override;
|
||||
|
||||
void logElapsed(double seconds);
|
||||
void setAsofInequality(ASOFJoinInequality asof_inequality_);
|
||||
|
||||
void logElapsed(double seconds);
|
||||
private:
|
||||
std::optional<Status> handleAnyJoinState();
|
||||
Status anyJoin(JoinKind kind);
|
||||
Status anyJoin();
|
||||
|
||||
std::optional<Status> handleAllJoinState();
|
||||
Status allJoin(JoinKind kind);
|
||||
Status allJoin();
|
||||
|
||||
std::optional<Status> handleAsofJoinState();
|
||||
Status asofJoin();
|
||||
|
||||
MutableColumns getEmptyResultColumns() const;
|
||||
Chunk createBlockWithDefaults(size_t source_num);
|
||||
Chunk createBlockWithDefaults(size_t source_num, size_t start, size_t num_rows) const;
|
||||
|
||||
@ -246,12 +263,15 @@ private:
|
||||
std::unordered_map<size_t, size_t> left_to_right_key_remap;
|
||||
|
||||
std::array<FullMergeJoinCursorPtr, 2> cursors;
|
||||
ASOFJoinInequality asof_inequality = ASOFJoinInequality::None;
|
||||
|
||||
/// Keep some state to make connection between data in different blocks
|
||||
/// Keep some state to make handle data from different blocks
|
||||
AnyJoinState any_join_state;
|
||||
std::unique_ptr<AllJoinState> all_join_state;
|
||||
AsofJoinState asof_join_state;
|
||||
|
||||
JoinPtr table_join;
|
||||
JoinKind kind;
|
||||
JoinStrictness strictness;
|
||||
|
||||
size_t max_block_size;
|
||||
int null_direction_hint = 1;
|
||||
@ -281,12 +301,21 @@ public:
|
||||
size_t max_block_size,
|
||||
UInt64 limit_hint = 0);
|
||||
|
||||
MergeJoinTransform(
|
||||
JoinKind kind_,
|
||||
JoinStrictness strictness_,
|
||||
const TableJoin::JoinOnClause & on_clause_,
|
||||
const Blocks & input_headers,
|
||||
const Block & output_header,
|
||||
size_t max_block_size,
|
||||
UInt64 limit_hint_ = 0);
|
||||
|
||||
String getName() const override { return "MergeJoinTransform"; }
|
||||
|
||||
void setAsofInequality(ASOFJoinInequality asof_inequality_) { algorithm.setAsofInequality(asof_inequality_); }
|
||||
|
||||
protected:
|
||||
void onFinish() override;
|
||||
|
||||
LoggerPtr log;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -109,7 +109,7 @@ void ScatterByPartitionTransform::generateOutputChunks()
|
||||
hash.reset(num_rows);
|
||||
|
||||
for (const auto & column_number : key_columns)
|
||||
columns[column_number]->updateWeakHash32(hash);
|
||||
hash.update(columns[column_number]->getWeakHash32());
|
||||
|
||||
const auto & hash_data = hash.getData();
|
||||
IColumn::Selector selector(num_rows);
|
||||
|
768
src/Processors/tests/gtest_full_sorting_join.cpp
Normal file
768
src/Processors/tests/gtest_full_sorting_join.cpp
Normal file
@ -0,0 +1,768 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <pcg_random.hpp>
|
||||
#include <random>
|
||||
#include <Poco/ConsoleChannel.h>
|
||||
#include <Poco/Logger.h>
|
||||
#include <Poco/AutoPtr.h>
|
||||
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Common/getRandomASCIIString.h>
|
||||
#include <Common/randomSeed.h>
|
||||
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
#include <Interpreters/TableJoin.h>
|
||||
|
||||
#include <Processors/Executors/PipelineExecutor.h>
|
||||
#include <Processors/Executors/PullingPipelineExecutor.h>
|
||||
#include <Processors/Sinks/NullSink.h>
|
||||
#include <Processors/Sources/SourceFromChunks.h>
|
||||
#include <Processors/Sources/SourceFromSingleChunk.h>
|
||||
#include <Processors/Transforms/MergeJoinTransform.h>
|
||||
|
||||
#include <Processors/Formats/Impl/PrettyCompactBlockOutputFormat.h>
|
||||
#include <Processors/Executors/CompletedPipelineExecutor.h>
|
||||
|
||||
|
||||
#include <QueryPipeline/QueryPipeline.h>
|
||||
|
||||
using namespace DB;
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
QueryPipeline buildJoinPipeline(
|
||||
std::shared_ptr<ISource> left_source,
|
||||
std::shared_ptr<ISource> right_source,
|
||||
size_t key_length = 1,
|
||||
JoinKind kind = JoinKind::Inner,
|
||||
JoinStrictness strictness = JoinStrictness::All,
|
||||
ASOFJoinInequality asof_inequality = ASOFJoinInequality::None)
|
||||
{
|
||||
Blocks inputs;
|
||||
inputs.emplace_back(left_source->getPort().getHeader());
|
||||
inputs.emplace_back(right_source->getPort().getHeader());
|
||||
|
||||
Block out_header;
|
||||
for (const auto & input : inputs)
|
||||
{
|
||||
for (ColumnWithTypeAndName column : input)
|
||||
{
|
||||
if (&input == &inputs.front())
|
||||
column.name = "t1." + column.name;
|
||||
else
|
||||
column.name = "t2." + column.name;
|
||||
out_header.insert(column);
|
||||
}
|
||||
}
|
||||
|
||||
TableJoin::JoinOnClause on_clause;
|
||||
for (size_t i = 0; i < key_length; ++i)
|
||||
{
|
||||
on_clause.key_names_left.emplace_back(inputs[0].getByPosition(i).name);
|
||||
on_clause.key_names_right.emplace_back(inputs[1].getByPosition(i).name);
|
||||
}
|
||||
|
||||
auto joining = std::make_shared<MergeJoinTransform>(
|
||||
kind,
|
||||
strictness,
|
||||
on_clause,
|
||||
inputs, out_header, /* max_block_size = */ 0);
|
||||
|
||||
if (asof_inequality != ASOFJoinInequality::None)
|
||||
joining->setAsofInequality(asof_inequality);
|
||||
|
||||
chassert(joining->getInputs().size() == 2);
|
||||
|
||||
connect(left_source->getPort(), joining->getInputs().front());
|
||||
connect(right_source->getPort(), joining->getInputs().back());
|
||||
|
||||
auto * output_port = &joining->getOutputPort();
|
||||
|
||||
auto processors = std::make_shared<Processors>();
|
||||
processors->emplace_back(std::move(left_source));
|
||||
processors->emplace_back(std::move(right_source));
|
||||
processors->emplace_back(std::move(joining));
|
||||
|
||||
QueryPipeline pipeline(QueryPlanResourceHolder{}, processors, output_port);
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
|
||||
std::shared_ptr<ISource> oneColumnSource(const std::vector<std::vector<UInt64>> & values)
|
||||
{
|
||||
Block header = {
|
||||
ColumnWithTypeAndName(std::make_shared<DataTypeUInt64>(), "key"),
|
||||
ColumnWithTypeAndName(std::make_shared<DataTypeUInt64>(), "idx"),
|
||||
};
|
||||
|
||||
UInt64 idx = 0;
|
||||
Chunks chunks;
|
||||
for (const auto & chunk_values : values)
|
||||
{
|
||||
auto key_column = ColumnUInt64::create();
|
||||
auto idx_column = ColumnUInt64::create();
|
||||
|
||||
for (auto n : chunk_values)
|
||||
{
|
||||
key_column->insertValue(n);
|
||||
idx_column->insertValue(idx);
|
||||
++idx;
|
||||
}
|
||||
chunks.emplace_back(Chunk(Columns{std::move(key_column), std::move(idx_column)}, chunk_values.size()));
|
||||
}
|
||||
return std::make_shared<SourceFromChunks>(header, std::move(chunks));
|
||||
}
|
||||
|
||||
class SourceChunksBuilder
|
||||
{
|
||||
public:
|
||||
|
||||
explicit SourceChunksBuilder(const Block & header_)
|
||||
: header(header_)
|
||||
{
|
||||
current_chunk = header.cloneEmptyColumns();
|
||||
chassert(!current_chunk.empty());
|
||||
}
|
||||
|
||||
void setBreakProbability(pcg64 & rng_)
|
||||
{
|
||||
/// random probability with possibility to have exact 0.0 and 1.0 values
|
||||
break_prob = std::uniform_int_distribution<size_t>(0, 5)(rng_) / static_cast<double>(5);
|
||||
rng = &rng_;
|
||||
}
|
||||
|
||||
void addRow(const std::vector<Field> & row)
|
||||
{
|
||||
chassert(row.size() == current_chunk.size());
|
||||
for (size_t i = 0; i < current_chunk.size(); ++i)
|
||||
current_chunk[i]->insert(row[i]);
|
||||
|
||||
if (rng && std::uniform_real_distribution<>(0.0, 1.0)(*rng) < break_prob)
|
||||
addChunk();
|
||||
}
|
||||
|
||||
void addChunk()
|
||||
{
|
||||
if (current_chunk.front()->empty())
|
||||
return;
|
||||
|
||||
size_t rows = current_chunk.front()->size();
|
||||
chunks.emplace_back(std::move(current_chunk), rows);
|
||||
current_chunk = header.cloneEmptyColumns();
|
||||
}
|
||||
|
||||
std::shared_ptr<ISource> getSource()
|
||||
{
|
||||
addChunk();
|
||||
|
||||
/// copy chunk to allow reusing same builder
|
||||
Chunks chunks_copy;
|
||||
chunks_copy.reserve(chunks.size());
|
||||
for (const auto & chunk : chunks)
|
||||
chunks_copy.emplace_back(chunk.clone());
|
||||
return std::make_shared<SourceFromChunks>(header, std::move(chunks_copy));
|
||||
}
|
||||
|
||||
private:
|
||||
Block header;
|
||||
Chunks chunks;
|
||||
MutableColumns current_chunk;
|
||||
|
||||
pcg64 * rng = nullptr;
|
||||
double break_prob = 0.0;
|
||||
};
|
||||
|
||||
|
||||
std::vector<std::vector<Field>> getValuesFromBlock(const Block & block, const Names & names)
|
||||
{
|
||||
std::vector<std::vector<Field>> result;
|
||||
for (size_t i = 0; i < block.rows(); ++i)
|
||||
{
|
||||
auto & row = result.emplace_back();
|
||||
for (const auto & name : names)
|
||||
block.getByName(name).column->get(i, row.emplace_back());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
Block executePipeline(QueryPipeline && pipeline)
|
||||
{
|
||||
PullingPipelineExecutor executor(pipeline);
|
||||
|
||||
Blocks result_blocks;
|
||||
while (true)
|
||||
{
|
||||
Block block;
|
||||
bool is_ok = executor.pull(block);
|
||||
if (!is_ok)
|
||||
break;
|
||||
result_blocks.emplace_back(std::move(block));
|
||||
}
|
||||
|
||||
return concatenateBlocks(result_blocks);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void assertColumnVectorEq(const typename ColumnVector<T>::Container & expected, const Block & block, const std::string & name)
|
||||
{
|
||||
const auto * actual = typeid_cast<const ColumnVector<T> *>(block.getByName(name).column.get());
|
||||
ASSERT_TRUE(actual) << "unexpected column type: " << block.getByName(name).column->dumpStructure() << "expected: " << typeid(ColumnVector<T>).name();
|
||||
|
||||
auto get_first_diff = [&]() -> String
|
||||
{
|
||||
const auto & actual_data = actual->getData();
|
||||
size_t num_rows = std::min(expected.size(), actual_data.size());
|
||||
for (size_t i = 0; i < num_rows; ++i)
|
||||
{
|
||||
if (expected[i] != actual_data[i])
|
||||
return fmt::format(", expected: {}, actual: {} at row {}", expected[i], actual_data[i], i);
|
||||
}
|
||||
return "";
|
||||
};
|
||||
|
||||
EXPECT_EQ(actual->getData().size(), expected.size());
|
||||
ASSERT_EQ(actual->getData(), expected) << "column name: " << name << get_first_diff();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void assertColumnEq(const IColumn & expected, const Block & block, const std::string & name)
|
||||
{
|
||||
const ColumnPtr & actual = block.getByName(name).column;
|
||||
ASSERT_TRUE(checkColumn<T>(*actual));
|
||||
ASSERT_TRUE(checkColumn<T>(expected));
|
||||
EXPECT_EQ(actual->size(), expected.size());
|
||||
|
||||
auto dump_val = [](const IColumn & col, size_t i) -> String
|
||||
{
|
||||
Field value;
|
||||
col.get(i, value);
|
||||
return value.dump();
|
||||
};
|
||||
|
||||
size_t num_rows = std::min(actual->size(), expected.size());
|
||||
for (size_t i = 0; i < num_rows; ++i)
|
||||
ASSERT_EQ(actual->compareAt(i, i, expected, 1), 0) << dump_val(*actual, i) << " != " << dump_val(expected, i) << " at row " << i;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T getRandomFrom(pcg64 & rng, const std::initializer_list<T> & opts)
|
||||
{
|
||||
std::vector<T> options(opts.begin(), opts.end());
|
||||
size_t idx = std::uniform_int_distribution<size_t>(0, options.size() - 1)(rng);
|
||||
return options[idx];
|
||||
}
|
||||
|
||||
void generateNextKey(pcg64 & rng, UInt64 & k1, String & k2)
|
||||
{
|
||||
size_t str_len = std::uniform_int_distribution<>(1, 10)(rng);
|
||||
String new_k2 = getRandomASCIIString(str_len, rng);
|
||||
if (new_k2.compare(k2) <= 0)
|
||||
++k1;
|
||||
k2 = new_k2;
|
||||
}
|
||||
|
||||
bool isStrict(ASOFJoinInequality inequality)
|
||||
{
|
||||
return inequality == ASOFJoinInequality::Less || inequality == ASOFJoinInequality::Greater;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class FullSortingJoinTest : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
FullSortingJoinTest() = default;
|
||||
|
||||
void SetUp() override
|
||||
{
|
||||
Poco::AutoPtr<Poco::ConsoleChannel> channel(new Poco::ConsoleChannel(std::cerr));
|
||||
Poco::Logger::root().setChannel(channel);
|
||||
if (const char * test_log_level = std::getenv("TEST_LOG_LEVEL")) // NOLINT(concurrency-mt-unsafe)
|
||||
Poco::Logger::root().setLevel(test_log_level);
|
||||
else
|
||||
Poco::Logger::root().setLevel("none");
|
||||
|
||||
|
||||
UInt64 seed = randomSeed();
|
||||
if (const char * random_seed = std::getenv("TEST_RANDOM_SEED")) // NOLINT(concurrency-mt-unsafe)
|
||||
seed = std::stoull(random_seed);
|
||||
std::cout << "TEST_RANDOM_SEED=" << seed << std::endl;
|
||||
rng = pcg64(seed);
|
||||
}
|
||||
|
||||
void TearDown() override
|
||||
{
|
||||
}
|
||||
|
||||
pcg64 rng;
|
||||
};
|
||||
|
||||
TEST_F(FullSortingJoinTest, AllAnyOneKey)
|
||||
try
|
||||
{
|
||||
{
|
||||
SCOPED_TRACE("Inner All");
|
||||
Block result = executePipeline(buildJoinPipeline(
|
||||
oneColumnSource({ {1, 2, 3, 4, 5} }),
|
||||
oneColumnSource({ {1}, {2}, {3}, {4}, {5} }),
|
||||
1, JoinKind::Inner, JoinStrictness::All));
|
||||
|
||||
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t1.idx");
|
||||
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t2.idx");
|
||||
}
|
||||
{
|
||||
SCOPED_TRACE("Inner Any");
|
||||
Block result = executePipeline(buildJoinPipeline(
|
||||
oneColumnSource({ {1, 2, 3, 4, 5} }),
|
||||
oneColumnSource({ {1}, {2}, {3}, {4}, {5} }),
|
||||
1, JoinKind::Inner, JoinStrictness::Any));
|
||||
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t1.idx");
|
||||
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t2.idx");
|
||||
}
|
||||
{
|
||||
SCOPED_TRACE("Inner All");
|
||||
Block result = executePipeline(buildJoinPipeline(
|
||||
oneColumnSource({ {2, 2, 2}, {2, 3}, {3, 5} }),
|
||||
oneColumnSource({ {1, 1, 1}, {2, 2}, {3, 4} }),
|
||||
1, JoinKind::Inner, JoinStrictness::All));
|
||||
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 0, 1, 2, 3, 3, 4, 5}), result, "t1.idx");
|
||||
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({3, 3, 3, 4, 4, 4, 3, 4, 5, 5}), result, "t2.idx");
|
||||
}
|
||||
{
|
||||
SCOPED_TRACE("Inner Any");
|
||||
Block result = executePipeline(buildJoinPipeline(
|
||||
oneColumnSource({ {2, 2, 2}, {2, 3}, {3, 5} }),
|
||||
oneColumnSource({ {1, 1, 1}, {2, 2}, {3, 4} }),
|
||||
1, JoinKind::Inner, JoinStrictness::Any));
|
||||
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 4}), result, "t1.idx");
|
||||
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({3, 5}), result, "t2.idx");
|
||||
}
|
||||
{
|
||||
SCOPED_TRACE("Inner Any");
|
||||
Block result = executePipeline(buildJoinPipeline(
|
||||
oneColumnSource({ {2, 2, 2, 2}, {3}, {3, 5} }),
|
||||
oneColumnSource({ {1, 1, 1, 2}, {2}, {3, 4} }),
|
||||
1, JoinKind::Inner, JoinStrictness::Any));
|
||||
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 4}), result, "t1.idx");
|
||||
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({3, 5}), result, "t2.idx");
|
||||
}
|
||||
{
|
||||
|
||||
SCOPED_TRACE("Left Any");
|
||||
Block result = executePipeline(buildJoinPipeline(
|
||||
oneColumnSource({ {2, 2, 2}, {2, 3}, {3, 5} }),
|
||||
oneColumnSource({ {1, 1, 1}, {2, 2}, {3, 4} }),
|
||||
1, JoinKind::Left, JoinStrictness::Any));
|
||||
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4, 5, 6}), result, "t1.idx");
|
||||
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({3, 3, 3, 3, 5, 5, 0}), result, "t2.idx");
|
||||
}
|
||||
{
|
||||
SCOPED_TRACE("Left Any");
|
||||
Block result = executePipeline(buildJoinPipeline(
|
||||
oneColumnSource({ {2, 2, 2, 2}, {3}, {3, 5} }),
|
||||
oneColumnSource({ {1, 1, 1, 2}, {2}, {3, 4} }),
|
||||
1, JoinKind::Left, JoinStrictness::Any));
|
||||
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4, 5, 6}), result, "t1.idx");
|
||||
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({3, 3, 3, 3, 5, 5, 0}), result, "t2.idx");
|
||||
}
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
std::cout << e.getStackTraceString() << std::endl;
|
||||
throw;
|
||||
}
|
||||
|
||||
|
||||
TEST_F(FullSortingJoinTest, AnySimple)
|
||||
try
|
||||
{
|
||||
JoinKind kind = getRandomFrom(rng, {JoinKind::Inner, JoinKind::Left, JoinKind::Right});
|
||||
|
||||
SourceChunksBuilder left_source({
|
||||
{std::make_shared<DataTypeUInt64>(), "k1"},
|
||||
{std::make_shared<DataTypeString>(), "k2"},
|
||||
{std::make_shared<DataTypeString>(), "attr"},
|
||||
});
|
||||
|
||||
SourceChunksBuilder right_source({
|
||||
{std::make_shared<DataTypeUInt64>(), "k1"},
|
||||
{std::make_shared<DataTypeString>(), "k2"},
|
||||
{std::make_shared<DataTypeString>(), "attr"},
|
||||
});
|
||||
|
||||
left_source.setBreakProbability(rng);
|
||||
right_source.setBreakProbability(rng);
|
||||
|
||||
size_t num_keys = std::uniform_int_distribution<>(100, 1000)(rng);
|
||||
|
||||
auto expected_left = ColumnString::create();
|
||||
auto expected_right = ColumnString::create();
|
||||
|
||||
UInt64 k1 = 1;
|
||||
String k2;
|
||||
|
||||
auto get_attr = [&](const String & side, size_t idx) -> String
|
||||
{
|
||||
return toString(k1) + "_" + k2 + "_" + side + "_" + toString(idx);
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < num_keys; ++i)
|
||||
{
|
||||
generateNextKey(rng, k1, k2);
|
||||
|
||||
/// Key is present in left, right or both tables. Both tables is more probable.
|
||||
size_t key_presence = std::uniform_int_distribution<>(0, 10)(rng);
|
||||
|
||||
size_t num_rows_left = key_presence == 0 ? 0 : std::uniform_int_distribution<>(1, 10)(rng);
|
||||
for (size_t j = 0; j < num_rows_left; ++j)
|
||||
left_source.addRow({k1, k2, get_attr("left", j)});
|
||||
|
||||
size_t num_rows_right = key_presence == 1 ? 0 : std::uniform_int_distribution<>(1, 10)(rng);
|
||||
for (size_t j = 0; j < num_rows_right; ++j)
|
||||
right_source.addRow({k1, k2, get_attr("right", j)});
|
||||
|
||||
String left_attr = num_rows_left ? get_attr("left", 0) : "";
|
||||
String right_attr = num_rows_right ? get_attr("right", 0) : "";
|
||||
|
||||
if (kind == JoinKind::Inner && num_rows_left && num_rows_right)
|
||||
{
|
||||
expected_left->insert(left_attr);
|
||||
expected_right->insert(right_attr);
|
||||
}
|
||||
else if (kind == JoinKind::Left)
|
||||
{
|
||||
for (size_t j = 0; j < num_rows_left; ++j)
|
||||
{
|
||||
expected_left->insert(get_attr("left", j));
|
||||
expected_right->insert(right_attr);
|
||||
}
|
||||
}
|
||||
else if (kind == JoinKind::Right)
|
||||
{
|
||||
for (size_t j = 0; j < num_rows_right; ++j)
|
||||
{
|
||||
expected_left->insert(left_attr);
|
||||
expected_right->insert(get_attr("right", j));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Block result_block = executePipeline(buildJoinPipeline(
|
||||
left_source.getSource(), right_source.getSource(), /* key_length = */ 2,
|
||||
kind, JoinStrictness::Any));
|
||||
assertColumnEq<ColumnString>(*expected_left, result_block, "t1.attr");
|
||||
assertColumnEq<ColumnString>(*expected_right, result_block, "t2.attr");
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
std::cout << e.getStackTraceString() << std::endl;
|
||||
throw;
|
||||
}
|
||||
|
||||
TEST_F(FullSortingJoinTest, AsofSimple)
|
||||
try
|
||||
{
|
||||
SourceChunksBuilder left_source({
|
||||
{std::make_shared<DataTypeString>(), "key"},
|
||||
{std::make_shared<DataTypeUInt64>(), "t"},
|
||||
});
|
||||
left_source.addRow({"AMZN", 3});
|
||||
left_source.addRow({"AMZN", 4});
|
||||
left_source.addRow({"AMZN", 6});
|
||||
left_source.addRow({"SBUX", 10});
|
||||
|
||||
SourceChunksBuilder right_source({
|
||||
{std::make_shared<DataTypeString>(), "key"},
|
||||
{std::make_shared<DataTypeUInt64>(), "t"},
|
||||
{std::make_shared<DataTypeUInt64>(), "value"},
|
||||
});
|
||||
right_source.addRow({"AAPL", 1, 97});
|
||||
right_source.addChunk();
|
||||
right_source.addRow({"AAPL", 2, 98});
|
||||
right_source.addRow({"AAPL", 3, 99});
|
||||
right_source.addRow({"AMZN", 1, 100});
|
||||
right_source.addRow({"AMZN", 2, 110});
|
||||
right_source.addChunk();
|
||||
right_source.addRow({"AMZN", 2, 110});
|
||||
right_source.addChunk();
|
||||
right_source.addRow({"AMZN", 4, 130});
|
||||
right_source.addRow({"AMZN", 5, 140});
|
||||
right_source.addRow({"SBUX", 8, 180});
|
||||
right_source.addChunk();
|
||||
right_source.addRow({"SBUX", 9, 190});
|
||||
|
||||
{
|
||||
Block result_block = executePipeline(buildJoinPipeline(
|
||||
left_source.getSource(), right_source.getSource(), /* key_length = */ 2,
|
||||
JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::LessOrEquals));
|
||||
auto values = getValuesFromBlock(result_block, {"t1.key", "t1.t", "t2.t", "t2.value"});
|
||||
|
||||
ASSERT_EQ(values, (std::vector<std::vector<Field>>{
|
||||
{"AMZN", 3u, 4u, 130u},
|
||||
{"AMZN", 4u, 4u, 130u},
|
||||
}));
|
||||
}
|
||||
|
||||
{
|
||||
Block result_block = executePipeline(buildJoinPipeline(
|
||||
left_source.getSource(), right_source.getSource(), /* key_length = */ 2,
|
||||
JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::GreaterOrEquals));
|
||||
auto values = getValuesFromBlock(result_block, {"t1.key", "t1.t", "t2.t", "t2.value"});
|
||||
|
||||
ASSERT_EQ(values, (std::vector<std::vector<Field>>{
|
||||
{"AMZN", 3u, 2u, 110u},
|
||||
{"AMZN", 4u, 4u, 130u},
|
||||
{"AMZN", 6u, 5u, 140u},
|
||||
{"SBUX", 10u, 9u, 190u},
|
||||
}));
|
||||
}
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
std::cout << e.getStackTraceString() << std::endl;
|
||||
throw;
|
||||
}
|
||||
|
||||
|
||||
TEST_F(FullSortingJoinTest, AsofOnlyColumn)
|
||||
try
|
||||
{
|
||||
auto left_source = oneColumnSource({ {3}, {3, 3, 3}, {3, 5, 5, 6}, {9, 9}, {10, 20} });
|
||||
|
||||
SourceChunksBuilder right_source_builder({
|
||||
{std::make_shared<DataTypeUInt64>(), "t"},
|
||||
{std::make_shared<DataTypeUInt64>(), "value"},
|
||||
});
|
||||
|
||||
right_source_builder.setBreakProbability(rng);
|
||||
|
||||
for (const auto & row : std::vector<std::vector<Field>>{ {1, 101}, {2, 102}, {4, 104}, {5, 105}, {11, 111}, {15, 115} })
|
||||
right_source_builder.addRow(row);
|
||||
|
||||
auto right_source = right_source_builder.getSource();
|
||||
|
||||
auto pipeline = buildJoinPipeline(
|
||||
left_source, right_source, /* key_length = */ 1,
|
||||
JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::LessOrEquals);
|
||||
|
||||
Block result_block = executePipeline(std::move(pipeline));
|
||||
|
||||
ASSERT_EQ(
|
||||
assert_cast<const ColumnUInt64 *>(result_block.getByName("t1.key").column.get())->getData(),
|
||||
(ColumnUInt64::Container{3, 3, 3, 3, 3, 5, 5, 6, 9, 9, 10})
|
||||
);
|
||||
|
||||
ASSERT_EQ(
|
||||
assert_cast<const ColumnUInt64 *>(result_block.getByName("t2.t").column.get())->getData(),
|
||||
(ColumnUInt64::Container{4, 4, 4, 4, 4, 5, 5, 11, 11, 11, 11})
|
||||
);
|
||||
|
||||
ASSERT_EQ(
|
||||
assert_cast<const ColumnUInt64 *>(result_block.getByName("t2.value").column.get())->getData(),
|
||||
(ColumnUInt64::Container{104, 104, 104, 104, 104, 105, 105, 111, 111, 111, 111})
|
||||
);
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
std::cout << e.getStackTraceString() << std::endl;
|
||||
throw;
|
||||
}
|
||||
|
||||
TEST_F(FullSortingJoinTest, AsofLessGeneratedTestData)
|
||||
try
|
||||
{
|
||||
/// Generate data random and build expected result at the same time.
|
||||
|
||||
/// Test specific combinations of join kind and inequality per each run
|
||||
auto join_kind = getRandomFrom(rng, { JoinKind::Inner, JoinKind::Left });
|
||||
auto asof_inequality = getRandomFrom(rng, { ASOFJoinInequality::Less, ASOFJoinInequality::LessOrEquals });
|
||||
|
||||
SCOPED_TRACE(fmt::format("{} {}", join_kind, asof_inequality));
|
||||
|
||||
/// Key is complex, `k1, k2` for equality and `t` for asof
|
||||
SourceChunksBuilder left_source_builder({
|
||||
{std::make_shared<DataTypeUInt64>(), "k1"},
|
||||
{std::make_shared<DataTypeString>(), "k2"},
|
||||
{std::make_shared<DataTypeUInt64>(), "t"},
|
||||
{std::make_shared<DataTypeInt64>(), "attr"},
|
||||
});
|
||||
|
||||
SourceChunksBuilder right_source_builder({
|
||||
{std::make_shared<DataTypeUInt64>(), "k1"},
|
||||
{std::make_shared<DataTypeString>(), "k2"},
|
||||
{std::make_shared<DataTypeUInt64>(), "t"},
|
||||
{std::make_shared<DataTypeInt64>(), "attr"},
|
||||
});
|
||||
|
||||
/// How small generated block should be
|
||||
left_source_builder.setBreakProbability(rng);
|
||||
right_source_builder.setBreakProbability(rng);
|
||||
|
||||
/// We are going to generate sorted data and remember expected result
|
||||
ColumnInt64::Container expected;
|
||||
|
||||
UInt64 k1 = 1;
|
||||
String k2;
|
||||
auto key_num_total = std::uniform_int_distribution<>(1, 1000)(rng);
|
||||
for (size_t key_num = 0; key_num < key_num_total; ++key_num)
|
||||
{
|
||||
/// Generate new key greater than previous
|
||||
generateNextKey(rng, k1, k2);
|
||||
|
||||
Int64 left_t = 0;
|
||||
/// Generate several rows for the key
|
||||
size_t num_left_rows = std::uniform_int_distribution<>(1, 100)(rng);
|
||||
for (size_t i = 0; i < num_left_rows; ++i)
|
||||
{
|
||||
/// t is strictly greater than previous
|
||||
left_t += std::uniform_int_distribution<>(1, 10)(rng);
|
||||
|
||||
auto left_arrtibute_value = 10 * left_t;
|
||||
left_source_builder.addRow({k1, k2, left_t, left_arrtibute_value});
|
||||
expected.push_back(left_arrtibute_value);
|
||||
|
||||
auto num_matches = 1 + std::poisson_distribution<>(4)(rng);
|
||||
/// Generate several matches in the right table
|
||||
auto right_t = left_t;
|
||||
for (size_t j = 0; j < num_matches; ++j)
|
||||
{
|
||||
int min_step = isStrict(asof_inequality) ? 1 : 0;
|
||||
right_t += std::uniform_int_distribution<>(min_step, 3)(rng);
|
||||
|
||||
/// First row should match
|
||||
bool is_match = j == 0;
|
||||
right_source_builder.addRow({k1, k2, right_t, is_match ? 10 * left_arrtibute_value : -1});
|
||||
}
|
||||
/// Next left_t should be greater than right_t not to match with previous rows
|
||||
left_t = right_t;
|
||||
}
|
||||
|
||||
/// generate some rows with greater left_t to check that they are not matched
|
||||
num_left_rows = std::bernoulli_distribution(0.5)(rng) ? std::uniform_int_distribution<>(1, 100)(rng) : 0;
|
||||
for (size_t i = 0; i < num_left_rows; ++i)
|
||||
{
|
||||
left_t += std::uniform_int_distribution<>(1, 10)(rng);
|
||||
left_source_builder.addRow({k1, k2, left_t, -10 * left_t});
|
||||
|
||||
if (join_kind == JoinKind::Left)
|
||||
expected.push_back(-10 * left_t);
|
||||
}
|
||||
}
|
||||
|
||||
Block result_block = executePipeline(buildJoinPipeline(
|
||||
left_source_builder.getSource(), right_source_builder.getSource(),
|
||||
/* key_length = */ 3,
|
||||
join_kind, JoinStrictness::Asof, asof_inequality));
|
||||
|
||||
assertColumnVectorEq<Int64>(expected, result_block, "t1.attr");
|
||||
|
||||
for (auto & e : expected)
|
||||
/// Non matched rows from left table have negative attr
|
||||
/// Value if attribute in right table is 10 times greater than in left table
|
||||
e = e < 0 ? 0 : 10 * e;
|
||||
|
||||
assertColumnVectorEq<Int64>(expected, result_block, "t2.attr");
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
std::cout << e.getStackTraceString() << std::endl;
|
||||
throw;
|
||||
}
|
||||
|
||||
TEST_F(FullSortingJoinTest, AsofGreaterGeneratedTestData)
|
||||
try
|
||||
{
|
||||
/// Generate data random and build expected result at the same time.
|
||||
|
||||
/// Test specific combinations of join kind and inequality per each run
|
||||
auto join_kind = getRandomFrom(rng, { JoinKind::Inner, JoinKind::Left });
|
||||
auto asof_inequality = getRandomFrom(rng, { ASOFJoinInequality::Greater, ASOFJoinInequality::GreaterOrEquals });
|
||||
|
||||
SCOPED_TRACE(fmt::format("{} {}", join_kind, asof_inequality));
|
||||
|
||||
SourceChunksBuilder left_source_builder({
|
||||
{std::make_shared<DataTypeUInt64>(), "k1"},
|
||||
{std::make_shared<DataTypeString>(), "k2"},
|
||||
{std::make_shared<DataTypeUInt64>(), "t"},
|
||||
{std::make_shared<DataTypeInt64>(), "attr"},
|
||||
});
|
||||
|
||||
SourceChunksBuilder right_source_builder({
|
||||
{std::make_shared<DataTypeUInt64>(), "k1"},
|
||||
{std::make_shared<DataTypeString>(), "k2"},
|
||||
{std::make_shared<DataTypeUInt64>(), "t"},
|
||||
{std::make_shared<DataTypeInt64>(), "attr"},
|
||||
});
|
||||
|
||||
left_source_builder.setBreakProbability(rng);
|
||||
right_source_builder.setBreakProbability(rng);
|
||||
|
||||
ColumnInt64::Container expected;
|
||||
|
||||
UInt64 k1 = 1;
|
||||
String k2;
|
||||
UInt64 left_t = 0;
|
||||
|
||||
auto key_num_total = std::uniform_int_distribution<>(1, 1000)(rng);
|
||||
for (size_t key_num = 0; key_num < key_num_total; ++key_num)
|
||||
{
|
||||
/// Generate new key greater than previous
|
||||
generateNextKey(rng, k1, k2);
|
||||
|
||||
/// Generate some rows with smaller left_t to check that they are not matched
|
||||
size_t num_left_rows = std::bernoulli_distribution(0.5)(rng) ? std::uniform_int_distribution<>(1, 100)(rng) : 0;
|
||||
for (size_t i = 0; i < num_left_rows; ++i)
|
||||
{
|
||||
left_t += std::uniform_int_distribution<>(1, 10)(rng);
|
||||
left_source_builder.addRow({k1, k2, left_t, -10 * left_t});
|
||||
|
||||
if (join_kind == JoinKind::Left)
|
||||
expected.push_back(-10 * left_t);
|
||||
}
|
||||
|
||||
if (std::bernoulli_distribution(0.1)(rng))
|
||||
continue;
|
||||
|
||||
size_t num_right_matches = std::uniform_int_distribution<>(1, 10)(rng);
|
||||
auto right_t = left_t + std::uniform_int_distribution<>(isStrict(asof_inequality) ? 0 : 1, 10)(rng);
|
||||
auto attribute_value = 10 * right_t;
|
||||
for (size_t j = 0; j < num_right_matches; ++j)
|
||||
{
|
||||
right_t += std::uniform_int_distribution<>(0, 3)(rng);
|
||||
bool is_match = j == num_right_matches - 1;
|
||||
right_source_builder.addRow({k1, k2, right_t, is_match ? 10 * attribute_value : -1});
|
||||
}
|
||||
|
||||
/// Next left_t should be greater than (or equals) right_t to match with previous rows
|
||||
left_t = right_t + std::uniform_int_distribution<>(isStrict(asof_inequality) ? 1 : 0, 100)(rng);
|
||||
size_t num_left_matches = std::uniform_int_distribution<>(1, 100)(rng);
|
||||
for (size_t j = 0; j < num_left_matches; ++j)
|
||||
{
|
||||
left_t += std::uniform_int_distribution<>(0, 3)(rng);
|
||||
left_source_builder.addRow({k1, k2, left_t, attribute_value});
|
||||
expected.push_back(attribute_value);
|
||||
}
|
||||
}
|
||||
|
||||
Block result_block = executePipeline(buildJoinPipeline(
|
||||
left_source_builder.getSource(), right_source_builder.getSource(),
|
||||
/* key_length = */ 3,
|
||||
join_kind, JoinStrictness::Asof, asof_inequality));
|
||||
|
||||
assertColumnVectorEq<Int64>(expected, result_block, "t1.attr");
|
||||
|
||||
for (auto & e : expected)
|
||||
/// Non matched rows from left table have negative attr
|
||||
/// Value if attribute in right table is 10 times greater than in left table
|
||||
e = e < 0 ? 0 : 10 * e;
|
||||
|
||||
assertColumnVectorEq<Int64>(expected, result_block, "t2.attr");
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
std::cout << e.getStackTraceString() << std::endl;
|
||||
throw;
|
||||
}
|
@ -155,6 +155,10 @@ void printExceptionWithRespectToAbort(LoggerPtr log, const String & query_id)
|
||||
{
|
||||
std::rethrow_exception(ex);
|
||||
}
|
||||
catch (const TestException &) // NOLINT
|
||||
{
|
||||
/// Exception from a unit test, ignore it.
|
||||
}
|
||||
catch (const Exception & e)
|
||||
{
|
||||
NOEXCEPT_SCOPE({
|
||||
|
@ -34,7 +34,7 @@ public:
|
||||
|
||||
auto choice = distribution(generator);
|
||||
if (choice == 0)
|
||||
throw std::runtime_error("Unlucky...");
|
||||
throw TestException();
|
||||
|
||||
return false;
|
||||
}
|
||||
@ -48,7 +48,7 @@ public:
|
||||
{
|
||||
auto choice = distribution(generator);
|
||||
if (choice == 0)
|
||||
throw std::runtime_error("Unlucky...");
|
||||
throw TestException();
|
||||
}
|
||||
|
||||
Priority getPriority() const override { return {}; }
|
||||
|
@ -3,8 +3,13 @@ import time
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from shutil import copy2
|
||||
from create_release import PackageDownloader, ReleaseInfo, ShellRunner
|
||||
from ci_utils import WithIter
|
||||
from create_release import (
|
||||
PackageDownloader,
|
||||
ReleaseInfo,
|
||||
ReleaseContextManager,
|
||||
ReleaseProgress,
|
||||
)
|
||||
from ci_utils import WithIter, Shell
|
||||
|
||||
|
||||
class MountPointApp(metaclass=WithIter):
|
||||
@ -38,7 +43,6 @@ class R2MountPoint:
|
||||
self.bucket_name = self._PROD_BUCKET_NAME
|
||||
|
||||
self.aux_mount_options = ""
|
||||
self.async_mount = False
|
||||
if self.app == MountPointApp.S3FS:
|
||||
self.cache_dir = "/home/ubuntu/s3fs_cache"
|
||||
# self.aux_mount_options += "-o nomodtime " if self.NOMODTIME else "" not for s3fs
|
||||
@ -52,7 +56,6 @@ class R2MountPoint:
|
||||
self.mount_cmd = f"s3fs {self.bucket_name} {self.MOUNT_POINT} -o url={self.API_ENDPOINT} -o use_path_request_style -o umask=0000 -o nomultipart -o logfile={self.LOG_FILE} {self.aux_mount_options}"
|
||||
elif self.app == MountPointApp.RCLONE:
|
||||
# run rclone mount process asynchronously, otherwise subprocess.run(daemonized command) will not return
|
||||
self.async_mount = True
|
||||
self.cache_dir = "/home/ubuntu/rclone_cache"
|
||||
self.aux_mount_options += "--no-modtime " if self.NOMODTIME else ""
|
||||
self.aux_mount_options += "-v " if self.DEBUG else "" # -vv too verbose
|
||||
@ -76,19 +79,22 @@ class R2MountPoint:
|
||||
)
|
||||
|
||||
_TEST_MOUNT_CMD = f"mount | grep -q {self.MOUNT_POINT}"
|
||||
ShellRunner.run(_CLEAN_LOG_FILE_CMD)
|
||||
ShellRunner.run(_UNMOUNT_CMD)
|
||||
ShellRunner.run(_MKDIR_CMD)
|
||||
ShellRunner.run(_MKDIR_FOR_CACHE)
|
||||
ShellRunner.run(self.mount_cmd, async_=self.async_mount)
|
||||
if self.async_mount:
|
||||
time.sleep(3)
|
||||
ShellRunner.run(_TEST_MOUNT_CMD)
|
||||
Shell.run(_CLEAN_LOG_FILE_CMD)
|
||||
Shell.run(_UNMOUNT_CMD)
|
||||
Shell.run(_MKDIR_CMD)
|
||||
Shell.run(_MKDIR_FOR_CACHE)
|
||||
if self.app == MountPointApp.S3FS:
|
||||
Shell.run(self.mount_cmd, check=True)
|
||||
else:
|
||||
# didn't manage to use simple run() and without blocking or failure
|
||||
Shell.run_as_daemon(self.mount_cmd)
|
||||
time.sleep(3)
|
||||
Shell.run(_TEST_MOUNT_CMD, check=True)
|
||||
|
||||
@classmethod
|
||||
def teardown(cls):
|
||||
print(f"Unmount [{cls.MOUNT_POINT}]")
|
||||
ShellRunner.run(f"umount {cls.MOUNT_POINT}")
|
||||
Shell.run(f"umount {cls.MOUNT_POINT}")
|
||||
|
||||
|
||||
class RepoCodenames(metaclass=WithIter):
|
||||
@ -101,6 +107,7 @@ class DebianArtifactory:
|
||||
_PROD_REPO_URL = "https://packages.clickhouse.com/deb"
|
||||
|
||||
def __init__(self, release_info: ReleaseInfo, dry_run: bool):
|
||||
self.release_info = release_info
|
||||
self.codename = release_info.codename
|
||||
self.version = release_info.version
|
||||
if dry_run:
|
||||
@ -124,8 +131,8 @@ class DebianArtifactory:
|
||||
cmd = f"{REPREPRO_CMD_PREFIX} includedeb {self.codename} {' '.join(paths)}"
|
||||
print("Running export command:")
|
||||
print(f" {cmd}")
|
||||
ShellRunner.run(cmd)
|
||||
ShellRunner.run("sync")
|
||||
Shell.run(cmd, check=True)
|
||||
Shell.run("sync")
|
||||
|
||||
if self.codename == RepoCodenames.LTS:
|
||||
packages_with_version = [
|
||||
@ -137,16 +144,19 @@ class DebianArtifactory:
|
||||
cmd = f"{REPREPRO_CMD_PREFIX} copy {RepoCodenames.STABLE} {RepoCodenames.LTS} {' '.join(packages_with_version)}"
|
||||
print("Running copy command:")
|
||||
print(f" {cmd}")
|
||||
ShellRunner.run(cmd)
|
||||
ShellRunner.run("sync")
|
||||
Shell.run(cmd, check=True)
|
||||
Shell.run("sync")
|
||||
|
||||
def test_packages(self):
|
||||
ShellRunner.run("docker pull ubuntu:latest")
|
||||
Shell.run("docker pull ubuntu:latest")
|
||||
print(f"Test packages installation, version [{self.version}]")
|
||||
cmd = f"docker run --rm ubuntu:latest bash -c \"apt update -y; apt install -y sudo gnupg ca-certificates; apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754; echo 'deb {self.repo_url} stable main' | tee /etc/apt/sources.list.d/clickhouse.list; apt update -y; apt-get install -y clickhouse-client={self.version}\""
|
||||
debian_command = f"echo 'deb {self.repo_url} stable main' | tee /etc/apt/sources.list.d/clickhouse.list; apt update -y; apt-get install -y clickhouse-common-static={self.version} clickhouse-client={self.version}"
|
||||
cmd = f'docker run --rm ubuntu:latest bash -c "apt update -y; apt install -y sudo gnupg ca-certificates; apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754; {debian_command}"'
|
||||
print("Running test command:")
|
||||
print(f" {cmd}")
|
||||
ShellRunner.run(cmd)
|
||||
Shell.run(cmd, check=True)
|
||||
self.release_info.debian_command = debian_command
|
||||
self.release_info.dump()
|
||||
|
||||
|
||||
def _copy_if_not_exists(src: Path, dst: Path) -> Path:
|
||||
@ -167,6 +177,7 @@ class RpmArtifactory:
|
||||
_SIGN_KEY = "885E2BDCF96B0B45ABF058453E4AD4719DDE9A38"
|
||||
|
||||
def __init__(self, release_info: ReleaseInfo, dry_run: bool):
|
||||
self.release_info = release_info
|
||||
self.codename = release_info.codename
|
||||
self.version = release_info.version
|
||||
if dry_run:
|
||||
@ -202,23 +213,26 @@ class RpmArtifactory:
|
||||
for command in commands:
|
||||
print("Running command:")
|
||||
print(f" {command}")
|
||||
ShellRunner.run(command)
|
||||
Shell.run(command, check=True)
|
||||
|
||||
update_public_key = f"gpg --armor --export {self._SIGN_KEY}"
|
||||
pub_key_path = dest_dir / "repodata" / "repomd.xml.key"
|
||||
print("Updating repomd.xml.key")
|
||||
pub_key_path.write_text(ShellRunner.run(update_public_key)[1])
|
||||
pub_key_path.write_text(Shell.run(update_public_key, check=True))
|
||||
if codename == RepoCodenames.LTS:
|
||||
self.export_packages(RepoCodenames.STABLE)
|
||||
ShellRunner.run("sync")
|
||||
Shell.run("sync")
|
||||
|
||||
def test_packages(self):
|
||||
ShellRunner.run("docker pull fedora:latest")
|
||||
Shell.run("docker pull fedora:latest")
|
||||
print(f"Test package installation, version [{self.version}]")
|
||||
cmd = f'docker run --rm fedora:latest /bin/bash -c "dnf -y install dnf-plugins-core && dnf config-manager --add-repo={self.repo_url} && dnf makecache && dnf -y install clickhouse-client-{self.version}-1"'
|
||||
rpm_command = f"dnf config-manager --add-repo={self.repo_url} && dnf makecache && dnf -y install clickhouse-client-{self.version}-1"
|
||||
cmd = f'docker run --rm fedora:latest /bin/bash -c "dnf -y install dnf-plugins-core && dnf config-manager --add-repo={self.repo_url} && {rpm_command}"'
|
||||
print("Running test command:")
|
||||
print(f" {cmd}")
|
||||
ShellRunner.run(cmd)
|
||||
Shell.run(cmd, check=True)
|
||||
self.release_info.rpm_command = rpm_command
|
||||
self.release_info.dump()
|
||||
|
||||
|
||||
class TgzArtifactory:
|
||||
@ -226,6 +240,7 @@ class TgzArtifactory:
|
||||
_PROD_REPO_URL = "https://packages.clickhouse.com/tgz"
|
||||
|
||||
def __init__(self, release_info: ReleaseInfo, dry_run: bool):
|
||||
self.release_info = release_info
|
||||
self.codename = release_info.codename
|
||||
self.version = release_info.version
|
||||
if dry_run:
|
||||
@ -256,23 +271,28 @@ class TgzArtifactory:
|
||||
|
||||
if codename == RepoCodenames.LTS:
|
||||
self.export_packages(RepoCodenames.STABLE)
|
||||
ShellRunner.run("sync")
|
||||
Shell.run("sync")
|
||||
|
||||
def test_packages(self):
|
||||
tgz_file = "/tmp/tmp.tgz"
|
||||
tgz_sha_file = "/tmp/tmp.tgz.sha512"
|
||||
ShellRunner.run(
|
||||
f"curl -o {tgz_file} -f0 {self.repo_url}/stable/clickhouse-client-{self.version}-arm64.tgz"
|
||||
cmd = f"curl -o {tgz_file} -f0 {self.repo_url}/stable/clickhouse-client-{self.version}-arm64.tgz"
|
||||
Shell.run(
|
||||
cmd,
|
||||
check=True,
|
||||
)
|
||||
ShellRunner.run(
|
||||
f"curl -o {tgz_sha_file} -f0 {self.repo_url}/stable/clickhouse-client-{self.version}-arm64.tgz.sha512"
|
||||
Shell.run(
|
||||
f"curl -o {tgz_sha_file} -f0 {self.repo_url}/stable/clickhouse-client-{self.version}-arm64.tgz.sha512",
|
||||
check=True,
|
||||
)
|
||||
expected_checksum = ShellRunner.run(f"cut -d ' ' -f 1 {tgz_sha_file}")
|
||||
actual_checksum = ShellRunner.run(f"sha512sum {tgz_file} | cut -d ' ' -f 1")
|
||||
expected_checksum = Shell.run(f"cut -d ' ' -f 1 {tgz_sha_file}", check=True)
|
||||
actual_checksum = Shell.run(f"sha512sum {tgz_file} | cut -d ' ' -f 1")
|
||||
assert (
|
||||
expected_checksum == actual_checksum
|
||||
), f"[{actual_checksum} != {expected_checksum}]"
|
||||
ShellRunner.run("rm /tmp/tmp.tgz*")
|
||||
Shell.run("rm /tmp/tmp.tgz*")
|
||||
self.release_info.tgz_command = cmd
|
||||
self.release_info.dump()
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
@ -280,12 +300,6 @@ def parse_args() -> argparse.Namespace:
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
description="Adds release packages to the repository",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--infile",
|
||||
type=str,
|
||||
required=True,
|
||||
help="input file with release info",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--export-debian",
|
||||
action="store_true",
|
||||
@ -326,9 +340,7 @@ def parse_args() -> argparse.Namespace:
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
assert args.dry_run
|
||||
|
||||
release_info = ReleaseInfo.from_file(args.infile)
|
||||
"""
|
||||
Use S3FS. RCLONE has some errors with r2 remote which I didn't figure out how to resolve:
|
||||
ERROR : IO error: NotImplemented: versionId not implemented
|
||||
@ -336,20 +348,38 @@ if __name__ == "__main__":
|
||||
"""
|
||||
mp = R2MountPoint(MountPointApp.S3FS, dry_run=args.dry_run)
|
||||
if args.export_debian:
|
||||
mp.init()
|
||||
DebianArtifactory(release_info, dry_run=args.dry_run).export_packages()
|
||||
mp.teardown()
|
||||
with ReleaseContextManager(
|
||||
release_progress=ReleaseProgress.EXPORT_DEB
|
||||
) as release_info:
|
||||
mp.init()
|
||||
DebianArtifactory(release_info, dry_run=args.dry_run).export_packages()
|
||||
mp.teardown()
|
||||
if args.export_rpm:
|
||||
mp.init()
|
||||
RpmArtifactory(release_info, dry_run=args.dry_run).export_packages()
|
||||
mp.teardown()
|
||||
with ReleaseContextManager(
|
||||
release_progress=ReleaseProgress.EXPORT_RPM
|
||||
) as release_info:
|
||||
mp.init()
|
||||
RpmArtifactory(release_info, dry_run=args.dry_run).export_packages()
|
||||
mp.teardown()
|
||||
if args.export_tgz:
|
||||
mp.init()
|
||||
TgzArtifactory(release_info, dry_run=args.dry_run).export_packages()
|
||||
mp.teardown()
|
||||
with ReleaseContextManager(
|
||||
release_progress=ReleaseProgress.EXPORT_TGZ
|
||||
) as release_info:
|
||||
mp.init()
|
||||
TgzArtifactory(release_info, dry_run=args.dry_run).export_packages()
|
||||
mp.teardown()
|
||||
if args.test_debian:
|
||||
DebianArtifactory(release_info, dry_run=args.dry_run).test_packages()
|
||||
with ReleaseContextManager(
|
||||
release_progress=ReleaseProgress.TEST_DEB
|
||||
) as release_info:
|
||||
DebianArtifactory(release_info, dry_run=args.dry_run).test_packages()
|
||||
if args.test_tgz:
|
||||
TgzArtifactory(release_info, dry_run=args.dry_run).test_packages()
|
||||
with ReleaseContextManager(
|
||||
release_progress=ReleaseProgress.TEST_TGZ
|
||||
) as release_info:
|
||||
TgzArtifactory(release_info, dry_run=args.dry_run).test_packages()
|
||||
if args.test_rpm:
|
||||
RpmArtifactory(release_info, dry_run=args.dry_run).test_packages()
|
||||
with ReleaseContextManager(
|
||||
release_progress=ReleaseProgress.TEST_RPM
|
||||
) as release_info:
|
||||
RpmArtifactory(release_info, dry_run=args.dry_run).test_packages()
|
||||
|
@ -1,17 +1,17 @@
|
||||
import argparse
|
||||
from datetime import timedelta, datetime
|
||||
import logging
|
||||
import dataclasses
|
||||
import json
|
||||
import os
|
||||
from commit_status_helper import get_commit_filtered_statuses
|
||||
import sys
|
||||
from typing import List
|
||||
|
||||
from get_robot_token import get_best_robot_token
|
||||
from github_helper import GitHub
|
||||
from release import Release, Repo as ReleaseRepo, RELEASE_READY_STATUS
|
||||
from ci_utils import Shell
|
||||
from env_helper import GITHUB_REPOSITORY
|
||||
from report import SUCCESS
|
||||
from ssh import SSHKey
|
||||
|
||||
LOGGER_NAME = __name__
|
||||
HELPER_LOGGERS = ["github_helper", LOGGER_NAME]
|
||||
logger = logging.getLogger(LOGGER_NAME)
|
||||
from ci_buddy import CIBuddy
|
||||
from ci_config import CI
|
||||
|
||||
|
||||
def parse_args():
|
||||
@ -21,120 +21,198 @@ def parse_args():
|
||||
)
|
||||
parser.add_argument("--token", help="GitHub token, if not set, used from smm")
|
||||
parser.add_argument(
|
||||
"--repo", default="ClickHouse/ClickHouse", help="Repo owner/name"
|
||||
)
|
||||
parser.add_argument("--dry-run", action="store_true", help="Do not create anything")
|
||||
parser.add_argument(
|
||||
"--release-after-days",
|
||||
type=int,
|
||||
default=3,
|
||||
help="Do automatic release on the latest green commit after the latest "
|
||||
"release if the newest release is older than the specified days",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--debug-helpers",
|
||||
"--post-status",
|
||||
action="store_true",
|
||||
help="Add debug logging for this script and github_helper",
|
||||
help="Post release branch statuses",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--remote-protocol",
|
||||
"-p",
|
||||
default="ssh",
|
||||
choices=ReleaseRepo.VALID,
|
||||
help="repo protocol for git commands remote, 'origin' is a special case and "
|
||||
"uses 'origin' as a remote",
|
||||
"--post-auto-release-complete",
|
||||
action="store_true",
|
||||
help="Post autorelease completion status",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--prepare",
|
||||
action="store_true",
|
||||
help="Prepare autorelease info",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--wf-status",
|
||||
type=str,
|
||||
default="",
|
||||
help="overall workflow status [success|failure]",
|
||||
)
|
||||
return parser.parse_args(), parser
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
MAX_NUMBER_OF_COMMITS_TO_CONSIDER_FOR_RELEASE = 5
|
||||
AUTORELEASE_INFO_FILE = "/tmp/autorelease_info.json"
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class ReleaseParams:
|
||||
ready: bool
|
||||
ci_status: str
|
||||
num_patches: int
|
||||
release_branch: str
|
||||
commit_sha: str
|
||||
commits_to_branch_head: int
|
||||
latest: bool
|
||||
|
||||
def to_dict(self):
|
||||
return dataclasses.asdict(self)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class AutoReleaseInfo:
|
||||
releases: List[ReleaseParams]
|
||||
|
||||
def add_release(self, release_params: ReleaseParams) -> None:
|
||||
self.releases.append(release_params)
|
||||
|
||||
def dump(self):
|
||||
print(f"Dump release info into [{AUTORELEASE_INFO_FILE}]")
|
||||
with open(AUTORELEASE_INFO_FILE, "w", encoding="utf-8") as f:
|
||||
print(json.dumps(dataclasses.asdict(self), indent=2), file=f)
|
||||
|
||||
@staticmethod
|
||||
def from_file() -> "AutoReleaseInfo":
|
||||
with open(AUTORELEASE_INFO_FILE, "r", encoding="utf-8") as json_file:
|
||||
res = json.load(json_file)
|
||||
releases = [ReleaseParams(**release) for release in res["releases"]]
|
||||
return AutoReleaseInfo(releases=releases)
|
||||
|
||||
|
||||
def _prepare(token):
|
||||
assert len(token) > 10
|
||||
os.environ["GH_TOKEN"] = token
|
||||
Shell.run("gh auth status", check=True)
|
||||
|
||||
gh = GitHub(token)
|
||||
prs = gh.get_release_pulls(GITHUB_REPOSITORY)
|
||||
prs.sort(key=lambda x: x.head.ref)
|
||||
branch_names = [pr.head.ref for pr in prs]
|
||||
print(f"Found release branches [{branch_names}]")
|
||||
|
||||
repo = gh.get_repo(GITHUB_REPOSITORY)
|
||||
autoRelease_info = AutoReleaseInfo(releases=[])
|
||||
|
||||
for pr in prs:
|
||||
print(f"\nChecking PR [{pr.head.ref}]")
|
||||
|
||||
refs = list(repo.get_git_matching_refs(f"tags/v{pr.head.ref}"))
|
||||
assert refs
|
||||
|
||||
refs.sort(key=lambda ref: ref.ref)
|
||||
latest_release_tag_ref = refs[-1]
|
||||
latest_release_tag = repo.get_git_tag(latest_release_tag_ref.object.sha)
|
||||
|
||||
commits = Shell.run(
|
||||
f"git rev-list --first-parent {latest_release_tag.tag}..origin/{pr.head.ref}",
|
||||
check=True,
|
||||
).split("\n")
|
||||
commit_num = len(commits)
|
||||
print(
|
||||
f"Previous release [{latest_release_tag.tag}] was [{commit_num}] commits ago, date [{latest_release_tag.tagger.date}]"
|
||||
)
|
||||
|
||||
commits_to_check = commits[:-1] # Exclude the version bump commit
|
||||
commit_sha = ""
|
||||
commit_ci_status = ""
|
||||
commits_to_branch_head = 0
|
||||
|
||||
for idx, commit in enumerate(
|
||||
commits_to_check[:MAX_NUMBER_OF_COMMITS_TO_CONSIDER_FOR_RELEASE]
|
||||
):
|
||||
print(
|
||||
f"Check commit [{commit}] [{pr.head.ref}~{idx+1}] as release candidate"
|
||||
)
|
||||
commit_num -= 1
|
||||
|
||||
is_completed = CI.GHActions.check_wf_completed(
|
||||
token=token, commit_sha=commit
|
||||
)
|
||||
if not is_completed:
|
||||
print(f"CI is in progress for [{commit}] - check previous commit")
|
||||
commits_to_branch_head += 1
|
||||
continue
|
||||
|
||||
commit_ci_status = CI.GHActions.get_commit_status_by_name(
|
||||
token=token,
|
||||
commit_sha=commit,
|
||||
status_name=(CI.JobNames.BUILD_CHECK, "ClickHouse build check"),
|
||||
)
|
||||
commit_sha = commit
|
||||
if commit_ci_status == SUCCESS:
|
||||
break
|
||||
|
||||
print(f"CI status [{commit_ci_status}] - skip")
|
||||
commits_to_branch_head += 1
|
||||
|
||||
ready = False
|
||||
if commit_ci_status == SUCCESS and commit_sha:
|
||||
print(
|
||||
f"Add release ready info for commit [{commit_sha}] and release branch [{pr.head.ref}]"
|
||||
)
|
||||
ready = True
|
||||
else:
|
||||
print(f"WARNING: No ready commits found for release branch [{pr.head.ref}]")
|
||||
|
||||
autoRelease_info.add_release(
|
||||
ReleaseParams(
|
||||
release_branch=pr.head.ref,
|
||||
commit_sha=commit_sha,
|
||||
ready=ready,
|
||||
ci_status=commit_ci_status,
|
||||
num_patches=commit_num,
|
||||
commits_to_branch_head=commits_to_branch_head,
|
||||
latest=False,
|
||||
)
|
||||
)
|
||||
|
||||
if autoRelease_info.releases:
|
||||
autoRelease_info.releases[-1].latest = True
|
||||
|
||||
autoRelease_info.dump()
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
if args.debug_helpers:
|
||||
for logger_name in HELPER_LOGGERS:
|
||||
logging.getLogger(logger_name).setLevel(logging.DEBUG)
|
||||
args, parser = parse_args()
|
||||
|
||||
token = args.token or get_best_robot_token()
|
||||
days_as_timedelta = timedelta(days=args.release_after_days)
|
||||
now = datetime.now()
|
||||
|
||||
gh = GitHub(token)
|
||||
prs = gh.get_release_pulls(args.repo)
|
||||
branch_names = [pr.head.ref for pr in prs]
|
||||
|
||||
logger.info("Found release branches: %s\n ", " \n".join(branch_names))
|
||||
repo = gh.get_repo(args.repo)
|
||||
|
||||
# In general there is no guarantee on which order the refs/commits are
|
||||
# returned from the API, so we have to order them.
|
||||
for pr in prs:
|
||||
logger.info("Checking PR %s", pr.head.ref)
|
||||
|
||||
refs = list(repo.get_git_matching_refs(f"tags/v{pr.head.ref}"))
|
||||
refs.sort(key=lambda ref: ref.ref)
|
||||
|
||||
latest_release_tag_ref = refs[-1]
|
||||
latest_release_tag = repo.get_git_tag(latest_release_tag_ref.object.sha)
|
||||
logger.info("That last release was done at %s", latest_release_tag.tagger.date)
|
||||
|
||||
if latest_release_tag.tagger.date + days_as_timedelta > now:
|
||||
logger.info(
|
||||
"Not enough days since the last release %s,"
|
||||
" no automatic release can be done",
|
||||
latest_release_tag.tag,
|
||||
if args.post_status:
|
||||
info = AutoReleaseInfo.from_file()
|
||||
for release_info in info.releases:
|
||||
if release_info.ready:
|
||||
CIBuddy(dry_run=False).post_info(
|
||||
title=f"Auto Release Status for {release_info.release_branch}",
|
||||
body=release_info.to_dict(),
|
||||
)
|
||||
else:
|
||||
CIBuddy(dry_run=False).post_warning(
|
||||
title=f"Auto Release Status for {release_info.release_branch}",
|
||||
body=release_info.to_dict(),
|
||||
)
|
||||
elif args.post_auto_release_complete:
|
||||
assert args.wf_status, "--wf-status Required with --post-auto-release-complete"
|
||||
if args.wf_status != SUCCESS:
|
||||
CIBuddy(dry_run=False).post_job_error(
|
||||
error_description="Autorelease workflow failed",
|
||||
job_name="Autorelease",
|
||||
with_instance_info=False,
|
||||
with_wf_link=True,
|
||||
critical=True,
|
||||
)
|
||||
continue
|
||||
|
||||
unreleased_commits = list(
|
||||
repo.get_commits(sha=pr.head.ref, since=latest_release_tag.tagger.date)
|
||||
)
|
||||
unreleased_commits.sort(
|
||||
key=lambda commit: commit.commit.committer.date, reverse=True
|
||||
)
|
||||
|
||||
for commit in unreleased_commits:
|
||||
logger.info("Checking statuses of commit %s", commit.sha)
|
||||
statuses = get_commit_filtered_statuses(commit)
|
||||
all_success = all(st.state == SUCCESS for st in statuses)
|
||||
passed_ready_for_release_check = any(
|
||||
st.context == RELEASE_READY_STATUS and st.state == SUCCESS
|
||||
for st in statuses
|
||||
else:
|
||||
CIBuddy(dry_run=False).post_info(
|
||||
title=f"Autorelease completed",
|
||||
body="",
|
||||
with_wf_link=True,
|
||||
)
|
||||
if not (all_success and passed_ready_for_release_check):
|
||||
logger.info("Commit is not green, thus not suitable for release")
|
||||
continue
|
||||
|
||||
logger.info("Commit is ready for release, let's release!")
|
||||
|
||||
release = Release(
|
||||
ReleaseRepo(args.repo, args.remote_protocol),
|
||||
commit.sha,
|
||||
"patch",
|
||||
args.dry_run,
|
||||
True,
|
||||
)
|
||||
try:
|
||||
release.do(True, True, True)
|
||||
except:
|
||||
if release.has_rollback:
|
||||
logging.error(
|
||||
"!!The release process finished with error, read the output carefully!!"
|
||||
)
|
||||
logging.error(
|
||||
"Probably, rollback finished with error. "
|
||||
"If you don't see any of the following commands in the output, "
|
||||
"execute them manually:"
|
||||
)
|
||||
release.log_rollback()
|
||||
raise
|
||||
logging.info("New release is done!")
|
||||
break
|
||||
elif args.prepare:
|
||||
_prepare(token=args.token or get_best_robot_token())
|
||||
else:
|
||||
parser.print_help()
|
||||
sys.exit(2)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if os.getenv("ROBOT_CLICKHOUSE_SSH_KEY", ""):
|
||||
with SSHKey("ROBOT_CLICKHOUSE_SSH_KEY"):
|
||||
main()
|
||||
else:
|
||||
main()
|
||||
main()
|
||||
|
@ -1110,13 +1110,14 @@ def main() -> int:
|
||||
ci_cache.print_status()
|
||||
|
||||
if IS_CI and not pr_info.is_merge_queue:
|
||||
# wait for pending jobs to be finished, await_jobs is a long blocking call
|
||||
ci_cache.await_pending_jobs(pr_info.is_release)
|
||||
|
||||
if pr_info.is_release:
|
||||
print("Release/master: CI Cache add pending records for all todo jobs")
|
||||
ci_cache.push_pending_all(pr_info.is_release)
|
||||
|
||||
# wait for pending jobs to be finished, await_jobs is a long blocking call
|
||||
ci_cache.await_pending_jobs(pr_info.is_release)
|
||||
|
||||
# conclude results
|
||||
result["git_ref"] = git_ref
|
||||
result["version"] = version
|
||||
@ -1292,10 +1293,11 @@ def main() -> int:
|
||||
pass
|
||||
if Utils.is_killed_with_oom():
|
||||
print("WARNING: OOM while job execution")
|
||||
print(subprocess.run("sudo dmesg -T", check=False))
|
||||
error_description = f"Out Of Memory, exit_code {job_report.exit_code}"
|
||||
else:
|
||||
error_description = f"Unknown, exit_code {job_report.exit_code}"
|
||||
CIBuddy().post_error(
|
||||
CIBuddy().post_job_error(
|
||||
error_description + f" after {int(job_report.duration)}s",
|
||||
job_name=_get_ext_check_name(args.job_name),
|
||||
)
|
||||
|
@ -1,5 +1,6 @@
|
||||
import json
|
||||
import os
|
||||
from typing import Union, Dict
|
||||
|
||||
import boto3
|
||||
import requests
|
||||
@ -60,14 +61,64 @@ class CIBuddy:
|
||||
except Exception as e:
|
||||
print(f"ERROR: Failed to post message, ex {e}")
|
||||
|
||||
def post_error(self, error_description, job_name="", with_instance_info=True):
|
||||
def _post_formatted(
|
||||
self, title: str, body: Union[Dict, str], with_wf_link: bool
|
||||
) -> None:
|
||||
message = title
|
||||
if isinstance(body, dict):
|
||||
for name, value in body.items():
|
||||
if "commit_sha" in name:
|
||||
value = (
|
||||
f"<https://github.com/{self.repo}/commit/{value}|{value[:8]}>"
|
||||
)
|
||||
message += f" *{name}*: {value}\n"
|
||||
else:
|
||||
message += body + "\n"
|
||||
run_id = os.getenv("GITHUB_RUN_ID", "")
|
||||
if with_wf_link and run_id:
|
||||
message += f" *workflow*: <https://github.com/{self.repo}/actions/runs/{run_id}|{run_id}>\n"
|
||||
self.post(message)
|
||||
|
||||
def post_info(
|
||||
self, title: str, body: Union[Dict, str], with_wf_link: bool = True
|
||||
) -> None:
|
||||
title_extended = f":white_circle: *{title}*\n\n"
|
||||
self._post_formatted(title_extended, body, with_wf_link)
|
||||
|
||||
def post_done(
|
||||
self, title: str, body: Union[Dict, str], with_wf_link: bool = True
|
||||
) -> None:
|
||||
title_extended = f":white_check_mark: *{title}*\n\n"
|
||||
self._post_formatted(title_extended, body, with_wf_link)
|
||||
|
||||
def post_warning(
|
||||
self, title: str, body: Union[Dict, str], with_wf_link: bool = True
|
||||
) -> None:
|
||||
title_extended = f":warning: *{title}*\n\n"
|
||||
self._post_formatted(title_extended, body, with_wf_link)
|
||||
|
||||
def post_critical(
|
||||
self, title: str, body: Union[Dict, str], with_wf_link: bool = True
|
||||
) -> None:
|
||||
title_extended = f":black_circle: *{title}*\n\n"
|
||||
self._post_formatted(title_extended, body, with_wf_link)
|
||||
|
||||
def post_job_error(
|
||||
self,
|
||||
error_description: str,
|
||||
job_name: str = "",
|
||||
with_instance_info: bool = True,
|
||||
with_wf_link: bool = True,
|
||||
critical: bool = False,
|
||||
) -> None:
|
||||
instance_id, instance_type = "unknown", "unknown"
|
||||
if with_instance_info:
|
||||
instance_id = Shell.run("ec2metadata --instance-id") or instance_id
|
||||
instance_type = Shell.run("ec2metadata --instance-type") or instance_type
|
||||
if not job_name:
|
||||
job_name = os.getenv("CHECK_NAME", "unknown")
|
||||
line_err = f":red_circle: *Error: {error_description}*\n\n"
|
||||
sign = ":red_circle:" if not critical else ":black_circle:"
|
||||
line_err = f"{sign} *Error: {error_description}*\n\n"
|
||||
line_ghr = f" *Runner:* `{instance_type}`, `{instance_id}`\n"
|
||||
line_job = f" *Job:* `{job_name}`\n"
|
||||
line_pr_ = f" *PR:* <https://github.com/{self.repo}/pull/{self.pr_number}|#{self.pr_number}>, <{self.commit_url}|{self.sha}>\n"
|
||||
@ -82,10 +133,13 @@ class CIBuddy:
|
||||
message += line_pr_
|
||||
else:
|
||||
message += line_br_
|
||||
run_id = os.getenv("GITHUB_RUN_ID", "")
|
||||
if with_wf_link and run_id:
|
||||
message += f" *workflow*: <https://github.com/{self.repo}/actions/runs/{run_id}|{run_id}>\n"
|
||||
self.post(message)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# test
|
||||
buddy = CIBuddy(dry_run=True)
|
||||
buddy.post_error("TEst")
|
||||
buddy.post_job_error("TEst")
|
||||
|
@ -638,7 +638,14 @@ class CiCache:
|
||||
pushes pending records for all jobs that supposed to be run
|
||||
"""
|
||||
for job, job_config in self.jobs_to_do.items():
|
||||
if not job_config.has_digest():
|
||||
if (
|
||||
job in self.jobs_to_wait
|
||||
or not job_config.has_digest()
|
||||
or job_config.disable_await
|
||||
):
|
||||
# 1. "job in self.jobs_to_wait" - this job already has a pending record in cache
|
||||
# 2. "not job_config.has_digest()" - cache is not used for these jobs
|
||||
# 3. "job_config.disable_await" - await is explicitly disabled
|
||||
continue
|
||||
pending_state = PendingState(time.time(), run_url=GITHUB_RUN_URL)
|
||||
assert job_config.batches
|
||||
@ -708,7 +715,7 @@ class CiCache:
|
||||
Filter is to be applied in PRs to remove jobs that are not affected by the change
|
||||
:return:
|
||||
"""
|
||||
remove_from_to_do = []
|
||||
remove_from_workflow = []
|
||||
required_builds = []
|
||||
has_test_jobs_to_skip = False
|
||||
for job_name, job_config in self.jobs_to_do.items():
|
||||
@ -723,26 +730,41 @@ class CiCache:
|
||||
job=reference_name,
|
||||
job_config=reference_config,
|
||||
):
|
||||
remove_from_to_do.append(job_name)
|
||||
remove_from_workflow.append(job_name)
|
||||
has_test_jobs_to_skip = True
|
||||
else:
|
||||
required_builds += (
|
||||
job_config.required_builds if job_config.required_builds else []
|
||||
)
|
||||
if has_test_jobs_to_skip:
|
||||
# If there are tests to skip, it means build digest has not been changed.
|
||||
# If there are tests to skip, it means builds are not affected as well.
|
||||
# No need to test builds. Let's keep all builds required for test jobs and skip the others
|
||||
for job_name, job_config in self.jobs_to_do.items():
|
||||
if CI.is_build_job(job_name):
|
||||
if job_name not in required_builds:
|
||||
remove_from_to_do.append(job_name)
|
||||
remove_from_workflow.append(job_name)
|
||||
|
||||
for job in remove_from_to_do:
|
||||
for job in remove_from_workflow:
|
||||
print(f"Filter job [{job}] - not affected by the change")
|
||||
if job in self.jobs_to_do:
|
||||
del self.jobs_to_do[job]
|
||||
if job in self.jobs_to_wait:
|
||||
del self.jobs_to_wait[job]
|
||||
if job in self.jobs_to_skip:
|
||||
self.jobs_to_skip.remove(job)
|
||||
|
||||
# special handling for the special job: BUILD_CHECK
|
||||
has_builds = False
|
||||
for job in list(self.jobs_to_do) + self.jobs_to_skip:
|
||||
if CI.is_build_job(job):
|
||||
has_builds = True
|
||||
break
|
||||
if not has_builds:
|
||||
if CI.JobNames.BUILD_CHECK in self.jobs_to_do:
|
||||
print(
|
||||
f"Filter job [{CI.JobNames.BUILD_CHECK}] - no builds are required in the workflow"
|
||||
)
|
||||
del self.jobs_to_do[CI.JobNames.BUILD_CHECK]
|
||||
|
||||
def await_pending_jobs(self, is_release: bool, dry_run: bool = False) -> None:
|
||||
"""
|
||||
@ -884,3 +906,87 @@ class CiCache:
|
||||
self.jobs_to_wait[job] = job_config
|
||||
|
||||
return self
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# for testing
|
||||
job_digest = {
|
||||
"package_release": "bbbd3519d1",
|
||||
"package_aarch64": "bbbd3519d1",
|
||||
"package_asan": "bbbd3519d1",
|
||||
"package_ubsan": "bbbd3519d1",
|
||||
"package_tsan": "bbbd3519d1",
|
||||
"package_msan": "bbbd3519d1",
|
||||
"package_debug": "bbbd3519d1",
|
||||
"package_release_coverage": "bbbd3519d1",
|
||||
"binary_release": "bbbd3519d1",
|
||||
"binary_tidy": "bbbd3519d1",
|
||||
"binary_darwin": "bbbd3519d1",
|
||||
"binary_aarch64": "bbbd3519d1",
|
||||
"binary_aarch64_v80compat": "bbbd3519d1",
|
||||
"binary_freebsd": "bbbd3519d1",
|
||||
"binary_darwin_aarch64": "bbbd3519d1",
|
||||
"binary_ppc64le": "bbbd3519d1",
|
||||
"binary_amd64_compat": "bbbd3519d1",
|
||||
"binary_amd64_musl": "bbbd3519d1",
|
||||
"binary_riscv64": "bbbd3519d1",
|
||||
"binary_s390x": "bbbd3519d1",
|
||||
"binary_loongarch64": "bbbd3519d1",
|
||||
"Builds": "f5dffeecb8",
|
||||
"Install packages (release)": "ba0c89660e",
|
||||
"Install packages (aarch64)": "ba0c89660e",
|
||||
"Stateful tests (asan)": "32a9a1aba9",
|
||||
"Stateful tests (tsan)": "32a9a1aba9",
|
||||
"Stateful tests (msan)": "32a9a1aba9",
|
||||
"Stateful tests (ubsan)": "32a9a1aba9",
|
||||
"Stateful tests (debug)": "32a9a1aba9",
|
||||
"Stateful tests (release)": "32a9a1aba9",
|
||||
"Stateful tests (coverage)": "32a9a1aba9",
|
||||
"Stateful tests (aarch64)": "32a9a1aba9",
|
||||
"Stateful tests (release, ParallelReplicas)": "32a9a1aba9",
|
||||
"Stateful tests (debug, ParallelReplicas)": "32a9a1aba9",
|
||||
"Stateless tests (asan)": "deb6778b88",
|
||||
"Stateless tests (tsan)": "deb6778b88",
|
||||
"Stateless tests (msan)": "deb6778b88",
|
||||
"Stateless tests (ubsan)": "deb6778b88",
|
||||
"Stateless tests (debug)": "deb6778b88",
|
||||
"Stateless tests (release)": "deb6778b88",
|
||||
"Stateless tests (coverage)": "deb6778b88",
|
||||
"Stateless tests (aarch64)": "deb6778b88",
|
||||
"Stateless tests (release, old analyzer, s3, DatabaseReplicated)": "deb6778b88",
|
||||
"Stateless tests (debug, s3 storage)": "deb6778b88",
|
||||
"Stateless tests (tsan, s3 storage)": "deb6778b88",
|
||||
"Stress test (debug)": "aa298abf10",
|
||||
"Stress test (tsan)": "aa298abf10",
|
||||
"Upgrade check (debug)": "5ce4d3ee02",
|
||||
"Integration tests (asan, old analyzer)": "42e58be3aa",
|
||||
"Integration tests (tsan)": "42e58be3aa",
|
||||
"Integration tests (aarch64)": "42e58be3aa",
|
||||
"Integration tests flaky check (asan)": "42e58be3aa",
|
||||
"Compatibility check (release)": "ecb69d8c4b",
|
||||
"Compatibility check (aarch64)": "ecb69d8c4b",
|
||||
"Unit tests (release)": "09d00b702e",
|
||||
"Unit tests (asan)": "09d00b702e",
|
||||
"Unit tests (msan)": "09d00b702e",
|
||||
"Unit tests (tsan)": "09d00b702e",
|
||||
"Unit tests (ubsan)": "09d00b702e",
|
||||
"AST fuzzer (debug)": "c38ebf947f",
|
||||
"AST fuzzer (asan)": "c38ebf947f",
|
||||
"AST fuzzer (msan)": "c38ebf947f",
|
||||
"AST fuzzer (tsan)": "c38ebf947f",
|
||||
"AST fuzzer (ubsan)": "c38ebf947f",
|
||||
"Stateless tests flaky check (asan)": "deb6778b88",
|
||||
"Performance Comparison (release)": "a8a7179258",
|
||||
"ClickBench (release)": "45c07c4aa6",
|
||||
"ClickBench (aarch64)": "45c07c4aa6",
|
||||
"Docker server image": "6a24d5b187",
|
||||
"Docker keeper image": "6a24d5b187",
|
||||
"Docs check": "4764154c62",
|
||||
"Fast test": "cb269133f2",
|
||||
"Style check": "ffffffffff",
|
||||
"Stateful tests (ubsan, ParallelReplicas)": "32a9a1aba9",
|
||||
"Stress test (msan)": "aa298abf10",
|
||||
"Upgrade check (asan)": "5ce4d3ee02",
|
||||
}
|
||||
ci_cache = CiCache(job_digests=job_digest, cache_enabled=True, s3=S3Helper())
|
||||
ci_cache.update()
|
||||
|
@ -32,6 +32,9 @@ class CI:
|
||||
from ci_definitions import MQ_JOBS as MQ_JOBS
|
||||
from ci_definitions import WorkflowStages as WorkflowStages
|
||||
from ci_definitions import Runners as Runners
|
||||
from ci_utils import Envs as Envs
|
||||
from ci_utils import Utils as Utils
|
||||
from ci_utils import GHActions as GHActions
|
||||
from ci_definitions import Labels as Labels
|
||||
from ci_definitions import TRUSTED_CONTRIBUTORS as TRUSTED_CONTRIBUTORS
|
||||
from ci_utils import CATEGORY_TO_LABEL as CATEGORY_TO_LABEL
|
||||
|
@ -351,6 +351,8 @@ class JobConfig:
|
||||
run_by_label: str = ""
|
||||
# to run always regardless of the job digest or/and label
|
||||
run_always: bool = False
|
||||
# disables CI await for a given job
|
||||
disable_await: bool = False
|
||||
# if the job needs to be run on the release branch, including master (building packages, docker server).
|
||||
# NOTE: Subsequent runs on the same branch with the similar digest are still considered skip-able.
|
||||
required_on_release_branch: bool = False
|
||||
@ -395,6 +397,7 @@ class CommonJobConfigs:
|
||||
],
|
||||
),
|
||||
runner_type=Runners.STYLE_CHECKER_ARM,
|
||||
disable_await=True,
|
||||
)
|
||||
COMPATIBILITY_TEST = JobConfig(
|
||||
job_name_keyword="compatibility",
|
||||
|
@ -1,9 +1,16 @@
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import time
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterator, List, Union, Optional, Tuple
|
||||
from typing import Any, Iterator, List, Union, Optional, Sequence, Tuple
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
class Envs:
|
||||
GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")
|
||||
|
||||
|
||||
LABEL_CATEGORIES = {
|
||||
@ -80,6 +87,71 @@ class GHActions:
|
||||
print(line)
|
||||
print("::endgroup::")
|
||||
|
||||
@staticmethod
|
||||
def get_commit_status_by_name(
|
||||
token: str, commit_sha: str, status_name: Union[str, Sequence]
|
||||
) -> str:
|
||||
assert len(token) == 40
|
||||
assert len(commit_sha) == 40
|
||||
assert is_hex(commit_sha)
|
||||
assert not is_hex(token)
|
||||
url = f"https://api.github.com/repos/{Envs.GITHUB_REPOSITORY}/commits/{commit_sha}/statuses?per_page={200}"
|
||||
headers = {
|
||||
"Authorization": f"token {token}",
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
}
|
||||
response = requests.get(url, headers=headers, timeout=5)
|
||||
|
||||
if isinstance(status_name, str):
|
||||
status_name = (status_name,)
|
||||
if response.status_code == 200:
|
||||
assert "next" not in response.links, "Response truncated"
|
||||
statuses = response.json()
|
||||
for status in statuses:
|
||||
if status["context"] in status_name:
|
||||
return status["state"] # type: ignore
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def check_wf_completed(token: str, commit_sha: str) -> bool:
|
||||
headers = {
|
||||
"Authorization": f"token {token}",
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
}
|
||||
url = f"https://api.github.com/repos/{Envs.GITHUB_REPOSITORY}/commits/{commit_sha}/check-runs?per_page={100}"
|
||||
|
||||
for i in range(3):
|
||||
try:
|
||||
response = requests.get(url, headers=headers, timeout=5)
|
||||
response.raise_for_status()
|
||||
# assert "next" not in response.links, "Response truncated"
|
||||
|
||||
data = response.json()
|
||||
assert data["check_runs"], "?"
|
||||
|
||||
for check in data["check_runs"]:
|
||||
if check["status"] != "completed":
|
||||
print(
|
||||
f" Check workflow status: Check not completed [{check['name']}]"
|
||||
)
|
||||
return False
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"ERROR: exception after attempt [{i}]: {e}")
|
||||
time.sleep(1)
|
||||
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def get_pr_url_by_branch(repo, branch):
|
||||
get_url_cmd = (
|
||||
f"gh pr list --repo {repo} --head {branch} --json url --jq '.[0].url'"
|
||||
)
|
||||
url = Shell.run(get_url_cmd)
|
||||
if not url:
|
||||
print(f"ERROR: PR nor found, branch [{branch}]")
|
||||
return url
|
||||
|
||||
|
||||
class Shell:
|
||||
@classmethod
|
||||
@ -95,7 +167,11 @@ class Shell:
|
||||
return res.stdout.strip()
|
||||
|
||||
@classmethod
|
||||
def run(cls, command):
|
||||
def run(cls, command, check=False, dry_run=False):
|
||||
if dry_run:
|
||||
print(f"Dry-ryn. Would run command [{command}]")
|
||||
return ""
|
||||
print(f"Run command [{command}]")
|
||||
res = ""
|
||||
result = subprocess.run(
|
||||
command,
|
||||
@ -106,13 +182,26 @@ class Shell:
|
||||
check=False,
|
||||
)
|
||||
if result.returncode == 0:
|
||||
print(f"stdout: {result.stdout.strip()}")
|
||||
res = result.stdout
|
||||
else:
|
||||
print(
|
||||
f"ERROR: stdout: {result.stdout.strip()}, stderr: {result.stderr.strip()}"
|
||||
)
|
||||
if check:
|
||||
assert result.returncode == 0
|
||||
return res.strip()
|
||||
|
||||
@classmethod
|
||||
def run_as_daemon(cls, command):
|
||||
print(f"Run daemon command [{command}]")
|
||||
subprocess.Popen(command.split(" ")) # pylint:disable=consider-using-with
|
||||
return 0, ""
|
||||
|
||||
@classmethod
|
||||
def check(cls, command):
|
||||
result = subprocess.run(
|
||||
command + " 2>&1",
|
||||
command,
|
||||
shell=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
|
@ -2,7 +2,6 @@ import argparse
|
||||
import dataclasses
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
from contextlib import contextmanager
|
||||
from copy import copy
|
||||
@ -13,7 +12,8 @@ from git_helper import Git, GIT_PREFIX
|
||||
from ssh import SSHAgent
|
||||
from env_helper import GITHUB_REPOSITORY, S3_BUILDS_BUCKET
|
||||
from s3_helper import S3Helper
|
||||
from ci_utils import Shell
|
||||
from ci_utils import Shell, GHActions
|
||||
from ci_buddy import CIBuddy
|
||||
from version_helper import (
|
||||
FILE_WITH_VERSION_PATH,
|
||||
GENERATED_CONTRIBUTORS,
|
||||
@ -27,34 +27,66 @@ from ci_config import CI
|
||||
|
||||
CMAKE_PATH = get_abs_path(FILE_WITH_VERSION_PATH)
|
||||
CONTRIBUTORS_PATH = get_abs_path(GENERATED_CONTRIBUTORS)
|
||||
RELEASE_INFO_FILE = "/tmp/release_info.json"
|
||||
|
||||
|
||||
class ShellRunner:
|
||||
class ReleaseProgress:
|
||||
STARTED = "started"
|
||||
DOWNLOAD_PACKAGES = "download packages"
|
||||
PUSH_RELEASE_TAG = "push release tag"
|
||||
PUSH_NEW_RELEASE_BRANCH = "push new release branch"
|
||||
BUMP_VERSION = "bump version"
|
||||
CREATE_GH_RELEASE = "create GH release"
|
||||
EXPORT_TGZ = "export TGZ packages"
|
||||
EXPORT_RPM = "export RPM packages"
|
||||
EXPORT_DEB = "export DEB packages"
|
||||
TEST_TGZ = "test TGZ packages"
|
||||
TEST_RPM = "test RPM packages"
|
||||
TEST_DEB = "test DEB packages"
|
||||
COMPLETED = "completed"
|
||||
|
||||
@classmethod
|
||||
def run(
|
||||
cls, command, check_retcode=True, print_output=True, async_=False, dry_run=False
|
||||
):
|
||||
if dry_run:
|
||||
print(f"Dry-run: Would run shell command: [{command}]")
|
||||
return 0, ""
|
||||
print(f"Running shell command: [{command}]")
|
||||
if async_:
|
||||
subprocess.Popen(command.split(" ")) # pylint:disable=consider-using-with
|
||||
return 0, ""
|
||||
result = subprocess.run(
|
||||
command + " 2>&1",
|
||||
shell=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
if print_output:
|
||||
print(result.stdout)
|
||||
if check_retcode:
|
||||
assert result.returncode == 0, f"Return code [{result.returncode}]"
|
||||
return result.returncode, result.stdout
|
||||
|
||||
class ReleaseProgressDescription:
|
||||
OK = "OK"
|
||||
FAILED = "FAILED"
|
||||
|
||||
|
||||
class ReleaseContextManager:
|
||||
def __init__(self, release_progress):
|
||||
self.release_progress = release_progress
|
||||
self.release_info = None
|
||||
|
||||
def __enter__(self):
|
||||
if self.release_progress == ReleaseProgress.STARTED:
|
||||
# create initial release info
|
||||
self.release_info = ReleaseInfo(
|
||||
release_branch="NA",
|
||||
commit_sha=args.ref,
|
||||
release_tag="NA",
|
||||
version="NA",
|
||||
codename="NA",
|
||||
previous_release_tag="NA",
|
||||
previous_release_sha="NA",
|
||||
release_progress=ReleaseProgress.STARTED,
|
||||
).dump()
|
||||
else:
|
||||
# fetch release info from fs and update
|
||||
self.release_info = ReleaseInfo.from_file()
|
||||
assert self.release_info
|
||||
assert (
|
||||
self.release_info.progress_description == ReleaseProgressDescription.OK
|
||||
), "Must be OK on the start of new context"
|
||||
self.release_info.release_progress = self.release_progress
|
||||
self.release_info.dump()
|
||||
return self.release_info
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
assert self.release_info
|
||||
if exc_type is not None:
|
||||
self.release_info.progress_description = ReleaseProgressDescription.FAILED
|
||||
else:
|
||||
self.release_info.progress_description = ReleaseProgressDescription.OK
|
||||
self.release_info.dump()
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
@ -67,31 +99,50 @@ class ReleaseInfo:
|
||||
codename: str
|
||||
previous_release_tag: str
|
||||
previous_release_sha: str
|
||||
changelog_pr: str = ""
|
||||
version_bump_pr: str = ""
|
||||
release_url: str = ""
|
||||
debian_command: str = ""
|
||||
rpm_command: str = ""
|
||||
tgz_command: str = ""
|
||||
docker_command: str = ""
|
||||
release_progress: str = ""
|
||||
progress_description: str = ""
|
||||
|
||||
def is_patch(self):
|
||||
return self.release_branch != "master"
|
||||
|
||||
def is_new_release_branch(self):
|
||||
return self.release_branch == "master"
|
||||
|
||||
@staticmethod
|
||||
def from_file(file_path: str) -> "ReleaseInfo":
|
||||
with open(file_path, "r", encoding="utf-8") as json_file:
|
||||
def from_file() -> "ReleaseInfo":
|
||||
with open(RELEASE_INFO_FILE, "r", encoding="utf-8") as json_file:
|
||||
res = json.load(json_file)
|
||||
return ReleaseInfo(**res)
|
||||
|
||||
@staticmethod
|
||||
def prepare(commit_ref: str, release_type: str, outfile: str) -> None:
|
||||
Path(outfile).parent.mkdir(parents=True, exist_ok=True)
|
||||
Path(outfile).unlink(missing_ok=True)
|
||||
def dump(self):
|
||||
print(f"Dump release info into [{RELEASE_INFO_FILE}]")
|
||||
with open(RELEASE_INFO_FILE, "w", encoding="utf-8") as f:
|
||||
print(json.dumps(dataclasses.asdict(self), indent=2), file=f)
|
||||
return self
|
||||
|
||||
def prepare(self, commit_ref: str, release_type: str) -> "ReleaseInfo":
|
||||
version = None
|
||||
release_branch = None
|
||||
release_tag = None
|
||||
previous_release_tag = None
|
||||
previous_release_sha = None
|
||||
codename = None
|
||||
codename = ""
|
||||
assert release_type in ("patch", "new")
|
||||
if release_type == "new":
|
||||
# check commit_ref is right and on a right branch
|
||||
ShellRunner.run(
|
||||
f"git merge-base --is-ancestor origin/{commit_ref} origin/master"
|
||||
Shell.run(
|
||||
f"git merge-base --is-ancestor {commit_ref} origin/master",
|
||||
check=True,
|
||||
)
|
||||
with checkout(commit_ref):
|
||||
_, commit_sha = ShellRunner.run(f"git rev-parse {commit_ref}")
|
||||
commit_sha = Shell.run(f"git rev-parse {commit_ref}", check=True)
|
||||
# Git() must be inside "with checkout" contextmanager
|
||||
git = Git()
|
||||
version = get_version_from_repo(git=git)
|
||||
@ -102,9 +153,6 @@ class ReleaseInfo:
|
||||
git.latest_tag == expected_prev_tag
|
||||
), f"BUG: latest tag [{git.latest_tag}], expected [{expected_prev_tag}]"
|
||||
release_tag = version.describe
|
||||
codename = (
|
||||
VersionType.STABLE
|
||||
) # dummy value (artifactory won't be updated for new release)
|
||||
previous_release_tag = expected_prev_tag
|
||||
previous_release_sha = Shell.run_strict(
|
||||
f"git rev-parse {previous_release_tag}"
|
||||
@ -112,7 +160,7 @@ class ReleaseInfo:
|
||||
assert previous_release_sha
|
||||
if release_type == "patch":
|
||||
with checkout(commit_ref):
|
||||
_, commit_sha = ShellRunner.run(f"git rev-parse {commit_ref}")
|
||||
commit_sha = Shell.run(f"git rev-parse {commit_ref}", check=True)
|
||||
# Git() must be inside "with checkout" contextmanager
|
||||
git = Git()
|
||||
version = get_version_from_repo(git=git)
|
||||
@ -120,10 +168,11 @@ class ReleaseInfo:
|
||||
version.with_description(codename)
|
||||
release_branch = f"{version.major}.{version.minor}"
|
||||
release_tag = version.describe
|
||||
ShellRunner.run(f"{GIT_PREFIX} fetch origin {release_branch} --tags")
|
||||
Shell.run(f"{GIT_PREFIX} fetch origin {release_branch} --tags", check=True)
|
||||
# check commit is right and on a right branch
|
||||
ShellRunner.run(
|
||||
f"git merge-base --is-ancestor {commit_ref} origin/{release_branch}"
|
||||
Shell.run(
|
||||
f"git merge-base --is-ancestor {commit_ref} origin/{release_branch}",
|
||||
check=True,
|
||||
)
|
||||
if version.patch == 1:
|
||||
expected_version = copy(version)
|
||||
@ -160,24 +209,24 @@ class ReleaseInfo:
|
||||
and commit_sha
|
||||
and release_tag
|
||||
and version
|
||||
and codename in ("lts", "stable")
|
||||
and (codename in ("lts", "stable") or release_type == "new")
|
||||
)
|
||||
res = ReleaseInfo(
|
||||
release_branch=release_branch,
|
||||
commit_sha=commit_sha,
|
||||
release_tag=release_tag,
|
||||
version=version.string,
|
||||
codename=codename,
|
||||
previous_release_tag=previous_release_tag,
|
||||
previous_release_sha=previous_release_sha,
|
||||
)
|
||||
with open(outfile, "w", encoding="utf-8") as f:
|
||||
print(json.dumps(dataclasses.asdict(res), indent=2), file=f)
|
||||
|
||||
self.release_branch = release_branch
|
||||
self.commit_sha = commit_sha
|
||||
self.release_tag = release_tag
|
||||
self.version = version.string
|
||||
self.codename = codename
|
||||
self.previous_release_tag = previous_release_tag
|
||||
self.previous_release_sha = previous_release_sha
|
||||
self.release_progress = ReleaseProgress.STARTED
|
||||
self.progress_description = ReleaseProgressDescription.OK
|
||||
return self
|
||||
|
||||
def push_release_tag(self, dry_run: bool) -> None:
|
||||
if dry_run:
|
||||
# remove locally created tag from prev run
|
||||
ShellRunner.run(
|
||||
Shell.run(
|
||||
f"{GIT_PREFIX} tag -l | grep -q {self.release_tag} && git tag -d {self.release_tag} ||:"
|
||||
)
|
||||
# Create release tag
|
||||
@ -185,16 +234,17 @@ class ReleaseInfo:
|
||||
f"Create and push release tag [{self.release_tag}], commit [{self.commit_sha}]"
|
||||
)
|
||||
tag_message = f"Release {self.release_tag}"
|
||||
ShellRunner.run(
|
||||
f"{GIT_PREFIX} tag -a -m '{tag_message}' {self.release_tag} {self.commit_sha}"
|
||||
Shell.run(
|
||||
f"{GIT_PREFIX} tag -a -m '{tag_message}' {self.release_tag} {self.commit_sha}",
|
||||
check=True,
|
||||
)
|
||||
cmd_push_tag = f"{GIT_PREFIX} push origin {self.release_tag}:{self.release_tag}"
|
||||
ShellRunner.run(cmd_push_tag, dry_run=dry_run)
|
||||
Shell.run(cmd_push_tag, dry_run=dry_run, check=True)
|
||||
|
||||
@staticmethod
|
||||
def _create_gh_label(label: str, color_hex: str, dry_run: bool) -> None:
|
||||
cmd = f"gh api repos/{GITHUB_REPOSITORY}/labels -f name={label} -f color={color_hex}"
|
||||
ShellRunner.run(cmd, dry_run=dry_run)
|
||||
Shell.run(cmd, dry_run=dry_run, check=True)
|
||||
|
||||
def push_new_release_branch(self, dry_run: bool) -> None:
|
||||
assert (
|
||||
@ -211,8 +261,8 @@ class ReleaseInfo:
|
||||
), f"Unexpected current version in git, must precede [{self.version}] by one step, actual [{version.string}]"
|
||||
if dry_run:
|
||||
# remove locally created branch from prev run
|
||||
ShellRunner.run(
|
||||
f"{GIT_PREFIX} branch -l | grep -q {new_release_branch} && git branch -d {new_release_branch} ||:"
|
||||
Shell.run(
|
||||
f"{GIT_PREFIX} branch -l | grep -q {new_release_branch} && git branch -d {new_release_branch}"
|
||||
)
|
||||
print(
|
||||
f"Create and push new release branch [{new_release_branch}], commit [{self.commit_sha}]"
|
||||
@ -225,7 +275,7 @@ class ReleaseInfo:
|
||||
cmd_push_branch = (
|
||||
f"{GIT_PREFIX} push --set-upstream origin {new_release_branch}"
|
||||
)
|
||||
ShellRunner.run(cmd_push_branch, dry_run=dry_run)
|
||||
Shell.run(cmd_push_branch, dry_run=dry_run, check=True)
|
||||
|
||||
print("Create and push backport tags for new release branch")
|
||||
ReleaseInfo._create_gh_label(
|
||||
@ -234,12 +284,13 @@ class ReleaseInfo:
|
||||
ReleaseInfo._create_gh_label(
|
||||
f"v{new_release_branch}-affected", "c2bfff", dry_run=dry_run
|
||||
)
|
||||
ShellRunner.run(
|
||||
Shell.run(
|
||||
f"""gh pr create --repo {GITHUB_REPOSITORY} --title 'Release pull request for branch {new_release_branch}'
|
||||
--head {new_release_branch} {pr_labels}
|
||||
--body 'This PullRequest is a part of ClickHouse release cycle. It is used by CI system only. Do not perform any changes with it.'
|
||||
""",
|
||||
dry_run=dry_run,
|
||||
check=True,
|
||||
)
|
||||
|
||||
def update_version_and_contributors_list(self, dry_run: bool) -> None:
|
||||
@ -265,32 +316,55 @@ class ReleaseInfo:
|
||||
body_file = get_abs_path(".github/PULL_REQUEST_TEMPLATE.md")
|
||||
actor = os.getenv("GITHUB_ACTOR", "") or "me"
|
||||
cmd_create_pr = f"gh pr create --repo {GITHUB_REPOSITORY} --title 'Update version after release' --head {branch_upd_version_contributors} --base {self.release_branch} --body-file '{body_file} --label 'do not test' --assignee @{actor}"
|
||||
ShellRunner.run(cmd_commit_version_upd, dry_run=dry_run)
|
||||
ShellRunner.run(cmd_push_branch, dry_run=dry_run)
|
||||
ShellRunner.run(cmd_create_pr, dry_run=dry_run)
|
||||
Shell.run(cmd_commit_version_upd, check=True, dry_run=dry_run)
|
||||
Shell.run(cmd_push_branch, check=True, dry_run=dry_run)
|
||||
Shell.run(cmd_create_pr, check=True, dry_run=dry_run)
|
||||
if dry_run:
|
||||
ShellRunner.run(
|
||||
f"{GIT_PREFIX} diff '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'"
|
||||
)
|
||||
ShellRunner.run(
|
||||
Shell.run(f"{GIT_PREFIX} diff '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'")
|
||||
Shell.run(
|
||||
f"{GIT_PREFIX} checkout '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'"
|
||||
)
|
||||
self.version_bump_pr = "dry-run"
|
||||
else:
|
||||
self.version_bump_pr = GHActions.get_pr_url_by_branch(
|
||||
repo=GITHUB_REPOSITORY, branch=branch_upd_version_contributors
|
||||
)
|
||||
|
||||
def update_release_info(self, dry_run: bool) -> "ReleaseInfo":
|
||||
if self.release_branch != "master":
|
||||
branch = f"auto/{release_info.release_tag}"
|
||||
if not dry_run:
|
||||
url = GHActions.get_pr_url_by_branch(
|
||||
repo=GITHUB_REPOSITORY, branch=branch
|
||||
)
|
||||
else:
|
||||
url = "dry-run"
|
||||
print(f"ChangeLog PR url [{url}]")
|
||||
self.changelog_pr = url
|
||||
print(f"Release url [{url}]")
|
||||
self.release_url = f"https://github.com/{GITHUB_REPOSITORY}/releases/tag/{self.release_tag}"
|
||||
if self.release_progress == ReleaseProgress.COMPLETED:
|
||||
self.docker_command = f"docker run --rm clickhouse/clickhouse:{self.version} clickhouse --version"
|
||||
self.dump()
|
||||
return self
|
||||
|
||||
def create_gh_release(self, packages_files: List[str], dry_run: bool) -> None:
|
||||
repo = os.getenv("GITHUB_REPOSITORY")
|
||||
assert repo
|
||||
cmds = []
|
||||
cmds.append(
|
||||
cmds = [
|
||||
f"gh release create --repo {repo} --title 'Release {self.release_tag}' {self.release_tag}"
|
||||
)
|
||||
]
|
||||
for file in packages_files:
|
||||
cmds.append(f"gh release upload {self.release_tag} {file}")
|
||||
if not dry_run:
|
||||
for cmd in cmds:
|
||||
ShellRunner.run(cmd)
|
||||
Shell.run(cmd, check=True)
|
||||
self.release_url = f"https://github.com/{GITHUB_REPOSITORY}/releases/tag/{self.release_tag}"
|
||||
else:
|
||||
print("Dry-run, would run commands:")
|
||||
print("\n * ".join(cmds))
|
||||
self.release_url = f"dry-run"
|
||||
self.dump()
|
||||
|
||||
|
||||
class RepoTypes:
|
||||
@ -350,7 +424,7 @@ class PackageDownloader:
|
||||
self.macos_package_files = ["clickhouse-macos", "clickhouse-macos-aarch64"]
|
||||
self.file_to_type = {}
|
||||
|
||||
ShellRunner.run(f"mkdir -p {self.LOCAL_DIR}")
|
||||
Shell.run(f"mkdir -p {self.LOCAL_DIR}")
|
||||
|
||||
for package_type in self.PACKAGE_TYPES:
|
||||
for package in self.package_names:
|
||||
@ -400,7 +474,7 @@ class PackageDownloader:
|
||||
return res
|
||||
|
||||
def run(self):
|
||||
ShellRunner.run(f"rm -rf {self.LOCAL_DIR}/*")
|
||||
Shell.run(f"rm -rf {self.LOCAL_DIR}/*")
|
||||
for package_file in (
|
||||
self.deb_package_files + self.rpm_package_files + self.tgz_package_files
|
||||
):
|
||||
@ -473,6 +547,37 @@ class PackageDownloader:
|
||||
return True
|
||||
|
||||
|
||||
@contextmanager
|
||||
def checkout(ref: str) -> Iterator[None]:
|
||||
orig_ref = Shell.run(f"{GIT_PREFIX} symbolic-ref --short HEAD", check=True)
|
||||
rollback_cmd = f"{GIT_PREFIX} checkout {orig_ref}"
|
||||
assert orig_ref
|
||||
if ref not in (orig_ref,):
|
||||
Shell.run(f"{GIT_PREFIX} checkout {ref}")
|
||||
try:
|
||||
yield
|
||||
except (Exception, KeyboardInterrupt) as e:
|
||||
print(f"ERROR: Exception [{e}]")
|
||||
Shell.run(rollback_cmd)
|
||||
raise
|
||||
Shell.run(rollback_cmd)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def checkout_new(ref: str) -> Iterator[None]:
|
||||
orig_ref = Shell.run(f"{GIT_PREFIX} symbolic-ref --short HEAD", check=True)
|
||||
rollback_cmd = f"{GIT_PREFIX} checkout {orig_ref}"
|
||||
assert orig_ref
|
||||
Shell.run(f"{GIT_PREFIX} checkout -b {ref}", check=True)
|
||||
try:
|
||||
yield
|
||||
except (Exception, KeyboardInterrupt) as e:
|
||||
print(f"ERROR: Exception [{e}]")
|
||||
Shell.run(rollback_cmd)
|
||||
raise
|
||||
Shell.run(rollback_cmd)
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
@ -508,6 +613,11 @@ def parse_args() -> argparse.Namespace:
|
||||
action="store_true",
|
||||
help="Create GH Release object and attach all packages",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--post-status",
|
||||
action="store_true",
|
||||
help="Post release status into Slack",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--ref",
|
||||
type=str,
|
||||
@ -526,55 +636,25 @@ def parse_args() -> argparse.Namespace:
|
||||
help="do not make any actual changes in the repo, just show what will be done",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--outfile",
|
||||
default="",
|
||||
type=str,
|
||||
help="output file to write json result to, if not set - stdout",
|
||||
"--set-progress-started",
|
||||
action="store_true",
|
||||
help="Set new progress step, --progress <PROGRESS STEP> must be set",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--infile",
|
||||
default="",
|
||||
"--progress",
|
||||
type=str,
|
||||
help="input file with release info",
|
||||
help="Progress step name, see @ReleaseProgress",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--set-progress-completed",
|
||||
action="store_true",
|
||||
help="Set current progress step to OK (completed)",
|
||||
)
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
@contextmanager
|
||||
def checkout(ref: str) -> Iterator[None]:
|
||||
_, orig_ref = ShellRunner.run(f"{GIT_PREFIX} symbolic-ref --short HEAD")
|
||||
rollback_cmd = f"{GIT_PREFIX} checkout {orig_ref}"
|
||||
assert orig_ref
|
||||
if ref not in (orig_ref,):
|
||||
ShellRunner.run(f"{GIT_PREFIX} checkout {ref}")
|
||||
try:
|
||||
yield
|
||||
except (Exception, KeyboardInterrupt) as e:
|
||||
print(f"ERROR: Exception [{e}]")
|
||||
ShellRunner.run(rollback_cmd)
|
||||
raise
|
||||
ShellRunner.run(rollback_cmd)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def checkout_new(ref: str) -> Iterator[None]:
|
||||
_, orig_ref = ShellRunner.run(f"{GIT_PREFIX} symbolic-ref --short HEAD")
|
||||
rollback_cmd = f"{GIT_PREFIX} checkout {orig_ref}"
|
||||
assert orig_ref
|
||||
ShellRunner.run(f"{GIT_PREFIX} checkout -b {ref}")
|
||||
try:
|
||||
yield
|
||||
except (Exception, KeyboardInterrupt) as e:
|
||||
print(f"ERROR: Exception [{e}]")
|
||||
ShellRunner.run(rollback_cmd)
|
||||
raise
|
||||
ShellRunner.run(rollback_cmd)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
assert args.dry_run
|
||||
|
||||
# prepare ssh for git if needed
|
||||
_ssh_agent = None
|
||||
@ -586,43 +666,91 @@ if __name__ == "__main__":
|
||||
_ssh_agent.print_keys()
|
||||
|
||||
if args.prepare_release_info:
|
||||
assert (
|
||||
args.ref and args.release_type and args.outfile
|
||||
), "--ref, --release-type and --outfile must be provided with --prepare-release-info"
|
||||
ReleaseInfo.prepare(
|
||||
commit_ref=args.ref, release_type=args.release_type, outfile=args.outfile
|
||||
)
|
||||
if args.push_release_tag:
|
||||
assert args.infile, "--infile <release info file path> must be provided"
|
||||
release_info = ReleaseInfo.from_file(args.infile)
|
||||
release_info.push_release_tag(dry_run=args.dry_run)
|
||||
if args.push_new_release_branch:
|
||||
assert args.infile, "--infile <release info file path> must be provided"
|
||||
release_info = ReleaseInfo.from_file(args.infile)
|
||||
release_info.push_new_release_branch(dry_run=args.dry_run)
|
||||
if args.create_bump_version_pr:
|
||||
# TODO: store link to PR in release info
|
||||
assert args.infile, "--infile <release info file path> must be provided"
|
||||
release_info = ReleaseInfo.from_file(args.infile)
|
||||
release_info.update_version_and_contributors_list(dry_run=args.dry_run)
|
||||
with ReleaseContextManager(
|
||||
release_progress=ReleaseProgress.STARTED
|
||||
) as release_info:
|
||||
assert (
|
||||
args.ref and args.release_type
|
||||
), "--ref and --release-type must be provided with --prepare-release-info"
|
||||
release_info.prepare(commit_ref=args.ref, release_type=args.release_type)
|
||||
|
||||
if args.download_packages:
|
||||
assert args.infile, "--infile <release info file path> must be provided"
|
||||
release_info = ReleaseInfo.from_file(args.infile)
|
||||
p = PackageDownloader(
|
||||
release=release_info.release_branch,
|
||||
commit_sha=release_info.commit_sha,
|
||||
version=release_info.version,
|
||||
)
|
||||
p.run()
|
||||
with ReleaseContextManager(
|
||||
release_progress=ReleaseProgress.DOWNLOAD_PACKAGES
|
||||
) as release_info:
|
||||
p = PackageDownloader(
|
||||
release=release_info.release_branch,
|
||||
commit_sha=release_info.commit_sha,
|
||||
version=release_info.version,
|
||||
)
|
||||
p.run()
|
||||
|
||||
if args.push_release_tag:
|
||||
with ReleaseContextManager(
|
||||
release_progress=ReleaseProgress.PUSH_RELEASE_TAG
|
||||
) as release_info:
|
||||
release_info.push_release_tag(dry_run=args.dry_run)
|
||||
|
||||
if args.push_new_release_branch:
|
||||
with ReleaseContextManager(
|
||||
release_progress=ReleaseProgress.PUSH_NEW_RELEASE_BRANCH
|
||||
) as release_info:
|
||||
release_info.push_new_release_branch(dry_run=args.dry_run)
|
||||
|
||||
if args.create_bump_version_pr:
|
||||
with ReleaseContextManager(
|
||||
release_progress=ReleaseProgress.BUMP_VERSION
|
||||
) as release_info:
|
||||
release_info.update_version_and_contributors_list(dry_run=args.dry_run)
|
||||
|
||||
if args.create_gh_release:
|
||||
assert args.infile, "--infile <release info file path> must be provided"
|
||||
release_info = ReleaseInfo.from_file(args.infile)
|
||||
p = PackageDownloader(
|
||||
release=release_info.release_branch,
|
||||
commit_sha=release_info.commit_sha,
|
||||
version=release_info.version,
|
||||
)
|
||||
release_info.create_gh_release(p.get_all_packages_files(), args.dry_run)
|
||||
with ReleaseContextManager(
|
||||
release_progress=ReleaseProgress.CREATE_GH_RELEASE
|
||||
) as release_info:
|
||||
p = PackageDownloader(
|
||||
release=release_info.release_branch,
|
||||
commit_sha=release_info.commit_sha,
|
||||
version=release_info.version,
|
||||
)
|
||||
release_info.create_gh_release(
|
||||
packages_files=p.get_all_packages_files(), dry_run=args.dry_run
|
||||
)
|
||||
|
||||
if args.post_status:
|
||||
release_info = ReleaseInfo.from_file()
|
||||
release_info.update_release_info(dry_run=args.dry_run)
|
||||
if release_info.is_new_release_branch():
|
||||
title = "New release branch"
|
||||
else:
|
||||
title = "New release"
|
||||
if (
|
||||
release_info.progress_description == ReleaseProgressDescription.OK
|
||||
and release_info.release_progress == ReleaseProgress.COMPLETED
|
||||
):
|
||||
title = "Completed: " + title
|
||||
CIBuddy(dry_run=args.dry_run).post_done(
|
||||
title, dataclasses.asdict(release_info)
|
||||
)
|
||||
else:
|
||||
title = "Failed: " + title
|
||||
CIBuddy(dry_run=args.dry_run).post_critical(
|
||||
title, dataclasses.asdict(release_info)
|
||||
)
|
||||
|
||||
if args.set_progress_started:
|
||||
ri = ReleaseInfo.from_file()
|
||||
ri.release_progress = args.progress
|
||||
ri.progress_description = ReleaseProgressDescription.FAILED
|
||||
ri.dump()
|
||||
assert args.progress, "Progress step name must be provided"
|
||||
|
||||
if args.set_progress_completed:
|
||||
ri = ReleaseInfo.from_file()
|
||||
assert (
|
||||
ri.progress_description == ReleaseProgressDescription.FAILED
|
||||
), "Must be FAILED before set to OK"
|
||||
ri.progress_description = ReleaseProgressDescription.OK
|
||||
ri.dump()
|
||||
|
||||
# tear down ssh
|
||||
if _ssh_agent and _key_pub:
|
||||
|
@ -254,11 +254,14 @@ def main():
|
||||
statuses = get_commit_filtered_statuses(commit)
|
||||
|
||||
has_failed_statuses = False
|
||||
has_native_failed_status = False
|
||||
for status in statuses:
|
||||
print(f"Check status [{status.context}], [{status.state}]")
|
||||
if CI.is_required(status.context) and status.state != SUCCESS:
|
||||
print(f"WARNING: Failed status [{status.context}], [{status.state}]")
|
||||
has_failed_statuses = True
|
||||
if status.context != CI.StatusNames.SYNC:
|
||||
has_native_failed_status = True
|
||||
|
||||
if args.wf_status == SUCCESS or has_failed_statuses:
|
||||
# set Mergeable check if workflow is successful (green)
|
||||
@ -280,7 +283,7 @@ def main():
|
||||
print(
|
||||
"Workflow failed but no failed statuses found (died runner?) - cannot set Mergeable Check status"
|
||||
)
|
||||
if args.wf_status == SUCCESS and not has_failed_statuses:
|
||||
if args.wf_status == SUCCESS and not has_native_failed_status:
|
||||
sys.exit(0)
|
||||
else:
|
||||
sys.exit(1)
|
||||
|
@ -296,13 +296,16 @@ class PRInfo:
|
||||
else:
|
||||
if "schedule" in github_event:
|
||||
self.event_type = EventType.SCHEDULE
|
||||
else:
|
||||
elif "inputs" in github_event:
|
||||
# assume this is a dispatch
|
||||
self.event_type = EventType.DISPATCH
|
||||
logging.warning(
|
||||
"event.json does not match pull_request or push:\n%s",
|
||||
json.dumps(github_event, sort_keys=True, indent=4),
|
||||
)
|
||||
print("PR Info:")
|
||||
print(self)
|
||||
else:
|
||||
logging.warning(
|
||||
"event.json does not match pull_request or push:\n%s",
|
||||
json.dumps(github_event, sort_keys=True, indent=4),
|
||||
)
|
||||
self.sha = os.getenv(
|
||||
"GITHUB_SHA", "0000000000000000000000000000000000000000"
|
||||
)
|
||||
|
@ -587,11 +587,11 @@ class TestCIConfig(unittest.TestCase):
|
||||
for job, job_config in ci_cache.jobs_to_do.items():
|
||||
if job in MOCK_AFFECTED_JOBS:
|
||||
MOCK_REQUIRED_BUILDS += job_config.required_builds
|
||||
elif job not in MOCK_AFFECTED_JOBS:
|
||||
elif job not in MOCK_AFFECTED_JOBS and not job_config.disable_await:
|
||||
ci_cache.jobs_to_wait[job] = job_config
|
||||
|
||||
for job, job_config in ci_cache.jobs_to_do.items():
|
||||
if job_config.reference_job_name:
|
||||
if job_config.reference_job_name or job_config.disable_await:
|
||||
# jobs with reference_job_name in config are not supposed to have records in the cache - continue
|
||||
continue
|
||||
if job in MOCK_AFFECTED_JOBS:
|
||||
@ -624,11 +624,76 @@ class TestCIConfig(unittest.TestCase):
|
||||
+ MOCK_AFFECTED_JOBS
|
||||
+ MOCK_REQUIRED_BUILDS
|
||||
)
|
||||
self.assertTrue(
|
||||
CI.JobNames.BUILD_CHECK not in ci_cache.jobs_to_wait,
|
||||
"We must never await on Builds Report",
|
||||
)
|
||||
self.assertCountEqual(
|
||||
list(ci_cache.jobs_to_wait),
|
||||
[
|
||||
CI.JobNames.BUILD_CHECK,
|
||||
]
|
||||
+ MOCK_REQUIRED_BUILDS,
|
||||
MOCK_REQUIRED_BUILDS,
|
||||
)
|
||||
self.assertCountEqual(list(ci_cache.jobs_to_do), expected_to_do)
|
||||
|
||||
def test_ci_py_filters_not_affected_jobs_in_prs_no_builds(self):
|
||||
"""
|
||||
checks ci.py filters not affected jobs in PRs, no builds required
|
||||
"""
|
||||
settings = CiSettings()
|
||||
settings.no_ci_cache = True
|
||||
pr_info = PRInfo(github_event=_TEST_EVENT_JSON)
|
||||
pr_info.event_type = EventType.PULL_REQUEST
|
||||
pr_info.number = 123
|
||||
assert pr_info.is_pr
|
||||
ci_cache = CIPY._configure_jobs(
|
||||
S3Helper(), pr_info, settings, skip_jobs=False, dry_run=True
|
||||
)
|
||||
self.assertTrue(not ci_cache.jobs_to_skip, "Must be no jobs in skip list")
|
||||
assert not ci_cache.jobs_to_wait
|
||||
assert not ci_cache.jobs_to_skip
|
||||
|
||||
MOCK_AFFECTED_JOBS = [
|
||||
CI.JobNames.FAST_TEST,
|
||||
]
|
||||
MOCK_REQUIRED_BUILDS = []
|
||||
|
||||
# pretend there are pending jobs that we need to wait
|
||||
for job, job_config in ci_cache.jobs_to_do.items():
|
||||
if job in MOCK_AFFECTED_JOBS:
|
||||
if job_config.required_builds:
|
||||
MOCK_REQUIRED_BUILDS += job_config.required_builds
|
||||
elif job not in MOCK_AFFECTED_JOBS and not job_config.disable_await:
|
||||
ci_cache.jobs_to_wait[job] = job_config
|
||||
|
||||
for job, job_config in ci_cache.jobs_to_do.items():
|
||||
if job_config.reference_job_name or job_config.disable_await:
|
||||
# jobs with reference_job_name in config are not supposed to have records in the cache - continue
|
||||
continue
|
||||
if job in MOCK_AFFECTED_JOBS:
|
||||
continue
|
||||
for batch in range(job_config.num_batches):
|
||||
# add any record into cache
|
||||
record = CiCache.Record(
|
||||
record_type=random.choice(
|
||||
[
|
||||
CiCache.RecordType.FAILED,
|
||||
CiCache.RecordType.PENDING,
|
||||
CiCache.RecordType.SUCCESSFUL,
|
||||
]
|
||||
),
|
||||
job_name=job,
|
||||
job_digest=ci_cache.job_digests[job],
|
||||
batch=batch,
|
||||
num_batches=job_config.num_batches,
|
||||
release_branch=True,
|
||||
)
|
||||
for record_t_, records_ in ci_cache.records.items():
|
||||
if record_t_.value == CiCache.RecordType.FAILED.value:
|
||||
records_[record.to_str_key()] = record
|
||||
|
||||
ci_cache.filter_out_not_affected_jobs()
|
||||
expected_to_do = MOCK_AFFECTED_JOBS + MOCK_REQUIRED_BUILDS
|
||||
self.assertCountEqual(
|
||||
list(ci_cache.jobs_to_wait),
|
||||
MOCK_REQUIRED_BUILDS,
|
||||
)
|
||||
self.assertCountEqual(list(ci_cache.jobs_to_do), expected_to_do)
|
||||
|
@ -50,7 +50,7 @@ set -uo pipefail
|
||||
# set accordingly to a runner role #
|
||||
####################################
|
||||
|
||||
echo "Running init v1"
|
||||
echo "Running init v1.1"
|
||||
export DEBIAN_FRONTEND=noninteractive
|
||||
export RUNNER_HOME=/home/ubuntu/actions-runner
|
||||
|
||||
@ -66,6 +66,14 @@ bash /usr/local/share/scripts/init-network.sh
|
||||
RUNNER_TYPE=$(/usr/local/bin/aws ec2 describe-tags --filters "Name=resource-id,Values=$INSTANCE_ID" --query "Tags[?Key=='github:runner-type'].Value" --output text)
|
||||
LABELS="self-hosted,Linux,$(uname -m),$RUNNER_TYPE"
|
||||
export LABELS
|
||||
echo "Instance Labels: $LABELS"
|
||||
|
||||
LIFE_CYCLE=$(curl -s --fail http://169.254.169.254/latest/meta-data/instance-life-cycle)
|
||||
export LIFE_CYCLE
|
||||
echo "Instance lifecycle: $LIFE_CYCLE"
|
||||
|
||||
INSTANCE_TYPE=$(ec2metadata --instance-type)
|
||||
echo "Instance type: $INSTANCE_TYPE"
|
||||
|
||||
# Refresh CloudWatch agent config
|
||||
aws ssm get-parameter --region us-east-1 --name AmazonCloudWatch-github-runners --query 'Parameter.Value' --output text > /opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json
|
||||
@ -124,10 +132,6 @@ terminate_decrease_and_exit() {
|
||||
declare -f terminate_and_exit >> /tmp/actions-hooks/common.sh
|
||||
|
||||
check_spot_instance_is_old() {
|
||||
# This function should be executed ONLY BETWEEN runnings.
|
||||
# It's unsafe to execute while the runner is working!
|
||||
local LIFE_CYCLE
|
||||
LIFE_CYCLE=$(curl -s --fail http://169.254.169.254/latest/meta-data/instance-life-cycle)
|
||||
if [ "$LIFE_CYCLE" == "spot" ]; then
|
||||
local UPTIME
|
||||
UPTIME=$(< /proc/uptime)
|
||||
|
@ -208,13 +208,21 @@ def test_merge_tree_custom_disk_setting(start_cluster):
|
||||
secret_access_key='minio123');
|
||||
"""
|
||||
)
|
||||
count = len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
|
||||
|
||||
list1 = list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))
|
||||
count1 = len(list1)
|
||||
|
||||
node1.query(f"INSERT INTO {TABLE_NAME}_3 SELECT number FROM numbers(100)")
|
||||
assert int(node1.query(f"SELECT count() FROM {TABLE_NAME}_3")) == 100
|
||||
assert (
|
||||
len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
|
||||
== count
|
||||
)
|
||||
|
||||
list2 = list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))
|
||||
count2 = len(list2)
|
||||
|
||||
if count1 != count2:
|
||||
print("list1: ", list1)
|
||||
print("list2: ", list2)
|
||||
|
||||
assert count1 == count2
|
||||
assert (
|
||||
len(list(minio.list_objects(cluster.minio_bucket, "data2/", recursive=True)))
|
||||
> 0
|
||||
|
@ -43,15 +43,10 @@ def started_cluster():
|
||||
config = """<clickhouse>
|
||||
<openSSL>
|
||||
<client>
|
||||
<verificationMode>none</verificationMode>
|
||||
|
||||
<verificationMode>strict</verificationMode>
|
||||
<certificateFile>{certificateFile}</certificateFile>
|
||||
<privateKeyFile>{privateKeyFile}</privateKeyFile>
|
||||
<caConfig>{caConfig}</caConfig>
|
||||
|
||||
<invalidCertificateHandler>
|
||||
<name>AcceptCertificateHandler</name>
|
||||
</invalidCertificateHandler>
|
||||
</client>
|
||||
</openSSL>
|
||||
</clickhouse>"""
|
||||
|
@ -2220,13 +2220,11 @@ def test_rabbitmq_commit_on_block_write(rabbitmq_cluster):
|
||||
|
||||
|
||||
def test_rabbitmq_no_connection_at_startup_1(rabbitmq_cluster):
|
||||
# no connection when table is initialized
|
||||
rabbitmq_cluster.pause_container("rabbitmq1")
|
||||
instance.query_and_get_error(
|
||||
error = instance.query_and_get_error(
|
||||
"""
|
||||
CREATE TABLE test.cs (key UInt64, value UInt64)
|
||||
ENGINE = RabbitMQ
|
||||
SETTINGS rabbitmq_host_port = 'rabbitmq1:5672',
|
||||
SETTINGS rabbitmq_host_port = 'no_connection_at_startup:5672',
|
||||
rabbitmq_exchange_name = 'cs',
|
||||
rabbitmq_format = 'JSONEachRow',
|
||||
rabbitmq_flush_interval_ms=1000,
|
||||
@ -2234,7 +2232,7 @@ def test_rabbitmq_no_connection_at_startup_1(rabbitmq_cluster):
|
||||
rabbitmq_row_delimiter = '\\n';
|
||||
"""
|
||||
)
|
||||
rabbitmq_cluster.unpause_container("rabbitmq1")
|
||||
assert "CANNOT_CONNECT_RABBITMQ" in error
|
||||
|
||||
|
||||
def test_rabbitmq_no_connection_at_startup_2(rabbitmq_cluster):
|
||||
|
@ -10,8 +10,8 @@
|
||||
PARTITION BY toYYYYMM(d) ORDER BY key
|
||||
</create_query>
|
||||
|
||||
<fill_query>INSERT INTO optimized_select_final SELECT toDate('2000-01-01'), 2*number, randomPrintableASCII(1000) FROM numbers(5000000)</fill_query>
|
||||
<fill_query>INSERT INTO optimized_select_final SELECT toDate('2020-01-01'), 2*number+1, randomPrintableASCII(1000) FROM numbers(5000000)</fill_query>
|
||||
<fill_query>INSERT INTO optimized_select_final SELECT toDate('2000-01-01'), 2*number, randomPrintableASCII(1000) FROM numbers(2500000)</fill_query>
|
||||
<fill_query>INSERT INTO optimized_select_final SELECT toDate('2020-01-01'), 2*number+1, randomPrintableASCII(1000) FROM numbers(2500000)</fill_query>
|
||||
|
||||
<query>SELECT * FROM optimized_select_final FINAL FORMAT Null SETTINGS max_threads = 8</query>
|
||||
<query>SELECT * FROM optimized_select_final FINAL WHERE key % 10 = 0 FORMAT Null</query>
|
||||
|
@ -1,5 +1,5 @@
|
||||
<test>
|
||||
<query>with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByChar(' ', materialize(s)) as w from numbers(1000000)</query>
|
||||
<query>with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByRegexp(' ', materialize(s)) as w from numbers(1000000)</query>
|
||||
<query>with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByRegexp('\s+', materialize(s)) as w from numbers(100000)</query>
|
||||
<query>with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByRegexp(' ', materialize(s)) as w from numbers(200000)</query>
|
||||
<query>with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByRegexp('\s+', materialize(s)) as w from numbers(20000)</query>
|
||||
</test>
|
||||
|
@ -24,10 +24,10 @@
|
||||
<min_insert_block_size_rows>1</min_insert_block_size_rows>
|
||||
</settings>
|
||||
|
||||
<!-- 100 parts -->
|
||||
<query>INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(100)</query>
|
||||
<query>INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(1000)</query>
|
||||
<query>INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(100)</query>
|
||||
<!-- 50 parts -->
|
||||
<query>INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(50)</query>
|
||||
<query>INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(500)</query>
|
||||
<query>INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(50)</query>
|
||||
|
||||
<drop_query>DROP TABLE IF EXISTS hits_wide</drop_query>
|
||||
<drop_query>DROP TABLE IF EXISTS hits_compact</drop_query>
|
||||
|
@ -555,7 +555,7 @@ if args.report == "main":
|
||||
"Total client time for measured query runs, s", # 2
|
||||
"Queries", # 3
|
||||
"Longest query, total for measured runs, s", # 4
|
||||
"Wall clock time per query, s", # 5
|
||||
"Average query wall clock time, s", # 5
|
||||
"Shortest query, total for measured runs, s", # 6
|
||||
"", # Runs #7
|
||||
]
|
||||
|
@ -8,13 +8,13 @@
|
||||
40
|
||||
41
|
||||
|
||||
0
|
||||
41
|
||||
2 42
|
||||
|
||||
2 42
|
||||
43
|
||||
|
||||
0
|
||||
43
|
||||
11
|
||||
|
||||
11
|
||||
|
@ -1,13 +1,36 @@
|
||||
-- { echoOn }
|
||||
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t);
|
||||
1 101 1 0 0 0
|
||||
1 102 2 2 102 1
|
||||
1 103 3 2 102 1
|
||||
1 104 4 4 104 1
|
||||
1 105 5 4 104 1
|
||||
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t);
|
||||
1 101 1 0 0 0
|
||||
1 102 2 2 102 1
|
||||
1 103 3 2 102 1
|
||||
1 104 4 4 104 1
|
||||
1 105 5 4 104 1
|
||||
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t);
|
||||
1 101 1 0 0 0
|
||||
1 102 2 2 102 1
|
||||
1 103 3 2 102 1
|
||||
1 104 4 4 104 1
|
||||
1 105 5 4 104 1
|
||||
SET join_algorithm = 'full_sorting_merge';
|
||||
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t);
|
||||
1 101 1 0 0 0
|
||||
1 102 2 2 102 1
|
||||
1 103 3 2 102 1
|
||||
1 104 4 4 104 1
|
||||
1 105 5 4 104 1
|
||||
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t);
|
||||
1 101 1 0 0 0
|
||||
1 102 2 2 102 1
|
||||
1 103 3 2 102 1
|
||||
1 104 4 4 104 1
|
||||
1 105 5 4 104 1
|
||||
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t);
|
||||
1 101 1 0 0 0
|
||||
1 102 2 2 102 1
|
||||
1 103 3 2 102 1
|
||||
|
@ -4,20 +4,29 @@ DROP TABLE IF EXISTS B;
|
||||
CREATE TABLE A(k UInt32, t UInt32, a UInt64) ENGINE = MergeTree() ORDER BY (k, t);
|
||||
INSERT INTO A(k,t,a) VALUES (1,101,1),(1,102,2),(1,103,3),(1,104,4),(1,105,5);
|
||||
|
||||
CREATE TABLE B(k UInt32, t UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t);
|
||||
INSERT INTO B(k,t,b) VALUES (1,102,2), (1,104,4);
|
||||
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t);
|
||||
DROP TABLE B;
|
||||
CREATE TABLE B1(k UInt32, t UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t);
|
||||
INSERT INTO B1(k,t,b) VALUES (1,102,2), (1,104,4);
|
||||
|
||||
CREATE TABLE B2(t UInt32, k UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t);
|
||||
INSERT INTO B2(k,t,b) VALUES (1,102,2), (1,104,4);
|
||||
|
||||
CREATE TABLE B(t UInt32, k UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t);
|
||||
INSERT INTO B(k,t,b) VALUES (1,102,2), (1,104,4);
|
||||
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t);
|
||||
DROP TABLE B;
|
||||
CREATE TABLE B3(k UInt32, b UInt64, t UInt32) ENGINE = MergeTree() ORDER BY (k, t);
|
||||
INSERT INTO B3(k,t,b) VALUES (1,102,2), (1,104,4);
|
||||
|
||||
CREATE TABLE B(k UInt32, b UInt64, t UInt32) ENGINE = MergeTree() ORDER BY (k, t);
|
||||
INSERT INTO B(k,t,b) VALUES (1,102,2), (1,104,4);
|
||||
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t);
|
||||
DROP TABLE B;
|
||||
-- { echoOn }
|
||||
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t);
|
||||
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t);
|
||||
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t);
|
||||
|
||||
SET join_algorithm = 'full_sorting_merge';
|
||||
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t);
|
||||
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t);
|
||||
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t);
|
||||
|
||||
-- { echoOff }
|
||||
|
||||
DROP TABLE B1;
|
||||
DROP TABLE B2;
|
||||
DROP TABLE B3;
|
||||
|
||||
DROP TABLE A;
|
||||
|
@ -1 +1,2 @@
|
||||
3000000
|
||||
3000000
|
||||
|
@ -2,15 +2,28 @@
|
||||
|
||||
DROP TABLE IF EXISTS tvs;
|
||||
|
||||
-- to use different algorithms for in subquery
|
||||
SET allow_experimental_analyzer = 1;
|
||||
|
||||
CREATE TABLE tvs(k UInt32, t UInt32, tv UInt64) ENGINE = Memory;
|
||||
INSERT INTO tvs(k,t,tv) SELECT k, t, t
|
||||
FROM (SELECT toUInt32(number) AS k FROM numbers(1000)) keys
|
||||
CROSS JOIN (SELECT toUInt32(number * 3) as t FROM numbers(10000)) tv_times;
|
||||
CROSS JOIN (SELECT toUInt32(number * 3) as t FROM numbers(10000)) tv_times
|
||||
SETTINGS join_algorithm = 'hash';
|
||||
|
||||
SELECT SUM(trades.price - tvs.tv) FROM
|
||||
(SELECT k, t, t as price
|
||||
FROM (SELECT toUInt32(number) AS k FROM numbers(1000)) keys
|
||||
CROSS JOIN (SELECT toUInt32(number * 10) AS t FROM numbers(3000)) trade_times) trades
|
||||
CROSS JOIN (SELECT toUInt32(number * 10) AS t FROM numbers(3000)) trade_times
|
||||
SETTINGS join_algorithm = 'hash') trades
|
||||
ASOF LEFT JOIN tvs USING(k,t);
|
||||
|
||||
SELECT SUM(trades.price - tvs.tv) FROM
|
||||
(SELECT k, t, t as price
|
||||
FROM (SELECT toUInt32(number) AS k FROM numbers(1000)) keys
|
||||
CROSS JOIN (SELECT toUInt32(number * 10) AS t FROM numbers(3000)) trade_times
|
||||
SETTINGS join_algorithm = 'hash') trades
|
||||
ASOF LEFT JOIN tvs USING(k,t)
|
||||
SETTINGS join_algorithm = 'full_sorting_merge';
|
||||
|
||||
DROP TABLE tvs;
|
||||
|
@ -27,3 +27,32 @@
|
||||
2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2
|
||||
2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2
|
||||
2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2
|
||||
1 1970-01-01 00:00:01 1 0 1970-01-01 00:00:00 0
|
||||
1 1970-01-01 00:00:02 2 2 1970-01-01 00:00:02 1
|
||||
1 1970-01-01 00:00:03 3 2 1970-01-01 00:00:02 1
|
||||
1 1970-01-01 00:00:04 4 4 1970-01-01 00:00:04 1
|
||||
1 1970-01-01 00:00:05 5 4 1970-01-01 00:00:04 1
|
||||
2 1970-01-01 00:00:01 1 0 1970-01-01 00:00:00 0
|
||||
2 1970-01-01 00:00:02 2 0 1970-01-01 00:00:00 0
|
||||
2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2
|
||||
2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2
|
||||
2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2
|
||||
3 1970-01-01 00:00:01 1 0 1970-01-01 00:00:00 0
|
||||
3 1970-01-01 00:00:02 2 0 1970-01-01 00:00:00 0
|
||||
3 1970-01-01 00:00:03 3 0 1970-01-01 00:00:00 0
|
||||
3 1970-01-01 00:00:04 4 0 1970-01-01 00:00:00 0
|
||||
3 1970-01-01 00:00:05 5 0 1970-01-01 00:00:00 0
|
||||
1 1970-01-01 00:00:02 2 2 1970-01-01 00:00:02 1
|
||||
1 1970-01-01 00:00:03 3 2 1970-01-01 00:00:02 1
|
||||
1 1970-01-01 00:00:04 4 4 1970-01-01 00:00:04 1
|
||||
1 1970-01-01 00:00:05 5 4 1970-01-01 00:00:04 1
|
||||
2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2
|
||||
2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2
|
||||
2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2
|
||||
1 1970-01-01 00:00:02 2 2 1970-01-01 00:00:02 1
|
||||
1 1970-01-01 00:00:03 3 2 1970-01-01 00:00:02 1
|
||||
1 1970-01-01 00:00:04 4 4 1970-01-01 00:00:04 1
|
||||
1 1970-01-01 00:00:05 5 4 1970-01-01 00:00:04 1
|
||||
2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2
|
||||
2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2
|
||||
2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2
|
||||
|
@ -11,9 +11,12 @@ INSERT INTO B(k,t,b) VALUES (1,2,2),(1,4,4);
|
||||
INSERT INTO B(k,t,b) VALUES (2,3,3);
|
||||
|
||||
SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t);
|
||||
|
||||
SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF INNER JOIN B ON A.k == B.k AND A.t >= B.t ORDER BY (A.k, A.t);
|
||||
SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF JOIN B USING(k,t) ORDER BY (A.k, A.t);
|
||||
|
||||
SET join_algorithm = 'full_sorting_merge';
|
||||
SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t);
|
||||
SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF INNER JOIN B ON A.k == B.k AND A.t >= B.t ORDER BY (A.k, A.t);
|
||||
SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF JOIN B USING(k,t) ORDER BY (A.k, A.t);
|
||||
|
||||
DROP TABLE A;
|
||||
|
@ -1,27 +1,72 @@
|
||||
-
|
||||
2 1 1 0
|
||||
2 3 3 3
|
||||
2 5 5 3
|
||||
-
|
||||
2 1 1 0
|
||||
2 3 3 3
|
||||
2 5 5 3
|
||||
-
|
||||
2 1 1 0
|
||||
2 3 3 3
|
||||
2 5 5 3
|
||||
-
|
||||
2 1 1 0
|
||||
2 3 3 3
|
||||
2 5 5 3
|
||||
-
|
||||
2 1 1 0
|
||||
2 3 3 3
|
||||
2 5 5 3
|
||||
-
|
||||
2 1 1 0
|
||||
2 3 3 3
|
||||
2 5 5 3
|
||||
-
|
||||
2 1 1 0
|
||||
2 3 3 3
|
||||
2 5 5 3
|
||||
-
|
||||
2 1 1 0
|
||||
2 3 3 3
|
||||
2 5 5 3
|
||||
-
|
||||
2 1970-01-01 02:00:01 1 0
|
||||
2 1970-01-01 02:00:03 3 3
|
||||
2 1970-01-01 02:00:05 5 3
|
||||
-
|
||||
2 1970-01-01 02:00:01 1 0
|
||||
2 1970-01-01 02:00:03 3 3
|
||||
2 1970-01-01 02:00:05 5 3
|
||||
-
|
||||
2 1 1 0
|
||||
2 3 3 3
|
||||
2 5 5 3
|
||||
-
|
||||
2 1 1 0
|
||||
2 3 3 3
|
||||
2 5 5 3
|
||||
-
|
||||
2 1 1 0
|
||||
2 3 3 3
|
||||
2 5 5 3
|
||||
-
|
||||
2 1 1 0
|
||||
2 3 3 3
|
||||
2 5 5 3
|
||||
-
|
||||
2 1 1 0
|
||||
2 3 3 3
|
||||
2 5 5 3
|
||||
-
|
||||
2 1 1 0
|
||||
2 3 3 3
|
||||
2 5 5 3
|
||||
-
|
||||
2 1970-01-01 02:00:00.001 1 0
|
||||
2 1970-01-01 02:00:00.003 3 3
|
||||
2 1970-01-01 02:00:00.005 5 3
|
||||
-
|
||||
2 1970-01-01 02:00:00.001 1 0
|
||||
2 1970-01-01 02:00:00.003 3 3
|
||||
2 1970-01-01 02:00:00.005 5 3
|
||||
|
@ -1,27 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
for typename in "UInt32" "UInt64" "Float64" "Float32" "DateTime('Asia/Istanbul')" "Decimal32(5)" "Decimal64(5)" "Decimal128(5)" "DateTime64(3, 'Asia/Istanbul')"
|
||||
do
|
||||
$CLICKHOUSE_CLIENT -mn <<EOF
|
||||
DROP TABLE IF EXISTS A;
|
||||
DROP TABLE IF EXISTS B;
|
||||
|
||||
CREATE TABLE A(k UInt32, t ${typename}, a Float64) ENGINE = MergeTree() ORDER BY (k, t);
|
||||
INSERT INTO A(k,t,a) VALUES (2,1,1),(2,3,3),(2,5,5);
|
||||
|
||||
CREATE TABLE B(k UInt32, t ${typename}, b Float64) ENGINE = MergeTree() ORDER BY (k, t);
|
||||
INSERT INTO B(k,t,b) VALUES (2,3,3);
|
||||
|
||||
SELECT k, t, a, b FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (k,t);
|
||||
|
||||
DROP TABLE A;
|
||||
DROP TABLE B;
|
||||
EOF
|
||||
|
||||
done
|
27
tests/queries/0_stateless/00927_asof_join_other_types.sql.j2
Normal file
27
tests/queries/0_stateless/00927_asof_join_other_types.sql.j2
Normal file
@ -0,0 +1,27 @@
|
||||
|
||||
{% for typename in ["UInt32", "UInt64", "Float64", "Float32", "DateTime('Asia/Istanbul')", "Decimal32(5)", "Decimal64(5)", "Decimal128(5)", "DateTime64(3, 'Asia/Istanbul')"] -%}
|
||||
|
||||
|
||||
DROP TABLE IF EXISTS A;
|
||||
DROP TABLE IF EXISTS B;
|
||||
|
||||
CREATE TABLE A(k UInt32, t {{ typename }}, a Float64) ENGINE = MergeTree() ORDER BY (k, t);
|
||||
INSERT INTO A(k,t,a) VALUES (2,1,1),(2,3,3),(2,5,5);
|
||||
|
||||
CREATE TABLE B(k UInt32, t {{ typename }}, b Float64) ENGINE = MergeTree() ORDER BY (k, t);
|
||||
INSERT INTO B(k,t,b) VALUES (2,3,3);
|
||||
|
||||
SELECT '-';
|
||||
|
||||
SELECT k, t, a, b FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (k,t)
|
||||
SETTINGS join_algorithm = 'full_sorting_merge';
|
||||
|
||||
SELECT '-';
|
||||
|
||||
SELECT k, t, a, b FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (k,t)
|
||||
SETTINGS join_algorithm = 'hash';
|
||||
|
||||
DROP TABLE A;
|
||||
DROP TABLE B;
|
||||
|
||||
{% endfor %}
|
@ -12,3 +12,18 @@
|
||||
2 1970-01-01 00:00:15 5 6.5 6
|
||||
2 1970-01-01 00:00:16 5 5.6 6
|
||||
2 1970-01-01 00:00:20 17 8.5 18
|
||||
-
|
||||
1 1970-01-01 00:00:05 1 1.5 2
|
||||
1 1970-01-01 00:00:06 1 1.51 2
|
||||
1 1970-01-01 00:00:10 11 11.5 12
|
||||
1 1970-01-01 00:00:11 11 11.51 12
|
||||
1 1970-01-01 00:00:15 5 5.5 6
|
||||
1 1970-01-01 00:00:16 5 5.6 6
|
||||
1 1970-01-01 00:00:20 7 7.5 8
|
||||
2 1970-01-01 00:00:05 11 2.5 12
|
||||
2 1970-01-01 00:00:06 11 2.51 12
|
||||
2 1970-01-01 00:00:10 21 12.5 22
|
||||
2 1970-01-01 00:00:11 21 12.51 22
|
||||
2 1970-01-01 00:00:15 5 6.5 6
|
||||
2 1970-01-01 00:00:16 5 5.6 6
|
||||
2 1970-01-01 00:00:20 17 8.5 18
|
||||
|
@ -9,7 +9,13 @@ CREATE TABLE tv(key UInt32, t DateTime, tv Float64) ENGINE = MergeTree() ORDER B
|
||||
INSERT INTO tv(key,t,tv) VALUES (1,5,1.5),(1,6,1.51),(1,10,11.5),(1,11,11.51),(1,15,5.5),(1,16,5.6),(1,20,7.5);
|
||||
INSERT INTO tv(key,t,tv) VALUES (2,5,2.5),(2,6,2.51),(2,10,12.5),(2,11,12.51),(2,15,6.5),(2,16,5.6),(2,20,8.5);
|
||||
|
||||
SELECT tv.key, toString(tv.t, 'UTC'), md.bid, tv.tv, md.ask FROM tv ASOF LEFT JOIN md USING(key,t) ORDER BY (tv.key, tv.t);
|
||||
SELECT tv.key, toString(tv.t, 'UTC'), md.bid, tv.tv, md.ask FROM tv ASOF LEFT JOIN md USING(key,t) ORDER BY (tv.key, tv.t)
|
||||
;
|
||||
|
||||
SELECT '-';
|
||||
|
||||
SELECT tv.key, toString(tv.t, 'UTC'), md.bid, tv.tv, md.ask FROM tv ASOF LEFT JOIN md USING(key,t) ORDER BY (tv.key, tv.t)
|
||||
SETTINGS join_algorithm = 'full_sorting_merge';
|
||||
|
||||
DROP TABLE md;
|
||||
DROP TABLE tv;
|
||||
|
@ -1,3 +1,4 @@
|
||||
- default / join_use_nulls = 0 -
|
||||
1 1 0 0
|
||||
1 2 1 2
|
||||
1 3 1 2
|
||||
@ -34,3 +35,114 @@
|
||||
2 1 2 3
|
||||
2 2 2 3
|
||||
1 2 1 2
|
||||
- full_sorting_merge / join_use_nulls = 0 -
|
||||
1 1 0 0
|
||||
1 2 1 2
|
||||
1 3 1 2
|
||||
2 1 0 0
|
||||
2 2 0 0
|
||||
2 3 2 3
|
||||
3 1 0 0
|
||||
3 2 0 0
|
||||
3 3 0 0
|
||||
9
|
||||
1 2 1 2
|
||||
1 3 1 2
|
||||
2 3 2 3
|
||||
-
|
||||
1 1 1 2
|
||||
1 2 1 2
|
||||
1 3 1 4
|
||||
2 1 2 3
|
||||
2 2 2 3
|
||||
2 3 2 3
|
||||
-
|
||||
1 1 1 2
|
||||
1 2 1 2
|
||||
1 3 1 4
|
||||
2 1 2 3
|
||||
2 2 2 3
|
||||
2 3 2 3
|
||||
-
|
||||
1 3 1 2
|
||||
-
|
||||
1 1 1 2
|
||||
1 2 1 4
|
||||
1 3 1 4
|
||||
2 1 2 3
|
||||
2 2 2 3
|
||||
1 2 1 2
|
||||
- default / join_use_nulls = 1 -
|
||||
1 1 \N \N
|
||||
1 2 1 2
|
||||
1 3 1 2
|
||||
2 1 \N \N
|
||||
2 2 \N \N
|
||||
2 3 2 3
|
||||
3 1 \N \N
|
||||
3 2 \N \N
|
||||
3 3 \N \N
|
||||
9
|
||||
1 2 1 2
|
||||
1 3 1 2
|
||||
2 3 2 3
|
||||
-
|
||||
1 1 1 2
|
||||
1 2 1 2
|
||||
1 3 1 4
|
||||
2 1 2 3
|
||||
2 2 2 3
|
||||
2 3 2 3
|
||||
-
|
||||
1 1 1 2
|
||||
1 2 1 2
|
||||
1 3 1 4
|
||||
2 1 2 3
|
||||
2 2 2 3
|
||||
2 3 2 3
|
||||
-
|
||||
1 3 1 2
|
||||
-
|
||||
1 1 1 2
|
||||
1 2 1 4
|
||||
1 3 1 4
|
||||
2 1 2 3
|
||||
2 2 2 3
|
||||
1 2 1 2
|
||||
- full_sorting_merge / join_use_nulls = 1 -
|
||||
1 1 \N \N
|
||||
1 2 1 2
|
||||
1 3 1 2
|
||||
2 1 \N \N
|
||||
2 2 \N \N
|
||||
2 3 2 3
|
||||
3 1 \N \N
|
||||
3 2 \N \N
|
||||
3 3 \N \N
|
||||
9
|
||||
1 2 1 2
|
||||
1 3 1 2
|
||||
2 3 2 3
|
||||
-
|
||||
1 1 1 2
|
||||
1 2 1 2
|
||||
1 3 1 4
|
||||
2 1 2 3
|
||||
2 2 2 3
|
||||
2 3 2 3
|
||||
-
|
||||
1 1 1 2
|
||||
1 2 1 2
|
||||
1 3 1 4
|
||||
2 1 2 3
|
||||
2 2 2 3
|
||||
2 3 2 3
|
||||
-
|
||||
1 3 1 2
|
||||
-
|
||||
1 1 1 2
|
||||
1 2 1 4
|
||||
1 3 1 4
|
||||
2 1 2 3
|
||||
2 2 2 3
|
||||
1 2 1 2
|
||||
|
@ -7,6 +7,14 @@ CREATE TABLE B(b UInt32, t UInt32) ENGINE = Memory;
|
||||
INSERT INTO A (a,t) VALUES (1,1),(1,2),(1,3), (2,1),(2,2),(2,3), (3,1),(3,2),(3,3);
|
||||
INSERT INTO B (b,t) VALUES (1,2),(1,4),(2,3);
|
||||
|
||||
{% for join_use_nulls in [0, 1] -%}
|
||||
{% for join_algorithm in ['default', 'full_sorting_merge'] -%}
|
||||
|
||||
SET join_algorithm = '{{ join_algorithm }}';
|
||||
|
||||
SELECT '- {{ join_algorithm }} / join_use_nulls = {{ join_use_nulls }} -';
|
||||
set join_use_nulls = {{ join_use_nulls }};
|
||||
|
||||
SELECT A.a, A.t, B.b, B.t FROM A ASOF LEFT JOIN B ON A.a == B.b AND A.t >= B.t ORDER BY (A.a, A.t);
|
||||
SELECT count() FROM A ASOF LEFT JOIN B ON A.a == B.b AND B.t <= A.t;
|
||||
SELECT A.a, A.t, B.b, B.t FROM A ASOF INNER JOIN B ON B.t <= A.t AND A.a == B.b ORDER BY (A.a, A.t);
|
||||
@ -28,5 +36,8 @@ ASOF INNER JOIN (SELECT * FROM B UNION ALL SELECT 1, 3) AS B ON B.t <= A.t AND A
|
||||
WHERE B.t != 3 ORDER BY (A.a, A.t)
|
||||
;
|
||||
|
||||
{% endfor -%}
|
||||
{% endfor -%}
|
||||
|
||||
DROP TABLE A;
|
||||
DROP TABLE B;
|
@ -12,10 +12,10 @@ ORDER BY (primary_key);
|
||||
|
||||
INSERT INTO set_array
|
||||
select
|
||||
toString(intDiv(number, 1000000)) as primary_key,
|
||||
toString(intDiv(number, 100000)) as primary_key,
|
||||
array(number) as index_array
|
||||
from system.numbers
|
||||
limit 10000000;
|
||||
limit 1000000;
|
||||
|
||||
OPTIMIZE TABLE set_array FINAL;
|
||||
|
||||
|
@ -1,3 +1,6 @@
|
||||
v1 o1 ['s2','s1']
|
||||
v1 o2 ['s4']
|
||||
v2 o3 ['s5','s3']
|
||||
v1 o1 ['s2','s1']
|
||||
v1 o2 ['s4']
|
||||
v2 o3 ['s5','s3']
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user