Merge remote-tracking branch 'origin/master' into pr-local-plan

This commit is contained in:
Igor Nikonov 2024-07-22 13:45:31 +00:00
commit 9900abade6
132 changed files with 4201 additions and 1003 deletions

168
.github/actions/release/action.yml vendored Normal file
View File

@ -0,0 +1,168 @@
name: Release
description: Makes patch releases and creates new release branch
inputs:
ref:
description: 'Git reference (branch or commit sha) from which to create the release'
required: true
type: string
type:
description: 'The type of release: "new" for a new release or "patch" for a patch release'
required: true
type: choice
options:
- patch
- new
dry-run:
description: 'Dry run'
required: false
default: true
type: boolean
token:
required: true
type: string
runs:
using: "composite"
steps:
- name: Prepare Release Info
shell: bash
run: |
python3 ./tests/ci/create_release.py --prepare-release-info \
--ref ${{ inputs.ref }} --release-type ${{ inputs.type }} \
${{ inputs.dry-run && '--dry-run' || '' }}
echo "::group::Release Info"
python3 -m json.tool /tmp/release_info.json
echo "::endgroup::"
release_tag=$(jq -r '.release_tag' /tmp/release_info.json)
commit_sha=$(jq -r '.commit_sha' /tmp/release_info.json)
echo "Release Tag: $release_tag"
echo "RELEASE_TAG=$release_tag" >> "$GITHUB_ENV"
echo "COMMIT_SHA=$commit_sha" >> "$GITHUB_ENV"
- name: Download All Release Artifacts
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --download-packages ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Push Git Tag for the Release
shell: bash
run: |
python3 ./tests/ci/create_release.py --push-release-tag ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Push New Release Branch
if: ${{ inputs.type == 'new' }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --push-new-release-branch ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Bump CH Version and Update Contributors' List
shell: bash
run: |
python3 ./tests/ci/create_release.py --create-bump-version-pr ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Bump Docker versions, Changelog, Security
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
git checkout master
python3 ./tests/ci/create_release.py --set-progress-started --progress "update changelog, docker version, security"
echo "List versions"
./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv
echo "Update docker version"
./utils/list-versions/update-docker-version.sh
echo "Generate ChangeLog"
export CI=1
docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 -e CI=1 --network=host \
--volume=".:/ClickHouse" clickhouse/style-test \
/ClickHouse/tests/ci/changelog.py -v --debug-helpers \
--gh-user-or-token=${{ inputs.token }} --jobs=5 \
--output="/ClickHouse/docs/changelogs/${{ env.RELEASE_TAG }}.md" ${{ env.RELEASE_TAG }}
git add ./docs/changelogs/${{ env.RELEASE_TAG }}.md
echo "Generate Security"
python3 ./utils/security-generator/generate_security.py > SECURITY.md
git diff HEAD
- name: Create ChangeLog PR
if: ${{ inputs.type == 'patch' && ! inputs.dry-run }}
uses: peter-evans/create-pull-request@v6
with:
author: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
token: ${{ inputs.token }}
committer: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
commit-message: Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }}
branch: auto/${{ env.RELEASE_TAG }}
assignees: ${{ github.event.sender.login }} # assign the PR to the tag pusher
delete-branch: true
title: Update version_date.tsv and changelog after ${{ env.RELEASE_TAG }}
labels: do not test
body: |
Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }}
### Changelog category (leave one):
- Not for changelog (changelog entry is not required)
- name: Complete previous steps and Restore git state
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --set-progress-completed
git reset --hard HEAD
git checkout "$GITHUB_REF_NAME"
- name: Create GH Release
shell: bash
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/create_release.py --create-gh-release ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export TGZ Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --export-tgz ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test TGZ Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --test-tgz ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export RPM Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --export-rpm ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test RPM Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --test-rpm ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export Debian Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --export-debian ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test Debian Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --test-debian ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Docker clickhouse/clickhouse-server building
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
cd "./tests/ci"
python3 ./create_release.py --set-progress-started --progress "docker server release"
export CHECK_NAME="Docker server image"
python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }}
python3 ./create_release.py --set-progress-completed
- name: Docker clickhouse/clickhouse-keeper building
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
cd "./tests/ci"
python3 ./create_release.py --set-progress-started --progress "docker keeper release"
export CHECK_NAME="Docker keeper image"
python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }}
python3 ./create_release.py --set-progress-completed
- name: Set current Release progress to Completed with OK
shell: bash
run: |
python3 ./tests/ci/create_release.py --set-progress-started --progress "completed"
python3 ./tests/ci/create_release.py --set-progress-completed
- name: Post Slack Message
if: ${{ !cancelled() }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --post-status ${{ inputs.dry-run && '--dry-run' || '' }}

View File

@ -1,44 +1,110 @@
name: AutoRelease name: AutoRelease
env: env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1 PYTHONUNBUFFERED: 1
DRY_RUN: true
concurrency: concurrency:
group: auto-release group: release
on: # yamllint disable-line rule:truthy on: # yamllint disable-line rule:truthy
# schedule: # Workflow uses a test bucket for packages and dry run mode (no real releases)
# - cron: '0 10-16 * * 1-5' schedule:
- cron: '0 9 * * *'
- cron: '0 15 * * *'
workflow_dispatch: workflow_dispatch:
inputs:
dry-run:
description: 'Dry run'
required: false
default: true
type: boolean
jobs: jobs:
CherryPick: AutoRelease:
runs-on: [self-hosted, style-checker-aarch64] runs-on: [self-hosted, release-maker]
steps: steps:
- name: DebugInfo
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
- name: Set envs - name: Set envs
# https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings
run: | run: |
cat >> "$GITHUB_ENV" << 'EOF' cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/cherry_pick
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}} ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
RCSK RCSK
REPO_OWNER=ClickHouse
REPO_NAME=ClickHouse
REPO_TEAM=core
EOF EOF
- name: Set DRY_RUN for schedule
if: ${{ github.event_name == 'schedule' }}
run: echo "DRY_RUN=true" >> "$GITHUB_ENV"
- name: Set DRY_RUN for dispatch
if: ${{ github.event_name == 'workflow_dispatch' }}
run: echo "DRY_RUN=${{ github.event.inputs.dry-run }}" >> "$GITHUB_ENV"
- name: Check out repository code - name: Check out repository code
uses: ClickHouse/checkout@v1 uses: ClickHouse/checkout@v1
with: with:
clear-repository: true
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}} token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
fetch-depth: 0 fetch-depth: 0
- name: Auto-release - name: Auto Release Prepare
run: | run: |
cd "$GITHUB_WORKSPACE/tests/ci" cd "$GITHUB_WORKSPACE/tests/ci"
python3 auto_release.py --release-after-days=3 python3 auto_release.py --prepare
- name: Cleanup echo "::group::Auto Release Info"
if: always() python3 -m json.tool /tmp/autorelease_info.json
echo "::endgroup::"
{
echo 'AUTO_RELEASE_PARAMS<<EOF'
cat /tmp/autorelease_info.json
echo 'EOF'
} >> "$GITHUB_ENV"
- name: Post Release Branch statuses
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 auto_release.py --post-status
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0].release_branch }}
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0] && fromJson(env.AUTO_RELEASE_PARAMS).releases[0].ready }}
uses: ./.github/actions/release
with:
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0].commit_sha }}
type: patch
dry-run: ${{ env.DRY_RUN }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[1].release_branch }}
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0] && fromJson(env.AUTO_RELEASE_PARAMS).releases[1].ready }}
uses: ./.github/actions/release
with:
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[1].commit_sha }}
type: patch
dry-run: ${{ env.DRY_RUN }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[2].release_branch }}
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[2] && fromJson(env.AUTO_RELEASE_PARAMS).releases[2].ready }}
uses: ./.github/actions/release
with:
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[2].commit_sha }}
type: patch
dry-run: ${{ env.DRY_RUN }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[3].release_branch }}
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[3] && fromJson(env.AUTO_RELEASE_PARAMS).releases[3].ready }}
uses: ./.github/actions/release
with:
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[3].commit_sha }}
type: patch
dry-run: ${{ env.DRY_RUN }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[4].release_branch }}
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[4] && fromJson(env.AUTO_RELEASE_PARAMS).releases[4].ready }}
uses: ./.github/actions/release
with:
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[4].commit_sha }}
type: patch
dry-run: ${{ env.DRY_RUN }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
- name: Post Slack Message
if: ${{ !cancelled() }}
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 auto_release.py --post-auto-release-complete --wf-status ${{ job.status }}
- name: Clean up
run: | run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:

View File

@ -2,7 +2,6 @@ name: CreateRelease
concurrency: concurrency:
group: release group: release
'on': 'on':
workflow_dispatch: workflow_dispatch:
inputs: inputs:
@ -31,136 +30,15 @@ jobs:
steps: steps:
- name: DebugInfo - name: DebugInfo
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6 uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
- name: Set envs
# https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings
run: |
cat >> "$GITHUB_ENV" << 'EOF'
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
RCSK
RELEASE_INFO_FILE=${{ runner.temp }}/release_info.json
EOF
- name: Check out repository code - name: Check out repository code
uses: ClickHouse/checkout@v1 uses: ClickHouse/checkout@v1
with: with:
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}} token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
fetch-depth: 0 fetch-depth: 0
- name: Prepare Release Info - name: Call Release Action
run: | uses: ./.github/actions/release
python3 ./tests/ci/create_release.py --prepare-release-info \
--ref ${{ inputs.ref }} --release-type ${{ inputs.type }} \
--outfile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
echo "::group::Release Info"
python3 -m json.tool "$RELEASE_INFO_FILE"
echo "::endgroup::"
release_tag=$(jq -r '.release_tag' "$RELEASE_INFO_FILE")
commit_sha=$(jq -r '.commit_sha' "$RELEASE_INFO_FILE")
echo "Release Tag: $release_tag"
echo "RELEASE_TAG=$release_tag" >> "$GITHUB_ENV"
echo "COMMIT_SHA=$commit_sha" >> "$GITHUB_ENV"
- name: Download All Release Artifacts
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/create_release.py --infile "$RELEASE_INFO_FILE" --download-packages ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Push Git Tag for the Release
run: |
python3 ./tests/ci/create_release.py --push-release-tag --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Push New Release Branch
if: ${{ inputs.type == 'new' }}
run: |
python3 ./tests/ci/create_release.py --push-new-release-branch --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Bump CH Version and Update Contributors' List
run: |
python3 ./tests/ci/create_release.py --create-bump-version-pr --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Checkout master
run: |
git checkout master
- name: Bump Docker versions, Changelog, Security
if: ${{ inputs.type == 'patch' }}
run: |
[ "$(git branch --show-current)" != "master" ] && echo "not on the master" && exit 1
echo "List versions"
./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv
echo "Update docker version"
./utils/list-versions/update-docker-version.sh
echo "Generate ChangeLog"
export CI=1
docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 -e CI=1 --network=host \
--volume=".:/ClickHouse" clickhouse/style-test \
/ClickHouse/tests/ci/changelog.py -v --debug-helpers \
--gh-user-or-token="$GH_TOKEN" --jobs=5 \
--output="/ClickHouse/docs/changelogs/${{ env.RELEASE_TAG }}.md" ${{ env.RELEASE_TAG }}
git add ./docs/changelogs/${{ env.RELEASE_TAG }}.md
echo "Generate Security"
python3 ./utils/security-generator/generate_security.py > SECURITY.md
git diff HEAD
- name: Create ChangeLog PR
if: ${{ inputs.type == 'patch' && ! inputs.dry-run }}
uses: peter-evans/create-pull-request@v6
with: with:
author: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>" ref: ${{ inputs.ref }}
token: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }} type: ${{ inputs.type }}
committer: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>" dry-run: ${{ inputs.dry-run }}
commit-message: Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }} token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
branch: auto/${{ env.RELEASE_TAG }}
assignees: ${{ github.event.sender.login }} # assign the PR to the tag pusher
delete-branch: true
title: Update version_date.tsv and changelog after ${{ env.RELEASE_TAG }}
labels: do not test
body: |
Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }}
### Changelog category (leave one):
- Not for changelog (changelog entry is not required)
- name: Reset changes if Dry-run
if: ${{ inputs.dry-run }}
run: |
git reset --hard HEAD
- name: Checkout back to GITHUB_REF
run: |
git checkout "$GITHUB_REF_NAME"
- name: Create GH Release
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/create_release.py --create-gh-release \
--infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export TGZ Packages
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/artifactory.py --export-tgz --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test TGZ Packages
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/artifactory.py --test-tgz --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export RPM Packages
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/artifactory.py --export-rpm --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test RPM Packages
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/artifactory.py --test-rpm --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export Debian Packages
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/artifactory.py --export-debian --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test Debian Packages
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/artifactory.py --test-debian --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Docker clickhouse/clickhouse-server building
if: ${{ inputs.type == 'patch' }}
run: |
cd "./tests/ci"
export CHECK_NAME="Docker server image"
python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }}
- name: Docker clickhouse/clickhouse-keeper building
if: ${{ inputs.type == 'patch' }}
run: |
cd "./tests/ci"
export CHECK_NAME="Docker keeper image"
python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }}
- name: Post Slack Message
if: always()
run: |
echo Slack Message

View File

@ -297,7 +297,7 @@ Algorithm requires the special column in tables. This column:
- Must contain an ordered sequence. - Must contain an ordered sequence.
- Can be one of the following types: [Int, UInt](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), [Date](../../../sql-reference/data-types/date.md), [DateTime](../../../sql-reference/data-types/datetime.md), [Decimal](../../../sql-reference/data-types/decimal.md). - Can be one of the following types: [Int, UInt](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), [Date](../../../sql-reference/data-types/date.md), [DateTime](../../../sql-reference/data-types/datetime.md), [Decimal](../../../sql-reference/data-types/decimal.md).
- Cant be the only column in the `JOIN` clause. - For `hash` join algorithm it cant be the only column in the `JOIN` clause.
Syntax `ASOF JOIN ... ON`: Syntax `ASOF JOIN ... ON`:
@ -337,7 +337,8 @@ For example, consider the following tables:
`ASOF JOIN` can take the timestamp of a user event from `table_1` and find an event in `table_2` where the timestamp is closest to the timestamp of the event from `table_1` corresponding to the closest match condition. Equal timestamp values are the closest if available. Here, the `user_id` column can be used for joining on equality and the `ev_time` column can be used for joining on the closest match. In our example, `event_1_1` can be joined with `event_2_1` and `event_1_2` can be joined with `event_2_3`, but `event_2_2` cant be joined. `ASOF JOIN` can take the timestamp of a user event from `table_1` and find an event in `table_2` where the timestamp is closest to the timestamp of the event from `table_1` corresponding to the closest match condition. Equal timestamp values are the closest if available. Here, the `user_id` column can be used for joining on equality and the `ev_time` column can be used for joining on the closest match. In our example, `event_1_1` can be joined with `event_2_1` and `event_1_2` can be joined with `event_2_3`, but `event_2_2` cant be joined.
:::note :::note
`ASOF` join is **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine. `ASOF JOIN` is supported only by `hash` and `full_sorting_merge` join algorithms.
It's **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine.
::: :::
## PASTE JOIN Usage ## PASTE JOIN Usage

View File

@ -6,38 +6,38 @@ sidebar_label: Playground
# ClickHouse Playground {#clickhouse-playground} # ClickHouse Playground {#clickhouse-playground}
[ClickHouse Playground](https://play.clickhouse.com/play?user=play) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster. [ClickHouse Playground](https://play.clickhouse.com/play?user=play) позволяет пользователям экспериментировать с ClickHouse, выполняя запросы мгновенно, без необходимости настройки сервера или кластера.
Several example datasets are available in Playground. В Playground доступны несколько примеров наборов данных.
You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md). Вы можете выполнять запросы к Playground, используя любой HTTP-клиент, например [curl](https://curl.haxx.se) или [wget](https://www.gnu.org/software/wget/), или настроить соединение, используя драйверы [JDBC](../interfaces/jdbc.md) или [ODBC](../interfaces/odbc.md). Дополнительную информацию о программных продуктах, поддерживающих ClickHouse, можно найти [здесь](../interfaces/index.md).
## Credentials {#credentials} ## Учетные данные {#credentials}
| Parameter | Value | | Параметр | Значение |
|:--------------------|:-----------------------------------| |:--------------------|:-----------------------------------|
| HTTPS endpoint | `https://play.clickhouse.com:443/` | | HTTPS-адрес | `https://play.clickhouse.com:443/` |
| Native TCP endpoint | `play.clickhouse.com:9440` | | TCP-адрес | `play.clickhouse.com:9440` |
| User | `explorer` or `play` | | Пользователь | `explorer` или `play` |
| Password | (empty) | | Пароль | (пусто) |
## Limitations {#limitations} ## Ограничения {#limitations}
The queries are executed as a read-only user. It implies some limitations: Запросы выполняются от имени пользователя с правами только на чтение. Это предполагает некоторые ограничения:
- DDL queries are not allowed - DDL-запросы не разрешены
- INSERT queries are not allowed - INSERT-запросы не разрешены
The service also have quotas on its usage. Сервис также имеет квоты на использование.
## Examples {#examples} ## Примеры {#examples}
HTTPS endpoint example with `curl`: Пример использования HTTPS-адреса с `curl`:
``` bash ```bash
curl "https://play.clickhouse.com/?user=explorer" --data-binary "SELECT 'Play ClickHouse'" curl "https://play.clickhouse.com/?user=explorer" --data-binary "SELECT 'Play ClickHouse'"
``` ```
TCP endpoint example with [CLI](../interfaces/cli.md): Пример использования TCP-адреса с [CLI](../interfaces/cli.md):
``` bash ``` bash
clickhouse client --secure --host play.clickhouse.com --user explorer clickhouse client --secure --host play.clickhouse.com --user explorer

View File

@ -68,6 +68,41 @@ QueryTreeNodePtr findEqualsFunction(const QueryTreeNodes & nodes)
return nullptr; return nullptr;
} }
/// Checks if the node is combination of isNull and notEquals functions of two the same arguments
bool matchIsNullOfTwoArgs(const QueryTreeNodes & nodes, QueryTreeNodePtr & lhs, QueryTreeNodePtr & rhs)
{
QueryTreeNodePtrWithHashSet all_arguments;
for (const auto & node : nodes)
{
const auto * func_node = node->as<FunctionNode>();
if (!func_node)
return false;
const auto & arguments = func_node->getArguments().getNodes();
if (func_node->getFunctionName() == "isNull" && arguments.size() == 1)
all_arguments.insert(QueryTreeNodePtrWithHash(arguments[0]));
else if (func_node->getFunctionName() == "notEquals" && arguments.size() == 2)
{
if (arguments[0]->isEqual(*arguments[1]))
return false;
all_arguments.insert(QueryTreeNodePtrWithHash(arguments[0]));
all_arguments.insert(QueryTreeNodePtrWithHash(arguments[1]));
}
else
return false;
if (all_arguments.size() > 2)
return false;
}
if (all_arguments.size() != 2)
return false;
lhs = all_arguments.begin()->node;
rhs = std::next(all_arguments.begin())->node;
return true;
}
bool isBooleanConstant(const QueryTreeNodePtr & node, bool expected_value) bool isBooleanConstant(const QueryTreeNodePtr & node, bool expected_value)
{ {
const auto * constant_node = node->as<ConstantNode>(); const auto * constant_node = node->as<ConstantNode>();
@ -213,11 +248,14 @@ private:
else if (func_name == "and") else if (func_name == "and")
{ {
const auto & and_arguments = argument_function->getArguments().getNodes(); const auto & and_arguments = argument_function->getArguments().getNodes();
bool all_are_is_null = and_arguments.size() == 2 && isNodeFunction(and_arguments[0], "isNull") && isNodeFunction(and_arguments[1], "isNull");
if (all_are_is_null) QueryTreeNodePtr is_null_lhs_arg;
QueryTreeNodePtr is_null_rhs_arg;
if (matchIsNullOfTwoArgs(and_arguments, is_null_lhs_arg, is_null_rhs_arg))
{ {
is_null_argument_to_indices[getFunctionArgument(and_arguments.front(), 0)].push_back(or_operands.size() - 1); is_null_argument_to_indices[is_null_lhs_arg].push_back(or_operands.size() - 1);
is_null_argument_to_indices[getFunctionArgument(and_arguments.back(), 0)].push_back(or_operands.size() - 1); is_null_argument_to_indices[is_null_rhs_arg].push_back(or_operands.size() - 1);
continue;
} }
/// Expression `a = b AND (a IS NOT NULL) AND true AND (b IS NOT NULL)` we can be replaced with `a = b` /// Expression `a = b AND (a IS NOT NULL) AND true AND (b IS NOT NULL)` we can be replaced with `a = b`

View File

@ -4124,7 +4124,9 @@ void QueryAnalyzer::resolveInterpolateColumnsNodeList(QueryTreeNodePtr & interpo
auto * column_to_interpolate = interpolate_node_typed.getExpression()->as<IdentifierNode>(); auto * column_to_interpolate = interpolate_node_typed.getExpression()->as<IdentifierNode>();
if (!column_to_interpolate) if (!column_to_interpolate)
throw Exception(ErrorCodes::LOGICAL_ERROR, "INTERPOLATE can work only for indentifiers, but {} is found", throw Exception(
ErrorCodes::LOGICAL_ERROR,
"INTERPOLATE can work only for identifiers, but {} is found",
interpolate_node_typed.getExpression()->formatASTForErrorMessage()); interpolate_node_typed.getExpression()->formatASTForErrorMessage());
auto column_to_interpolate_name = column_to_interpolate->getIdentifier().getFullName(); auto column_to_interpolate_name = column_to_interpolate->getIdentifier().getFullName();

View File

@ -366,13 +366,10 @@ void ColumnAggregateFunction::updateHashWithValue(size_t n, SipHash & hash) cons
hash.update(wbuf.str().c_str(), wbuf.str().size()); hash.update(wbuf.str().c_str(), wbuf.str().size());
} }
void ColumnAggregateFunction::updateWeakHash32(WeakHash32 & hash) const WeakHash32 ColumnAggregateFunction::getWeakHash32() const
{ {
auto s = data.size(); auto s = data.size();
if (hash.getData().size() != data.size()) WeakHash32 hash(s);
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), hash.getData().size());
auto & hash_data = hash.getData(); auto & hash_data = hash.getData();
std::vector<UInt8> v; std::vector<UInt8> v;
@ -383,6 +380,8 @@ void ColumnAggregateFunction::updateWeakHash32(WeakHash32 & hash) const
wbuf.finalize(); wbuf.finalize();
hash_data[i] = ::updateWeakHash32(v.data(), v.size(), hash_data[i]); hash_data[i] = ::updateWeakHash32(v.data(), v.size(), hash_data[i]);
} }
return hash;
} }
void ColumnAggregateFunction::updateHashFast(SipHash & hash) const void ColumnAggregateFunction::updateHashFast(SipHash & hash) const

View File

@ -177,7 +177,7 @@ public:
void updateHashWithValue(size_t n, SipHash & hash) const override; void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override; WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override; void updateHashFast(SipHash & hash) const override;

View File

@ -271,15 +271,12 @@ void ColumnArray::updateHashWithValue(size_t n, SipHash & hash) const
getData().updateHashWithValue(offset + i, hash); getData().updateHashWithValue(offset + i, hash);
} }
void ColumnArray::updateWeakHash32(WeakHash32 & hash) const WeakHash32 ColumnArray::getWeakHash32() const
{ {
auto s = offsets->size(); auto s = offsets->size();
if (hash.getData().size() != s) WeakHash32 hash(s);
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", s, hash.getData().size());
WeakHash32 internal_hash(data->size()); WeakHash32 internal_hash = data->getWeakHash32();
data->updateWeakHash32(internal_hash);
Offset prev_offset = 0; Offset prev_offset = 0;
const auto & offsets_data = getOffsets(); const auto & offsets_data = getOffsets();
@ -300,6 +297,8 @@ void ColumnArray::updateWeakHash32(WeakHash32 & hash) const
prev_offset = offsets_data[i]; prev_offset = offsets_data[i];
} }
return hash;
} }
void ColumnArray::updateHashFast(SipHash & hash) const void ColumnArray::updateHashFast(SipHash & hash) const

View File

@ -82,7 +82,7 @@ public:
const char * deserializeAndInsertFromArena(const char * pos) override; const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override; const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override; void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override; WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override; void updateHashFast(SipHash & hash) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;

View File

@ -3,6 +3,7 @@
#include <optional> #include <optional>
#include <Core/Field.h> #include <Core/Field.h>
#include <Columns/IColumn.h> #include <Columns/IColumn.h>
#include <Common/WeakHash.h>
#include <IO/BufferWithOwnMemory.h> #include <IO/BufferWithOwnMemory.h>
@ -98,7 +99,7 @@ public:
const char * deserializeAndInsertFromArena(const char *) override { throwMustBeDecompressed(); } const char * deserializeAndInsertFromArena(const char *) override { throwMustBeDecompressed(); }
const char * skipSerializedInArena(const char *) const override { throwMustBeDecompressed(); } const char * skipSerializedInArena(const char *) const override { throwMustBeDecompressed(); }
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeDecompressed(); } void updateHashWithValue(size_t, SipHash &) const override { throwMustBeDecompressed(); }
void updateWeakHash32(WeakHash32 &) const override { throwMustBeDecompressed(); } WeakHash32 getWeakHash32() const override { throwMustBeDecompressed(); }
void updateHashFast(SipHash &) const override { throwMustBeDecompressed(); } void updateHashFast(SipHash &) const override { throwMustBeDecompressed(); }
ColumnPtr filter(const Filter &, ssize_t) const override { throwMustBeDecompressed(); } ColumnPtr filter(const Filter &, ssize_t) const override { throwMustBeDecompressed(); }
void expand(const Filter &, bool) override { throwMustBeDecompressed(); } void expand(const Filter &, bool) override { throwMustBeDecompressed(); }

View File

@ -137,18 +137,10 @@ void ColumnConst::updatePermutation(PermutationSortDirection /*direction*/, Perm
{ {
} }
void ColumnConst::updateWeakHash32(WeakHash32 & hash) const WeakHash32 ColumnConst::getWeakHash32() const
{ {
if (hash.getData().size() != s) WeakHash32 element_hash = data->getWeakHash32();
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: " return WeakHash32(s, element_hash.getData()[0]);
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
WeakHash32 element_hash(1);
data->updateWeakHash32(element_hash);
size_t data_hash = element_hash.getData()[0];
for (auto & value : hash.getData())
value = static_cast<UInt32>(intHashCRC32(data_hash, value));
} }
void ColumnConst::compareColumn( void ColumnConst::compareColumn(

View File

@ -204,7 +204,7 @@ public:
data->updateHashWithValue(0, hash); data->updateHashWithValue(0, hash);
} }
void updateWeakHash32(WeakHash32 & hash) const override; WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override void updateHashFast(SipHash & hash) const override
{ {

View File

@ -28,7 +28,6 @@ namespace ErrorCodes
extern const int PARAMETER_OUT_OF_BOUND; extern const int PARAMETER_OUT_OF_BOUND;
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
extern const int NOT_IMPLEMENTED; extern const int NOT_IMPLEMENTED;
extern const int LOGICAL_ERROR;
} }
template <is_decimal T> template <is_decimal T>
@ -76,13 +75,10 @@ void ColumnDecimal<T>::updateHashWithValue(size_t n, SipHash & hash) const
} }
template <is_decimal T> template <is_decimal T>
void ColumnDecimal<T>::updateWeakHash32(WeakHash32 & hash) const WeakHash32 ColumnDecimal<T>::getWeakHash32() const
{ {
auto s = data.size(); auto s = data.size();
WeakHash32 hash(s);
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
const T * begin = data.data(); const T * begin = data.data();
const T * end = begin + s; const T * end = begin + s;
@ -94,6 +90,8 @@ void ColumnDecimal<T>::updateWeakHash32(WeakHash32 & hash) const
++begin; ++begin;
++hash_data; ++hash_data;
} }
return hash;
} }
template <is_decimal T> template <is_decimal T>

View File

@ -102,7 +102,7 @@ public:
const char * deserializeAndInsertFromArena(const char * pos) override; const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override; const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override; void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override; WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override; void updateHashFast(SipHash & hash) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;

View File

@ -4,6 +4,7 @@
#include <Columns/ColumnVector.h> #include <Columns/ColumnVector.h>
#include <Columns/ColumnVariant.h> #include <Columns/ColumnVariant.h>
#include <DataTypes/IDataType.h> #include <DataTypes/IDataType.h>
#include <Common/WeakHash.h>
namespace DB namespace DB
@ -174,9 +175,9 @@ public:
void updateHashWithValue(size_t n, SipHash & hash) const override; void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override WeakHash32 getWeakHash32() const override
{ {
variant_column->updateWeakHash32(hash); return variant_column->getWeakHash32();
} }
void updateHashFast(SipHash & hash) const override void updateHashFast(SipHash & hash) const override

View File

@ -137,14 +137,10 @@ void ColumnFixedString::updateHashWithValue(size_t index, SipHash & hash) const
hash.update(reinterpret_cast<const char *>(&chars[n * index]), n); hash.update(reinterpret_cast<const char *>(&chars[n * index]), n);
} }
void ColumnFixedString::updateWeakHash32(WeakHash32 & hash) const WeakHash32 ColumnFixedString::getWeakHash32() const
{ {
auto s = size(); auto s = size();
WeakHash32 hash(s);
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, "
"hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
const UInt8 * pos = chars.data(); const UInt8 * pos = chars.data();
UInt32 * hash_data = hash.getData().data(); UInt32 * hash_data = hash.getData().data();
@ -156,6 +152,8 @@ void ColumnFixedString::updateWeakHash32(WeakHash32 & hash) const
pos += n; pos += n;
++hash_data; ++hash_data;
} }
return hash;
} }
void ColumnFixedString::updateHashFast(SipHash & hash) const void ColumnFixedString::updateHashFast(SipHash & hash) const

View File

@ -133,7 +133,7 @@ public:
void updateHashWithValue(size_t index, SipHash & hash) const override; void updateHashWithValue(size_t index, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override; WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override; void updateHashFast(SipHash & hash) const override;

View File

@ -4,6 +4,7 @@
#include <Core/NamesAndTypes.h> #include <Core/NamesAndTypes.h>
#include <Core/ColumnsWithTypeAndName.h> #include <Core/ColumnsWithTypeAndName.h>
#include <Columns/IColumn.h> #include <Columns/IColumn.h>
#include <Common/WeakHash.h>
namespace DB namespace DB
@ -130,9 +131,9 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "updateHashWithValue is not implemented for {}", getName()); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "updateHashWithValue is not implemented for {}", getName());
} }
void updateWeakHash32(WeakHash32 &) const override WeakHash32 getWeakHash32() const override
{ {
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "updateWeakHash32 is not implemented for {}", getName()); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "getWeakHash32 is not implemented for {}", getName());
} }
void updateHashFast(SipHash &) const override void updateHashFast(SipHash &) const override

View File

@ -7,8 +7,7 @@
#include <Common/HashTable/HashMap.h> #include <Common/HashTable/HashMap.h>
#include <Common/WeakHash.h> #include <Common/WeakHash.h>
#include <Common/assert_cast.h> #include <Common/assert_cast.h>
#include "Storages/IndicesDescription.h" #include <base/types.h>
#include "base/types.h"
#include <base/sort.h> #include <base/sort.h>
#include <base/scope_guard.h> #include <base/scope_guard.h>
@ -320,19 +319,10 @@ const char * ColumnLowCardinality::skipSerializedInArena(const char * pos) const
return getDictionary().skipSerializedInArena(pos); return getDictionary().skipSerializedInArena(pos);
} }
void ColumnLowCardinality::updateWeakHash32(WeakHash32 & hash) const WeakHash32 ColumnLowCardinality::getWeakHash32() const
{ {
auto s = size(); WeakHash32 dict_hash = getDictionary().getNestedColumn()->getWeakHash32();
return idx.getWeakHash(dict_hash);
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
const auto & dict = getDictionary().getNestedColumn();
WeakHash32 dict_hash(dict->size());
dict->updateWeakHash32(dict_hash);
idx.updateWeakHash(hash, dict_hash);
} }
void ColumnLowCardinality::updateHashFast(SipHash & hash) const void ColumnLowCardinality::updateHashFast(SipHash & hash) const
@ -832,10 +822,11 @@ bool ColumnLowCardinality::Index::containsDefault() const
return contains; return contains;
} }
void ColumnLowCardinality::Index::updateWeakHash(WeakHash32 & hash, WeakHash32 & dict_hash) const WeakHash32 ColumnLowCardinality::Index::getWeakHash(const WeakHash32 & dict_hash) const
{ {
WeakHash32 hash(positions->size());
auto & hash_data = hash.getData(); auto & hash_data = hash.getData();
auto & dict_hash_data = dict_hash.getData(); const auto & dict_hash_data = dict_hash.getData();
auto update_weak_hash = [&](auto x) auto update_weak_hash = [&](auto x)
{ {
@ -844,10 +835,11 @@ void ColumnLowCardinality::Index::updateWeakHash(WeakHash32 & hash, WeakHash32 &
auto size = data.size(); auto size = data.size();
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
hash_data[i] = static_cast<UInt32>(intHashCRC32(dict_hash_data[data[i]], hash_data[i])); hash_data[i] = dict_hash_data[data[i]];
}; };
callForType(std::move(update_weak_hash), size_of_type); callForType(std::move(update_weak_hash), size_of_type);
return hash;
} }
void ColumnLowCardinality::Index::collectSerializedValueSizes( void ColumnLowCardinality::Index::collectSerializedValueSizes(

View File

@ -111,7 +111,7 @@ public:
getDictionary().updateHashWithValue(getIndexes().getUInt(n), hash); getDictionary().updateHashWithValue(getIndexes().getUInt(n), hash);
} }
void updateWeakHash32(WeakHash32 & hash) const override; WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash &) const override; void updateHashFast(SipHash &) const override;
@ -325,7 +325,7 @@ public:
bool containsDefault() const; bool containsDefault() const;
void updateWeakHash(WeakHash32 & hash, WeakHash32 & dict_hash) const; WeakHash32 getWeakHash(const WeakHash32 & dict_hash) const;
void collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const PaddedPODArray<UInt64> & dict_sizes) const; void collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const PaddedPODArray<UInt64> & dict_sizes) const;

View File

@ -143,9 +143,9 @@ void ColumnMap::updateHashWithValue(size_t n, SipHash & hash) const
nested->updateHashWithValue(n, hash); nested->updateHashWithValue(n, hash);
} }
void ColumnMap::updateWeakHash32(WeakHash32 & hash) const WeakHash32 ColumnMap::getWeakHash32() const
{ {
nested->updateWeakHash32(hash); return nested->getWeakHash32();
} }
void ColumnMap::updateHashFast(SipHash & hash) const void ColumnMap::updateHashFast(SipHash & hash) const

View File

@ -64,7 +64,7 @@ public:
const char * deserializeAndInsertFromArena(const char * pos) override; const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override; const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override; void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override; WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override; void updateHashFast(SipHash & hash) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(ABORT_ON_LOGICAL_ERROR)

View File

@ -56,25 +56,21 @@ void ColumnNullable::updateHashWithValue(size_t n, SipHash & hash) const
getNestedColumn().updateHashWithValue(n, hash); getNestedColumn().updateHashWithValue(n, hash);
} }
void ColumnNullable::updateWeakHash32(WeakHash32 & hash) const WeakHash32 ColumnNullable::getWeakHash32() const
{ {
auto s = size(); auto s = size();
if (hash.getData().size() != s) WeakHash32 hash = nested_column->getWeakHash32();
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
WeakHash32 old_hash = hash;
nested_column->updateWeakHash32(hash);
const auto & null_map_data = getNullMapData(); const auto & null_map_data = getNullMapData();
auto & hash_data = hash.getData(); auto & hash_data = hash.getData();
auto & old_hash_data = old_hash.getData();
/// Use old data for nulls. /// Use default for nulls.
for (size_t row = 0; row < s; ++row) for (size_t row = 0; row < s; ++row)
if (null_map_data[row]) if (null_map_data[row])
hash_data[row] = old_hash_data[row]; hash_data[row] = WeakHash32::kDefaultInitialValue;
return hash;
} }
void ColumnNullable::updateHashFast(SipHash & hash) const void ColumnNullable::updateHashFast(SipHash & hash) const

View File

@ -133,7 +133,7 @@ public:
void protect() override; void protect() override;
ColumnPtr replicate(const Offsets & replicate_offsets) const override; ColumnPtr replicate(const Offsets & replicate_offsets) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override; void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override; WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override; void updateHashFast(SipHash & hash) const override;
void getExtremes(Field & min, Field & max) const override; void getExtremes(Field & min, Field & max) const override;
// Special function for nullable minmax index // Special function for nullable minmax index

View File

@ -5,6 +5,7 @@
#include <Core/Names.h> #include <Core/Names.h>
#include <DataTypes/Serializations/SubcolumnsTree.h> #include <DataTypes/Serializations/SubcolumnsTree.h>
#include <Common/PODArray.h> #include <Common/PODArray.h>
#include <Common/WeakHash.h>
#include <DataTypes/IDataType.h> #include <DataTypes/IDataType.h>
@ -252,7 +253,7 @@ public:
const char * deserializeAndInsertFromArena(const char *) override { throwMustBeConcrete(); } const char * deserializeAndInsertFromArena(const char *) override { throwMustBeConcrete(); }
const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); } const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); }
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); } void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); }
void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); } WeakHash32 getWeakHash32() const override { throwMustBeConcrete(); }
void updateHashFast(SipHash & hash) const override; void updateHashFast(SipHash & hash) const override;
void expand(const Filter &, bool) override { throwMustBeConcrete(); } void expand(const Filter &, bool) override { throwMustBeConcrete(); }
bool hasEqualValues() const override { throwMustBeConcrete(); } bool hasEqualValues() const override { throwMustBeConcrete(); }

View File

@ -678,20 +678,22 @@ void ColumnSparse::updateHashWithValue(size_t n, SipHash & hash) const
values->updateHashWithValue(getValueIndex(n), hash); values->updateHashWithValue(getValueIndex(n), hash);
} }
void ColumnSparse::updateWeakHash32(WeakHash32 & hash) const WeakHash32 ColumnSparse::getWeakHash32() const
{ {
if (hash.getData().size() != _size) WeakHash32 values_hash = values->getWeakHash32();
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: " WeakHash32 hash(size());
"column size is {}, hash size is {}", _size, hash.getData().size());
auto & hash_data = hash.getData();
auto & values_hash_data = values_hash.getData();
auto offset_it = begin(); auto offset_it = begin();
auto & hash_data = hash.getData();
for (size_t i = 0; i < _size; ++i, ++offset_it) for (size_t i = 0; i < _size; ++i, ++offset_it)
{ {
size_t value_index = offset_it.getValueIndex(); size_t value_index = offset_it.getValueIndex();
auto data_ref = values->getDataAt(value_index); hash_data[i] = values_hash_data[value_index];
hash_data[i] = ::updateWeakHash32(reinterpret_cast<const UInt8 *>(data_ref.data), data_ref.size, hash_data[i]);
} }
return hash;
} }
void ColumnSparse::updateHashFast(SipHash & hash) const void ColumnSparse::updateHashFast(SipHash & hash) const

View File

@ -139,7 +139,7 @@ public:
void protect() override; void protect() override;
ColumnPtr replicate(const Offsets & replicate_offsets) const override; ColumnPtr replicate(const Offsets & replicate_offsets) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override; void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override; WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override; void updateHashFast(SipHash & hash) const override;
void getExtremes(Field & min, Field & max) const override; void getExtremes(Field & min, Field & max) const override;

View File

@ -108,13 +108,10 @@ MutableColumnPtr ColumnString::cloneResized(size_t to_size) const
return res; return res;
} }
void ColumnString::updateWeakHash32(WeakHash32 & hash) const WeakHash32 ColumnString::getWeakHash32() const
{ {
auto s = offsets.size(); auto s = offsets.size();
WeakHash32 hash(s);
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
const UInt8 * pos = chars.data(); const UInt8 * pos = chars.data();
UInt32 * hash_data = hash.getData().data(); UInt32 * hash_data = hash.getData().data();
@ -130,6 +127,8 @@ void ColumnString::updateWeakHash32(WeakHash32 & hash) const
prev_offset = offset; prev_offset = offset;
++hash_data; ++hash_data;
} }
return hash;
} }

View File

@ -212,7 +212,7 @@ public:
hash.update(reinterpret_cast<const char *>(&chars[offset]), string_size); hash.update(reinterpret_cast<const char *>(&chars[offset]), string_size);
} }
void updateWeakHash32(WeakHash32 & hash) const override; WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override void updateHashFast(SipHash & hash) const override
{ {

View File

@ -310,16 +310,15 @@ void ColumnTuple::updateHashWithValue(size_t n, SipHash & hash) const
column->updateHashWithValue(n, hash); column->updateHashWithValue(n, hash);
} }
void ColumnTuple::updateWeakHash32(WeakHash32 & hash) const WeakHash32 ColumnTuple::getWeakHash32() const
{ {
auto s = size(); auto s = size();
WeakHash32 hash(s);
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
for (const auto & column : columns) for (const auto & column : columns)
column->updateWeakHash32(hash); hash.update(column->getWeakHash32());
return hash;
} }
void ColumnTuple::updateHashFast(SipHash & hash) const void ColumnTuple::updateHashFast(SipHash & hash) const

View File

@ -81,7 +81,7 @@ public:
const char * deserializeAndInsertFromArena(const char * pos) override; const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override; const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override; void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override; WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override; void updateHashFast(SipHash & hash) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;

View File

@ -789,36 +789,26 @@ void ColumnVariant::updateHashWithValue(size_t n, SipHash & hash) const
variants[localDiscriminatorByGlobal(global_discr)]->updateHashWithValue(offsetAt(n), hash); variants[localDiscriminatorByGlobal(global_discr)]->updateHashWithValue(offsetAt(n), hash);
} }
void ColumnVariant::updateWeakHash32(WeakHash32 & hash) const WeakHash32 ColumnVariant::getWeakHash32() const
{ {
auto s = size(); auto s = size();
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
/// If we have only NULLs, keep hash unchanged. /// If we have only NULLs, keep hash unchanged.
if (hasOnlyNulls()) if (hasOnlyNulls())
return; return WeakHash32(s);
/// Optimization for case when there is only 1 non-empty variant and no NULLs. /// Optimization for case when there is only 1 non-empty variant and no NULLs.
/// In this case we can just calculate weak hash for this variant. /// In this case we can just calculate weak hash for this variant.
if (auto non_empty_local_discr = getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls()) if (auto non_empty_local_discr = getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls())
{ return variants[*non_empty_local_discr]->getWeakHash32();
variants[*non_empty_local_discr]->updateWeakHash32(hash);
return;
}
/// Calculate weak hash for all variants. /// Calculate weak hash for all variants.
std::vector<WeakHash32> nested_hashes; std::vector<WeakHash32> nested_hashes;
for (const auto & variant : variants) for (const auto & variant : variants)
{ nested_hashes.emplace_back(variant->getWeakHash32());
WeakHash32 nested_hash(variant->size());
variant->updateWeakHash32(nested_hash);
nested_hashes.emplace_back(std::move(nested_hash));
}
/// For each row hash is a hash of corresponding row from corresponding variant. /// For each row hash is a hash of corresponding row from corresponding variant.
WeakHash32 hash(s);
auto & hash_data = hash.getData(); auto & hash_data = hash.getData();
const auto & local_discriminators_data = getLocalDiscriminators(); const auto & local_discriminators_data = getLocalDiscriminators();
const auto & offsets_data = getOffsets(); const auto & offsets_data = getOffsets();
@ -827,11 +817,10 @@ void ColumnVariant::updateWeakHash32(WeakHash32 & hash) const
Discriminator discr = local_discriminators_data[i]; Discriminator discr = local_discriminators_data[i];
/// Update hash only for non-NULL values /// Update hash only for non-NULL values
if (discr != NULL_DISCRIMINATOR) if (discr != NULL_DISCRIMINATOR)
{ hash_data[i] = nested_hashes[discr].getData()[offsets_data[i]];
auto nested_hash = nested_hashes[local_discriminators_data[i]].getData()[offsets_data[i]];
hash_data[i] = static_cast<UInt32>(hashCRC32(nested_hash, hash_data[i]));
}
} }
return hash;
} }
void ColumnVariant::updateHashFast(SipHash & hash) const void ColumnVariant::updateHashFast(SipHash & hash) const

View File

@ -213,7 +213,7 @@ public:
const char * deserializeVariantAndInsertFromArena(Discriminator global_discr, const char * pos); const char * deserializeVariantAndInsertFromArena(Discriminator global_discr, const char * pos);
const char * skipSerializedInArena(const char * pos) const override; const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override; void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override; WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override; void updateHashFast(SipHash & hash) const override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
void expand(const Filter & mask, bool inverted) override; void expand(const Filter & mask, bool inverted) override;

View File

@ -73,13 +73,10 @@ void ColumnVector<T>::updateHashWithValue(size_t n, SipHash & hash) const
} }
template <typename T> template <typename T>
void ColumnVector<T>::updateWeakHash32(WeakHash32 & hash) const WeakHash32 ColumnVector<T>::getWeakHash32() const
{ {
auto s = data.size(); auto s = data.size();
WeakHash32 hash(s);
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
const T * begin = data.data(); const T * begin = data.data();
const T * end = begin + s; const T * end = begin + s;
@ -91,6 +88,8 @@ void ColumnVector<T>::updateWeakHash32(WeakHash32 & hash) const
++begin; ++begin;
++hash_data; ++hash_data;
} }
return hash;
} }
template <typename T> template <typename T>

View File

@ -114,7 +114,7 @@ public:
void updateHashWithValue(size_t n, SipHash & hash) const override; void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override; WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override; void updateHashFast(SipHash & hash) const override;

View File

@ -300,10 +300,10 @@ public:
/// passed bytes to hash must identify sequence of values unambiguously. /// passed bytes to hash must identify sequence of values unambiguously.
virtual void updateHashWithValue(size_t n, SipHash & hash) const = 0; virtual void updateHashWithValue(size_t n, SipHash & hash) const = 0;
/// Update hash function value. Hash is calculated for each element. /// Get hash function value. Hash is calculated for each element.
/// It's a fast weak hash function. Mainly need to scatter data between threads. /// It's a fast weak hash function. Mainly need to scatter data between threads.
/// WeakHash32 must have the same size as column. /// WeakHash32 must have the same size as column.
virtual void updateWeakHash32(WeakHash32 & hash) const = 0; virtual WeakHash32 getWeakHash32() const = 0;
/// Update state of hash with all column. /// Update state of hash with all column.
virtual void updateHashFast(SipHash & hash) const = 0; virtual void updateHashFast(SipHash & hash) const = 0;

View File

@ -1,6 +1,7 @@
#pragma once #pragma once
#include <Columns/IColumn.h> #include <Columns/IColumn.h>
#include <Common/WeakHash.h>
namespace DB namespace DB
@ -63,8 +64,9 @@ public:
{ {
} }
void updateWeakHash32(WeakHash32 & /*hash*/) const override WeakHash32 getWeakHash32() const override
{ {
return WeakHash32(s);
} }
void updateHashFast(SipHash & /*hash*/) const override void updateHashFast(SipHash & /*hash*/) const override

View File

@ -1,6 +1,7 @@
#pragma once #pragma once
#include <optional> #include <optional>
#include <Columns/IColumn.h> #include <Columns/IColumn.h>
#include <Common/WeakHash.h>
namespace DB namespace DB
{ {
@ -166,9 +167,9 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method scatter is not supported for ColumnUnique."); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method scatter is not supported for ColumnUnique.");
} }
void updateWeakHash32(WeakHash32 &) const override WeakHash32 getWeakHash32() const override
{ {
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method updateWeakHash32 is not supported for ColumnUnique."); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getWeakHash32 is not supported for ColumnUnique.");
} }
void updateHashFast(SipHash &) const override void updateHashFast(SipHash &) const override

View File

@ -60,8 +60,7 @@ TEST(WeakHash32, ColumnVectorU8)
data.push_back(i); data.push_back(i);
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), col->getData()); checkColumn(hash.getData(), col->getData());
} }
@ -77,8 +76,7 @@ TEST(WeakHash32, ColumnVectorI8)
data.push_back(i); data.push_back(i);
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), col->getData()); checkColumn(hash.getData(), col->getData());
} }
@ -94,8 +92,7 @@ TEST(WeakHash32, ColumnVectorU16)
data.push_back(i); data.push_back(i);
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), col->getData()); checkColumn(hash.getData(), col->getData());
} }
@ -111,8 +108,7 @@ TEST(WeakHash32, ColumnVectorI16)
data.push_back(i); data.push_back(i);
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), col->getData()); checkColumn(hash.getData(), col->getData());
} }
@ -128,8 +124,7 @@ TEST(WeakHash32, ColumnVectorU32)
data.push_back(i << 16u); data.push_back(i << 16u);
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), col->getData()); checkColumn(hash.getData(), col->getData());
} }
@ -145,8 +140,7 @@ TEST(WeakHash32, ColumnVectorI32)
data.push_back(i << 16); data.push_back(i << 16);
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), col->getData()); checkColumn(hash.getData(), col->getData());
} }
@ -162,8 +156,7 @@ TEST(WeakHash32, ColumnVectorU64)
data.push_back(i << 32u); data.push_back(i << 32u);
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), col->getData()); checkColumn(hash.getData(), col->getData());
} }
@ -179,8 +172,7 @@ TEST(WeakHash32, ColumnVectorI64)
data.push_back(i << 32); data.push_back(i << 32);
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), col->getData()); checkColumn(hash.getData(), col->getData());
} }
@ -204,8 +196,7 @@ TEST(WeakHash32, ColumnVectorU128)
} }
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), eq_data); checkColumn(hash.getData(), eq_data);
} }
@ -221,8 +212,7 @@ TEST(WeakHash32, ColumnVectorI128)
data.push_back(i << 32); data.push_back(i << 32);
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), col->getData()); checkColumn(hash.getData(), col->getData());
} }
@ -238,8 +228,7 @@ TEST(WeakHash32, ColumnDecimal32)
data.push_back(i << 16); data.push_back(i << 16);
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), col->getData()); checkColumn(hash.getData(), col->getData());
} }
@ -255,8 +244,7 @@ TEST(WeakHash32, ColumnDecimal64)
data.push_back(i << 32); data.push_back(i << 32);
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), col->getData()); checkColumn(hash.getData(), col->getData());
} }
@ -272,8 +260,7 @@ TEST(WeakHash32, ColumnDecimal128)
data.push_back(i << 32); data.push_back(i << 32);
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), col->getData()); checkColumn(hash.getData(), col->getData());
} }
@ -294,8 +281,7 @@ TEST(WeakHash32, ColumnString1)
} }
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), data); checkColumn(hash.getData(), data);
} }
@ -331,8 +317,7 @@ TEST(WeakHash32, ColumnString2)
} }
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), data); checkColumn(hash.getData(), data);
} }
@ -369,8 +354,7 @@ TEST(WeakHash32, ColumnString3)
} }
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), data); checkColumn(hash.getData(), data);
} }
@ -397,8 +381,7 @@ TEST(WeakHash32, ColumnFixedString)
} }
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), data); checkColumn(hash.getData(), data);
} }
@ -444,8 +427,7 @@ TEST(WeakHash32, ColumnArray)
auto col_arr = ColumnArray::create(std::move(val), std::move(off)); auto col_arr = ColumnArray::create(std::move(val), std::move(off));
WeakHash32 hash(col_arr->size()); WeakHash32 hash = col_arr->getWeakHash32();
col_arr->updateWeakHash32(hash);
checkColumn(hash.getData(), eq_data); checkColumn(hash.getData(), eq_data);
} }
@ -479,8 +461,7 @@ TEST(WeakHash32, ColumnArray2)
auto col_arr = ColumnArray::create(std::move(val), std::move(off)); auto col_arr = ColumnArray::create(std::move(val), std::move(off));
WeakHash32 hash(col_arr->size()); WeakHash32 hash = col_arr->getWeakHash32();
col_arr->updateWeakHash32(hash);
checkColumn(hash.getData(), eq_data); checkColumn(hash.getData(), eq_data);
} }
@ -536,8 +517,7 @@ TEST(WeakHash32, ColumnArrayArray)
auto col_arr = ColumnArray::create(std::move(val), std::move(off)); auto col_arr = ColumnArray::create(std::move(val), std::move(off));
auto col_arr_arr = ColumnArray::create(std::move(col_arr), std::move(off2)); auto col_arr_arr = ColumnArray::create(std::move(col_arr), std::move(off2));
WeakHash32 hash(col_arr_arr->size()); WeakHash32 hash = col_arr_arr->getWeakHash32();
col_arr_arr->updateWeakHash32(hash);
checkColumn(hash.getData(), eq_data); checkColumn(hash.getData(), eq_data);
} }
@ -555,8 +535,7 @@ TEST(WeakHash32, ColumnConst)
auto col_const = ColumnConst::create(std::move(inner_col), 256); auto col_const = ColumnConst::create(std::move(inner_col), 256);
WeakHash32 hash(col_const->size()); WeakHash32 hash = col_const->getWeakHash32();
col_const->updateWeakHash32(hash);
checkColumn(hash.getData(), data); checkColumn(hash.getData(), data);
} }
@ -576,8 +555,7 @@ TEST(WeakHash32, ColumnLowcardinality)
} }
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), data); checkColumn(hash.getData(), data);
} }
@ -602,8 +580,7 @@ TEST(WeakHash32, ColumnNullable)
auto col_null = ColumnNullable::create(std::move(col), std::move(mask)); auto col_null = ColumnNullable::create(std::move(col), std::move(mask));
WeakHash32 hash(col_null->size()); WeakHash32 hash = col_null->getWeakHash32();
col_null->updateWeakHash32(hash);
checkColumn(hash.getData(), eq); checkColumn(hash.getData(), eq);
} }
@ -633,8 +610,7 @@ TEST(WeakHash32, ColumnTupleUInt64UInt64)
columns.emplace_back(std::move(col2)); columns.emplace_back(std::move(col2));
auto col_tuple = ColumnTuple::create(std::move(columns)); auto col_tuple = ColumnTuple::create(std::move(columns));
WeakHash32 hash(col_tuple->size()); WeakHash32 hash = col_tuple->getWeakHash32();
col_tuple->updateWeakHash32(hash);
checkColumn(hash.getData(), eq); checkColumn(hash.getData(), eq);
} }
@ -671,8 +647,7 @@ TEST(WeakHash32, ColumnTupleUInt64String)
columns.emplace_back(std::move(col2)); columns.emplace_back(std::move(col2));
auto col_tuple = ColumnTuple::create(std::move(columns)); auto col_tuple = ColumnTuple::create(std::move(columns));
WeakHash32 hash(col_tuple->size()); WeakHash32 hash = col_tuple->getWeakHash32();
col_tuple->updateWeakHash32(hash);
checkColumn(hash.getData(), eq); checkColumn(hash.getData(), eq);
} }
@ -709,8 +684,7 @@ TEST(WeakHash32, ColumnTupleUInt64FixedString)
columns.emplace_back(std::move(col2)); columns.emplace_back(std::move(col2));
auto col_tuple = ColumnTuple::create(std::move(columns)); auto col_tuple = ColumnTuple::create(std::move(columns));
WeakHash32 hash(col_tuple->size()); WeakHash32 hash = col_tuple->getWeakHash32();
col_tuple->updateWeakHash32(hash);
checkColumn(hash.getData(), eq); checkColumn(hash.getData(), eq);
} }
@ -756,8 +730,7 @@ TEST(WeakHash32, ColumnTupleUInt64Array)
columns.emplace_back(ColumnArray::create(std::move(val), std::move(off))); columns.emplace_back(ColumnArray::create(std::move(val), std::move(off)));
auto col_tuple = ColumnTuple::create(std::move(columns)); auto col_tuple = ColumnTuple::create(std::move(columns));
WeakHash32 hash(col_tuple->size()); WeakHash32 hash = col_tuple->getWeakHash32();
col_tuple->updateWeakHash32(hash);
checkColumn(hash.getData(), eq_data); checkColumn(hash.getData(), eq_data);
} }

View File

@ -206,7 +206,7 @@ void * Allocator<clear_memory_, populate>::realloc(void * buf, size_t old_size,
} }
else else
{ {
[[maybe_unused]] auto trace_free = CurrentMemoryTracker::free(old_size); [[maybe_unused]] auto trace_free = CurrentMemoryTracker::free(new_size);
ProfileEvents::increment(ProfileEvents::GWPAsanAllocateFailed); ProfileEvents::increment(ProfileEvents::GWPAsanAllocateFailed);
} }
} }
@ -239,7 +239,7 @@ void * Allocator<clear_memory_, populate>::realloc(void * buf, size_t old_size,
void * new_buf = ::realloc(buf, new_size); void * new_buf = ::realloc(buf, new_size);
if (nullptr == new_buf) if (nullptr == new_buf)
{ {
[[maybe_unused]] auto trace_free = CurrentMemoryTracker::free(old_size); [[maybe_unused]] auto trace_free = CurrentMemoryTracker::free(new_size);
throw DB::ErrnoException( throw DB::ErrnoException(
DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY,
"Allocator: Cannot realloc from {} to {}", "Allocator: Cannot realloc from {} to {}",

View File

@ -244,6 +244,15 @@ private:
const char * className() const noexcept override { return "DB::ErrnoException"; } const char * className() const noexcept override { return "DB::ErrnoException"; }
}; };
/// An exception to use in unit tests to test interfaces.
/// It is distinguished from others, so it does not have to be logged.
class TestException : public Exception
{
public:
using Exception::Exception;
};
using Exceptions = std::vector<std::exception_ptr>; using Exceptions = std::vector<std::exception_ptr>;
/** Try to write an exception to the log (and forget about it). /** Try to write an exception to the log (and forget about it).

View File

@ -23,8 +23,20 @@ namespace DB
LazyPipeFDs TraceSender::pipe; LazyPipeFDs TraceSender::pipe;
static thread_local bool inside_send = false;
void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Extras extras) void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Extras extras)
{ {
/** The method shouldn't be called recursively or throw exceptions.
* There are several reasons:
* - avoid infinite recursion when some of subsequent functions invoke tracing;
* - avoid inconsistent writes if the method was interrupted by a signal handler in the middle of writing,
* and then another tracing is invoked (e.g., from query profiler).
*/
if (unlikely(inside_send))
return;
inside_send = true;
DENY_ALLOCATIONS_IN_SCOPE;
constexpr size_t buf_size = sizeof(char) /// TraceCollector stop flag constexpr size_t buf_size = sizeof(char) /// TraceCollector stop flag
+ sizeof(UInt8) /// String size + sizeof(UInt8) /// String size
+ QUERY_ID_MAX_LEN /// Maximum query_id length + QUERY_ID_MAX_LEN /// Maximum query_id length
@ -80,6 +92,8 @@ void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Ext
writePODBinary(extras.increment, out); writePODBinary(extras.increment, out);
out.next(); out.next();
inside_send = false;
} }
} }

View File

@ -1,2 +1,24 @@
#include <Common/WeakHash.h> #include <Common/WeakHash.h>
#include <Common/Exception.h>
#include <Common/HashTable/Hash.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
void WeakHash32::update(const WeakHash32 & other)
{
size_t size = data.size();
if (size != other.data.size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match:"
"left size is {}, right size is {}", size, other.data.size());
for (size_t i = 0; i < size; ++i)
data[i] = static_cast<UInt32>(intHashCRC32(other.data[i], data[i]));
}
}

View File

@ -11,9 +11,8 @@ namespace DB
/// The main purpose why this class needed is to support data initialization. Initially, every bit is 1. /// The main purpose why this class needed is to support data initialization. Initially, every bit is 1.
class WeakHash32 class WeakHash32
{ {
static constexpr UInt32 kDefaultInitialValue = ~UInt32(0);
public: public:
static constexpr UInt32 kDefaultInitialValue = ~UInt32(0);
using Container = PaddedPODArray<UInt32>; using Container = PaddedPODArray<UInt32>;
@ -22,6 +21,8 @@ public:
void reset(size_t size, UInt32 initial_value = kDefaultInitialValue) { data.assign(size, initial_value); } void reset(size_t size, UInt32 initial_value = kDefaultInitialValue) { data.assign(size, initial_value); }
void update(const WeakHash32 & other);
const Container & getData() const { return data; } const Container & getData() const { return data; }
Container & getData() { return data; } Container & getData() { return data; }

View File

@ -6,12 +6,17 @@ namespace DB
{ {
String getRandomASCIIString(size_t length) String getRandomASCIIString(size_t length)
{
return getRandomASCIIString(length, thread_local_rng);
}
String getRandomASCIIString(size_t length, pcg64 & rng)
{ {
std::uniform_int_distribution<int> distribution('a', 'z'); std::uniform_int_distribution<int> distribution('a', 'z');
String res; String res;
res.resize(length); res.resize(length);
for (auto & c : res) for (auto & c : res)
c = distribution(thread_local_rng); c = distribution(rng);
return res; return res;
} }

View File

@ -2,11 +2,14 @@
#include <Core/Types.h> #include <Core/Types.h>
#include <pcg_random.hpp>
namespace DB namespace DB
{ {
/// Slow random string. Useful for random names and things like this. Not for generating data. /// Slow random string. Useful for random names and things like this. Not for generating data.
String getRandomASCIIString(size_t length); String getRandomASCIIString(size_t length);
String getRandomASCIIString(size_t length, pcg64 & rng);
} }

View File

@ -54,16 +54,3 @@ TEST(ShellCommand, ExecuteWithInput)
EXPECT_EQ(res, "Hello, world!\n"); EXPECT_EQ(res, "Hello, world!\n");
} }
TEST(ShellCommand, AutoWait)
{
// <defunct> hunting:
for (int i = 0; i < 1000; ++i)
{
auto command = ShellCommand::execute("echo " + std::to_string(i));
//command->wait(); // now automatic
}
// std::cerr << "inspect me: ps auxwwf\n";
// std::this_thread::sleep_for(std::chrono::seconds(100));
}

View File

@ -47,54 +47,85 @@ bool allArgumentsAreConstants(const ColumnsWithTypeAndName & args)
return true; return true;
} }
/// Replaces single low cardinality column in a function call by its dictionary
/// This can only happen after the arguments have been adapted in IFunctionOverloadResolver::getReturnType
/// as it's only possible if there is one low cardinality column and, optionally, const columns
ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes( ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
ColumnsWithTypeAndName & args, bool can_be_executed_on_default_arguments, size_t input_rows_count) ColumnsWithTypeAndName & args, bool can_be_executed_on_default_arguments, size_t input_rows_count)
{ {
size_t num_rows = input_rows_count; /// We return the LC indexes so the LC can be reconstructed with the function result
ColumnPtr indexes; ColumnPtr indexes;
/// Find first LowCardinality column and replace it to nested dictionary. size_t number_low_cardinality_columns = 0;
for (auto & column : args) size_t last_low_cardinality = 0;
size_t number_const_columns = 0;
size_t number_full_columns = 0;
for (size_t i = 0; i < args.size(); i++)
{ {
if (const auto * low_cardinality_column = checkAndGetColumn<ColumnLowCardinality>(column.column.get())) auto const & arg = args[i];
if (checkAndGetColumn<ColumnLowCardinality>(arg.column.get()))
{ {
/// Single LowCardinality column is supported now. number_low_cardinality_columns++;
if (indexes) last_low_cardinality = i;
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected single dictionary argument for function.");
const auto * low_cardinality_type = checkAndGetDataType<DataTypeLowCardinality>(column.type.get());
if (!low_cardinality_type)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Incompatible type for LowCardinality column: {}",
column.type->getName());
if (can_be_executed_on_default_arguments)
{
/// Normal case, when function can be executed on values' default.
column.column = low_cardinality_column->getDictionary().getNestedColumn();
indexes = low_cardinality_column->getIndexesPtr();
}
else
{
/// Special case when default value can't be used. Example: 1 % LowCardinality(Int).
/// LowCardinality always contains default, so 1 % 0 will throw exception in normal case.
auto dict_encoded = low_cardinality_column->getMinimalDictionaryEncodedColumn(0, low_cardinality_column->size());
column.column = dict_encoded.dictionary;
indexes = dict_encoded.indexes;
}
num_rows = column.column->size();
column.type = low_cardinality_type->getDictionaryType();
} }
else if (checkAndGetColumn<ColumnConst>(arg.column.get()))
number_const_columns++;
else
number_full_columns++;
} }
/// Change size of constants. if (!number_low_cardinality_columns && !number_const_columns)
return nullptr;
if (number_full_columns > 0 || number_low_cardinality_columns > 1)
{
/// This should not be possible but currently there are multiple tests in CI failing because of it
/// TODO: Fix those cases, then enable this exception
#if 0
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected low cardinality types found. Low cardinality: {}. Full {}. Const {}",
number_low_cardinality_columns, number_full_columns, number_const_columns);
#else
return nullptr;
#endif
}
else if (number_low_cardinality_columns == 1)
{
auto & lc_arg = args[last_low_cardinality];
const auto * low_cardinality_type = checkAndGetDataType<DataTypeLowCardinality>(lc_arg.type.get());
if (!low_cardinality_type)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Incompatible type for LowCardinality column: {}", lc_arg.type->getName());
const auto * low_cardinality_column = checkAndGetColumn<ColumnLowCardinality>(lc_arg.column.get());
chassert(low_cardinality_column);
if (can_be_executed_on_default_arguments)
{
/// Normal case, when function can be executed on values' default.
lc_arg.column = low_cardinality_column->getDictionary().getNestedColumn();
indexes = low_cardinality_column->getIndexesPtr();
}
else
{
/// Special case when default value can't be used. Example: 1 % LowCardinality(Int).
/// LowCardinality always contains default, so 1 % 0 will throw exception in normal case.
auto dict_encoded = low_cardinality_column->getMinimalDictionaryEncodedColumn(0, low_cardinality_column->size());
lc_arg.column = dict_encoded.dictionary;
indexes = dict_encoded.indexes;
}
/// The new column will have a different number of rows, normally less but occasionally it might be more (NULL)
input_rows_count = lc_arg.column->size();
lc_arg.type = low_cardinality_type->getDictionaryType();
}
/// Change size of constants
for (auto & column : args) for (auto & column : args)
{ {
if (const auto * column_const = checkAndGetColumn<ColumnConst>(column.column.get())) if (const auto * column_const = checkAndGetColumn<ColumnConst>(column.column.get()))
{ {
column.column = ColumnConst::create(recursiveRemoveLowCardinality(column_const->getDataColumnPtr()), num_rows); column.column = ColumnConst::create(recursiveRemoveLowCardinality(column_const->getDataColumnPtr()), input_rows_count);
column.type = recursiveRemoveLowCardinality(column.type); column.type = recursiveRemoveLowCardinality(column.type);
} }
} }
@ -270,6 +301,8 @@ ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithType
bool can_be_executed_on_default_arguments = canBeExecutedOnDefaultArguments(); bool can_be_executed_on_default_arguments = canBeExecutedOnDefaultArguments();
const auto & dictionary_type = res_low_cardinality_type->getDictionaryType(); const auto & dictionary_type = res_low_cardinality_type->getDictionaryType();
/// The arguments should have been adapted in IFunctionOverloadResolver::getReturnType
/// So there is only one low cardinality column (and optionally some const columns) and no full column
ColumnPtr indexes = replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes( ColumnPtr indexes = replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
columns_without_low_cardinality, can_be_executed_on_default_arguments, input_rows_count); columns_without_low_cardinality, can_be_executed_on_default_arguments, input_rows_count);

View File

@ -310,7 +310,7 @@ IColumn::Selector ConcurrentHashJoin::selectDispatchBlock(const Strings & key_co
{ {
const auto & key_col = from_block.getByName(key_name).column->convertToFullColumnIfConst(); const auto & key_col = from_block.getByName(key_name).column->convertToFullColumnIfConst();
const auto & key_col_no_lc = recursiveRemoveLowCardinality(recursiveRemoveSparse(key_col)); const auto & key_col_no_lc = recursiveRemoveLowCardinality(recursiveRemoveSparse(key_col));
key_col_no_lc->updateWeakHash32(hash); hash.update(key_col_no_lc->getWeakHash32());
} }
return hashToSelector(hash, num_shards); return hashToSelector(hash, num_shards);
} }

View File

@ -1726,7 +1726,10 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
const auto & join_clause = table_join.getOnlyClause(); const auto & join_clause = table_join.getOnlyClause();
auto join_kind = table_join.kind(); auto join_kind = table_join.kind();
bool kind_allows_filtering = isInner(join_kind) || isLeft(join_kind) || isRight(join_kind); auto join_strictness = table_join.strictness();
bool join_type_allows_filtering = (join_strictness == JoinStrictness::All || join_strictness == JoinStrictness::Any)
&& (isInner(join_kind) || isLeft(join_kind) || isRight(join_kind));
auto has_non_const = [](const Block & block, const auto & keys) auto has_non_const = [](const Block & block, const auto & keys)
{ {
@ -1745,7 +1748,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
bool has_non_const_keys = has_non_const(query_plan.getCurrentDataStream().header, join_clause.key_names_left) bool has_non_const_keys = has_non_const(query_plan.getCurrentDataStream().header, join_clause.key_names_left)
&& has_non_const(joined_plan->getCurrentDataStream().header, join_clause.key_names_right); && has_non_const(joined_plan->getCurrentDataStream().header, join_clause.key_names_right);
if (settings.max_rows_in_set_to_optimize_join > 0 && kind_allows_filtering && has_non_const_keys) if (settings.max_rows_in_set_to_optimize_join > 0 && join_type_allows_filtering && has_non_const_keys)
{ {
auto * left_set = add_create_set(query_plan, join_clause.key_names_left, JoinTableSide::Left); auto * left_set = add_create_set(query_plan, join_clause.key_names_left, JoinTableSide::Left);
auto * right_set = add_create_set(*joined_plan, join_clause.key_names_right, JoinTableSide::Right); auto * right_set = add_create_set(*joined_plan, join_clause.key_names_right, JoinTableSide::Right);

View File

@ -554,7 +554,7 @@ static Blocks scatterBlockByHashImpl(const Strings & key_columns_names, const Bl
for (const auto & key_name : key_columns_names) for (const auto & key_name : key_columns_names)
{ {
ColumnPtr key_col = materializeColumn(block, key_name); ColumnPtr key_col = materializeColumn(block, key_name);
key_col->updateWeakHash32(hash); hash.update(key_col->getWeakHash32());
} }
auto selector = hashToSelector(hash, sharder); auto selector = hashToSelector(hash, sharder);

View File

@ -7,7 +7,6 @@
#include <Common/FieldVisitorToString.h> #include <Common/FieldVisitorToString.h>
#include <Common/KnownObjectNames.h> #include <Common/KnownObjectNames.h>
#include <Common/SipHash.h> #include <Common/SipHash.h>
#include <Common/typeid_cast.h>
#include <IO/Operators.h> #include <IO/Operators.h>
#include <IO/WriteBufferFromString.h> #include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
@ -19,9 +18,6 @@
#include <Parsers/queryToString.h> #include <Parsers/queryToString.h>
#include <Parsers/ASTSetQuery.h> #include <Parsers/ASTSetQuery.h>
#include <Parsers/FunctionSecretArgumentsFinderAST.h> #include <Parsers/FunctionSecretArgumentsFinderAST.h>
#include <Core/QualifiedTableName.h>
#include <boost/algorithm/string.hpp>
using namespace std::literals; using namespace std::literals;
@ -632,6 +628,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
settings.ostr << ", "; settings.ostr << ", ";
if (arguments->children[i]->as<ASTSetQuery>()) if (arguments->children[i]->as<ASTSetQuery>())
settings.ostr << "SETTINGS "; settings.ostr << "SETTINGS ";
nested_dont_need_parens.list_element_index = i;
arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens); arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens);
} }
settings.ostr << (settings.hilite ? hilite_operator : "") << ']' << (settings.hilite ? hilite_none : ""); settings.ostr << (settings.hilite ? hilite_operator : "") << ']' << (settings.hilite ? hilite_none : "");
@ -642,12 +639,14 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
{ {
settings.ostr << (settings.hilite ? hilite_operator : "") << ((frame.need_parens && !alias.empty()) ? "tuple" : "") << '(' settings.ostr << (settings.hilite ? hilite_operator : "") << ((frame.need_parens && !alias.empty()) ? "tuple" : "") << '('
<< (settings.hilite ? hilite_none : ""); << (settings.hilite ? hilite_none : "");
for (size_t i = 0; i < arguments->children.size(); ++i) for (size_t i = 0; i < arguments->children.size(); ++i)
{ {
if (i != 0) if (i != 0)
settings.ostr << ", "; settings.ostr << ", ";
if (arguments->children[i]->as<ASTSetQuery>()) if (arguments->children[i]->as<ASTSetQuery>())
settings.ostr << "SETTINGS "; settings.ostr << "SETTINGS ";
nested_dont_need_parens.list_element_index = i;
arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens); arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens);
} }
settings.ostr << (settings.hilite ? hilite_operator : "") << ')' << (settings.hilite ? hilite_none : ""); settings.ostr << (settings.hilite ? hilite_operator : "") << ')' << (settings.hilite ? hilite_none : "");
@ -663,6 +662,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
settings.ostr << ", "; settings.ostr << ", ";
if (arguments->children[i]->as<ASTSetQuery>()) if (arguments->children[i]->as<ASTSetQuery>())
settings.ostr << "SETTINGS "; settings.ostr << "SETTINGS ";
nested_dont_need_parens.list_element_index = i;
arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens); arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens);
} }
settings.ostr << (settings.hilite ? hilite_operator : "") << ')' << (settings.hilite ? hilite_none : ""); settings.ostr << (settings.hilite ? hilite_operator : "") << ')' << (settings.hilite ? hilite_none : "");

View File

@ -745,7 +745,12 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan,
{ {
auto & interpolate_node_typed = interpolate_node->as<InterpolateNode &>(); auto & interpolate_node_typed = interpolate_node->as<InterpolateNode &>();
PlannerActionsVisitor planner_actions_visitor(planner_context); PlannerActionsVisitor planner_actions_visitor(
planner_context,
/* use_column_identifier_as_action_node_name_, (default value)*/ true,
/// Prefer the INPUT to CONSTANT nodes (actions must be non constant)
/* always_use_const_column_for_constant_nodes */ false);
auto expression_to_interpolate_expression_nodes = planner_actions_visitor.visit(*interpolate_actions_dag, auto expression_to_interpolate_expression_nodes = planner_actions_visitor.visit(*interpolate_actions_dag,
interpolate_node_typed.getExpression()); interpolate_node_typed.getExpression());
if (expression_to_interpolate_expression_nodes.size() != 1) if (expression_to_interpolate_expression_nodes.size() != 1)

View File

@ -487,16 +487,33 @@ public:
return node; return node;
} }
const ActionsDAG::Node * addConstantIfNecessary(const std::string & node_name, const ColumnWithTypeAndName & column) [[nodiscard]] String addConstantIfNecessary(
const std::string & node_name, const ColumnWithTypeAndName & column, bool always_use_const_column_for_constant_nodes)
{ {
chassert(column.column != nullptr);
auto it = node_name_to_node.find(node_name); auto it = node_name_to_node.find(node_name);
if (it != node_name_to_node.end() && (!always_use_const_column_for_constant_nodes || it->second->column))
return {node_name};
if (it != node_name_to_node.end()) if (it != node_name_to_node.end())
return it->second; {
/// There is a node with this name, but it doesn't have a column
/// This likely happens because we executed the query until WithMergeableState with a const node in the
/// WHERE clause and, as the results of headers are materialized, the column was removed
/// Let's add a new column and keep this
String dupped_name{node_name + "_dupped"};
if (node_name_to_node.find(dupped_name) != node_name_to_node.end())
return dupped_name;
const auto * node = &actions_dag.addColumn(column);
node_name_to_node[dupped_name] = node;
return dupped_name;
}
const auto * node = &actions_dag.addColumn(column); const auto * node = &actions_dag.addColumn(column);
node_name_to_node[node->result_name] = node; node_name_to_node[node->result_name] = node;
return node; return {node_name};
} }
template <typename FunctionOrOverloadResolver> template <typename FunctionOrOverloadResolver>
@ -525,7 +542,7 @@ public:
} }
private: private:
std::unordered_map<std::string_view, const ActionsDAG::Node *> node_name_to_node; std::unordered_map<String, const ActionsDAG::Node *> node_name_to_node;
ActionsDAG & actions_dag; ActionsDAG & actions_dag;
QueryTreeNodePtr scope_node; QueryTreeNodePtr scope_node;
}; };
@ -533,9 +550,11 @@ private:
class PlannerActionsVisitorImpl class PlannerActionsVisitorImpl
{ {
public: public:
PlannerActionsVisitorImpl(ActionsDAG & actions_dag, PlannerActionsVisitorImpl(
ActionsDAG & actions_dag,
const PlannerContextPtr & planner_context_, const PlannerContextPtr & planner_context_,
bool use_column_identifier_as_action_node_name_); bool use_column_identifier_as_action_node_name_,
bool always_use_const_column_for_constant_nodes_);
ActionsDAG::NodeRawConstPtrs visit(QueryTreeNodePtr expression_node); ActionsDAG::NodeRawConstPtrs visit(QueryTreeNodePtr expression_node);
@ -595,14 +614,18 @@ private:
const PlannerContextPtr planner_context; const PlannerContextPtr planner_context;
ActionNodeNameHelper action_node_name_helper; ActionNodeNameHelper action_node_name_helper;
bool use_column_identifier_as_action_node_name; bool use_column_identifier_as_action_node_name;
bool always_use_const_column_for_constant_nodes;
}; };
PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(ActionsDAG & actions_dag, PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(
ActionsDAG & actions_dag,
const PlannerContextPtr & planner_context_, const PlannerContextPtr & planner_context_,
bool use_column_identifier_as_action_node_name_) bool use_column_identifier_as_action_node_name_,
bool always_use_const_column_for_constant_nodes_)
: planner_context(planner_context_) : planner_context(planner_context_)
, action_node_name_helper(node_to_node_name, *planner_context, use_column_identifier_as_action_node_name_) , action_node_name_helper(node_to_node_name, *planner_context, use_column_identifier_as_action_node_name_)
, use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_) , use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_)
, always_use_const_column_for_constant_nodes(always_use_const_column_for_constant_nodes_)
{ {
actions_stack.emplace_back(actions_dag, nullptr); actions_stack.emplace_back(actions_dag, nullptr);
} }
@ -725,17 +748,16 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi
column.type = constant_type; column.type = constant_type;
column.column = column.type->createColumnConst(1, constant_literal); column.column = column.type->createColumnConst(1, constant_literal);
actions_stack[0].addConstantIfNecessary(constant_node_name, column); String final_name = actions_stack[0].addConstantIfNecessary(constant_node_name, column, always_use_const_column_for_constant_nodes);
size_t actions_stack_size = actions_stack.size(); size_t actions_stack_size = actions_stack.size();
for (size_t i = 1; i < actions_stack_size; ++i) for (size_t i = 1; i < actions_stack_size; ++i)
{ {
auto & actions_stack_node = actions_stack[i]; auto & actions_stack_node = actions_stack[i];
actions_stack_node.addInputConstantColumnIfNecessary(constant_node_name, column); actions_stack_node.addInputConstantColumnIfNecessary(final_name, column);
} }
return {constant_node_name, Levels(0)}; return {final_name, Levels(0)};
} }
PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitLambda(const QueryTreeNodePtr & node) PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitLambda(const QueryTreeNodePtr & node)
@ -864,16 +886,16 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::ma
else else
column.column = std::move(column_set); column.column = std::move(column_set);
actions_stack[0].addConstantIfNecessary(column.name, column); String final_name = actions_stack[0].addConstantIfNecessary(column.name, column, always_use_const_column_for_constant_nodes);
size_t actions_stack_size = actions_stack.size(); size_t actions_stack_size = actions_stack.size();
for (size_t i = 1; i < actions_stack_size; ++i) for (size_t i = 1; i < actions_stack_size; ++i)
{ {
auto & actions_stack_node = actions_stack[i]; auto & actions_stack_node = actions_stack[i];
actions_stack_node.addInputConstantColumnIfNecessary(column.name, column); actions_stack_node.addInputConstantColumnIfNecessary(final_name, column);
} }
return {column.name, Levels(0)}; return {final_name, Levels(0)};
} }
PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitIndexHintFunction(const QueryTreeNodePtr & node) PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitIndexHintFunction(const QueryTreeNodePtr & node)
@ -1010,14 +1032,19 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi
} }
PlannerActionsVisitor::PlannerActionsVisitor(const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_) PlannerActionsVisitor::PlannerActionsVisitor(
const PlannerContextPtr & planner_context_,
bool use_column_identifier_as_action_node_name_,
bool always_use_const_column_for_constant_nodes_)
: planner_context(planner_context_) : planner_context(planner_context_)
, use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_) , use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_)
, always_use_const_column_for_constant_nodes(always_use_const_column_for_constant_nodes_)
{} {}
ActionsDAG::NodeRawConstPtrs PlannerActionsVisitor::visit(ActionsDAG & actions_dag, QueryTreeNodePtr expression_node) ActionsDAG::NodeRawConstPtrs PlannerActionsVisitor::visit(ActionsDAG & actions_dag, QueryTreeNodePtr expression_node)
{ {
PlannerActionsVisitorImpl actions_visitor_impl(actions_dag, planner_context, use_column_identifier_as_action_node_name); PlannerActionsVisitorImpl actions_visitor_impl(
actions_dag, planner_context, use_column_identifier_as_action_node_name, always_use_const_column_for_constant_nodes);
return actions_visitor_impl.visit(expression_node); return actions_visitor_impl.visit(expression_node);
} }

View File

@ -27,11 +27,17 @@ using PlannerContextPtr = std::shared_ptr<PlannerContext>;
* During actions build, there is special handling for following functions: * During actions build, there is special handling for following functions:
* 1. Aggregate functions are added in actions dag as INPUT nodes. Aggregate functions arguments are not added. * 1. Aggregate functions are added in actions dag as INPUT nodes. Aggregate functions arguments are not added.
* 2. For function `in` and its variants, already collected sets from planner context are used. * 2. For function `in` and its variants, already collected sets from planner context are used.
* 3. When building actions that use CONSTANT nodes, by default we ignore pre-existing INPUTs if those don't have
* a column (a const column always has a column). This is for compatibility with previous headers. We disable this
* behaviour when we explicitly want to override CONSTANT nodes with the input (resolving InterpolateNode for example)
*/ */
class PlannerActionsVisitor class PlannerActionsVisitor
{ {
public: public:
explicit PlannerActionsVisitor(const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_ = true); explicit PlannerActionsVisitor(
const PlannerContextPtr & planner_context_,
bool use_column_identifier_as_action_node_name_ = true,
bool always_use_const_column_for_constant_nodes_ = true);
/** Add actions necessary to calculate expression node into expression dag. /** Add actions necessary to calculate expression node into expression dag.
* Necessary actions are not added in actions dag output. * Necessary actions are not added in actions dag output.
@ -42,6 +48,7 @@ public:
private: private:
const PlannerContextPtr planner_context; const PlannerContextPtr planner_context;
bool use_column_identifier_as_action_node_name = true; bool use_column_identifier_as_action_node_name = true;
bool always_use_const_column_for_constant_nodes = true;
}; };
/** Calculate query tree expression node action dag name and add them into node to name map. /** Calculate query tree expression node action dag name and add them into node to name map.

View File

@ -77,7 +77,6 @@ namespace ErrorCodes
extern const int INVALID_JOIN_ON_EXPRESSION; extern const int INVALID_JOIN_ON_EXPRESSION;
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED; extern const int NOT_IMPLEMENTED;
extern const int SYNTAX_ERROR;
extern const int ACCESS_DENIED; extern const int ACCESS_DENIED;
extern const int PARAMETER_OUT_OF_BOUND; extern const int PARAMETER_OUT_OF_BOUND;
extern const int TOO_MANY_COLUMNS; extern const int TOO_MANY_COLUMNS;
@ -1417,12 +1416,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
{ {
if (!join_clause.hasASOF()) if (!join_clause.hasASOF())
throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
"JOIN {} no inequality in ASOF JOIN ON section.", "JOIN {} no inequality in ASOF JOIN ON section",
join_node.formatASTForErrorMessage());
if (table_join_clause.key_names_left.size() <= 1)
throw Exception(ErrorCodes::SYNTAX_ERROR,
"JOIN {} ASOF join needs at least one equi-join column",
join_node.formatASTForErrorMessage()); join_node.formatASTForErrorMessage());
} }
@ -1544,7 +1538,9 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
{ {
const auto & join_clause = table_join->getOnlyClause(); const auto & join_clause = table_join->getOnlyClause();
bool kind_allows_filtering = isInner(join_kind) || isLeft(join_kind) || isRight(join_kind); bool join_type_allows_filtering = (join_strictness == JoinStrictness::All || join_strictness == JoinStrictness::Any)
&& (isInner(join_kind) || isLeft(join_kind) || isRight(join_kind));
auto has_non_const = [](const Block & block, const auto & keys) auto has_non_const = [](const Block & block, const auto & keys)
{ {
@ -1564,7 +1560,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
bool has_non_const_keys = has_non_const(left_plan.getCurrentDataStream().header, join_clause.key_names_left) bool has_non_const_keys = has_non_const(left_plan.getCurrentDataStream().header, join_clause.key_names_left)
&& has_non_const(right_plan.getCurrentDataStream().header, join_clause.key_names_right); && has_non_const(right_plan.getCurrentDataStream().header, join_clause.key_names_right);
if (settings.max_rows_in_set_to_optimize_join > 0 && kind_allows_filtering && has_non_const_keys) if (settings.max_rows_in_set_to_optimize_join > 0 && join_type_allows_filtering && has_non_const_keys)
{ {
auto * left_set = add_create_set(left_plan, join_clause.key_names_left, JoinTableSide::Left); auto * left_set = add_create_set(left_plan, join_clause.key_names_left, JoinTableSide::Left);
auto * right_set = add_create_set(right_plan, join_clause.key_names_right, JoinTableSide::Right); auto * right_set = add_create_set(right_plan, join_clause.key_names_right, JoinTableSide::Right);

View File

@ -34,13 +34,20 @@ namespace ErrorCodes
namespace namespace
{ {
FullMergeJoinCursorPtr createCursor(const Block & block, const Names & columns) FullMergeJoinCursorPtr createCursor(const Block & block, const Names & columns, JoinStrictness strictness)
{ {
SortDescription desc; SortDescription desc;
desc.reserve(columns.size()); desc.reserve(columns.size());
for (const auto & name : columns) for (const auto & name : columns)
desc.emplace_back(name); desc.emplace_back(name);
return std::make_unique<FullMergeJoinCursor>(block, desc); return std::make_unique<FullMergeJoinCursor>(block, desc, strictness == JoinStrictness::Asof);
}
bool ALWAYS_INLINE isNullAt(const IColumn & column, size_t row)
{
if (const auto * nullable_column = checkAndGetColumn<ColumnNullable>(&column))
return nullable_column->isNullAt(row);
return false;
} }
template <bool has_left_nulls, bool has_right_nulls> template <bool has_left_nulls, bool has_right_nulls>
@ -54,7 +61,7 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column,
if (left_nullable && right_nullable) if (left_nullable && right_nullable)
{ {
int res = left_nullable->compareAt(lhs_pos, rhs_pos, right_column, null_direction_hint); int res = left_nullable->compareAt(lhs_pos, rhs_pos, right_column, null_direction_hint);
if (res) if (res != 0)
return res; return res;
/// NULL != NULL case /// NULL != NULL case
@ -90,9 +97,10 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column,
int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, size_t lpos, int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, size_t lpos,
const SortCursorImpl & rhs, size_t rpos, const SortCursorImpl & rhs, size_t rpos,
size_t key_length,
int null_direction_hint) int null_direction_hint)
{ {
for (size_t i = 0; i < lhs.sort_columns_size; ++i) for (size_t i = 0; i < key_length; ++i)
{ {
/// TODO(@vdimir): use nullableCompareAt only if there's nullable columns /// TODO(@vdimir): use nullableCompareAt only if there's nullable columns
int cmp = nullableCompareAt<true, true>(*lhs.sort_columns[i], *rhs.sort_columns[i], lpos, rpos, null_direction_hint); int cmp = nullableCompareAt<true, true>(*lhs.sort_columns[i], *rhs.sort_columns[i], lpos, rpos, null_direction_hint);
@ -104,13 +112,18 @@ int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, size_t lpos,
int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, const SortCursorImpl & rhs, int null_direction_hint) int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, const SortCursorImpl & rhs, int null_direction_hint)
{ {
return compareCursors(lhs, lhs.getRow(), rhs, rhs.getRow(), null_direction_hint); return compareCursors(lhs, lhs.getRow(), rhs, rhs.getRow(), lhs.sort_columns_size, null_direction_hint);
}
int compareAsofCursors(const FullMergeJoinCursor & lhs, const FullMergeJoinCursor & rhs, int null_direction_hint)
{
return nullableCompareAt<true, true>(*lhs.getAsofColumn(), *rhs.getAsofColumn(), lhs->getRow(), rhs->getRow(), null_direction_hint);
} }
bool ALWAYS_INLINE totallyLess(SortCursorImpl & lhs, SortCursorImpl & rhs, int null_direction_hint) bool ALWAYS_INLINE totallyLess(SortCursorImpl & lhs, SortCursorImpl & rhs, int null_direction_hint)
{ {
/// The last row of left cursor is less than the current row of the right cursor. /// The last row of left cursor is less than the current row of the right cursor.
int cmp = compareCursors(lhs, lhs.rows - 1, rhs, rhs.getRow(), null_direction_hint); int cmp = compareCursors(lhs, lhs.rows - 1, rhs, rhs.getRow(), lhs.sort_columns_size, null_direction_hint);
return cmp < 0; return cmp < 0;
} }
@ -222,25 +235,136 @@ Chunk getRowFromChunk(const Chunk & chunk, size_t pos)
return result; return result;
} }
void inline addRange(PaddedPODArray<UInt64> & left_map, size_t start, size_t end) void inline addRange(PaddedPODArray<UInt64> & values, UInt64 start, UInt64 end)
{ {
assert(end > start); assert(end > start);
for (size_t i = start; i < end; ++i) for (UInt64 i = start; i < end; ++i)
left_map.push_back(i); values.push_back(i);
} }
void inline addMany(PaddedPODArray<UInt64> & left_or_right_map, size_t idx, size_t num) void inline addMany(PaddedPODArray<UInt64> & values, UInt64 value, size_t num)
{ {
for (size_t i = 0; i < num; ++i) values.resize_fill(values.size() + num, value);
left_or_right_map.push_back(idx);
} }
} }
FullMergeJoinCursor::FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_) JoinKeyRow::JoinKeyRow(const FullMergeJoinCursor & cursor, size_t pos)
: sample_block(materializeBlock(sample_block_).cloneEmpty()), desc(description_)
{ {
row.reserve(cursor->sort_columns.size());
for (const auto & col : cursor->sort_columns)
{
auto new_col = col->cloneEmpty();
new_col->insertFrom(*col, pos);
row.push_back(std::move(new_col));
}
if (const IColumn * asof_column = cursor.getAsofColumn())
{
if (const auto * nullable_asof_column = checkAndGetColumn<ColumnNullable>(asof_column))
{
/// We save matched column, and since NULL do not match anything, we can't use it as a key
chassert(!nullable_asof_column->isNullAt(pos));
asof_column = nullable_asof_column->getNestedColumnPtr().get();
}
auto new_col = asof_column->cloneEmpty();
new_col->insertFrom(*asof_column, pos);
row.push_back(std::move(new_col));
}
} }
void JoinKeyRow::reset()
{
row.clear();
}
bool JoinKeyRow::equals(const FullMergeJoinCursor & cursor) const
{
if (row.empty())
return false;
for (size_t i = 0; i < cursor->sort_columns_size; ++i)
{
// int cmp = this->row[i]->compareAt(0, cursor->getRow(), *(cursor->sort_columns[i]), cursor->desc[i].nulls_direction);
int cmp = nullableCompareAt<true, true>(*this->row[i], *cursor->sort_columns[i], 0, cursor->getRow(), cursor->desc[i].nulls_direction);
if (cmp != 0)
return false;
}
return true;
}
bool JoinKeyRow::asofMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const
{
chassert(this->row.size() == cursor->sort_columns_size + 1);
if (!equals(cursor))
return false;
const auto & asof_row = row.back();
if (isNullAt(*asof_row, 0) || isNullAt(*cursor.getAsofColumn(), cursor->getRow()))
return false;
int cmp = 0;
if (const auto * nullable_column = checkAndGetColumn<ColumnNullable>(cursor.getAsofColumn()))
cmp = nullable_column->getNestedColumn().compareAt(cursor->getRow(), 0, *asof_row, 1);
else
cmp = cursor.getAsofColumn()->compareAt(cursor->getRow(), 0, *asof_row, 1);
return (asof_inequality == ASOFJoinInequality::Less && cmp < 0)
|| (asof_inequality == ASOFJoinInequality::LessOrEquals && cmp <= 0)
|| (asof_inequality == ASOFJoinInequality::Greater && cmp > 0)
|| (asof_inequality == ASOFJoinInequality::GreaterOrEquals && cmp >= 0);
}
void AnyJoinState::set(size_t source_num, const FullMergeJoinCursor & cursor)
{
assert(cursor->rows);
keys[source_num] = JoinKeyRow(cursor, cursor->rows - 1);
}
void AnyJoinState::reset(size_t source_num)
{
keys[source_num].reset();
value.clear();
}
void AnyJoinState::setValue(Chunk value_)
{
value = std::move(value_);
}
bool AnyJoinState::empty() const { return keys[0].row.empty() && keys[1].row.empty(); }
void AsofJoinState::set(const FullMergeJoinCursor & rcursor, size_t rpos)
{
key = JoinKeyRow(rcursor, rpos);
value = rcursor.getCurrent().clone();
value_row = rpos;
}
void AsofJoinState::reset()
{
key.reset();
value.clear();
}
FullMergeJoinCursor::FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_, bool is_asof)
: sample_block(materializeBlock(sample_block_).cloneEmpty())
, desc(description_)
{
if (desc.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty sort description for FullMergeJoinCursor");
if (is_asof)
{
/// For ASOF join prefix of sort description is used for equality comparison
/// and the last column is used for inequality comparison and is handled separately
auto asof_column_description = desc.back();
desc.pop_back();
chassert(asof_column_description.direction == 1 && asof_column_description.nulls_direction == 1);
asof_column_position = sample_block.getPositionByName(asof_column_description.column_name);
}
}
const Chunk & FullMergeJoinCursor::getCurrent() const const Chunk & FullMergeJoinCursor::getCurrent() const
{ {
@ -278,48 +402,103 @@ bool FullMergeJoinCursor::fullyCompleted() const
return !cursor.isValid() && recieved_all_blocks; return !cursor.isValid() && recieved_all_blocks;
} }
String FullMergeJoinCursor::dump() const
{
Strings row_dump;
if (cursor.isValid())
{
Field val;
for (size_t i = 0; i < cursor.sort_columns_size; ++i)
{
cursor.sort_columns[i]->get(cursor.getRow(), val);
row_dump.push_back(val.dump());
}
if (const auto * asof_column = getAsofColumn())
{
asof_column->get(cursor.getRow(), val);
row_dump.push_back(val.dump());
}
}
return fmt::format("<{}/{}{}>[{}]",
cursor.getRow(), cursor.rows,
recieved_all_blocks ? "(finished)" : "",
fmt::join(row_dump, ", "));
}
MergeJoinAlgorithm::MergeJoinAlgorithm( MergeJoinAlgorithm::MergeJoinAlgorithm(
JoinPtr table_join_, JoinKind kind_,
JoinStrictness strictness_,
const TableJoin::JoinOnClause & on_clause_,
const Blocks & input_headers, const Blocks & input_headers,
size_t max_block_size_) size_t max_block_size_)
: table_join(table_join_) : kind(kind_)
, strictness(strictness_)
, max_block_size(max_block_size_) , max_block_size(max_block_size_)
, log(getLogger("MergeJoinAlgorithm")) , log(getLogger("MergeJoinAlgorithm"))
{ {
if (input_headers.size() != 2) if (input_headers.size() != 2)
throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeJoinAlgorithm requires exactly two inputs"); throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeJoinAlgorithm requires exactly two inputs");
auto strictness = table_join->getTableJoin().strictness(); if (strictness != JoinStrictness::Any && strictness != JoinStrictness::All && strictness != JoinStrictness::Asof)
if (strictness != JoinStrictness::Any && strictness != JoinStrictness::All)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm is not implemented for strictness {}", strictness); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm is not implemented for strictness {}", strictness);
auto kind = table_join->getTableJoin().kind(); if (strictness == JoinStrictness::Asof)
{
if (kind != JoinKind::Left && kind != JoinKind::Inner)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm does not implement ASOF {} join", kind);
}
if (!isInner(kind) && !isLeft(kind) && !isRight(kind) && !isFull(kind)) if (!isInner(kind) && !isLeft(kind) && !isRight(kind) && !isFull(kind))
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm is not implemented for kind {}", kind); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm is not implemented for kind {}", kind);
const auto & join_on = table_join->getTableJoin().getOnlyClause(); if (on_clause_.on_filter_condition_left || on_clause_.on_filter_condition_right)
if (join_on.on_filter_condition_left || join_on.on_filter_condition_right)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm does not support ON filter conditions"); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm does not support ON filter conditions");
cursors = { cursors = {
createCursor(input_headers[0], join_on.key_names_left), createCursor(input_headers[0], on_clause_.key_names_left, strictness),
createCursor(input_headers[1], join_on.key_names_right) createCursor(input_headers[1], on_clause_.key_names_right, strictness),
}; };
}
for (const auto & [left_key, right_key] : table_join->getTableJoin().leftToRightKeyRemap()) MergeJoinAlgorithm::MergeJoinAlgorithm(
JoinPtr join_ptr,
const Blocks & input_headers,
size_t max_block_size_)
: MergeJoinAlgorithm(
join_ptr->getTableJoin().kind(),
join_ptr->getTableJoin().strictness(),
join_ptr->getTableJoin().getOnlyClause(),
input_headers,
max_block_size_)
{
for (const auto & [left_key, right_key] : join_ptr->getTableJoin().leftToRightKeyRemap())
{ {
size_t left_idx = input_headers[0].getPositionByName(left_key); size_t left_idx = input_headers[0].getPositionByName(left_key);
size_t right_idx = input_headers[1].getPositionByName(right_key); size_t right_idx = input_headers[1].getPositionByName(right_key);
left_to_right_key_remap[left_idx] = right_idx; left_to_right_key_remap[left_idx] = right_idx;
} }
const auto *smjPtr = typeid_cast<const FullSortingMergeJoin *>(table_join.get()); const auto *smjPtr = typeid_cast<const FullSortingMergeJoin *>(join_ptr.get());
if (smjPtr) if (smjPtr)
{ {
null_direction_hint = smjPtr->getNullDirection(); null_direction_hint = smjPtr->getNullDirection();
} }
if (strictness == JoinStrictness::Asof)
setAsofInequality(join_ptr->getTableJoin().getAsofInequality());
}
void MergeJoinAlgorithm::setAsofInequality(ASOFJoinInequality asof_inequality_)
{
if (strictness != JoinStrictness::Asof)
throw Exception(ErrorCodes::LOGICAL_ERROR, "setAsofInequality is only supported for ASOF joins");
if (asof_inequality_ == ASOFJoinInequality::None)
throw Exception(ErrorCodes::LOGICAL_ERROR, "ASOF inequality cannot be None");
asof_inequality = asof_inequality_;
} }
void MergeJoinAlgorithm::logElapsed(double seconds) void MergeJoinAlgorithm::logElapsed(double seconds)
@ -407,7 +586,7 @@ struct AllJoinImpl
size_t lnum = nextDistinct(left_cursor.cursor); size_t lnum = nextDistinct(left_cursor.cursor);
size_t rnum = nextDistinct(right_cursor.cursor); size_t rnum = nextDistinct(right_cursor.cursor);
bool all_fit_in_block = std::max(left_map.size(), right_map.size()) + lnum * rnum <= max_block_size; bool all_fit_in_block = !max_block_size || std::max(left_map.size(), right_map.size()) + lnum * rnum <= max_block_size;
bool have_all_ranges = left_cursor.cursor.isValid() && right_cursor.cursor.isValid(); bool have_all_ranges = left_cursor.cursor.isValid() && right_cursor.cursor.isValid();
if (all_fit_in_block && have_all_ranges) if (all_fit_in_block && have_all_ranges)
{ {
@ -421,7 +600,7 @@ struct AllJoinImpl
else else
{ {
assert(state == nullptr); assert(state == nullptr);
state = std::make_unique<AllJoinState>(left_cursor.cursor, lpos, right_cursor.cursor, rpos); state = std::make_unique<AllJoinState>(left_cursor, lpos, right_cursor, rpos);
state->addRange(0, left_cursor.getCurrent().clone(), lpos, lnum); state->addRange(0, left_cursor.getCurrent().clone(), lpos, lnum);
state->addRange(1, right_cursor.getCurrent().clone(), rpos, rnum); state->addRange(1, right_cursor.getCurrent().clone(), rpos, rnum);
return; return;
@ -466,6 +645,17 @@ void dispatchKind(JoinKind kind, Args && ... args)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported join kind: \"{}\"", kind); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported join kind: \"{}\"", kind);
} }
MutableColumns MergeJoinAlgorithm::getEmptyResultColumns() const
{
MutableColumns result_cols;
for (size_t i = 0; i < 2; ++i)
{
for (const auto & col : cursors[i]->sampleColumns())
result_cols.push_back(col->cloneEmpty());
}
return result_cols;
}
std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAllJoinState() std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAllJoinState()
{ {
if (all_join_state && all_join_state->finished()) if (all_join_state && all_join_state->finished())
@ -479,7 +669,7 @@ std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAllJoinState
/// Accumulate blocks with same key in all_join_state /// Accumulate blocks with same key in all_join_state
for (size_t i = 0; i < 2; ++i) for (size_t i = 0; i < 2; ++i)
{ {
if (cursors[i]->cursor.isValid() && all_join_state->keys[i].equals(cursors[i]->cursor)) if (cursors[i]->cursor.isValid() && all_join_state->keys[i].equals(*cursors[i]))
{ {
size_t pos = cursors[i]->cursor.getRow(); size_t pos = cursors[i]->cursor.getRow();
size_t num = nextDistinct(cursors[i]->cursor); size_t num = nextDistinct(cursors[i]->cursor);
@ -499,15 +689,10 @@ std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAllJoinState
stat.max_blocks_loaded = std::max(stat.max_blocks_loaded, all_join_state->blocksStored()); stat.max_blocks_loaded = std::max(stat.max_blocks_loaded, all_join_state->blocksStored());
/// join all rows with current key /// join all rows with current key
MutableColumns result_cols; MutableColumns result_cols = getEmptyResultColumns();
for (size_t i = 0; i < 2; ++i)
{
for (const auto & col : cursors[i]->sampleColumns())
result_cols.push_back(col->cloneEmpty());
}
size_t total_rows = 0; size_t total_rows = 0;
while (total_rows < max_block_size) while (!max_block_size || total_rows < max_block_size)
{ {
const auto & left_range = all_join_state->getLeft(); const auto & left_range = all_join_state->getLeft();
const auto & right_range = all_join_state->getRight(); const auto & right_range = all_join_state->getRight();
@ -532,7 +717,52 @@ std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAllJoinState
return {}; return {};
} }
MergeJoinAlgorithm::Status MergeJoinAlgorithm::allJoin(JoinKind kind) std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAsofJoinState()
{
if (strictness != JoinStrictness::Asof)
return {};
if (!cursors[1]->fullyCompleted())
return {};
auto & left_cursor = *cursors[0];
const auto & left_columns = left_cursor.getCurrent().getColumns();
MutableColumns result_cols = getEmptyResultColumns();
while (left_cursor->isValid() && asof_join_state.hasMatch(left_cursor, asof_inequality))
{
size_t i = 0;
for (const auto & col : left_columns)
result_cols[i++]->insertFrom(*col, left_cursor->getRow());
for (const auto & col : asof_join_state.value.getColumns())
result_cols[i++]->insertFrom(*col, asof_join_state.value_row);
chassert(i == result_cols.size());
left_cursor->next();
}
while (isLeft(kind) && left_cursor->isValid())
{
/// return row with default values at right side
size_t i = 0;
for (const auto & col : left_columns)
result_cols[i++]->insertFrom(*col, left_cursor->getRow());
for (; i < result_cols.size(); ++i)
result_cols[i]->insertDefault();
chassert(i == result_cols.size());
left_cursor->next();
}
size_t result_rows = result_cols.empty() ? 0 : result_cols.front()->size();
if (result_rows)
return Status(Chunk(std::move(result_cols), result_rows));
return {};
}
MergeJoinAlgorithm::Status MergeJoinAlgorithm::allJoin()
{ {
PaddedPODArray<UInt64> idx_map[2]; PaddedPODArray<UInt64> idx_map[2];
@ -595,7 +825,7 @@ struct AnyJoinImpl
FullMergeJoinCursor & right_cursor, FullMergeJoinCursor & right_cursor,
PaddedPODArray<UInt64> & left_map, PaddedPODArray<UInt64> & left_map,
PaddedPODArray<UInt64> & right_map, PaddedPODArray<UInt64> & right_map,
AnyJoinState & state, AnyJoinState & any_join_state,
int null_direction_hint) int null_direction_hint)
{ {
assert(enabled); assert(enabled);
@ -656,21 +886,21 @@ struct AnyJoinImpl
} }
} }
/// Remember index of last joined row to propagate it to next block /// Remember last joined row to propagate it to next block
state.setValue({}); any_join_state.setValue({});
if (!left_cursor->isValid()) if (!left_cursor->isValid())
{ {
state.set(0, left_cursor.cursor); any_join_state.set(0, left_cursor);
if (cmp == 0 && isLeft(kind)) if (cmp == 0 && isLeft(kind))
state.setValue(getRowFromChunk(right_cursor.getCurrent(), rpos)); any_join_state.setValue(getRowFromChunk(right_cursor.getCurrent(), rpos));
} }
if (!right_cursor->isValid()) if (!right_cursor->isValid())
{ {
state.set(1, right_cursor.cursor); any_join_state.set(1, right_cursor);
if (cmp == 0 && isRight(kind)) if (cmp == 0 && isRight(kind))
state.setValue(getRowFromChunk(left_cursor.getCurrent(), lpos)); any_join_state.setValue(getRowFromChunk(left_cursor.getCurrent(), lpos));
} }
} }
}; };
@ -680,40 +910,34 @@ std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAnyJoinState
if (any_join_state.empty()) if (any_join_state.empty())
return {}; return {};
auto kind = table_join->getTableJoin().kind();
Chunk result; Chunk result;
for (size_t source_num = 0; source_num < 2; ++source_num) for (size_t source_num = 0; source_num < 2; ++source_num)
{ {
auto & current = *cursors[source_num]; auto & current = *cursors[source_num];
auto & state = any_join_state; if (any_join_state.keys[source_num].equals(current))
if (any_join_state.keys[source_num].equals(current.cursor))
{ {
size_t start_pos = current->getRow(); size_t start_pos = current->getRow();
size_t length = nextDistinct(current.cursor); size_t length = nextDistinct(current.cursor);
if (length && isLeft(kind) && source_num == 0) if (length && isLeft(kind) && source_num == 0)
{ {
if (state.value) if (any_join_state.value)
result = copyChunkResized(current.getCurrent(), state.value, start_pos, length); result = copyChunkResized(current.getCurrent(), any_join_state.value, start_pos, length);
else else
result = createBlockWithDefaults(source_num, start_pos, length); result = createBlockWithDefaults(source_num, start_pos, length);
} }
if (length && isRight(kind) && source_num == 1) if (length && isRight(kind) && source_num == 1)
{ {
if (state.value) if (any_join_state.value)
result = copyChunkResized(state.value, current.getCurrent(), start_pos, length); result = copyChunkResized(any_join_state.value, current.getCurrent(), start_pos, length);
else else
result = createBlockWithDefaults(source_num, start_pos, length); result = createBlockWithDefaults(source_num, start_pos, length);
} }
/// We've found row with other key, no need to skip more rows with current key
if (current->isValid()) if (current->isValid())
{ any_join_state.keys[source_num].reset();
state.keys[source_num].reset();
}
} }
else else
{ {
@ -726,7 +950,7 @@ std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAnyJoinState
return {}; return {};
} }
MergeJoinAlgorithm::Status MergeJoinAlgorithm::anyJoin(JoinKind kind) MergeJoinAlgorithm::Status MergeJoinAlgorithm::anyJoin()
{ {
if (auto result = handleAnyJoinState()) if (auto result = handleAnyJoinState())
return std::move(*result); return std::move(*result);
@ -771,10 +995,151 @@ MergeJoinAlgorithm::Status MergeJoinAlgorithm::anyJoin(JoinKind kind)
return Status(std::move(result)); return Status(std::move(result));
} }
MergeJoinAlgorithm::Status MergeJoinAlgorithm::asofJoin()
{
auto & left_cursor = *cursors[0];
if (!left_cursor->isValid())
return Status(0);
auto & right_cursor = *cursors[1];
if (!right_cursor->isValid())
return Status(1);
const auto & left_columns = left_cursor.getCurrent().getColumns();
const auto & right_columns = right_cursor.getCurrent().getColumns();
MutableColumns result_cols = getEmptyResultColumns();
while (left_cursor->isValid() && right_cursor->isValid())
{
auto lpos = left_cursor->getRow();
auto rpos = right_cursor->getRow();
auto cmp = compareCursors(*left_cursor, *right_cursor, null_direction_hint);
if (cmp == 0)
{
if (isNullAt(*left_cursor.getAsofColumn(), lpos))
cmp = -1;
if (isNullAt(*right_cursor.getAsofColumn(), rpos))
cmp = 1;
}
if (cmp == 0)
{
auto asof_cmp = compareAsofCursors(left_cursor, right_cursor, null_direction_hint);
if ((asof_inequality == ASOFJoinInequality::Less && asof_cmp <= -1)
|| (asof_inequality == ASOFJoinInequality::LessOrEquals && asof_cmp <= 0))
{
/// First row in right table that is greater (or equal) than current row in left table
/// matches asof join condition the best
size_t i = 0;
for (const auto & col : left_columns)
result_cols[i++]->insertFrom(*col, lpos);
for (const auto & col : right_columns)
result_cols[i++]->insertFrom(*col, rpos);
chassert(i == result_cols.size());
left_cursor->next();
continue;
}
if (asof_inequality == ASOFJoinInequality::Less || asof_inequality == ASOFJoinInequality::LessOrEquals)
{
/// Asof condition is not (yet) satisfied, skip row in right table
right_cursor->next();
continue;
}
if ((asof_inequality == ASOFJoinInequality::Greater && asof_cmp >= 1)
|| (asof_inequality == ASOFJoinInequality::GreaterOrEquals && asof_cmp >= 0))
{
/// condition is satisfied, remember this row and move next to try to find better match
asof_join_state.set(right_cursor, rpos);
right_cursor->next();
continue;
}
if (asof_inequality == ASOFJoinInequality::Greater || asof_inequality == ASOFJoinInequality::GreaterOrEquals)
{
/// Asof condition is not satisfied anymore, use last matched row from right table
if (asof_join_state.hasMatch(left_cursor, asof_inequality))
{
size_t i = 0;
for (const auto & col : left_columns)
result_cols[i++]->insertFrom(*col, lpos);
for (const auto & col : asof_join_state.value.getColumns())
result_cols[i++]->insertFrom(*col, asof_join_state.value_row);
chassert(i == result_cols.size());
}
else
{
asof_join_state.reset();
if (isLeft(kind))
{
/// return row with default values at right side
size_t i = 0;
for (const auto & col : left_columns)
result_cols[i++]->insertFrom(*col, lpos);
for (; i < result_cols.size(); ++i)
result_cols[i]->insertDefault();
chassert(i == result_cols.size());
}
}
left_cursor->next();
continue;
}
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "TODO: implement ASOF equality join");
}
else if (cmp < 0)
{
if (asof_join_state.hasMatch(left_cursor, asof_inequality))
{
size_t i = 0;
for (const auto & col : left_columns)
result_cols[i++]->insertFrom(*col, lpos);
for (const auto & col : asof_join_state.value.getColumns())
result_cols[i++]->insertFrom(*col, asof_join_state.value_row);
chassert(i == result_cols.size());
left_cursor->next();
continue;
}
else
{
asof_join_state.reset();
}
/// no matches for rows in left table, just pass them through
size_t num = nextDistinct(*left_cursor);
if (isLeft(kind) && num)
{
/// return them with default values at right side
size_t i = 0;
for (const auto & col : left_columns)
result_cols[i++]->insertRangeFrom(*col, lpos, num);
for (; i < result_cols.size(); ++i)
result_cols[i]->insertManyDefaults(num);
chassert(i == result_cols.size());
}
}
else
{
/// skip rows in right table until we find match for current row in left table
nextDistinct(*right_cursor);
}
}
size_t num_rows = result_cols.empty() ? 0 : result_cols.front()->size();
return Status(Chunk(std::move(result_cols), num_rows));
}
/// if `source_num == 0` get data from left cursor and fill defaults at right /// if `source_num == 0` get data from left cursor and fill defaults at right
/// otherwise - vice versa /// otherwise - vice versa
Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num, size_t start, size_t num_rows) const Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num, size_t start, size_t num_rows) const
{ {
ColumnRawPtrs cols; ColumnRawPtrs cols;
{ {
const auto & columns_left = source_num == 0 ? cursors[0]->getCurrent().getColumns() : cursors[0]->sampleColumns(); const auto & columns_left = source_num == 0 ? cursors[0]->getCurrent().getColumns() : cursors[0]->sampleColumns();
@ -797,7 +1162,6 @@ Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num, size_t star
cols.push_back(col.get()); cols.push_back(col.get());
} }
} }
Chunk result_chunk; Chunk result_chunk;
copyColumnsResized(cols, start, num_rows, result_chunk); copyColumnsResized(cols, start, num_rows, result_chunk);
return result_chunk; return result_chunk;
@ -813,7 +1177,6 @@ Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num)
IMergingAlgorithm::Status MergeJoinAlgorithm::merge() IMergingAlgorithm::Status MergeJoinAlgorithm::merge()
{ {
auto kind = table_join->getTableJoin().kind();
if (!cursors[0]->cursor.isValid() && !cursors[0]->fullyCompleted()) if (!cursors[0]->cursor.isValid() && !cursors[0]->fullyCompleted())
return Status(0); return Status(0);
@ -821,11 +1184,11 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge()
if (!cursors[1]->cursor.isValid() && !cursors[1]->fullyCompleted()) if (!cursors[1]->cursor.isValid() && !cursors[1]->fullyCompleted())
return Status(1); return Status(1);
if (auto result = handleAllJoinState()) if (auto result = handleAllJoinState())
{
return std::move(*result); return std::move(*result);
}
if (auto result = handleAsofJoinState())
return std::move(*result);
if (cursors[0]->fullyCompleted() || cursors[1]->fullyCompleted()) if (cursors[0]->fullyCompleted() || cursors[1]->fullyCompleted())
{ {
@ -839,7 +1202,7 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge()
} }
/// check if blocks are not intersecting at all /// check if blocks are not intersecting at all
if (int cmp = totallyCompare(cursors[0]->cursor, cursors[1]->cursor, null_direction_hint); cmp != 0) if (int cmp = totallyCompare(cursors[0]->cursor, cursors[1]->cursor, null_direction_hint); cmp != 0 && strictness != JoinStrictness::Asof)
{ {
if (cmp < 0) if (cmp < 0)
{ {
@ -858,13 +1221,14 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge()
} }
} }
auto strictness = table_join->getTableJoin().strictness();
if (strictness == JoinStrictness::Any) if (strictness == JoinStrictness::Any)
return anyJoin(kind); return anyJoin();
if (strictness == JoinStrictness::All) if (strictness == JoinStrictness::All)
return allJoin(kind); return allJoin();
if (strictness == JoinStrictness::Asof)
return asofJoin();
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported strictness '{}'", strictness); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported strictness '{}'", strictness);
} }
@ -883,9 +1247,26 @@ MergeJoinTransform::MergeJoinTransform(
/* always_read_till_end_= */ false, /* always_read_till_end_= */ false,
/* empty_chunk_on_finish_= */ true, /* empty_chunk_on_finish_= */ true,
table_join, input_headers, max_block_size) table_join, input_headers, max_block_size)
, log(getLogger("MergeJoinTransform"))
{ {
LOG_TRACE(log, "Use MergeJoinTransform"); }
MergeJoinTransform::MergeJoinTransform(
JoinKind kind_,
JoinStrictness strictness_,
const TableJoin::JoinOnClause & on_clause_,
const Blocks & input_headers,
const Block & output_header,
size_t max_block_size,
UInt64 limit_hint_)
: IMergingTransform<MergeJoinAlgorithm>(
input_headers,
output_header,
/* have_all_inputs_= */ true,
limit_hint_,
/* always_read_till_end_= */ false,
/* empty_chunk_on_finish_= */ true,
kind_, strictness_, on_clause_, input_headers, max_block_size)
{
} }
void MergeJoinTransform::onFinish() void MergeJoinTransform::onFinish()

View File

@ -8,6 +8,7 @@
#include <unordered_map> #include <unordered_map>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include <array>
#include <boost/core/noncopyable.hpp> #include <boost/core/noncopyable.hpp>
@ -19,6 +20,7 @@
#include <Processors/Chunk.h> #include <Processors/Chunk.h>
#include <Processors/Merges/Algorithms/IMergingAlgorithm.h> #include <Processors/Merges/Algorithms/IMergingAlgorithm.h>
#include <Processors/Merges/IMergingTransform.h> #include <Processors/Merges/IMergingTransform.h>
#include <Interpreters/TableJoin.h>
namespace Poco { class Logger; } namespace Poco { class Logger; }
@ -35,57 +37,28 @@ using FullMergeJoinCursorPtr = std::unique_ptr<FullMergeJoinCursor>;
/// Used instead of storing previous block /// Used instead of storing previous block
struct JoinKeyRow struct JoinKeyRow
{ {
std::vector<ColumnPtr> row;
JoinKeyRow() = default; JoinKeyRow() = default;
explicit JoinKeyRow(const SortCursorImpl & impl_, size_t pos) JoinKeyRow(const FullMergeJoinCursor & cursor, size_t pos);
{
row.reserve(impl_.sort_columns.size());
for (const auto & col : impl_.sort_columns)
{
auto new_col = col->cloneEmpty();
new_col->insertFrom(*col, pos);
row.push_back(std::move(new_col));
}
}
void reset() bool equals(const FullMergeJoinCursor & cursor) const;
{ bool asofMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const;
row.clear();
}
bool equals(const SortCursorImpl & impl) const void reset();
{
if (row.empty())
return false;
assert(this->row.size() == impl.sort_columns_size); std::vector<ColumnPtr> row;
for (size_t i = 0; i < impl.sort_columns_size; ++i)
{
int cmp = this->row[i]->compareAt(0, impl.getRow(), *impl.sort_columns[i], impl.desc[i].nulls_direction);
if (cmp != 0)
return false;
}
return true;
}
}; };
/// Remembers previous key if it was joined in previous block /// Remembers previous key if it was joined in previous block
class AnyJoinState : boost::noncopyable class AnyJoinState : boost::noncopyable
{ {
public: public:
AnyJoinState() = default; void set(size_t source_num, const FullMergeJoinCursor & cursor);
void setValue(Chunk value_);
void set(size_t source_num, const SortCursorImpl & cursor) void reset(size_t source_num);
{
assert(cursor.rows);
keys[source_num] = JoinKeyRow(cursor, cursor.rows - 1);
}
void setValue(Chunk value_) { value = std::move(value_); } bool empty() const;
bool empty() const { return keys[0].row.empty() && keys[1].row.empty(); }
/// current keys /// current keys
JoinKeyRow keys[2]; JoinKeyRow keys[2];
@ -118,8 +91,8 @@ public:
Chunk chunk; Chunk chunk;
}; };
AllJoinState(const SortCursorImpl & lcursor, size_t lpos, AllJoinState(const FullMergeJoinCursor & lcursor, size_t lpos,
const SortCursorImpl & rcursor, size_t rpos) const FullMergeJoinCursor & rcursor, size_t rpos)
: keys{JoinKeyRow(lcursor, lpos), JoinKeyRow(rcursor, rpos)} : keys{JoinKeyRow(lcursor, lpos), JoinKeyRow(rcursor, rpos)}
{ {
} }
@ -187,13 +160,32 @@ private:
size_t ridx = 0; size_t ridx = 0;
}; };
class AsofJoinState : boost::noncopyable
{
public:
void set(const FullMergeJoinCursor & rcursor, size_t rpos);
void reset();
bool hasMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const
{
if (value.empty())
return false;
return key.asofMatch(cursor, asof_inequality);
}
JoinKeyRow key;
Chunk value;
size_t value_row = 0;
};
/* /*
* Wrapper for SortCursorImpl * Wrapper for SortCursorImpl
*/ */
class FullMergeJoinCursor : boost::noncopyable class FullMergeJoinCursor : boost::noncopyable
{ {
public: public:
explicit FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_); explicit FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_, bool is_asof = false);
bool fullyCompleted() const; bool fullyCompleted() const;
void setChunk(Chunk && chunk); void setChunk(Chunk && chunk);
@ -203,17 +195,31 @@ public:
SortCursorImpl * operator-> () { return &cursor; } SortCursorImpl * operator-> () { return &cursor; }
const SortCursorImpl * operator-> () const { return &cursor; } const SortCursorImpl * operator-> () const { return &cursor; }
SortCursorImpl & operator* () { return cursor; }
const SortCursorImpl & operator* () const { return cursor; }
SortCursorImpl cursor; SortCursorImpl cursor;
const Block & sampleBlock() const { return sample_block; } const Block & sampleBlock() const { return sample_block; }
Columns sampleColumns() const { return sample_block.getColumns(); } Columns sampleColumns() const { return sample_block.getColumns(); }
const IColumn * getAsofColumn() const
{
if (!asof_column_position)
return nullptr;
return cursor.all_columns[*asof_column_position];
}
String dump() const;
private: private:
Block sample_block; Block sample_block;
SortDescription desc; SortDescription desc;
Chunk current_chunk; Chunk current_chunk;
bool recieved_all_blocks = false; bool recieved_all_blocks = false;
std::optional<size_t> asof_column_position;
}; };
/* /*
@ -223,22 +229,33 @@ private:
class MergeJoinAlgorithm final : public IMergingAlgorithm class MergeJoinAlgorithm final : public IMergingAlgorithm
{ {
public: public:
explicit MergeJoinAlgorithm(JoinPtr table_join, const Blocks & input_headers, size_t max_block_size_); MergeJoinAlgorithm(JoinKind kind_,
JoinStrictness strictness_,
const TableJoin::JoinOnClause & on_clause_,
const Blocks & input_headers,
size_t max_block_size_);
MergeJoinAlgorithm(JoinPtr join_ptr, const Blocks & input_headers, size_t max_block_size_);
const char * getName() const override { return "MergeJoinAlgorithm"; } const char * getName() const override { return "MergeJoinAlgorithm"; }
void initialize(Inputs inputs) override; void initialize(Inputs inputs) override;
void consume(Input & input, size_t source_num) override; void consume(Input & input, size_t source_num) override;
Status merge() override; Status merge() override;
void logElapsed(double seconds); void setAsofInequality(ASOFJoinInequality asof_inequality_);
void logElapsed(double seconds);
private: private:
std::optional<Status> handleAnyJoinState(); std::optional<Status> handleAnyJoinState();
Status anyJoin(JoinKind kind); Status anyJoin();
std::optional<Status> handleAllJoinState(); std::optional<Status> handleAllJoinState();
Status allJoin(JoinKind kind); Status allJoin();
std::optional<Status> handleAsofJoinState();
Status asofJoin();
MutableColumns getEmptyResultColumns() const;
Chunk createBlockWithDefaults(size_t source_num); Chunk createBlockWithDefaults(size_t source_num);
Chunk createBlockWithDefaults(size_t source_num, size_t start, size_t num_rows) const; Chunk createBlockWithDefaults(size_t source_num, size_t start, size_t num_rows) const;
@ -246,12 +263,15 @@ private:
std::unordered_map<size_t, size_t> left_to_right_key_remap; std::unordered_map<size_t, size_t> left_to_right_key_remap;
std::array<FullMergeJoinCursorPtr, 2> cursors; std::array<FullMergeJoinCursorPtr, 2> cursors;
ASOFJoinInequality asof_inequality = ASOFJoinInequality::None;
/// Keep some state to make connection between data in different blocks /// Keep some state to make handle data from different blocks
AnyJoinState any_join_state; AnyJoinState any_join_state;
std::unique_ptr<AllJoinState> all_join_state; std::unique_ptr<AllJoinState> all_join_state;
AsofJoinState asof_join_state;
JoinPtr table_join; JoinKind kind;
JoinStrictness strictness;
size_t max_block_size; size_t max_block_size;
int null_direction_hint = 1; int null_direction_hint = 1;
@ -281,12 +301,21 @@ public:
size_t max_block_size, size_t max_block_size,
UInt64 limit_hint = 0); UInt64 limit_hint = 0);
MergeJoinTransform(
JoinKind kind_,
JoinStrictness strictness_,
const TableJoin::JoinOnClause & on_clause_,
const Blocks & input_headers,
const Block & output_header,
size_t max_block_size,
UInt64 limit_hint_ = 0);
String getName() const override { return "MergeJoinTransform"; } String getName() const override { return "MergeJoinTransform"; }
void setAsofInequality(ASOFJoinInequality asof_inequality_) { algorithm.setAsofInequality(asof_inequality_); }
protected: protected:
void onFinish() override; void onFinish() override;
LoggerPtr log;
}; };
} }

View File

@ -109,7 +109,7 @@ void ScatterByPartitionTransform::generateOutputChunks()
hash.reset(num_rows); hash.reset(num_rows);
for (const auto & column_number : key_columns) for (const auto & column_number : key_columns)
columns[column_number]->updateWeakHash32(hash); hash.update(columns[column_number]->getWeakHash32());
const auto & hash_data = hash.getData(); const auto & hash_data = hash.getData();
IColumn::Selector selector(num_rows); IColumn::Selector selector(num_rows);

View File

@ -0,0 +1,768 @@
#include <gtest/gtest.h>
#include <pcg_random.hpp>
#include <random>
#include <Poco/ConsoleChannel.h>
#include <Poco/Logger.h>
#include <Poco/AutoPtr.h>
#include <Columns/ColumnsNumber.h>
#include <Common/getRandomASCIIString.h>
#include <Common/randomSeed.h>
#include <DataTypes/DataTypesNumber.h>
#include <Interpreters/TableJoin.h>
#include <Processors/Executors/PipelineExecutor.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Processors/Sinks/NullSink.h>
#include <Processors/Sources/SourceFromChunks.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <Processors/Transforms/MergeJoinTransform.h>
#include <Processors/Formats/Impl/PrettyCompactBlockOutputFormat.h>
#include <Processors/Executors/CompletedPipelineExecutor.h>
#include <QueryPipeline/QueryPipeline.h>
using namespace DB;
namespace
{
QueryPipeline buildJoinPipeline(
std::shared_ptr<ISource> left_source,
std::shared_ptr<ISource> right_source,
size_t key_length = 1,
JoinKind kind = JoinKind::Inner,
JoinStrictness strictness = JoinStrictness::All,
ASOFJoinInequality asof_inequality = ASOFJoinInequality::None)
{
Blocks inputs;
inputs.emplace_back(left_source->getPort().getHeader());
inputs.emplace_back(right_source->getPort().getHeader());
Block out_header;
for (const auto & input : inputs)
{
for (ColumnWithTypeAndName column : input)
{
if (&input == &inputs.front())
column.name = "t1." + column.name;
else
column.name = "t2." + column.name;
out_header.insert(column);
}
}
TableJoin::JoinOnClause on_clause;
for (size_t i = 0; i < key_length; ++i)
{
on_clause.key_names_left.emplace_back(inputs[0].getByPosition(i).name);
on_clause.key_names_right.emplace_back(inputs[1].getByPosition(i).name);
}
auto joining = std::make_shared<MergeJoinTransform>(
kind,
strictness,
on_clause,
inputs, out_header, /* max_block_size = */ 0);
if (asof_inequality != ASOFJoinInequality::None)
joining->setAsofInequality(asof_inequality);
chassert(joining->getInputs().size() == 2);
connect(left_source->getPort(), joining->getInputs().front());
connect(right_source->getPort(), joining->getInputs().back());
auto * output_port = &joining->getOutputPort();
auto processors = std::make_shared<Processors>();
processors->emplace_back(std::move(left_source));
processors->emplace_back(std::move(right_source));
processors->emplace_back(std::move(joining));
QueryPipeline pipeline(QueryPlanResourceHolder{}, processors, output_port);
return pipeline;
}
std::shared_ptr<ISource> oneColumnSource(const std::vector<std::vector<UInt64>> & values)
{
Block header = {
ColumnWithTypeAndName(std::make_shared<DataTypeUInt64>(), "key"),
ColumnWithTypeAndName(std::make_shared<DataTypeUInt64>(), "idx"),
};
UInt64 idx = 0;
Chunks chunks;
for (const auto & chunk_values : values)
{
auto key_column = ColumnUInt64::create();
auto idx_column = ColumnUInt64::create();
for (auto n : chunk_values)
{
key_column->insertValue(n);
idx_column->insertValue(idx);
++idx;
}
chunks.emplace_back(Chunk(Columns{std::move(key_column), std::move(idx_column)}, chunk_values.size()));
}
return std::make_shared<SourceFromChunks>(header, std::move(chunks));
}
class SourceChunksBuilder
{
public:
explicit SourceChunksBuilder(const Block & header_)
: header(header_)
{
current_chunk = header.cloneEmptyColumns();
chassert(!current_chunk.empty());
}
void setBreakProbability(pcg64 & rng_)
{
/// random probability with possibility to have exact 0.0 and 1.0 values
break_prob = std::uniform_int_distribution<size_t>(0, 5)(rng_) / static_cast<double>(5);
rng = &rng_;
}
void addRow(const std::vector<Field> & row)
{
chassert(row.size() == current_chunk.size());
for (size_t i = 0; i < current_chunk.size(); ++i)
current_chunk[i]->insert(row[i]);
if (rng && std::uniform_real_distribution<>(0.0, 1.0)(*rng) < break_prob)
addChunk();
}
void addChunk()
{
if (current_chunk.front()->empty())
return;
size_t rows = current_chunk.front()->size();
chunks.emplace_back(std::move(current_chunk), rows);
current_chunk = header.cloneEmptyColumns();
}
std::shared_ptr<ISource> getSource()
{
addChunk();
/// copy chunk to allow reusing same builder
Chunks chunks_copy;
chunks_copy.reserve(chunks.size());
for (const auto & chunk : chunks)
chunks_copy.emplace_back(chunk.clone());
return std::make_shared<SourceFromChunks>(header, std::move(chunks_copy));
}
private:
Block header;
Chunks chunks;
MutableColumns current_chunk;
pcg64 * rng = nullptr;
double break_prob = 0.0;
};
std::vector<std::vector<Field>> getValuesFromBlock(const Block & block, const Names & names)
{
std::vector<std::vector<Field>> result;
for (size_t i = 0; i < block.rows(); ++i)
{
auto & row = result.emplace_back();
for (const auto & name : names)
block.getByName(name).column->get(i, row.emplace_back());
}
return result;
}
Block executePipeline(QueryPipeline && pipeline)
{
PullingPipelineExecutor executor(pipeline);
Blocks result_blocks;
while (true)
{
Block block;
bool is_ok = executor.pull(block);
if (!is_ok)
break;
result_blocks.emplace_back(std::move(block));
}
return concatenateBlocks(result_blocks);
}
template <typename T>
void assertColumnVectorEq(const typename ColumnVector<T>::Container & expected, const Block & block, const std::string & name)
{
const auto * actual = typeid_cast<const ColumnVector<T> *>(block.getByName(name).column.get());
ASSERT_TRUE(actual) << "unexpected column type: " << block.getByName(name).column->dumpStructure() << "expected: " << typeid(ColumnVector<T>).name();
auto get_first_diff = [&]() -> String
{
const auto & actual_data = actual->getData();
size_t num_rows = std::min(expected.size(), actual_data.size());
for (size_t i = 0; i < num_rows; ++i)
{
if (expected[i] != actual_data[i])
return fmt::format(", expected: {}, actual: {} at row {}", expected[i], actual_data[i], i);
}
return "";
};
EXPECT_EQ(actual->getData().size(), expected.size());
ASSERT_EQ(actual->getData(), expected) << "column name: " << name << get_first_diff();
}
template <typename T>
void assertColumnEq(const IColumn & expected, const Block & block, const std::string & name)
{
const ColumnPtr & actual = block.getByName(name).column;
ASSERT_TRUE(checkColumn<T>(*actual));
ASSERT_TRUE(checkColumn<T>(expected));
EXPECT_EQ(actual->size(), expected.size());
auto dump_val = [](const IColumn & col, size_t i) -> String
{
Field value;
col.get(i, value);
return value.dump();
};
size_t num_rows = std::min(actual->size(), expected.size());
for (size_t i = 0; i < num_rows; ++i)
ASSERT_EQ(actual->compareAt(i, i, expected, 1), 0) << dump_val(*actual, i) << " != " << dump_val(expected, i) << " at row " << i;
}
template <typename T>
T getRandomFrom(pcg64 & rng, const std::initializer_list<T> & opts)
{
std::vector<T> options(opts.begin(), opts.end());
size_t idx = std::uniform_int_distribution<size_t>(0, options.size() - 1)(rng);
return options[idx];
}
void generateNextKey(pcg64 & rng, UInt64 & k1, String & k2)
{
size_t str_len = std::uniform_int_distribution<>(1, 10)(rng);
String new_k2 = getRandomASCIIString(str_len, rng);
if (new_k2.compare(k2) <= 0)
++k1;
k2 = new_k2;
}
bool isStrict(ASOFJoinInequality inequality)
{
return inequality == ASOFJoinInequality::Less || inequality == ASOFJoinInequality::Greater;
}
}
class FullSortingJoinTest : public ::testing::Test
{
public:
FullSortingJoinTest() = default;
void SetUp() override
{
Poco::AutoPtr<Poco::ConsoleChannel> channel(new Poco::ConsoleChannel(std::cerr));
Poco::Logger::root().setChannel(channel);
if (const char * test_log_level = std::getenv("TEST_LOG_LEVEL")) // NOLINT(concurrency-mt-unsafe)
Poco::Logger::root().setLevel(test_log_level);
else
Poco::Logger::root().setLevel("none");
UInt64 seed = randomSeed();
if (const char * random_seed = std::getenv("TEST_RANDOM_SEED")) // NOLINT(concurrency-mt-unsafe)
seed = std::stoull(random_seed);
std::cout << "TEST_RANDOM_SEED=" << seed << std::endl;
rng = pcg64(seed);
}
void TearDown() override
{
}
pcg64 rng;
};
TEST_F(FullSortingJoinTest, AllAnyOneKey)
try
{
{
SCOPED_TRACE("Inner All");
Block result = executePipeline(buildJoinPipeline(
oneColumnSource({ {1, 2, 3, 4, 5} }),
oneColumnSource({ {1}, {2}, {3}, {4}, {5} }),
1, JoinKind::Inner, JoinStrictness::All));
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t1.idx");
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t2.idx");
}
{
SCOPED_TRACE("Inner Any");
Block result = executePipeline(buildJoinPipeline(
oneColumnSource({ {1, 2, 3, 4, 5} }),
oneColumnSource({ {1}, {2}, {3}, {4}, {5} }),
1, JoinKind::Inner, JoinStrictness::Any));
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t1.idx");
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t2.idx");
}
{
SCOPED_TRACE("Inner All");
Block result = executePipeline(buildJoinPipeline(
oneColumnSource({ {2, 2, 2}, {2, 3}, {3, 5} }),
oneColumnSource({ {1, 1, 1}, {2, 2}, {3, 4} }),
1, JoinKind::Inner, JoinStrictness::All));
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 0, 1, 2, 3, 3, 4, 5}), result, "t1.idx");
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({3, 3, 3, 4, 4, 4, 3, 4, 5, 5}), result, "t2.idx");
}
{
SCOPED_TRACE("Inner Any");
Block result = executePipeline(buildJoinPipeline(
oneColumnSource({ {2, 2, 2}, {2, 3}, {3, 5} }),
oneColumnSource({ {1, 1, 1}, {2, 2}, {3, 4} }),
1, JoinKind::Inner, JoinStrictness::Any));
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 4}), result, "t1.idx");
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({3, 5}), result, "t2.idx");
}
{
SCOPED_TRACE("Inner Any");
Block result = executePipeline(buildJoinPipeline(
oneColumnSource({ {2, 2, 2, 2}, {3}, {3, 5} }),
oneColumnSource({ {1, 1, 1, 2}, {2}, {3, 4} }),
1, JoinKind::Inner, JoinStrictness::Any));
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 4}), result, "t1.idx");
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({3, 5}), result, "t2.idx");
}
{
SCOPED_TRACE("Left Any");
Block result = executePipeline(buildJoinPipeline(
oneColumnSource({ {2, 2, 2}, {2, 3}, {3, 5} }),
oneColumnSource({ {1, 1, 1}, {2, 2}, {3, 4} }),
1, JoinKind::Left, JoinStrictness::Any));
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4, 5, 6}), result, "t1.idx");
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({3, 3, 3, 3, 5, 5, 0}), result, "t2.idx");
}
{
SCOPED_TRACE("Left Any");
Block result = executePipeline(buildJoinPipeline(
oneColumnSource({ {2, 2, 2, 2}, {3}, {3, 5} }),
oneColumnSource({ {1, 1, 1, 2}, {2}, {3, 4} }),
1, JoinKind::Left, JoinStrictness::Any));
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4, 5, 6}), result, "t1.idx");
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({3, 3, 3, 3, 5, 5, 0}), result, "t2.idx");
}
}
catch (Exception & e)
{
std::cout << e.getStackTraceString() << std::endl;
throw;
}
TEST_F(FullSortingJoinTest, AnySimple)
try
{
JoinKind kind = getRandomFrom(rng, {JoinKind::Inner, JoinKind::Left, JoinKind::Right});
SourceChunksBuilder left_source({
{std::make_shared<DataTypeUInt64>(), "k1"},
{std::make_shared<DataTypeString>(), "k2"},
{std::make_shared<DataTypeString>(), "attr"},
});
SourceChunksBuilder right_source({
{std::make_shared<DataTypeUInt64>(), "k1"},
{std::make_shared<DataTypeString>(), "k2"},
{std::make_shared<DataTypeString>(), "attr"},
});
left_source.setBreakProbability(rng);
right_source.setBreakProbability(rng);
size_t num_keys = std::uniform_int_distribution<>(100, 1000)(rng);
auto expected_left = ColumnString::create();
auto expected_right = ColumnString::create();
UInt64 k1 = 1;
String k2;
auto get_attr = [&](const String & side, size_t idx) -> String
{
return toString(k1) + "_" + k2 + "_" + side + "_" + toString(idx);
};
for (size_t i = 0; i < num_keys; ++i)
{
generateNextKey(rng, k1, k2);
/// Key is present in left, right or both tables. Both tables is more probable.
size_t key_presence = std::uniform_int_distribution<>(0, 10)(rng);
size_t num_rows_left = key_presence == 0 ? 0 : std::uniform_int_distribution<>(1, 10)(rng);
for (size_t j = 0; j < num_rows_left; ++j)
left_source.addRow({k1, k2, get_attr("left", j)});
size_t num_rows_right = key_presence == 1 ? 0 : std::uniform_int_distribution<>(1, 10)(rng);
for (size_t j = 0; j < num_rows_right; ++j)
right_source.addRow({k1, k2, get_attr("right", j)});
String left_attr = num_rows_left ? get_attr("left", 0) : "";
String right_attr = num_rows_right ? get_attr("right", 0) : "";
if (kind == JoinKind::Inner && num_rows_left && num_rows_right)
{
expected_left->insert(left_attr);
expected_right->insert(right_attr);
}
else if (kind == JoinKind::Left)
{
for (size_t j = 0; j < num_rows_left; ++j)
{
expected_left->insert(get_attr("left", j));
expected_right->insert(right_attr);
}
}
else if (kind == JoinKind::Right)
{
for (size_t j = 0; j < num_rows_right; ++j)
{
expected_left->insert(left_attr);
expected_right->insert(get_attr("right", j));
}
}
}
Block result_block = executePipeline(buildJoinPipeline(
left_source.getSource(), right_source.getSource(), /* key_length = */ 2,
kind, JoinStrictness::Any));
assertColumnEq<ColumnString>(*expected_left, result_block, "t1.attr");
assertColumnEq<ColumnString>(*expected_right, result_block, "t2.attr");
}
catch (Exception & e)
{
std::cout << e.getStackTraceString() << std::endl;
throw;
}
TEST_F(FullSortingJoinTest, AsofSimple)
try
{
SourceChunksBuilder left_source({
{std::make_shared<DataTypeString>(), "key"},
{std::make_shared<DataTypeUInt64>(), "t"},
});
left_source.addRow({"AMZN", 3});
left_source.addRow({"AMZN", 4});
left_source.addRow({"AMZN", 6});
left_source.addRow({"SBUX", 10});
SourceChunksBuilder right_source({
{std::make_shared<DataTypeString>(), "key"},
{std::make_shared<DataTypeUInt64>(), "t"},
{std::make_shared<DataTypeUInt64>(), "value"},
});
right_source.addRow({"AAPL", 1, 97});
right_source.addChunk();
right_source.addRow({"AAPL", 2, 98});
right_source.addRow({"AAPL", 3, 99});
right_source.addRow({"AMZN", 1, 100});
right_source.addRow({"AMZN", 2, 110});
right_source.addChunk();
right_source.addRow({"AMZN", 2, 110});
right_source.addChunk();
right_source.addRow({"AMZN", 4, 130});
right_source.addRow({"AMZN", 5, 140});
right_source.addRow({"SBUX", 8, 180});
right_source.addChunk();
right_source.addRow({"SBUX", 9, 190});
{
Block result_block = executePipeline(buildJoinPipeline(
left_source.getSource(), right_source.getSource(), /* key_length = */ 2,
JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::LessOrEquals));
auto values = getValuesFromBlock(result_block, {"t1.key", "t1.t", "t2.t", "t2.value"});
ASSERT_EQ(values, (std::vector<std::vector<Field>>{
{"AMZN", 3u, 4u, 130u},
{"AMZN", 4u, 4u, 130u},
}));
}
{
Block result_block = executePipeline(buildJoinPipeline(
left_source.getSource(), right_source.getSource(), /* key_length = */ 2,
JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::GreaterOrEquals));
auto values = getValuesFromBlock(result_block, {"t1.key", "t1.t", "t2.t", "t2.value"});
ASSERT_EQ(values, (std::vector<std::vector<Field>>{
{"AMZN", 3u, 2u, 110u},
{"AMZN", 4u, 4u, 130u},
{"AMZN", 6u, 5u, 140u},
{"SBUX", 10u, 9u, 190u},
}));
}
}
catch (Exception & e)
{
std::cout << e.getStackTraceString() << std::endl;
throw;
}
TEST_F(FullSortingJoinTest, AsofOnlyColumn)
try
{
auto left_source = oneColumnSource({ {3}, {3, 3, 3}, {3, 5, 5, 6}, {9, 9}, {10, 20} });
SourceChunksBuilder right_source_builder({
{std::make_shared<DataTypeUInt64>(), "t"},
{std::make_shared<DataTypeUInt64>(), "value"},
});
right_source_builder.setBreakProbability(rng);
for (const auto & row : std::vector<std::vector<Field>>{ {1, 101}, {2, 102}, {4, 104}, {5, 105}, {11, 111}, {15, 115} })
right_source_builder.addRow(row);
auto right_source = right_source_builder.getSource();
auto pipeline = buildJoinPipeline(
left_source, right_source, /* key_length = */ 1,
JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::LessOrEquals);
Block result_block = executePipeline(std::move(pipeline));
ASSERT_EQ(
assert_cast<const ColumnUInt64 *>(result_block.getByName("t1.key").column.get())->getData(),
(ColumnUInt64::Container{3, 3, 3, 3, 3, 5, 5, 6, 9, 9, 10})
);
ASSERT_EQ(
assert_cast<const ColumnUInt64 *>(result_block.getByName("t2.t").column.get())->getData(),
(ColumnUInt64::Container{4, 4, 4, 4, 4, 5, 5, 11, 11, 11, 11})
);
ASSERT_EQ(
assert_cast<const ColumnUInt64 *>(result_block.getByName("t2.value").column.get())->getData(),
(ColumnUInt64::Container{104, 104, 104, 104, 104, 105, 105, 111, 111, 111, 111})
);
}
catch (Exception & e)
{
std::cout << e.getStackTraceString() << std::endl;
throw;
}
TEST_F(FullSortingJoinTest, AsofLessGeneratedTestData)
try
{
/// Generate data random and build expected result at the same time.
/// Test specific combinations of join kind and inequality per each run
auto join_kind = getRandomFrom(rng, { JoinKind::Inner, JoinKind::Left });
auto asof_inequality = getRandomFrom(rng, { ASOFJoinInequality::Less, ASOFJoinInequality::LessOrEquals });
SCOPED_TRACE(fmt::format("{} {}", join_kind, asof_inequality));
/// Key is complex, `k1, k2` for equality and `t` for asof
SourceChunksBuilder left_source_builder({
{std::make_shared<DataTypeUInt64>(), "k1"},
{std::make_shared<DataTypeString>(), "k2"},
{std::make_shared<DataTypeUInt64>(), "t"},
{std::make_shared<DataTypeInt64>(), "attr"},
});
SourceChunksBuilder right_source_builder({
{std::make_shared<DataTypeUInt64>(), "k1"},
{std::make_shared<DataTypeString>(), "k2"},
{std::make_shared<DataTypeUInt64>(), "t"},
{std::make_shared<DataTypeInt64>(), "attr"},
});
/// How small generated block should be
left_source_builder.setBreakProbability(rng);
right_source_builder.setBreakProbability(rng);
/// We are going to generate sorted data and remember expected result
ColumnInt64::Container expected;
UInt64 k1 = 1;
String k2;
auto key_num_total = std::uniform_int_distribution<>(1, 1000)(rng);
for (size_t key_num = 0; key_num < key_num_total; ++key_num)
{
/// Generate new key greater than previous
generateNextKey(rng, k1, k2);
Int64 left_t = 0;
/// Generate several rows for the key
size_t num_left_rows = std::uniform_int_distribution<>(1, 100)(rng);
for (size_t i = 0; i < num_left_rows; ++i)
{
/// t is strictly greater than previous
left_t += std::uniform_int_distribution<>(1, 10)(rng);
auto left_arrtibute_value = 10 * left_t;
left_source_builder.addRow({k1, k2, left_t, left_arrtibute_value});
expected.push_back(left_arrtibute_value);
auto num_matches = 1 + std::poisson_distribution<>(4)(rng);
/// Generate several matches in the right table
auto right_t = left_t;
for (size_t j = 0; j < num_matches; ++j)
{
int min_step = isStrict(asof_inequality) ? 1 : 0;
right_t += std::uniform_int_distribution<>(min_step, 3)(rng);
/// First row should match
bool is_match = j == 0;
right_source_builder.addRow({k1, k2, right_t, is_match ? 10 * left_arrtibute_value : -1});
}
/// Next left_t should be greater than right_t not to match with previous rows
left_t = right_t;
}
/// generate some rows with greater left_t to check that they are not matched
num_left_rows = std::bernoulli_distribution(0.5)(rng) ? std::uniform_int_distribution<>(1, 100)(rng) : 0;
for (size_t i = 0; i < num_left_rows; ++i)
{
left_t += std::uniform_int_distribution<>(1, 10)(rng);
left_source_builder.addRow({k1, k2, left_t, -10 * left_t});
if (join_kind == JoinKind::Left)
expected.push_back(-10 * left_t);
}
}
Block result_block = executePipeline(buildJoinPipeline(
left_source_builder.getSource(), right_source_builder.getSource(),
/* key_length = */ 3,
join_kind, JoinStrictness::Asof, asof_inequality));
assertColumnVectorEq<Int64>(expected, result_block, "t1.attr");
for (auto & e : expected)
/// Non matched rows from left table have negative attr
/// Value if attribute in right table is 10 times greater than in left table
e = e < 0 ? 0 : 10 * e;
assertColumnVectorEq<Int64>(expected, result_block, "t2.attr");
}
catch (Exception & e)
{
std::cout << e.getStackTraceString() << std::endl;
throw;
}
TEST_F(FullSortingJoinTest, AsofGreaterGeneratedTestData)
try
{
/// Generate data random and build expected result at the same time.
/// Test specific combinations of join kind and inequality per each run
auto join_kind = getRandomFrom(rng, { JoinKind::Inner, JoinKind::Left });
auto asof_inequality = getRandomFrom(rng, { ASOFJoinInequality::Greater, ASOFJoinInequality::GreaterOrEquals });
SCOPED_TRACE(fmt::format("{} {}", join_kind, asof_inequality));
SourceChunksBuilder left_source_builder({
{std::make_shared<DataTypeUInt64>(), "k1"},
{std::make_shared<DataTypeString>(), "k2"},
{std::make_shared<DataTypeUInt64>(), "t"},
{std::make_shared<DataTypeInt64>(), "attr"},
});
SourceChunksBuilder right_source_builder({
{std::make_shared<DataTypeUInt64>(), "k1"},
{std::make_shared<DataTypeString>(), "k2"},
{std::make_shared<DataTypeUInt64>(), "t"},
{std::make_shared<DataTypeInt64>(), "attr"},
});
left_source_builder.setBreakProbability(rng);
right_source_builder.setBreakProbability(rng);
ColumnInt64::Container expected;
UInt64 k1 = 1;
String k2;
UInt64 left_t = 0;
auto key_num_total = std::uniform_int_distribution<>(1, 1000)(rng);
for (size_t key_num = 0; key_num < key_num_total; ++key_num)
{
/// Generate new key greater than previous
generateNextKey(rng, k1, k2);
/// Generate some rows with smaller left_t to check that they are not matched
size_t num_left_rows = std::bernoulli_distribution(0.5)(rng) ? std::uniform_int_distribution<>(1, 100)(rng) : 0;
for (size_t i = 0; i < num_left_rows; ++i)
{
left_t += std::uniform_int_distribution<>(1, 10)(rng);
left_source_builder.addRow({k1, k2, left_t, -10 * left_t});
if (join_kind == JoinKind::Left)
expected.push_back(-10 * left_t);
}
if (std::bernoulli_distribution(0.1)(rng))
continue;
size_t num_right_matches = std::uniform_int_distribution<>(1, 10)(rng);
auto right_t = left_t + std::uniform_int_distribution<>(isStrict(asof_inequality) ? 0 : 1, 10)(rng);
auto attribute_value = 10 * right_t;
for (size_t j = 0; j < num_right_matches; ++j)
{
right_t += std::uniform_int_distribution<>(0, 3)(rng);
bool is_match = j == num_right_matches - 1;
right_source_builder.addRow({k1, k2, right_t, is_match ? 10 * attribute_value : -1});
}
/// Next left_t should be greater than (or equals) right_t to match with previous rows
left_t = right_t + std::uniform_int_distribution<>(isStrict(asof_inequality) ? 1 : 0, 100)(rng);
size_t num_left_matches = std::uniform_int_distribution<>(1, 100)(rng);
for (size_t j = 0; j < num_left_matches; ++j)
{
left_t += std::uniform_int_distribution<>(0, 3)(rng);
left_source_builder.addRow({k1, k2, left_t, attribute_value});
expected.push_back(attribute_value);
}
}
Block result_block = executePipeline(buildJoinPipeline(
left_source_builder.getSource(), right_source_builder.getSource(),
/* key_length = */ 3,
join_kind, JoinStrictness::Asof, asof_inequality));
assertColumnVectorEq<Int64>(expected, result_block, "t1.attr");
for (auto & e : expected)
/// Non matched rows from left table have negative attr
/// Value if attribute in right table is 10 times greater than in left table
e = e < 0 ? 0 : 10 * e;
assertColumnVectorEq<Int64>(expected, result_block, "t2.attr");
}
catch (Exception & e)
{
std::cout << e.getStackTraceString() << std::endl;
throw;
}

View File

@ -155,6 +155,10 @@ void printExceptionWithRespectToAbort(LoggerPtr log, const String & query_id)
{ {
std::rethrow_exception(ex); std::rethrow_exception(ex);
} }
catch (const TestException &) // NOLINT
{
/// Exception from a unit test, ignore it.
}
catch (const Exception & e) catch (const Exception & e)
{ {
NOEXCEPT_SCOPE({ NOEXCEPT_SCOPE({

View File

@ -34,7 +34,7 @@ public:
auto choice = distribution(generator); auto choice = distribution(generator);
if (choice == 0) if (choice == 0)
throw std::runtime_error("Unlucky..."); throw TestException();
return false; return false;
} }
@ -48,7 +48,7 @@ public:
{ {
auto choice = distribution(generator); auto choice = distribution(generator);
if (choice == 0) if (choice == 0)
throw std::runtime_error("Unlucky..."); throw TestException();
} }
Priority getPriority() const override { return {}; } Priority getPriority() const override { return {}; }

View File

@ -3,8 +3,13 @@ import time
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
from shutil import copy2 from shutil import copy2
from create_release import PackageDownloader, ReleaseInfo, ShellRunner from create_release import (
from ci_utils import WithIter PackageDownloader,
ReleaseInfo,
ReleaseContextManager,
ReleaseProgress,
)
from ci_utils import WithIter, Shell
class MountPointApp(metaclass=WithIter): class MountPointApp(metaclass=WithIter):
@ -38,7 +43,6 @@ class R2MountPoint:
self.bucket_name = self._PROD_BUCKET_NAME self.bucket_name = self._PROD_BUCKET_NAME
self.aux_mount_options = "" self.aux_mount_options = ""
self.async_mount = False
if self.app == MountPointApp.S3FS: if self.app == MountPointApp.S3FS:
self.cache_dir = "/home/ubuntu/s3fs_cache" self.cache_dir = "/home/ubuntu/s3fs_cache"
# self.aux_mount_options += "-o nomodtime " if self.NOMODTIME else "" not for s3fs # self.aux_mount_options += "-o nomodtime " if self.NOMODTIME else "" not for s3fs
@ -52,7 +56,6 @@ class R2MountPoint:
self.mount_cmd = f"s3fs {self.bucket_name} {self.MOUNT_POINT} -o url={self.API_ENDPOINT} -o use_path_request_style -o umask=0000 -o nomultipart -o logfile={self.LOG_FILE} {self.aux_mount_options}" self.mount_cmd = f"s3fs {self.bucket_name} {self.MOUNT_POINT} -o url={self.API_ENDPOINT} -o use_path_request_style -o umask=0000 -o nomultipart -o logfile={self.LOG_FILE} {self.aux_mount_options}"
elif self.app == MountPointApp.RCLONE: elif self.app == MountPointApp.RCLONE:
# run rclone mount process asynchronously, otherwise subprocess.run(daemonized command) will not return # run rclone mount process asynchronously, otherwise subprocess.run(daemonized command) will not return
self.async_mount = True
self.cache_dir = "/home/ubuntu/rclone_cache" self.cache_dir = "/home/ubuntu/rclone_cache"
self.aux_mount_options += "--no-modtime " if self.NOMODTIME else "" self.aux_mount_options += "--no-modtime " if self.NOMODTIME else ""
self.aux_mount_options += "-v " if self.DEBUG else "" # -vv too verbose self.aux_mount_options += "-v " if self.DEBUG else "" # -vv too verbose
@ -76,19 +79,22 @@ class R2MountPoint:
) )
_TEST_MOUNT_CMD = f"mount | grep -q {self.MOUNT_POINT}" _TEST_MOUNT_CMD = f"mount | grep -q {self.MOUNT_POINT}"
ShellRunner.run(_CLEAN_LOG_FILE_CMD) Shell.run(_CLEAN_LOG_FILE_CMD)
ShellRunner.run(_UNMOUNT_CMD) Shell.run(_UNMOUNT_CMD)
ShellRunner.run(_MKDIR_CMD) Shell.run(_MKDIR_CMD)
ShellRunner.run(_MKDIR_FOR_CACHE) Shell.run(_MKDIR_FOR_CACHE)
ShellRunner.run(self.mount_cmd, async_=self.async_mount) if self.app == MountPointApp.S3FS:
if self.async_mount: Shell.run(self.mount_cmd, check=True)
time.sleep(3) else:
ShellRunner.run(_TEST_MOUNT_CMD) # didn't manage to use simple run() and without blocking or failure
Shell.run_as_daemon(self.mount_cmd)
time.sleep(3)
Shell.run(_TEST_MOUNT_CMD, check=True)
@classmethod @classmethod
def teardown(cls): def teardown(cls):
print(f"Unmount [{cls.MOUNT_POINT}]") print(f"Unmount [{cls.MOUNT_POINT}]")
ShellRunner.run(f"umount {cls.MOUNT_POINT}") Shell.run(f"umount {cls.MOUNT_POINT}")
class RepoCodenames(metaclass=WithIter): class RepoCodenames(metaclass=WithIter):
@ -101,6 +107,7 @@ class DebianArtifactory:
_PROD_REPO_URL = "https://packages.clickhouse.com/deb" _PROD_REPO_URL = "https://packages.clickhouse.com/deb"
def __init__(self, release_info: ReleaseInfo, dry_run: bool): def __init__(self, release_info: ReleaseInfo, dry_run: bool):
self.release_info = release_info
self.codename = release_info.codename self.codename = release_info.codename
self.version = release_info.version self.version = release_info.version
if dry_run: if dry_run:
@ -124,8 +131,8 @@ class DebianArtifactory:
cmd = f"{REPREPRO_CMD_PREFIX} includedeb {self.codename} {' '.join(paths)}" cmd = f"{REPREPRO_CMD_PREFIX} includedeb {self.codename} {' '.join(paths)}"
print("Running export command:") print("Running export command:")
print(f" {cmd}") print(f" {cmd}")
ShellRunner.run(cmd) Shell.run(cmd, check=True)
ShellRunner.run("sync") Shell.run("sync")
if self.codename == RepoCodenames.LTS: if self.codename == RepoCodenames.LTS:
packages_with_version = [ packages_with_version = [
@ -137,16 +144,19 @@ class DebianArtifactory:
cmd = f"{REPREPRO_CMD_PREFIX} copy {RepoCodenames.STABLE} {RepoCodenames.LTS} {' '.join(packages_with_version)}" cmd = f"{REPREPRO_CMD_PREFIX} copy {RepoCodenames.STABLE} {RepoCodenames.LTS} {' '.join(packages_with_version)}"
print("Running copy command:") print("Running copy command:")
print(f" {cmd}") print(f" {cmd}")
ShellRunner.run(cmd) Shell.run(cmd, check=True)
ShellRunner.run("sync") Shell.run("sync")
def test_packages(self): def test_packages(self):
ShellRunner.run("docker pull ubuntu:latest") Shell.run("docker pull ubuntu:latest")
print(f"Test packages installation, version [{self.version}]") print(f"Test packages installation, version [{self.version}]")
cmd = f"docker run --rm ubuntu:latest bash -c \"apt update -y; apt install -y sudo gnupg ca-certificates; apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754; echo 'deb {self.repo_url} stable main' | tee /etc/apt/sources.list.d/clickhouse.list; apt update -y; apt-get install -y clickhouse-client={self.version}\"" debian_command = f"echo 'deb {self.repo_url} stable main' | tee /etc/apt/sources.list.d/clickhouse.list; apt update -y; apt-get install -y clickhouse-common-static={self.version} clickhouse-client={self.version}"
cmd = f'docker run --rm ubuntu:latest bash -c "apt update -y; apt install -y sudo gnupg ca-certificates; apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754; {debian_command}"'
print("Running test command:") print("Running test command:")
print(f" {cmd}") print(f" {cmd}")
ShellRunner.run(cmd) Shell.run(cmd, check=True)
self.release_info.debian_command = debian_command
self.release_info.dump()
def _copy_if_not_exists(src: Path, dst: Path) -> Path: def _copy_if_not_exists(src: Path, dst: Path) -> Path:
@ -167,6 +177,7 @@ class RpmArtifactory:
_SIGN_KEY = "885E2BDCF96B0B45ABF058453E4AD4719DDE9A38" _SIGN_KEY = "885E2BDCF96B0B45ABF058453E4AD4719DDE9A38"
def __init__(self, release_info: ReleaseInfo, dry_run: bool): def __init__(self, release_info: ReleaseInfo, dry_run: bool):
self.release_info = release_info
self.codename = release_info.codename self.codename = release_info.codename
self.version = release_info.version self.version = release_info.version
if dry_run: if dry_run:
@ -202,23 +213,26 @@ class RpmArtifactory:
for command in commands: for command in commands:
print("Running command:") print("Running command:")
print(f" {command}") print(f" {command}")
ShellRunner.run(command) Shell.run(command, check=True)
update_public_key = f"gpg --armor --export {self._SIGN_KEY}" update_public_key = f"gpg --armor --export {self._SIGN_KEY}"
pub_key_path = dest_dir / "repodata" / "repomd.xml.key" pub_key_path = dest_dir / "repodata" / "repomd.xml.key"
print("Updating repomd.xml.key") print("Updating repomd.xml.key")
pub_key_path.write_text(ShellRunner.run(update_public_key)[1]) pub_key_path.write_text(Shell.run(update_public_key, check=True))
if codename == RepoCodenames.LTS: if codename == RepoCodenames.LTS:
self.export_packages(RepoCodenames.STABLE) self.export_packages(RepoCodenames.STABLE)
ShellRunner.run("sync") Shell.run("sync")
def test_packages(self): def test_packages(self):
ShellRunner.run("docker pull fedora:latest") Shell.run("docker pull fedora:latest")
print(f"Test package installation, version [{self.version}]") print(f"Test package installation, version [{self.version}]")
cmd = f'docker run --rm fedora:latest /bin/bash -c "dnf -y install dnf-plugins-core && dnf config-manager --add-repo={self.repo_url} && dnf makecache && dnf -y install clickhouse-client-{self.version}-1"' rpm_command = f"dnf config-manager --add-repo={self.repo_url} && dnf makecache && dnf -y install clickhouse-client-{self.version}-1"
cmd = f'docker run --rm fedora:latest /bin/bash -c "dnf -y install dnf-plugins-core && dnf config-manager --add-repo={self.repo_url} && {rpm_command}"'
print("Running test command:") print("Running test command:")
print(f" {cmd}") print(f" {cmd}")
ShellRunner.run(cmd) Shell.run(cmd, check=True)
self.release_info.rpm_command = rpm_command
self.release_info.dump()
class TgzArtifactory: class TgzArtifactory:
@ -226,6 +240,7 @@ class TgzArtifactory:
_PROD_REPO_URL = "https://packages.clickhouse.com/tgz" _PROD_REPO_URL = "https://packages.clickhouse.com/tgz"
def __init__(self, release_info: ReleaseInfo, dry_run: bool): def __init__(self, release_info: ReleaseInfo, dry_run: bool):
self.release_info = release_info
self.codename = release_info.codename self.codename = release_info.codename
self.version = release_info.version self.version = release_info.version
if dry_run: if dry_run:
@ -256,23 +271,28 @@ class TgzArtifactory:
if codename == RepoCodenames.LTS: if codename == RepoCodenames.LTS:
self.export_packages(RepoCodenames.STABLE) self.export_packages(RepoCodenames.STABLE)
ShellRunner.run("sync") Shell.run("sync")
def test_packages(self): def test_packages(self):
tgz_file = "/tmp/tmp.tgz" tgz_file = "/tmp/tmp.tgz"
tgz_sha_file = "/tmp/tmp.tgz.sha512" tgz_sha_file = "/tmp/tmp.tgz.sha512"
ShellRunner.run( cmd = f"curl -o {tgz_file} -f0 {self.repo_url}/stable/clickhouse-client-{self.version}-arm64.tgz"
f"curl -o {tgz_file} -f0 {self.repo_url}/stable/clickhouse-client-{self.version}-arm64.tgz" Shell.run(
cmd,
check=True,
) )
ShellRunner.run( Shell.run(
f"curl -o {tgz_sha_file} -f0 {self.repo_url}/stable/clickhouse-client-{self.version}-arm64.tgz.sha512" f"curl -o {tgz_sha_file} -f0 {self.repo_url}/stable/clickhouse-client-{self.version}-arm64.tgz.sha512",
check=True,
) )
expected_checksum = ShellRunner.run(f"cut -d ' ' -f 1 {tgz_sha_file}") expected_checksum = Shell.run(f"cut -d ' ' -f 1 {tgz_sha_file}", check=True)
actual_checksum = ShellRunner.run(f"sha512sum {tgz_file} | cut -d ' ' -f 1") actual_checksum = Shell.run(f"sha512sum {tgz_file} | cut -d ' ' -f 1")
assert ( assert (
expected_checksum == actual_checksum expected_checksum == actual_checksum
), f"[{actual_checksum} != {expected_checksum}]" ), f"[{actual_checksum} != {expected_checksum}]"
ShellRunner.run("rm /tmp/tmp.tgz*") Shell.run("rm /tmp/tmp.tgz*")
self.release_info.tgz_command = cmd
self.release_info.dump()
def parse_args() -> argparse.Namespace: def parse_args() -> argparse.Namespace:
@ -280,12 +300,6 @@ def parse_args() -> argparse.Namespace:
formatter_class=argparse.ArgumentDefaultsHelpFormatter, formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description="Adds release packages to the repository", description="Adds release packages to the repository",
) )
parser.add_argument(
"--infile",
type=str,
required=True,
help="input file with release info",
)
parser.add_argument( parser.add_argument(
"--export-debian", "--export-debian",
action="store_true", action="store_true",
@ -326,9 +340,7 @@ def parse_args() -> argparse.Namespace:
if __name__ == "__main__": if __name__ == "__main__":
args = parse_args() args = parse_args()
assert args.dry_run
release_info = ReleaseInfo.from_file(args.infile)
""" """
Use S3FS. RCLONE has some errors with r2 remote which I didn't figure out how to resolve: Use S3FS. RCLONE has some errors with r2 remote which I didn't figure out how to resolve:
ERROR : IO error: NotImplemented: versionId not implemented ERROR : IO error: NotImplemented: versionId not implemented
@ -336,20 +348,38 @@ if __name__ == "__main__":
""" """
mp = R2MountPoint(MountPointApp.S3FS, dry_run=args.dry_run) mp = R2MountPoint(MountPointApp.S3FS, dry_run=args.dry_run)
if args.export_debian: if args.export_debian:
mp.init() with ReleaseContextManager(
DebianArtifactory(release_info, dry_run=args.dry_run).export_packages() release_progress=ReleaseProgress.EXPORT_DEB
mp.teardown() ) as release_info:
mp.init()
DebianArtifactory(release_info, dry_run=args.dry_run).export_packages()
mp.teardown()
if args.export_rpm: if args.export_rpm:
mp.init() with ReleaseContextManager(
RpmArtifactory(release_info, dry_run=args.dry_run).export_packages() release_progress=ReleaseProgress.EXPORT_RPM
mp.teardown() ) as release_info:
mp.init()
RpmArtifactory(release_info, dry_run=args.dry_run).export_packages()
mp.teardown()
if args.export_tgz: if args.export_tgz:
mp.init() with ReleaseContextManager(
TgzArtifactory(release_info, dry_run=args.dry_run).export_packages() release_progress=ReleaseProgress.EXPORT_TGZ
mp.teardown() ) as release_info:
mp.init()
TgzArtifactory(release_info, dry_run=args.dry_run).export_packages()
mp.teardown()
if args.test_debian: if args.test_debian:
DebianArtifactory(release_info, dry_run=args.dry_run).test_packages() with ReleaseContextManager(
release_progress=ReleaseProgress.TEST_DEB
) as release_info:
DebianArtifactory(release_info, dry_run=args.dry_run).test_packages()
if args.test_tgz: if args.test_tgz:
TgzArtifactory(release_info, dry_run=args.dry_run).test_packages() with ReleaseContextManager(
release_progress=ReleaseProgress.TEST_TGZ
) as release_info:
TgzArtifactory(release_info, dry_run=args.dry_run).test_packages()
if args.test_rpm: if args.test_rpm:
RpmArtifactory(release_info, dry_run=args.dry_run).test_packages() with ReleaseContextManager(
release_progress=ReleaseProgress.TEST_RPM
) as release_info:
RpmArtifactory(release_info, dry_run=args.dry_run).test_packages()

View File

@ -1,17 +1,17 @@
import argparse import argparse
from datetime import timedelta, datetime import dataclasses
import logging import json
import os import os
from commit_status_helper import get_commit_filtered_statuses import sys
from typing import List
from get_robot_token import get_best_robot_token from get_robot_token import get_best_robot_token
from github_helper import GitHub from github_helper import GitHub
from release import Release, Repo as ReleaseRepo, RELEASE_READY_STATUS from ci_utils import Shell
from env_helper import GITHUB_REPOSITORY
from report import SUCCESS from report import SUCCESS
from ssh import SSHKey from ci_buddy import CIBuddy
from ci_config import CI
LOGGER_NAME = __name__
HELPER_LOGGERS = ["github_helper", LOGGER_NAME]
logger = logging.getLogger(LOGGER_NAME)
def parse_args(): def parse_args():
@ -21,120 +21,198 @@ def parse_args():
) )
parser.add_argument("--token", help="GitHub token, if not set, used from smm") parser.add_argument("--token", help="GitHub token, if not set, used from smm")
parser.add_argument( parser.add_argument(
"--repo", default="ClickHouse/ClickHouse", help="Repo owner/name" "--post-status",
)
parser.add_argument("--dry-run", action="store_true", help="Do not create anything")
parser.add_argument(
"--release-after-days",
type=int,
default=3,
help="Do automatic release on the latest green commit after the latest "
"release if the newest release is older than the specified days",
)
parser.add_argument(
"--debug-helpers",
action="store_true", action="store_true",
help="Add debug logging for this script and github_helper", help="Post release branch statuses",
) )
parser.add_argument( parser.add_argument(
"--remote-protocol", "--post-auto-release-complete",
"-p", action="store_true",
default="ssh", help="Post autorelease completion status",
choices=ReleaseRepo.VALID,
help="repo protocol for git commands remote, 'origin' is a special case and "
"uses 'origin' as a remote",
) )
parser.add_argument(
"--prepare",
action="store_true",
help="Prepare autorelease info",
)
parser.add_argument(
"--wf-status",
type=str,
default="",
help="overall workflow status [success|failure]",
)
return parser.parse_args(), parser
return parser.parse_args()
MAX_NUMBER_OF_COMMITS_TO_CONSIDER_FOR_RELEASE = 5
AUTORELEASE_INFO_FILE = "/tmp/autorelease_info.json"
@dataclasses.dataclass
class ReleaseParams:
ready: bool
ci_status: str
num_patches: int
release_branch: str
commit_sha: str
commits_to_branch_head: int
latest: bool
def to_dict(self):
return dataclasses.asdict(self)
@dataclasses.dataclass
class AutoReleaseInfo:
releases: List[ReleaseParams]
def add_release(self, release_params: ReleaseParams) -> None:
self.releases.append(release_params)
def dump(self):
print(f"Dump release info into [{AUTORELEASE_INFO_FILE}]")
with open(AUTORELEASE_INFO_FILE, "w", encoding="utf-8") as f:
print(json.dumps(dataclasses.asdict(self), indent=2), file=f)
@staticmethod
def from_file() -> "AutoReleaseInfo":
with open(AUTORELEASE_INFO_FILE, "r", encoding="utf-8") as json_file:
res = json.load(json_file)
releases = [ReleaseParams(**release) for release in res["releases"]]
return AutoReleaseInfo(releases=releases)
def _prepare(token):
assert len(token) > 10
os.environ["GH_TOKEN"] = token
Shell.run("gh auth status", check=True)
gh = GitHub(token)
prs = gh.get_release_pulls(GITHUB_REPOSITORY)
prs.sort(key=lambda x: x.head.ref)
branch_names = [pr.head.ref for pr in prs]
print(f"Found release branches [{branch_names}]")
repo = gh.get_repo(GITHUB_REPOSITORY)
autoRelease_info = AutoReleaseInfo(releases=[])
for pr in prs:
print(f"\nChecking PR [{pr.head.ref}]")
refs = list(repo.get_git_matching_refs(f"tags/v{pr.head.ref}"))
assert refs
refs.sort(key=lambda ref: ref.ref)
latest_release_tag_ref = refs[-1]
latest_release_tag = repo.get_git_tag(latest_release_tag_ref.object.sha)
commits = Shell.run(
f"git rev-list --first-parent {latest_release_tag.tag}..origin/{pr.head.ref}",
check=True,
).split("\n")
commit_num = len(commits)
print(
f"Previous release [{latest_release_tag.tag}] was [{commit_num}] commits ago, date [{latest_release_tag.tagger.date}]"
)
commits_to_check = commits[:-1] # Exclude the version bump commit
commit_sha = ""
commit_ci_status = ""
commits_to_branch_head = 0
for idx, commit in enumerate(
commits_to_check[:MAX_NUMBER_OF_COMMITS_TO_CONSIDER_FOR_RELEASE]
):
print(
f"Check commit [{commit}] [{pr.head.ref}~{idx+1}] as release candidate"
)
commit_num -= 1
is_completed = CI.GHActions.check_wf_completed(
token=token, commit_sha=commit
)
if not is_completed:
print(f"CI is in progress for [{commit}] - check previous commit")
commits_to_branch_head += 1
continue
commit_ci_status = CI.GHActions.get_commit_status_by_name(
token=token,
commit_sha=commit,
status_name=(CI.JobNames.BUILD_CHECK, "ClickHouse build check"),
)
commit_sha = commit
if commit_ci_status == SUCCESS:
break
print(f"CI status [{commit_ci_status}] - skip")
commits_to_branch_head += 1
ready = False
if commit_ci_status == SUCCESS and commit_sha:
print(
f"Add release ready info for commit [{commit_sha}] and release branch [{pr.head.ref}]"
)
ready = True
else:
print(f"WARNING: No ready commits found for release branch [{pr.head.ref}]")
autoRelease_info.add_release(
ReleaseParams(
release_branch=pr.head.ref,
commit_sha=commit_sha,
ready=ready,
ci_status=commit_ci_status,
num_patches=commit_num,
commits_to_branch_head=commits_to_branch_head,
latest=False,
)
)
if autoRelease_info.releases:
autoRelease_info.releases[-1].latest = True
autoRelease_info.dump()
def main(): def main():
args = parse_args() args, parser = parse_args()
logging.basicConfig(level=logging.INFO)
if args.debug_helpers:
for logger_name in HELPER_LOGGERS:
logging.getLogger(logger_name).setLevel(logging.DEBUG)
token = args.token or get_best_robot_token() if args.post_status:
days_as_timedelta = timedelta(days=args.release_after_days) info = AutoReleaseInfo.from_file()
now = datetime.now() for release_info in info.releases:
if release_info.ready:
gh = GitHub(token) CIBuddy(dry_run=False).post_info(
prs = gh.get_release_pulls(args.repo) title=f"Auto Release Status for {release_info.release_branch}",
branch_names = [pr.head.ref for pr in prs] body=release_info.to_dict(),
)
logger.info("Found release branches: %s\n ", " \n".join(branch_names)) else:
repo = gh.get_repo(args.repo) CIBuddy(dry_run=False).post_warning(
title=f"Auto Release Status for {release_info.release_branch}",
# In general there is no guarantee on which order the refs/commits are body=release_info.to_dict(),
# returned from the API, so we have to order them. )
for pr in prs: elif args.post_auto_release_complete:
logger.info("Checking PR %s", pr.head.ref) assert args.wf_status, "--wf-status Required with --post-auto-release-complete"
if args.wf_status != SUCCESS:
refs = list(repo.get_git_matching_refs(f"tags/v{pr.head.ref}")) CIBuddy(dry_run=False).post_job_error(
refs.sort(key=lambda ref: ref.ref) error_description="Autorelease workflow failed",
job_name="Autorelease",
latest_release_tag_ref = refs[-1] with_instance_info=False,
latest_release_tag = repo.get_git_tag(latest_release_tag_ref.object.sha) with_wf_link=True,
logger.info("That last release was done at %s", latest_release_tag.tagger.date) critical=True,
if latest_release_tag.tagger.date + days_as_timedelta > now:
logger.info(
"Not enough days since the last release %s,"
" no automatic release can be done",
latest_release_tag.tag,
) )
continue else:
CIBuddy(dry_run=False).post_info(
unreleased_commits = list( title=f"Autorelease completed",
repo.get_commits(sha=pr.head.ref, since=latest_release_tag.tagger.date) body="",
) with_wf_link=True,
unreleased_commits.sort(
key=lambda commit: commit.commit.committer.date, reverse=True
)
for commit in unreleased_commits:
logger.info("Checking statuses of commit %s", commit.sha)
statuses = get_commit_filtered_statuses(commit)
all_success = all(st.state == SUCCESS for st in statuses)
passed_ready_for_release_check = any(
st.context == RELEASE_READY_STATUS and st.state == SUCCESS
for st in statuses
) )
if not (all_success and passed_ready_for_release_check): elif args.prepare:
logger.info("Commit is not green, thus not suitable for release") _prepare(token=args.token or get_best_robot_token())
continue else:
parser.print_help()
logger.info("Commit is ready for release, let's release!") sys.exit(2)
release = Release(
ReleaseRepo(args.repo, args.remote_protocol),
commit.sha,
"patch",
args.dry_run,
True,
)
try:
release.do(True, True, True)
except:
if release.has_rollback:
logging.error(
"!!The release process finished with error, read the output carefully!!"
)
logging.error(
"Probably, rollback finished with error. "
"If you don't see any of the following commands in the output, "
"execute them manually:"
)
release.log_rollback()
raise
logging.info("New release is done!")
break
if __name__ == "__main__": if __name__ == "__main__":
if os.getenv("ROBOT_CLICKHOUSE_SSH_KEY", ""): main()
with SSHKey("ROBOT_CLICKHOUSE_SSH_KEY"):
main()
else:
main()

View File

@ -1110,13 +1110,14 @@ def main() -> int:
ci_cache.print_status() ci_cache.print_status()
if IS_CI and not pr_info.is_merge_queue: if IS_CI and not pr_info.is_merge_queue:
# wait for pending jobs to be finished, await_jobs is a long blocking call
ci_cache.await_pending_jobs(pr_info.is_release)
if pr_info.is_release: if pr_info.is_release:
print("Release/master: CI Cache add pending records for all todo jobs") print("Release/master: CI Cache add pending records for all todo jobs")
ci_cache.push_pending_all(pr_info.is_release) ci_cache.push_pending_all(pr_info.is_release)
# wait for pending jobs to be finished, await_jobs is a long blocking call
ci_cache.await_pending_jobs(pr_info.is_release)
# conclude results # conclude results
result["git_ref"] = git_ref result["git_ref"] = git_ref
result["version"] = version result["version"] = version
@ -1292,10 +1293,11 @@ def main() -> int:
pass pass
if Utils.is_killed_with_oom(): if Utils.is_killed_with_oom():
print("WARNING: OOM while job execution") print("WARNING: OOM while job execution")
print(subprocess.run("sudo dmesg -T", check=False))
error_description = f"Out Of Memory, exit_code {job_report.exit_code}" error_description = f"Out Of Memory, exit_code {job_report.exit_code}"
else: else:
error_description = f"Unknown, exit_code {job_report.exit_code}" error_description = f"Unknown, exit_code {job_report.exit_code}"
CIBuddy().post_error( CIBuddy().post_job_error(
error_description + f" after {int(job_report.duration)}s", error_description + f" after {int(job_report.duration)}s",
job_name=_get_ext_check_name(args.job_name), job_name=_get_ext_check_name(args.job_name),
) )

View File

@ -1,5 +1,6 @@
import json import json
import os import os
from typing import Union, Dict
import boto3 import boto3
import requests import requests
@ -60,14 +61,64 @@ class CIBuddy:
except Exception as e: except Exception as e:
print(f"ERROR: Failed to post message, ex {e}") print(f"ERROR: Failed to post message, ex {e}")
def post_error(self, error_description, job_name="", with_instance_info=True): def _post_formatted(
self, title: str, body: Union[Dict, str], with_wf_link: bool
) -> None:
message = title
if isinstance(body, dict):
for name, value in body.items():
if "commit_sha" in name:
value = (
f"<https://github.com/{self.repo}/commit/{value}|{value[:8]}>"
)
message += f" *{name}*: {value}\n"
else:
message += body + "\n"
run_id = os.getenv("GITHUB_RUN_ID", "")
if with_wf_link and run_id:
message += f" *workflow*: <https://github.com/{self.repo}/actions/runs/{run_id}|{run_id}>\n"
self.post(message)
def post_info(
self, title: str, body: Union[Dict, str], with_wf_link: bool = True
) -> None:
title_extended = f":white_circle: *{title}*\n\n"
self._post_formatted(title_extended, body, with_wf_link)
def post_done(
self, title: str, body: Union[Dict, str], with_wf_link: bool = True
) -> None:
title_extended = f":white_check_mark: *{title}*\n\n"
self._post_formatted(title_extended, body, with_wf_link)
def post_warning(
self, title: str, body: Union[Dict, str], with_wf_link: bool = True
) -> None:
title_extended = f":warning: *{title}*\n\n"
self._post_formatted(title_extended, body, with_wf_link)
def post_critical(
self, title: str, body: Union[Dict, str], with_wf_link: bool = True
) -> None:
title_extended = f":black_circle: *{title}*\n\n"
self._post_formatted(title_extended, body, with_wf_link)
def post_job_error(
self,
error_description: str,
job_name: str = "",
with_instance_info: bool = True,
with_wf_link: bool = True,
critical: bool = False,
) -> None:
instance_id, instance_type = "unknown", "unknown" instance_id, instance_type = "unknown", "unknown"
if with_instance_info: if with_instance_info:
instance_id = Shell.run("ec2metadata --instance-id") or instance_id instance_id = Shell.run("ec2metadata --instance-id") or instance_id
instance_type = Shell.run("ec2metadata --instance-type") or instance_type instance_type = Shell.run("ec2metadata --instance-type") or instance_type
if not job_name: if not job_name:
job_name = os.getenv("CHECK_NAME", "unknown") job_name = os.getenv("CHECK_NAME", "unknown")
line_err = f":red_circle: *Error: {error_description}*\n\n" sign = ":red_circle:" if not critical else ":black_circle:"
line_err = f"{sign} *Error: {error_description}*\n\n"
line_ghr = f" *Runner:* `{instance_type}`, `{instance_id}`\n" line_ghr = f" *Runner:* `{instance_type}`, `{instance_id}`\n"
line_job = f" *Job:* `{job_name}`\n" line_job = f" *Job:* `{job_name}`\n"
line_pr_ = f" *PR:* <https://github.com/{self.repo}/pull/{self.pr_number}|#{self.pr_number}>, <{self.commit_url}|{self.sha}>\n" line_pr_ = f" *PR:* <https://github.com/{self.repo}/pull/{self.pr_number}|#{self.pr_number}>, <{self.commit_url}|{self.sha}>\n"
@ -82,10 +133,13 @@ class CIBuddy:
message += line_pr_ message += line_pr_
else: else:
message += line_br_ message += line_br_
run_id = os.getenv("GITHUB_RUN_ID", "")
if with_wf_link and run_id:
message += f" *workflow*: <https://github.com/{self.repo}/actions/runs/{run_id}|{run_id}>\n"
self.post(message) self.post(message)
if __name__ == "__main__": if __name__ == "__main__":
# test # test
buddy = CIBuddy(dry_run=True) buddy = CIBuddy(dry_run=True)
buddy.post_error("TEst") buddy.post_job_error("TEst")

View File

@ -638,7 +638,14 @@ class CiCache:
pushes pending records for all jobs that supposed to be run pushes pending records for all jobs that supposed to be run
""" """
for job, job_config in self.jobs_to_do.items(): for job, job_config in self.jobs_to_do.items():
if not job_config.has_digest(): if (
job in self.jobs_to_wait
or not job_config.has_digest()
or job_config.disable_await
):
# 1. "job in self.jobs_to_wait" - this job already has a pending record in cache
# 2. "not job_config.has_digest()" - cache is not used for these jobs
# 3. "job_config.disable_await" - await is explicitly disabled
continue continue
pending_state = PendingState(time.time(), run_url=GITHUB_RUN_URL) pending_state = PendingState(time.time(), run_url=GITHUB_RUN_URL)
assert job_config.batches assert job_config.batches
@ -708,7 +715,7 @@ class CiCache:
Filter is to be applied in PRs to remove jobs that are not affected by the change Filter is to be applied in PRs to remove jobs that are not affected by the change
:return: :return:
""" """
remove_from_to_do = [] remove_from_workflow = []
required_builds = [] required_builds = []
has_test_jobs_to_skip = False has_test_jobs_to_skip = False
for job_name, job_config in self.jobs_to_do.items(): for job_name, job_config in self.jobs_to_do.items():
@ -723,26 +730,41 @@ class CiCache:
job=reference_name, job=reference_name,
job_config=reference_config, job_config=reference_config,
): ):
remove_from_to_do.append(job_name) remove_from_workflow.append(job_name)
has_test_jobs_to_skip = True has_test_jobs_to_skip = True
else: else:
required_builds += ( required_builds += (
job_config.required_builds if job_config.required_builds else [] job_config.required_builds if job_config.required_builds else []
) )
if has_test_jobs_to_skip: if has_test_jobs_to_skip:
# If there are tests to skip, it means build digest has not been changed. # If there are tests to skip, it means builds are not affected as well.
# No need to test builds. Let's keep all builds required for test jobs and skip the others # No need to test builds. Let's keep all builds required for test jobs and skip the others
for job_name, job_config in self.jobs_to_do.items(): for job_name, job_config in self.jobs_to_do.items():
if CI.is_build_job(job_name): if CI.is_build_job(job_name):
if job_name not in required_builds: if job_name not in required_builds:
remove_from_to_do.append(job_name) remove_from_workflow.append(job_name)
for job in remove_from_to_do: for job in remove_from_workflow:
print(f"Filter job [{job}] - not affected by the change") print(f"Filter job [{job}] - not affected by the change")
if job in self.jobs_to_do: if job in self.jobs_to_do:
del self.jobs_to_do[job] del self.jobs_to_do[job]
if job in self.jobs_to_wait: if job in self.jobs_to_wait:
del self.jobs_to_wait[job] del self.jobs_to_wait[job]
if job in self.jobs_to_skip:
self.jobs_to_skip.remove(job)
# special handling for the special job: BUILD_CHECK
has_builds = False
for job in list(self.jobs_to_do) + self.jobs_to_skip:
if CI.is_build_job(job):
has_builds = True
break
if not has_builds:
if CI.JobNames.BUILD_CHECK in self.jobs_to_do:
print(
f"Filter job [{CI.JobNames.BUILD_CHECK}] - no builds are required in the workflow"
)
del self.jobs_to_do[CI.JobNames.BUILD_CHECK]
def await_pending_jobs(self, is_release: bool, dry_run: bool = False) -> None: def await_pending_jobs(self, is_release: bool, dry_run: bool = False) -> None:
""" """
@ -884,3 +906,87 @@ class CiCache:
self.jobs_to_wait[job] = job_config self.jobs_to_wait[job] = job_config
return self return self
if __name__ == "__main__":
# for testing
job_digest = {
"package_release": "bbbd3519d1",
"package_aarch64": "bbbd3519d1",
"package_asan": "bbbd3519d1",
"package_ubsan": "bbbd3519d1",
"package_tsan": "bbbd3519d1",
"package_msan": "bbbd3519d1",
"package_debug": "bbbd3519d1",
"package_release_coverage": "bbbd3519d1",
"binary_release": "bbbd3519d1",
"binary_tidy": "bbbd3519d1",
"binary_darwin": "bbbd3519d1",
"binary_aarch64": "bbbd3519d1",
"binary_aarch64_v80compat": "bbbd3519d1",
"binary_freebsd": "bbbd3519d1",
"binary_darwin_aarch64": "bbbd3519d1",
"binary_ppc64le": "bbbd3519d1",
"binary_amd64_compat": "bbbd3519d1",
"binary_amd64_musl": "bbbd3519d1",
"binary_riscv64": "bbbd3519d1",
"binary_s390x": "bbbd3519d1",
"binary_loongarch64": "bbbd3519d1",
"Builds": "f5dffeecb8",
"Install packages (release)": "ba0c89660e",
"Install packages (aarch64)": "ba0c89660e",
"Stateful tests (asan)": "32a9a1aba9",
"Stateful tests (tsan)": "32a9a1aba9",
"Stateful tests (msan)": "32a9a1aba9",
"Stateful tests (ubsan)": "32a9a1aba9",
"Stateful tests (debug)": "32a9a1aba9",
"Stateful tests (release)": "32a9a1aba9",
"Stateful tests (coverage)": "32a9a1aba9",
"Stateful tests (aarch64)": "32a9a1aba9",
"Stateful tests (release, ParallelReplicas)": "32a9a1aba9",
"Stateful tests (debug, ParallelReplicas)": "32a9a1aba9",
"Stateless tests (asan)": "deb6778b88",
"Stateless tests (tsan)": "deb6778b88",
"Stateless tests (msan)": "deb6778b88",
"Stateless tests (ubsan)": "deb6778b88",
"Stateless tests (debug)": "deb6778b88",
"Stateless tests (release)": "deb6778b88",
"Stateless tests (coverage)": "deb6778b88",
"Stateless tests (aarch64)": "deb6778b88",
"Stateless tests (release, old analyzer, s3, DatabaseReplicated)": "deb6778b88",
"Stateless tests (debug, s3 storage)": "deb6778b88",
"Stateless tests (tsan, s3 storage)": "deb6778b88",
"Stress test (debug)": "aa298abf10",
"Stress test (tsan)": "aa298abf10",
"Upgrade check (debug)": "5ce4d3ee02",
"Integration tests (asan, old analyzer)": "42e58be3aa",
"Integration tests (tsan)": "42e58be3aa",
"Integration tests (aarch64)": "42e58be3aa",
"Integration tests flaky check (asan)": "42e58be3aa",
"Compatibility check (release)": "ecb69d8c4b",
"Compatibility check (aarch64)": "ecb69d8c4b",
"Unit tests (release)": "09d00b702e",
"Unit tests (asan)": "09d00b702e",
"Unit tests (msan)": "09d00b702e",
"Unit tests (tsan)": "09d00b702e",
"Unit tests (ubsan)": "09d00b702e",
"AST fuzzer (debug)": "c38ebf947f",
"AST fuzzer (asan)": "c38ebf947f",
"AST fuzzer (msan)": "c38ebf947f",
"AST fuzzer (tsan)": "c38ebf947f",
"AST fuzzer (ubsan)": "c38ebf947f",
"Stateless tests flaky check (asan)": "deb6778b88",
"Performance Comparison (release)": "a8a7179258",
"ClickBench (release)": "45c07c4aa6",
"ClickBench (aarch64)": "45c07c4aa6",
"Docker server image": "6a24d5b187",
"Docker keeper image": "6a24d5b187",
"Docs check": "4764154c62",
"Fast test": "cb269133f2",
"Style check": "ffffffffff",
"Stateful tests (ubsan, ParallelReplicas)": "32a9a1aba9",
"Stress test (msan)": "aa298abf10",
"Upgrade check (asan)": "5ce4d3ee02",
}
ci_cache = CiCache(job_digests=job_digest, cache_enabled=True, s3=S3Helper())
ci_cache.update()

View File

@ -32,6 +32,9 @@ class CI:
from ci_definitions import MQ_JOBS as MQ_JOBS from ci_definitions import MQ_JOBS as MQ_JOBS
from ci_definitions import WorkflowStages as WorkflowStages from ci_definitions import WorkflowStages as WorkflowStages
from ci_definitions import Runners as Runners from ci_definitions import Runners as Runners
from ci_utils import Envs as Envs
from ci_utils import Utils as Utils
from ci_utils import GHActions as GHActions
from ci_definitions import Labels as Labels from ci_definitions import Labels as Labels
from ci_definitions import TRUSTED_CONTRIBUTORS as TRUSTED_CONTRIBUTORS from ci_definitions import TRUSTED_CONTRIBUTORS as TRUSTED_CONTRIBUTORS
from ci_utils import CATEGORY_TO_LABEL as CATEGORY_TO_LABEL from ci_utils import CATEGORY_TO_LABEL as CATEGORY_TO_LABEL

View File

@ -351,6 +351,8 @@ class JobConfig:
run_by_label: str = "" run_by_label: str = ""
# to run always regardless of the job digest or/and label # to run always regardless of the job digest or/and label
run_always: bool = False run_always: bool = False
# disables CI await for a given job
disable_await: bool = False
# if the job needs to be run on the release branch, including master (building packages, docker server). # if the job needs to be run on the release branch, including master (building packages, docker server).
# NOTE: Subsequent runs on the same branch with the similar digest are still considered skip-able. # NOTE: Subsequent runs on the same branch with the similar digest are still considered skip-able.
required_on_release_branch: bool = False required_on_release_branch: bool = False
@ -395,6 +397,7 @@ class CommonJobConfigs:
], ],
), ),
runner_type=Runners.STYLE_CHECKER_ARM, runner_type=Runners.STYLE_CHECKER_ARM,
disable_await=True,
) )
COMPATIBILITY_TEST = JobConfig( COMPATIBILITY_TEST = JobConfig(
job_name_keyword="compatibility", job_name_keyword="compatibility",

View File

@ -1,9 +1,16 @@
import os import os
import re import re
import subprocess import subprocess
import time
from contextlib import contextmanager from contextlib import contextmanager
from pathlib import Path from pathlib import Path
from typing import Any, Iterator, List, Union, Optional, Tuple from typing import Any, Iterator, List, Union, Optional, Sequence, Tuple
import requests
class Envs:
GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")
LABEL_CATEGORIES = { LABEL_CATEGORIES = {
@ -80,6 +87,71 @@ class GHActions:
print(line) print(line)
print("::endgroup::") print("::endgroup::")
@staticmethod
def get_commit_status_by_name(
token: str, commit_sha: str, status_name: Union[str, Sequence]
) -> str:
assert len(token) == 40
assert len(commit_sha) == 40
assert is_hex(commit_sha)
assert not is_hex(token)
url = f"https://api.github.com/repos/{Envs.GITHUB_REPOSITORY}/commits/{commit_sha}/statuses?per_page={200}"
headers = {
"Authorization": f"token {token}",
"Accept": "application/vnd.github.v3+json",
}
response = requests.get(url, headers=headers, timeout=5)
if isinstance(status_name, str):
status_name = (status_name,)
if response.status_code == 200:
assert "next" not in response.links, "Response truncated"
statuses = response.json()
for status in statuses:
if status["context"] in status_name:
return status["state"] # type: ignore
return ""
@staticmethod
def check_wf_completed(token: str, commit_sha: str) -> bool:
headers = {
"Authorization": f"token {token}",
"Accept": "application/vnd.github.v3+json",
}
url = f"https://api.github.com/repos/{Envs.GITHUB_REPOSITORY}/commits/{commit_sha}/check-runs?per_page={100}"
for i in range(3):
try:
response = requests.get(url, headers=headers, timeout=5)
response.raise_for_status()
# assert "next" not in response.links, "Response truncated"
data = response.json()
assert data["check_runs"], "?"
for check in data["check_runs"]:
if check["status"] != "completed":
print(
f" Check workflow status: Check not completed [{check['name']}]"
)
return False
return True
except Exception as e:
print(f"ERROR: exception after attempt [{i}]: {e}")
time.sleep(1)
return False
@staticmethod
def get_pr_url_by_branch(repo, branch):
get_url_cmd = (
f"gh pr list --repo {repo} --head {branch} --json url --jq '.[0].url'"
)
url = Shell.run(get_url_cmd)
if not url:
print(f"ERROR: PR nor found, branch [{branch}]")
return url
class Shell: class Shell:
@classmethod @classmethod
@ -95,7 +167,11 @@ class Shell:
return res.stdout.strip() return res.stdout.strip()
@classmethod @classmethod
def run(cls, command): def run(cls, command, check=False, dry_run=False):
if dry_run:
print(f"Dry-ryn. Would run command [{command}]")
return ""
print(f"Run command [{command}]")
res = "" res = ""
result = subprocess.run( result = subprocess.run(
command, command,
@ -106,13 +182,26 @@ class Shell:
check=False, check=False,
) )
if result.returncode == 0: if result.returncode == 0:
print(f"stdout: {result.stdout.strip()}")
res = result.stdout res = result.stdout
else:
print(
f"ERROR: stdout: {result.stdout.strip()}, stderr: {result.stderr.strip()}"
)
if check:
assert result.returncode == 0
return res.strip() return res.strip()
@classmethod
def run_as_daemon(cls, command):
print(f"Run daemon command [{command}]")
subprocess.Popen(command.split(" ")) # pylint:disable=consider-using-with
return 0, ""
@classmethod @classmethod
def check(cls, command): def check(cls, command):
result = subprocess.run( result = subprocess.run(
command + " 2>&1", command,
shell=True, shell=True,
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, stderr=subprocess.PIPE,

View File

@ -2,7 +2,6 @@ import argparse
import dataclasses import dataclasses
import json import json
import os import os
import subprocess
from contextlib import contextmanager from contextlib import contextmanager
from copy import copy from copy import copy
@ -13,7 +12,8 @@ from git_helper import Git, GIT_PREFIX
from ssh import SSHAgent from ssh import SSHAgent
from env_helper import GITHUB_REPOSITORY, S3_BUILDS_BUCKET from env_helper import GITHUB_REPOSITORY, S3_BUILDS_BUCKET
from s3_helper import S3Helper from s3_helper import S3Helper
from ci_utils import Shell from ci_utils import Shell, GHActions
from ci_buddy import CIBuddy
from version_helper import ( from version_helper import (
FILE_WITH_VERSION_PATH, FILE_WITH_VERSION_PATH,
GENERATED_CONTRIBUTORS, GENERATED_CONTRIBUTORS,
@ -27,34 +27,66 @@ from ci_config import CI
CMAKE_PATH = get_abs_path(FILE_WITH_VERSION_PATH) CMAKE_PATH = get_abs_path(FILE_WITH_VERSION_PATH)
CONTRIBUTORS_PATH = get_abs_path(GENERATED_CONTRIBUTORS) CONTRIBUTORS_PATH = get_abs_path(GENERATED_CONTRIBUTORS)
RELEASE_INFO_FILE = "/tmp/release_info.json"
class ShellRunner: class ReleaseProgress:
STARTED = "started"
DOWNLOAD_PACKAGES = "download packages"
PUSH_RELEASE_TAG = "push release tag"
PUSH_NEW_RELEASE_BRANCH = "push new release branch"
BUMP_VERSION = "bump version"
CREATE_GH_RELEASE = "create GH release"
EXPORT_TGZ = "export TGZ packages"
EXPORT_RPM = "export RPM packages"
EXPORT_DEB = "export DEB packages"
TEST_TGZ = "test TGZ packages"
TEST_RPM = "test RPM packages"
TEST_DEB = "test DEB packages"
COMPLETED = "completed"
@classmethod
def run( class ReleaseProgressDescription:
cls, command, check_retcode=True, print_output=True, async_=False, dry_run=False OK = "OK"
): FAILED = "FAILED"
if dry_run:
print(f"Dry-run: Would run shell command: [{command}]")
return 0, "" class ReleaseContextManager:
print(f"Running shell command: [{command}]") def __init__(self, release_progress):
if async_: self.release_progress = release_progress
subprocess.Popen(command.split(" ")) # pylint:disable=consider-using-with self.release_info = None
return 0, ""
result = subprocess.run( def __enter__(self):
command + " 2>&1", if self.release_progress == ReleaseProgress.STARTED:
shell=True, # create initial release info
stdout=subprocess.PIPE, self.release_info = ReleaseInfo(
stderr=subprocess.PIPE, release_branch="NA",
text=True, commit_sha=args.ref,
check=True, release_tag="NA",
) version="NA",
if print_output: codename="NA",
print(result.stdout) previous_release_tag="NA",
if check_retcode: previous_release_sha="NA",
assert result.returncode == 0, f"Return code [{result.returncode}]" release_progress=ReleaseProgress.STARTED,
return result.returncode, result.stdout ).dump()
else:
# fetch release info from fs and update
self.release_info = ReleaseInfo.from_file()
assert self.release_info
assert (
self.release_info.progress_description == ReleaseProgressDescription.OK
), "Must be OK on the start of new context"
self.release_info.release_progress = self.release_progress
self.release_info.dump()
return self.release_info
def __exit__(self, exc_type, exc_value, traceback):
assert self.release_info
if exc_type is not None:
self.release_info.progress_description = ReleaseProgressDescription.FAILED
else:
self.release_info.progress_description = ReleaseProgressDescription.OK
self.release_info.dump()
@dataclasses.dataclass @dataclasses.dataclass
@ -67,31 +99,50 @@ class ReleaseInfo:
codename: str codename: str
previous_release_tag: str previous_release_tag: str
previous_release_sha: str previous_release_sha: str
changelog_pr: str = ""
version_bump_pr: str = ""
release_url: str = ""
debian_command: str = ""
rpm_command: str = ""
tgz_command: str = ""
docker_command: str = ""
release_progress: str = ""
progress_description: str = ""
def is_patch(self):
return self.release_branch != "master"
def is_new_release_branch(self):
return self.release_branch == "master"
@staticmethod @staticmethod
def from_file(file_path: str) -> "ReleaseInfo": def from_file() -> "ReleaseInfo":
with open(file_path, "r", encoding="utf-8") as json_file: with open(RELEASE_INFO_FILE, "r", encoding="utf-8") as json_file:
res = json.load(json_file) res = json.load(json_file)
return ReleaseInfo(**res) return ReleaseInfo(**res)
@staticmethod def dump(self):
def prepare(commit_ref: str, release_type: str, outfile: str) -> None: print(f"Dump release info into [{RELEASE_INFO_FILE}]")
Path(outfile).parent.mkdir(parents=True, exist_ok=True) with open(RELEASE_INFO_FILE, "w", encoding="utf-8") as f:
Path(outfile).unlink(missing_ok=True) print(json.dumps(dataclasses.asdict(self), indent=2), file=f)
return self
def prepare(self, commit_ref: str, release_type: str) -> "ReleaseInfo":
version = None version = None
release_branch = None release_branch = None
release_tag = None release_tag = None
previous_release_tag = None previous_release_tag = None
previous_release_sha = None previous_release_sha = None
codename = None codename = ""
assert release_type in ("patch", "new") assert release_type in ("patch", "new")
if release_type == "new": if release_type == "new":
# check commit_ref is right and on a right branch # check commit_ref is right and on a right branch
ShellRunner.run( Shell.run(
f"git merge-base --is-ancestor origin/{commit_ref} origin/master" f"git merge-base --is-ancestor {commit_ref} origin/master",
check=True,
) )
with checkout(commit_ref): with checkout(commit_ref):
_, commit_sha = ShellRunner.run(f"git rev-parse {commit_ref}") commit_sha = Shell.run(f"git rev-parse {commit_ref}", check=True)
# Git() must be inside "with checkout" contextmanager # Git() must be inside "with checkout" contextmanager
git = Git() git = Git()
version = get_version_from_repo(git=git) version = get_version_from_repo(git=git)
@ -102,9 +153,6 @@ class ReleaseInfo:
git.latest_tag == expected_prev_tag git.latest_tag == expected_prev_tag
), f"BUG: latest tag [{git.latest_tag}], expected [{expected_prev_tag}]" ), f"BUG: latest tag [{git.latest_tag}], expected [{expected_prev_tag}]"
release_tag = version.describe release_tag = version.describe
codename = (
VersionType.STABLE
) # dummy value (artifactory won't be updated for new release)
previous_release_tag = expected_prev_tag previous_release_tag = expected_prev_tag
previous_release_sha = Shell.run_strict( previous_release_sha = Shell.run_strict(
f"git rev-parse {previous_release_tag}" f"git rev-parse {previous_release_tag}"
@ -112,7 +160,7 @@ class ReleaseInfo:
assert previous_release_sha assert previous_release_sha
if release_type == "patch": if release_type == "patch":
with checkout(commit_ref): with checkout(commit_ref):
_, commit_sha = ShellRunner.run(f"git rev-parse {commit_ref}") commit_sha = Shell.run(f"git rev-parse {commit_ref}", check=True)
# Git() must be inside "with checkout" contextmanager # Git() must be inside "with checkout" contextmanager
git = Git() git = Git()
version = get_version_from_repo(git=git) version = get_version_from_repo(git=git)
@ -120,10 +168,11 @@ class ReleaseInfo:
version.with_description(codename) version.with_description(codename)
release_branch = f"{version.major}.{version.minor}" release_branch = f"{version.major}.{version.minor}"
release_tag = version.describe release_tag = version.describe
ShellRunner.run(f"{GIT_PREFIX} fetch origin {release_branch} --tags") Shell.run(f"{GIT_PREFIX} fetch origin {release_branch} --tags", check=True)
# check commit is right and on a right branch # check commit is right and on a right branch
ShellRunner.run( Shell.run(
f"git merge-base --is-ancestor {commit_ref} origin/{release_branch}" f"git merge-base --is-ancestor {commit_ref} origin/{release_branch}",
check=True,
) )
if version.patch == 1: if version.patch == 1:
expected_version = copy(version) expected_version = copy(version)
@ -160,24 +209,24 @@ class ReleaseInfo:
and commit_sha and commit_sha
and release_tag and release_tag
and version and version
and codename in ("lts", "stable") and (codename in ("lts", "stable") or release_type == "new")
) )
res = ReleaseInfo(
release_branch=release_branch, self.release_branch = release_branch
commit_sha=commit_sha, self.commit_sha = commit_sha
release_tag=release_tag, self.release_tag = release_tag
version=version.string, self.version = version.string
codename=codename, self.codename = codename
previous_release_tag=previous_release_tag, self.previous_release_tag = previous_release_tag
previous_release_sha=previous_release_sha, self.previous_release_sha = previous_release_sha
) self.release_progress = ReleaseProgress.STARTED
with open(outfile, "w", encoding="utf-8") as f: self.progress_description = ReleaseProgressDescription.OK
print(json.dumps(dataclasses.asdict(res), indent=2), file=f) return self
def push_release_tag(self, dry_run: bool) -> None: def push_release_tag(self, dry_run: bool) -> None:
if dry_run: if dry_run:
# remove locally created tag from prev run # remove locally created tag from prev run
ShellRunner.run( Shell.run(
f"{GIT_PREFIX} tag -l | grep -q {self.release_tag} && git tag -d {self.release_tag} ||:" f"{GIT_PREFIX} tag -l | grep -q {self.release_tag} && git tag -d {self.release_tag} ||:"
) )
# Create release tag # Create release tag
@ -185,16 +234,17 @@ class ReleaseInfo:
f"Create and push release tag [{self.release_tag}], commit [{self.commit_sha}]" f"Create and push release tag [{self.release_tag}], commit [{self.commit_sha}]"
) )
tag_message = f"Release {self.release_tag}" tag_message = f"Release {self.release_tag}"
ShellRunner.run( Shell.run(
f"{GIT_PREFIX} tag -a -m '{tag_message}' {self.release_tag} {self.commit_sha}" f"{GIT_PREFIX} tag -a -m '{tag_message}' {self.release_tag} {self.commit_sha}",
check=True,
) )
cmd_push_tag = f"{GIT_PREFIX} push origin {self.release_tag}:{self.release_tag}" cmd_push_tag = f"{GIT_PREFIX} push origin {self.release_tag}:{self.release_tag}"
ShellRunner.run(cmd_push_tag, dry_run=dry_run) Shell.run(cmd_push_tag, dry_run=dry_run, check=True)
@staticmethod @staticmethod
def _create_gh_label(label: str, color_hex: str, dry_run: bool) -> None: def _create_gh_label(label: str, color_hex: str, dry_run: bool) -> None:
cmd = f"gh api repos/{GITHUB_REPOSITORY}/labels -f name={label} -f color={color_hex}" cmd = f"gh api repos/{GITHUB_REPOSITORY}/labels -f name={label} -f color={color_hex}"
ShellRunner.run(cmd, dry_run=dry_run) Shell.run(cmd, dry_run=dry_run, check=True)
def push_new_release_branch(self, dry_run: bool) -> None: def push_new_release_branch(self, dry_run: bool) -> None:
assert ( assert (
@ -211,8 +261,8 @@ class ReleaseInfo:
), f"Unexpected current version in git, must precede [{self.version}] by one step, actual [{version.string}]" ), f"Unexpected current version in git, must precede [{self.version}] by one step, actual [{version.string}]"
if dry_run: if dry_run:
# remove locally created branch from prev run # remove locally created branch from prev run
ShellRunner.run( Shell.run(
f"{GIT_PREFIX} branch -l | grep -q {new_release_branch} && git branch -d {new_release_branch} ||:" f"{GIT_PREFIX} branch -l | grep -q {new_release_branch} && git branch -d {new_release_branch}"
) )
print( print(
f"Create and push new release branch [{new_release_branch}], commit [{self.commit_sha}]" f"Create and push new release branch [{new_release_branch}], commit [{self.commit_sha}]"
@ -225,7 +275,7 @@ class ReleaseInfo:
cmd_push_branch = ( cmd_push_branch = (
f"{GIT_PREFIX} push --set-upstream origin {new_release_branch}" f"{GIT_PREFIX} push --set-upstream origin {new_release_branch}"
) )
ShellRunner.run(cmd_push_branch, dry_run=dry_run) Shell.run(cmd_push_branch, dry_run=dry_run, check=True)
print("Create and push backport tags for new release branch") print("Create and push backport tags for new release branch")
ReleaseInfo._create_gh_label( ReleaseInfo._create_gh_label(
@ -234,12 +284,13 @@ class ReleaseInfo:
ReleaseInfo._create_gh_label( ReleaseInfo._create_gh_label(
f"v{new_release_branch}-affected", "c2bfff", dry_run=dry_run f"v{new_release_branch}-affected", "c2bfff", dry_run=dry_run
) )
ShellRunner.run( Shell.run(
f"""gh pr create --repo {GITHUB_REPOSITORY} --title 'Release pull request for branch {new_release_branch}' f"""gh pr create --repo {GITHUB_REPOSITORY} --title 'Release pull request for branch {new_release_branch}'
--head {new_release_branch} {pr_labels} --head {new_release_branch} {pr_labels}
--body 'This PullRequest is a part of ClickHouse release cycle. It is used by CI system only. Do not perform any changes with it.' --body 'This PullRequest is a part of ClickHouse release cycle. It is used by CI system only. Do not perform any changes with it.'
""", """,
dry_run=dry_run, dry_run=dry_run,
check=True,
) )
def update_version_and_contributors_list(self, dry_run: bool) -> None: def update_version_and_contributors_list(self, dry_run: bool) -> None:
@ -265,32 +316,55 @@ class ReleaseInfo:
body_file = get_abs_path(".github/PULL_REQUEST_TEMPLATE.md") body_file = get_abs_path(".github/PULL_REQUEST_TEMPLATE.md")
actor = os.getenv("GITHUB_ACTOR", "") or "me" actor = os.getenv("GITHUB_ACTOR", "") or "me"
cmd_create_pr = f"gh pr create --repo {GITHUB_REPOSITORY} --title 'Update version after release' --head {branch_upd_version_contributors} --base {self.release_branch} --body-file '{body_file} --label 'do not test' --assignee @{actor}" cmd_create_pr = f"gh pr create --repo {GITHUB_REPOSITORY} --title 'Update version after release' --head {branch_upd_version_contributors} --base {self.release_branch} --body-file '{body_file} --label 'do not test' --assignee @{actor}"
ShellRunner.run(cmd_commit_version_upd, dry_run=dry_run) Shell.run(cmd_commit_version_upd, check=True, dry_run=dry_run)
ShellRunner.run(cmd_push_branch, dry_run=dry_run) Shell.run(cmd_push_branch, check=True, dry_run=dry_run)
ShellRunner.run(cmd_create_pr, dry_run=dry_run) Shell.run(cmd_create_pr, check=True, dry_run=dry_run)
if dry_run: if dry_run:
ShellRunner.run( Shell.run(f"{GIT_PREFIX} diff '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'")
f"{GIT_PREFIX} diff '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'" Shell.run(
)
ShellRunner.run(
f"{GIT_PREFIX} checkout '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'" f"{GIT_PREFIX} checkout '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'"
) )
self.version_bump_pr = "dry-run"
else:
self.version_bump_pr = GHActions.get_pr_url_by_branch(
repo=GITHUB_REPOSITORY, branch=branch_upd_version_contributors
)
def update_release_info(self, dry_run: bool) -> "ReleaseInfo":
if self.release_branch != "master":
branch = f"auto/{release_info.release_tag}"
if not dry_run:
url = GHActions.get_pr_url_by_branch(
repo=GITHUB_REPOSITORY, branch=branch
)
else:
url = "dry-run"
print(f"ChangeLog PR url [{url}]")
self.changelog_pr = url
print(f"Release url [{url}]")
self.release_url = f"https://github.com/{GITHUB_REPOSITORY}/releases/tag/{self.release_tag}"
if self.release_progress == ReleaseProgress.COMPLETED:
self.docker_command = f"docker run --rm clickhouse/clickhouse:{self.version} clickhouse --version"
self.dump()
return self
def create_gh_release(self, packages_files: List[str], dry_run: bool) -> None: def create_gh_release(self, packages_files: List[str], dry_run: bool) -> None:
repo = os.getenv("GITHUB_REPOSITORY") repo = os.getenv("GITHUB_REPOSITORY")
assert repo assert repo
cmds = [] cmds = [
cmds.append(
f"gh release create --repo {repo} --title 'Release {self.release_tag}' {self.release_tag}" f"gh release create --repo {repo} --title 'Release {self.release_tag}' {self.release_tag}"
) ]
for file in packages_files: for file in packages_files:
cmds.append(f"gh release upload {self.release_tag} {file}") cmds.append(f"gh release upload {self.release_tag} {file}")
if not dry_run: if not dry_run:
for cmd in cmds: for cmd in cmds:
ShellRunner.run(cmd) Shell.run(cmd, check=True)
self.release_url = f"https://github.com/{GITHUB_REPOSITORY}/releases/tag/{self.release_tag}"
else: else:
print("Dry-run, would run commands:") print("Dry-run, would run commands:")
print("\n * ".join(cmds)) print("\n * ".join(cmds))
self.release_url = f"dry-run"
self.dump()
class RepoTypes: class RepoTypes:
@ -350,7 +424,7 @@ class PackageDownloader:
self.macos_package_files = ["clickhouse-macos", "clickhouse-macos-aarch64"] self.macos_package_files = ["clickhouse-macos", "clickhouse-macos-aarch64"]
self.file_to_type = {} self.file_to_type = {}
ShellRunner.run(f"mkdir -p {self.LOCAL_DIR}") Shell.run(f"mkdir -p {self.LOCAL_DIR}")
for package_type in self.PACKAGE_TYPES: for package_type in self.PACKAGE_TYPES:
for package in self.package_names: for package in self.package_names:
@ -400,7 +474,7 @@ class PackageDownloader:
return res return res
def run(self): def run(self):
ShellRunner.run(f"rm -rf {self.LOCAL_DIR}/*") Shell.run(f"rm -rf {self.LOCAL_DIR}/*")
for package_file in ( for package_file in (
self.deb_package_files + self.rpm_package_files + self.tgz_package_files self.deb_package_files + self.rpm_package_files + self.tgz_package_files
): ):
@ -473,6 +547,37 @@ class PackageDownloader:
return True return True
@contextmanager
def checkout(ref: str) -> Iterator[None]:
orig_ref = Shell.run(f"{GIT_PREFIX} symbolic-ref --short HEAD", check=True)
rollback_cmd = f"{GIT_PREFIX} checkout {orig_ref}"
assert orig_ref
if ref not in (orig_ref,):
Shell.run(f"{GIT_PREFIX} checkout {ref}")
try:
yield
except (Exception, KeyboardInterrupt) as e:
print(f"ERROR: Exception [{e}]")
Shell.run(rollback_cmd)
raise
Shell.run(rollback_cmd)
@contextmanager
def checkout_new(ref: str) -> Iterator[None]:
orig_ref = Shell.run(f"{GIT_PREFIX} symbolic-ref --short HEAD", check=True)
rollback_cmd = f"{GIT_PREFIX} checkout {orig_ref}"
assert orig_ref
Shell.run(f"{GIT_PREFIX} checkout -b {ref}", check=True)
try:
yield
except (Exception, KeyboardInterrupt) as e:
print(f"ERROR: Exception [{e}]")
Shell.run(rollback_cmd)
raise
Shell.run(rollback_cmd)
def parse_args() -> argparse.Namespace: def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter, formatter_class=argparse.ArgumentDefaultsHelpFormatter,
@ -508,6 +613,11 @@ def parse_args() -> argparse.Namespace:
action="store_true", action="store_true",
help="Create GH Release object and attach all packages", help="Create GH Release object and attach all packages",
) )
parser.add_argument(
"--post-status",
action="store_true",
help="Post release status into Slack",
)
parser.add_argument( parser.add_argument(
"--ref", "--ref",
type=str, type=str,
@ -526,55 +636,25 @@ def parse_args() -> argparse.Namespace:
help="do not make any actual changes in the repo, just show what will be done", help="do not make any actual changes in the repo, just show what will be done",
) )
parser.add_argument( parser.add_argument(
"--outfile", "--set-progress-started",
default="", action="store_true",
type=str, help="Set new progress step, --progress <PROGRESS STEP> must be set",
help="output file to write json result to, if not set - stdout",
) )
parser.add_argument( parser.add_argument(
"--infile", "--progress",
default="",
type=str, type=str,
help="input file with release info", help="Progress step name, see @ReleaseProgress",
)
parser.add_argument(
"--set-progress-completed",
action="store_true",
help="Set current progress step to OK (completed)",
) )
return parser.parse_args() return parser.parse_args()
@contextmanager
def checkout(ref: str) -> Iterator[None]:
_, orig_ref = ShellRunner.run(f"{GIT_PREFIX} symbolic-ref --short HEAD")
rollback_cmd = f"{GIT_PREFIX} checkout {orig_ref}"
assert orig_ref
if ref not in (orig_ref,):
ShellRunner.run(f"{GIT_PREFIX} checkout {ref}")
try:
yield
except (Exception, KeyboardInterrupt) as e:
print(f"ERROR: Exception [{e}]")
ShellRunner.run(rollback_cmd)
raise
ShellRunner.run(rollback_cmd)
@contextmanager
def checkout_new(ref: str) -> Iterator[None]:
_, orig_ref = ShellRunner.run(f"{GIT_PREFIX} symbolic-ref --short HEAD")
rollback_cmd = f"{GIT_PREFIX} checkout {orig_ref}"
assert orig_ref
ShellRunner.run(f"{GIT_PREFIX} checkout -b {ref}")
try:
yield
except (Exception, KeyboardInterrupt) as e:
print(f"ERROR: Exception [{e}]")
ShellRunner.run(rollback_cmd)
raise
ShellRunner.run(rollback_cmd)
if __name__ == "__main__": if __name__ == "__main__":
args = parse_args() args = parse_args()
assert args.dry_run
# prepare ssh for git if needed # prepare ssh for git if needed
_ssh_agent = None _ssh_agent = None
@ -586,43 +666,91 @@ if __name__ == "__main__":
_ssh_agent.print_keys() _ssh_agent.print_keys()
if args.prepare_release_info: if args.prepare_release_info:
assert ( with ReleaseContextManager(
args.ref and args.release_type and args.outfile release_progress=ReleaseProgress.STARTED
), "--ref, --release-type and --outfile must be provided with --prepare-release-info" ) as release_info:
ReleaseInfo.prepare( assert (
commit_ref=args.ref, release_type=args.release_type, outfile=args.outfile args.ref and args.release_type
) ), "--ref and --release-type must be provided with --prepare-release-info"
if args.push_release_tag: release_info.prepare(commit_ref=args.ref, release_type=args.release_type)
assert args.infile, "--infile <release info file path> must be provided"
release_info = ReleaseInfo.from_file(args.infile)
release_info.push_release_tag(dry_run=args.dry_run)
if args.push_new_release_branch:
assert args.infile, "--infile <release info file path> must be provided"
release_info = ReleaseInfo.from_file(args.infile)
release_info.push_new_release_branch(dry_run=args.dry_run)
if args.create_bump_version_pr:
# TODO: store link to PR in release info
assert args.infile, "--infile <release info file path> must be provided"
release_info = ReleaseInfo.from_file(args.infile)
release_info.update_version_and_contributors_list(dry_run=args.dry_run)
if args.download_packages: if args.download_packages:
assert args.infile, "--infile <release info file path> must be provided" with ReleaseContextManager(
release_info = ReleaseInfo.from_file(args.infile) release_progress=ReleaseProgress.DOWNLOAD_PACKAGES
p = PackageDownloader( ) as release_info:
release=release_info.release_branch, p = PackageDownloader(
commit_sha=release_info.commit_sha, release=release_info.release_branch,
version=release_info.version, commit_sha=release_info.commit_sha,
) version=release_info.version,
p.run() )
p.run()
if args.push_release_tag:
with ReleaseContextManager(
release_progress=ReleaseProgress.PUSH_RELEASE_TAG
) as release_info:
release_info.push_release_tag(dry_run=args.dry_run)
if args.push_new_release_branch:
with ReleaseContextManager(
release_progress=ReleaseProgress.PUSH_NEW_RELEASE_BRANCH
) as release_info:
release_info.push_new_release_branch(dry_run=args.dry_run)
if args.create_bump_version_pr:
with ReleaseContextManager(
release_progress=ReleaseProgress.BUMP_VERSION
) as release_info:
release_info.update_version_and_contributors_list(dry_run=args.dry_run)
if args.create_gh_release: if args.create_gh_release:
assert args.infile, "--infile <release info file path> must be provided" with ReleaseContextManager(
release_info = ReleaseInfo.from_file(args.infile) release_progress=ReleaseProgress.CREATE_GH_RELEASE
p = PackageDownloader( ) as release_info:
release=release_info.release_branch, p = PackageDownloader(
commit_sha=release_info.commit_sha, release=release_info.release_branch,
version=release_info.version, commit_sha=release_info.commit_sha,
) version=release_info.version,
release_info.create_gh_release(p.get_all_packages_files(), args.dry_run) )
release_info.create_gh_release(
packages_files=p.get_all_packages_files(), dry_run=args.dry_run
)
if args.post_status:
release_info = ReleaseInfo.from_file()
release_info.update_release_info(dry_run=args.dry_run)
if release_info.is_new_release_branch():
title = "New release branch"
else:
title = "New release"
if (
release_info.progress_description == ReleaseProgressDescription.OK
and release_info.release_progress == ReleaseProgress.COMPLETED
):
title = "Completed: " + title
CIBuddy(dry_run=args.dry_run).post_done(
title, dataclasses.asdict(release_info)
)
else:
title = "Failed: " + title
CIBuddy(dry_run=args.dry_run).post_critical(
title, dataclasses.asdict(release_info)
)
if args.set_progress_started:
ri = ReleaseInfo.from_file()
ri.release_progress = args.progress
ri.progress_description = ReleaseProgressDescription.FAILED
ri.dump()
assert args.progress, "Progress step name must be provided"
if args.set_progress_completed:
ri = ReleaseInfo.from_file()
assert (
ri.progress_description == ReleaseProgressDescription.FAILED
), "Must be FAILED before set to OK"
ri.progress_description = ReleaseProgressDescription.OK
ri.dump()
# tear down ssh # tear down ssh
if _ssh_agent and _key_pub: if _ssh_agent and _key_pub:

View File

@ -254,11 +254,14 @@ def main():
statuses = get_commit_filtered_statuses(commit) statuses = get_commit_filtered_statuses(commit)
has_failed_statuses = False has_failed_statuses = False
has_native_failed_status = False
for status in statuses: for status in statuses:
print(f"Check status [{status.context}], [{status.state}]") print(f"Check status [{status.context}], [{status.state}]")
if CI.is_required(status.context) and status.state != SUCCESS: if CI.is_required(status.context) and status.state != SUCCESS:
print(f"WARNING: Failed status [{status.context}], [{status.state}]") print(f"WARNING: Failed status [{status.context}], [{status.state}]")
has_failed_statuses = True has_failed_statuses = True
if status.context != CI.StatusNames.SYNC:
has_native_failed_status = True
if args.wf_status == SUCCESS or has_failed_statuses: if args.wf_status == SUCCESS or has_failed_statuses:
# set Mergeable check if workflow is successful (green) # set Mergeable check if workflow is successful (green)
@ -280,7 +283,7 @@ def main():
print( print(
"Workflow failed but no failed statuses found (died runner?) - cannot set Mergeable Check status" "Workflow failed but no failed statuses found (died runner?) - cannot set Mergeable Check status"
) )
if args.wf_status == SUCCESS and not has_failed_statuses: if args.wf_status == SUCCESS and not has_native_failed_status:
sys.exit(0) sys.exit(0)
else: else:
sys.exit(1) sys.exit(1)

View File

@ -296,13 +296,16 @@ class PRInfo:
else: else:
if "schedule" in github_event: if "schedule" in github_event:
self.event_type = EventType.SCHEDULE self.event_type = EventType.SCHEDULE
else: elif "inputs" in github_event:
# assume this is a dispatch # assume this is a dispatch
self.event_type = EventType.DISPATCH self.event_type = EventType.DISPATCH
logging.warning( print("PR Info:")
"event.json does not match pull_request or push:\n%s", print(self)
json.dumps(github_event, sort_keys=True, indent=4), else:
) logging.warning(
"event.json does not match pull_request or push:\n%s",
json.dumps(github_event, sort_keys=True, indent=4),
)
self.sha = os.getenv( self.sha = os.getenv(
"GITHUB_SHA", "0000000000000000000000000000000000000000" "GITHUB_SHA", "0000000000000000000000000000000000000000"
) )

View File

@ -587,11 +587,11 @@ class TestCIConfig(unittest.TestCase):
for job, job_config in ci_cache.jobs_to_do.items(): for job, job_config in ci_cache.jobs_to_do.items():
if job in MOCK_AFFECTED_JOBS: if job in MOCK_AFFECTED_JOBS:
MOCK_REQUIRED_BUILDS += job_config.required_builds MOCK_REQUIRED_BUILDS += job_config.required_builds
elif job not in MOCK_AFFECTED_JOBS: elif job not in MOCK_AFFECTED_JOBS and not job_config.disable_await:
ci_cache.jobs_to_wait[job] = job_config ci_cache.jobs_to_wait[job] = job_config
for job, job_config in ci_cache.jobs_to_do.items(): for job, job_config in ci_cache.jobs_to_do.items():
if job_config.reference_job_name: if job_config.reference_job_name or job_config.disable_await:
# jobs with reference_job_name in config are not supposed to have records in the cache - continue # jobs with reference_job_name in config are not supposed to have records in the cache - continue
continue continue
if job in MOCK_AFFECTED_JOBS: if job in MOCK_AFFECTED_JOBS:
@ -624,11 +624,76 @@ class TestCIConfig(unittest.TestCase):
+ MOCK_AFFECTED_JOBS + MOCK_AFFECTED_JOBS
+ MOCK_REQUIRED_BUILDS + MOCK_REQUIRED_BUILDS
) )
self.assertTrue(
CI.JobNames.BUILD_CHECK not in ci_cache.jobs_to_wait,
"We must never await on Builds Report",
)
self.assertCountEqual( self.assertCountEqual(
list(ci_cache.jobs_to_wait), list(ci_cache.jobs_to_wait),
[ MOCK_REQUIRED_BUILDS,
CI.JobNames.BUILD_CHECK, )
] self.assertCountEqual(list(ci_cache.jobs_to_do), expected_to_do)
+ MOCK_REQUIRED_BUILDS,
def test_ci_py_filters_not_affected_jobs_in_prs_no_builds(self):
"""
checks ci.py filters not affected jobs in PRs, no builds required
"""
settings = CiSettings()
settings.no_ci_cache = True
pr_info = PRInfo(github_event=_TEST_EVENT_JSON)
pr_info.event_type = EventType.PULL_REQUEST
pr_info.number = 123
assert pr_info.is_pr
ci_cache = CIPY._configure_jobs(
S3Helper(), pr_info, settings, skip_jobs=False, dry_run=True
)
self.assertTrue(not ci_cache.jobs_to_skip, "Must be no jobs in skip list")
assert not ci_cache.jobs_to_wait
assert not ci_cache.jobs_to_skip
MOCK_AFFECTED_JOBS = [
CI.JobNames.FAST_TEST,
]
MOCK_REQUIRED_BUILDS = []
# pretend there are pending jobs that we need to wait
for job, job_config in ci_cache.jobs_to_do.items():
if job in MOCK_AFFECTED_JOBS:
if job_config.required_builds:
MOCK_REQUIRED_BUILDS += job_config.required_builds
elif job not in MOCK_AFFECTED_JOBS and not job_config.disable_await:
ci_cache.jobs_to_wait[job] = job_config
for job, job_config in ci_cache.jobs_to_do.items():
if job_config.reference_job_name or job_config.disable_await:
# jobs with reference_job_name in config are not supposed to have records in the cache - continue
continue
if job in MOCK_AFFECTED_JOBS:
continue
for batch in range(job_config.num_batches):
# add any record into cache
record = CiCache.Record(
record_type=random.choice(
[
CiCache.RecordType.FAILED,
CiCache.RecordType.PENDING,
CiCache.RecordType.SUCCESSFUL,
]
),
job_name=job,
job_digest=ci_cache.job_digests[job],
batch=batch,
num_batches=job_config.num_batches,
release_branch=True,
)
for record_t_, records_ in ci_cache.records.items():
if record_t_.value == CiCache.RecordType.FAILED.value:
records_[record.to_str_key()] = record
ci_cache.filter_out_not_affected_jobs()
expected_to_do = MOCK_AFFECTED_JOBS + MOCK_REQUIRED_BUILDS
self.assertCountEqual(
list(ci_cache.jobs_to_wait),
MOCK_REQUIRED_BUILDS,
) )
self.assertCountEqual(list(ci_cache.jobs_to_do), expected_to_do) self.assertCountEqual(list(ci_cache.jobs_to_do), expected_to_do)

View File

@ -50,7 +50,7 @@ set -uo pipefail
# set accordingly to a runner role # # set accordingly to a runner role #
#################################### ####################################
echo "Running init v1" echo "Running init v1.1"
export DEBIAN_FRONTEND=noninteractive export DEBIAN_FRONTEND=noninteractive
export RUNNER_HOME=/home/ubuntu/actions-runner export RUNNER_HOME=/home/ubuntu/actions-runner
@ -66,6 +66,14 @@ bash /usr/local/share/scripts/init-network.sh
RUNNER_TYPE=$(/usr/local/bin/aws ec2 describe-tags --filters "Name=resource-id,Values=$INSTANCE_ID" --query "Tags[?Key=='github:runner-type'].Value" --output text) RUNNER_TYPE=$(/usr/local/bin/aws ec2 describe-tags --filters "Name=resource-id,Values=$INSTANCE_ID" --query "Tags[?Key=='github:runner-type'].Value" --output text)
LABELS="self-hosted,Linux,$(uname -m),$RUNNER_TYPE" LABELS="self-hosted,Linux,$(uname -m),$RUNNER_TYPE"
export LABELS export LABELS
echo "Instance Labels: $LABELS"
LIFE_CYCLE=$(curl -s --fail http://169.254.169.254/latest/meta-data/instance-life-cycle)
export LIFE_CYCLE
echo "Instance lifecycle: $LIFE_CYCLE"
INSTANCE_TYPE=$(ec2metadata --instance-type)
echo "Instance type: $INSTANCE_TYPE"
# Refresh CloudWatch agent config # Refresh CloudWatch agent config
aws ssm get-parameter --region us-east-1 --name AmazonCloudWatch-github-runners --query 'Parameter.Value' --output text > /opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json aws ssm get-parameter --region us-east-1 --name AmazonCloudWatch-github-runners --query 'Parameter.Value' --output text > /opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json
@ -124,10 +132,6 @@ terminate_decrease_and_exit() {
declare -f terminate_and_exit >> /tmp/actions-hooks/common.sh declare -f terminate_and_exit >> /tmp/actions-hooks/common.sh
check_spot_instance_is_old() { check_spot_instance_is_old() {
# This function should be executed ONLY BETWEEN runnings.
# It's unsafe to execute while the runner is working!
local LIFE_CYCLE
LIFE_CYCLE=$(curl -s --fail http://169.254.169.254/latest/meta-data/instance-life-cycle)
if [ "$LIFE_CYCLE" == "spot" ]; then if [ "$LIFE_CYCLE" == "spot" ]; then
local UPTIME local UPTIME
UPTIME=$(< /proc/uptime) UPTIME=$(< /proc/uptime)

View File

@ -208,13 +208,21 @@ def test_merge_tree_custom_disk_setting(start_cluster):
secret_access_key='minio123'); secret_access_key='minio123');
""" """
) )
count = len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
list1 = list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))
count1 = len(list1)
node1.query(f"INSERT INTO {TABLE_NAME}_3 SELECT number FROM numbers(100)") node1.query(f"INSERT INTO {TABLE_NAME}_3 SELECT number FROM numbers(100)")
assert int(node1.query(f"SELECT count() FROM {TABLE_NAME}_3")) == 100 assert int(node1.query(f"SELECT count() FROM {TABLE_NAME}_3")) == 100
assert (
len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) list2 = list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))
== count count2 = len(list2)
)
if count1 != count2:
print("list1: ", list1)
print("list2: ", list2)
assert count1 == count2
assert ( assert (
len(list(minio.list_objects(cluster.minio_bucket, "data2/", recursive=True))) len(list(minio.list_objects(cluster.minio_bucket, "data2/", recursive=True)))
> 0 > 0

View File

@ -43,15 +43,10 @@ def started_cluster():
config = """<clickhouse> config = """<clickhouse>
<openSSL> <openSSL>
<client> <client>
<verificationMode>none</verificationMode> <verificationMode>strict</verificationMode>
<certificateFile>{certificateFile}</certificateFile> <certificateFile>{certificateFile}</certificateFile>
<privateKeyFile>{privateKeyFile}</privateKeyFile> <privateKeyFile>{privateKeyFile}</privateKeyFile>
<caConfig>{caConfig}</caConfig> <caConfig>{caConfig}</caConfig>
<invalidCertificateHandler>
<name>AcceptCertificateHandler</name>
</invalidCertificateHandler>
</client> </client>
</openSSL> </openSSL>
</clickhouse>""" </clickhouse>"""

View File

@ -2220,13 +2220,11 @@ def test_rabbitmq_commit_on_block_write(rabbitmq_cluster):
def test_rabbitmq_no_connection_at_startup_1(rabbitmq_cluster): def test_rabbitmq_no_connection_at_startup_1(rabbitmq_cluster):
# no connection when table is initialized error = instance.query_and_get_error(
rabbitmq_cluster.pause_container("rabbitmq1")
instance.query_and_get_error(
""" """
CREATE TABLE test.cs (key UInt64, value UInt64) CREATE TABLE test.cs (key UInt64, value UInt64)
ENGINE = RabbitMQ ENGINE = RabbitMQ
SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', SETTINGS rabbitmq_host_port = 'no_connection_at_startup:5672',
rabbitmq_exchange_name = 'cs', rabbitmq_exchange_name = 'cs',
rabbitmq_format = 'JSONEachRow', rabbitmq_format = 'JSONEachRow',
rabbitmq_flush_interval_ms=1000, rabbitmq_flush_interval_ms=1000,
@ -2234,7 +2232,7 @@ def test_rabbitmq_no_connection_at_startup_1(rabbitmq_cluster):
rabbitmq_row_delimiter = '\\n'; rabbitmq_row_delimiter = '\\n';
""" """
) )
rabbitmq_cluster.unpause_container("rabbitmq1") assert "CANNOT_CONNECT_RABBITMQ" in error
def test_rabbitmq_no_connection_at_startup_2(rabbitmq_cluster): def test_rabbitmq_no_connection_at_startup_2(rabbitmq_cluster):

View File

@ -10,8 +10,8 @@
PARTITION BY toYYYYMM(d) ORDER BY key PARTITION BY toYYYYMM(d) ORDER BY key
</create_query> </create_query>
<fill_query>INSERT INTO optimized_select_final SELECT toDate('2000-01-01'), 2*number, randomPrintableASCII(1000) FROM numbers(5000000)</fill_query> <fill_query>INSERT INTO optimized_select_final SELECT toDate('2000-01-01'), 2*number, randomPrintableASCII(1000) FROM numbers(2500000)</fill_query>
<fill_query>INSERT INTO optimized_select_final SELECT toDate('2020-01-01'), 2*number+1, randomPrintableASCII(1000) FROM numbers(5000000)</fill_query> <fill_query>INSERT INTO optimized_select_final SELECT toDate('2020-01-01'), 2*number+1, randomPrintableASCII(1000) FROM numbers(2500000)</fill_query>
<query>SELECT * FROM optimized_select_final FINAL FORMAT Null SETTINGS max_threads = 8</query> <query>SELECT * FROM optimized_select_final FINAL FORMAT Null SETTINGS max_threads = 8</query>
<query>SELECT * FROM optimized_select_final FINAL WHERE key % 10 = 0 FORMAT Null</query> <query>SELECT * FROM optimized_select_final FINAL WHERE key % 10 = 0 FORMAT Null</query>

View File

@ -1,5 +1,5 @@
<test> <test>
<query>with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByChar(' ', materialize(s)) as w from numbers(1000000)</query> <query>with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByChar(' ', materialize(s)) as w from numbers(1000000)</query>
<query>with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByRegexp(' ', materialize(s)) as w from numbers(1000000)</query> <query>with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByRegexp(' ', materialize(s)) as w from numbers(200000)</query>
<query>with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByRegexp('\s+', materialize(s)) as w from numbers(100000)</query> <query>with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByRegexp('\s+', materialize(s)) as w from numbers(20000)</query>
</test> </test>

View File

@ -24,10 +24,10 @@
<min_insert_block_size_rows>1</min_insert_block_size_rows> <min_insert_block_size_rows>1</min_insert_block_size_rows>
</settings> </settings>
<!-- 100 parts --> <!-- 50 parts -->
<query>INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(100)</query> <query>INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(50)</query>
<query>INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(1000)</query> <query>INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(500)</query>
<query>INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(100)</query> <query>INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(50)</query>
<drop_query>DROP TABLE IF EXISTS hits_wide</drop_query> <drop_query>DROP TABLE IF EXISTS hits_wide</drop_query>
<drop_query>DROP TABLE IF EXISTS hits_compact</drop_query> <drop_query>DROP TABLE IF EXISTS hits_compact</drop_query>

View File

@ -555,7 +555,7 @@ if args.report == "main":
"Total client time for measured query runs,&nbsp;s", # 2 "Total client time for measured query runs,&nbsp;s", # 2
"Queries", # 3 "Queries", # 3
"Longest query, total for measured runs,&nbsp;s", # 4 "Longest query, total for measured runs,&nbsp;s", # 4
"Wall clock time per query,&nbsp;s", # 5 "Average query wall clock time,&nbsp;s", # 5
"Shortest query, total for measured runs,&nbsp;s", # 6 "Shortest query, total for measured runs,&nbsp;s", # 6
"", # Runs #7 "", # Runs #7
] ]

View File

@ -8,13 +8,13 @@
40 40
41 41
0 41
2 42 2 42
2 42 2 42
43 43
0 43
11 11
11 11

View File

@ -1,13 +1,36 @@
-- { echoOn }
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t);
1 101 1 0 0 0 1 101 1 0 0 0
1 102 2 2 102 1 1 102 2 2 102 1
1 103 3 2 102 1 1 103 3 2 102 1
1 104 4 4 104 1 1 104 4 4 104 1
1 105 5 4 104 1 1 105 5 4 104 1
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t);
1 101 1 0 0 0 1 101 1 0 0 0
1 102 2 2 102 1 1 102 2 2 102 1
1 103 3 2 102 1 1 103 3 2 102 1
1 104 4 4 104 1 1 104 4 4 104 1
1 105 5 4 104 1 1 105 5 4 104 1
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t);
1 101 1 0 0 0
1 102 2 2 102 1
1 103 3 2 102 1
1 104 4 4 104 1
1 105 5 4 104 1
SET join_algorithm = 'full_sorting_merge';
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t);
1 101 1 0 0 0
1 102 2 2 102 1
1 103 3 2 102 1
1 104 4 4 104 1
1 105 5 4 104 1
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t);
1 101 1 0 0 0
1 102 2 2 102 1
1 103 3 2 102 1
1 104 4 4 104 1
1 105 5 4 104 1
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t);
1 101 1 0 0 0 1 101 1 0 0 0
1 102 2 2 102 1 1 102 2 2 102 1
1 103 3 2 102 1 1 103 3 2 102 1

View File

@ -4,20 +4,29 @@ DROP TABLE IF EXISTS B;
CREATE TABLE A(k UInt32, t UInt32, a UInt64) ENGINE = MergeTree() ORDER BY (k, t); CREATE TABLE A(k UInt32, t UInt32, a UInt64) ENGINE = MergeTree() ORDER BY (k, t);
INSERT INTO A(k,t,a) VALUES (1,101,1),(1,102,2),(1,103,3),(1,104,4),(1,105,5); INSERT INTO A(k,t,a) VALUES (1,101,1),(1,102,2),(1,103,3),(1,104,4),(1,105,5);
CREATE TABLE B(k UInt32, t UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t); CREATE TABLE B1(k UInt32, t UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t);
INSERT INTO B(k,t,b) VALUES (1,102,2), (1,104,4); INSERT INTO B1(k,t,b) VALUES (1,102,2), (1,104,4);
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t);
DROP TABLE B;
CREATE TABLE B2(t UInt32, k UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t);
INSERT INTO B2(k,t,b) VALUES (1,102,2), (1,104,4);
CREATE TABLE B(t UInt32, k UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t); CREATE TABLE B3(k UInt32, b UInt64, t UInt32) ENGINE = MergeTree() ORDER BY (k, t);
INSERT INTO B(k,t,b) VALUES (1,102,2), (1,104,4); INSERT INTO B3(k,t,b) VALUES (1,102,2), (1,104,4);
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t);
DROP TABLE B;
CREATE TABLE B(k UInt32, b UInt64, t UInt32) ENGINE = MergeTree() ORDER BY (k, t); -- { echoOn }
INSERT INTO B(k,t,b) VALUES (1,102,2), (1,104,4); SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t);
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t); SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t);
DROP TABLE B; SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t);
SET join_algorithm = 'full_sorting_merge';
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t);
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t);
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t);
-- { echoOff }
DROP TABLE B1;
DROP TABLE B2;
DROP TABLE B3;
DROP TABLE A; DROP TABLE A;

View File

@ -1 +1,2 @@
3000000 3000000
3000000

View File

@ -2,15 +2,28 @@
DROP TABLE IF EXISTS tvs; DROP TABLE IF EXISTS tvs;
-- to use different algorithms for in subquery
SET allow_experimental_analyzer = 1;
CREATE TABLE tvs(k UInt32, t UInt32, tv UInt64) ENGINE = Memory; CREATE TABLE tvs(k UInt32, t UInt32, tv UInt64) ENGINE = Memory;
INSERT INTO tvs(k,t,tv) SELECT k, t, t INSERT INTO tvs(k,t,tv) SELECT k, t, t
FROM (SELECT toUInt32(number) AS k FROM numbers(1000)) keys FROM (SELECT toUInt32(number) AS k FROM numbers(1000)) keys
CROSS JOIN (SELECT toUInt32(number * 3) as t FROM numbers(10000)) tv_times; CROSS JOIN (SELECT toUInt32(number * 3) as t FROM numbers(10000)) tv_times
SETTINGS join_algorithm = 'hash';
SELECT SUM(trades.price - tvs.tv) FROM SELECT SUM(trades.price - tvs.tv) FROM
(SELECT k, t, t as price (SELECT k, t, t as price
FROM (SELECT toUInt32(number) AS k FROM numbers(1000)) keys FROM (SELECT toUInt32(number) AS k FROM numbers(1000)) keys
CROSS JOIN (SELECT toUInt32(number * 10) AS t FROM numbers(3000)) trade_times) trades CROSS JOIN (SELECT toUInt32(number * 10) AS t FROM numbers(3000)) trade_times
SETTINGS join_algorithm = 'hash') trades
ASOF LEFT JOIN tvs USING(k,t); ASOF LEFT JOIN tvs USING(k,t);
SELECT SUM(trades.price - tvs.tv) FROM
(SELECT k, t, t as price
FROM (SELECT toUInt32(number) AS k FROM numbers(1000)) keys
CROSS JOIN (SELECT toUInt32(number * 10) AS t FROM numbers(3000)) trade_times
SETTINGS join_algorithm = 'hash') trades
ASOF LEFT JOIN tvs USING(k,t)
SETTINGS join_algorithm = 'full_sorting_merge';
DROP TABLE tvs; DROP TABLE tvs;

View File

@ -27,3 +27,32 @@
2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2 2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2
2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2 2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2
2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2 2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2
1 1970-01-01 00:00:01 1 0 1970-01-01 00:00:00 0
1 1970-01-01 00:00:02 2 2 1970-01-01 00:00:02 1
1 1970-01-01 00:00:03 3 2 1970-01-01 00:00:02 1
1 1970-01-01 00:00:04 4 4 1970-01-01 00:00:04 1
1 1970-01-01 00:00:05 5 4 1970-01-01 00:00:04 1
2 1970-01-01 00:00:01 1 0 1970-01-01 00:00:00 0
2 1970-01-01 00:00:02 2 0 1970-01-01 00:00:00 0
2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2
2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2
2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2
3 1970-01-01 00:00:01 1 0 1970-01-01 00:00:00 0
3 1970-01-01 00:00:02 2 0 1970-01-01 00:00:00 0
3 1970-01-01 00:00:03 3 0 1970-01-01 00:00:00 0
3 1970-01-01 00:00:04 4 0 1970-01-01 00:00:00 0
3 1970-01-01 00:00:05 5 0 1970-01-01 00:00:00 0
1 1970-01-01 00:00:02 2 2 1970-01-01 00:00:02 1
1 1970-01-01 00:00:03 3 2 1970-01-01 00:00:02 1
1 1970-01-01 00:00:04 4 4 1970-01-01 00:00:04 1
1 1970-01-01 00:00:05 5 4 1970-01-01 00:00:04 1
2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2
2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2
2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2
1 1970-01-01 00:00:02 2 2 1970-01-01 00:00:02 1
1 1970-01-01 00:00:03 3 2 1970-01-01 00:00:02 1
1 1970-01-01 00:00:04 4 4 1970-01-01 00:00:04 1
1 1970-01-01 00:00:05 5 4 1970-01-01 00:00:04 1
2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2
2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2
2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2

View File

@ -11,9 +11,12 @@ INSERT INTO B(k,t,b) VALUES (1,2,2),(1,4,4);
INSERT INTO B(k,t,b) VALUES (2,3,3); INSERT INTO B(k,t,b) VALUES (2,3,3);
SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t); SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t);
SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF INNER JOIN B ON A.k == B.k AND A.t >= B.t ORDER BY (A.k, A.t); SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF INNER JOIN B ON A.k == B.k AND A.t >= B.t ORDER BY (A.k, A.t);
SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF JOIN B USING(k,t) ORDER BY (A.k, A.t);
SET join_algorithm = 'full_sorting_merge';
SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t);
SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF INNER JOIN B ON A.k == B.k AND A.t >= B.t ORDER BY (A.k, A.t);
SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF JOIN B USING(k,t) ORDER BY (A.k, A.t); SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF JOIN B USING(k,t) ORDER BY (A.k, A.t);
DROP TABLE A; DROP TABLE A;

View File

@ -1,27 +1,72 @@
-
2 1 1 0 2 1 1 0
2 3 3 3 2 3 3 3
2 5 5 3 2 5 5 3
-
2 1 1 0 2 1 1 0
2 3 3 3 2 3 3 3
2 5 5 3 2 5 5 3
-
2 1 1 0 2 1 1 0
2 3 3 3 2 3 3 3
2 5 5 3 2 5 5 3
-
2 1 1 0 2 1 1 0
2 3 3 3 2 3 3 3
2 5 5 3 2 5 5 3
-
2 1 1 0
2 3 3 3
2 5 5 3
-
2 1 1 0
2 3 3 3
2 5 5 3
-
2 1 1 0
2 3 3 3
2 5 5 3
-
2 1 1 0
2 3 3 3
2 5 5 3
-
2 1970-01-01 02:00:01 1 0 2 1970-01-01 02:00:01 1 0
2 1970-01-01 02:00:03 3 3 2 1970-01-01 02:00:03 3 3
2 1970-01-01 02:00:05 5 3 2 1970-01-01 02:00:05 5 3
-
2 1970-01-01 02:00:01 1 0
2 1970-01-01 02:00:03 3 3
2 1970-01-01 02:00:05 5 3
-
2 1 1 0 2 1 1 0
2 3 3 3 2 3 3 3
2 5 5 3 2 5 5 3
-
2 1 1 0 2 1 1 0
2 3 3 3 2 3 3 3
2 5 5 3 2 5 5 3
-
2 1 1 0 2 1 1 0
2 3 3 3 2 3 3 3
2 5 5 3 2 5 5 3
-
2 1 1 0
2 3 3 3
2 5 5 3
-
2 1 1 0
2 3 3 3
2 5 5 3
-
2 1 1 0
2 3 3 3
2 5 5 3
-
2 1970-01-01 02:00:00.001 1 0
2 1970-01-01 02:00:00.003 3 3
2 1970-01-01 02:00:00.005 5 3
-
2 1970-01-01 02:00:00.001 1 0 2 1970-01-01 02:00:00.001 1 0
2 1970-01-01 02:00:00.003 3 3 2 1970-01-01 02:00:00.003 3 3
2 1970-01-01 02:00:00.005 5 3 2 1970-01-01 02:00:00.005 5 3

View File

@ -1,27 +0,0 @@
#!/usr/bin/env bash
set -e
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
for typename in "UInt32" "UInt64" "Float64" "Float32" "DateTime('Asia/Istanbul')" "Decimal32(5)" "Decimal64(5)" "Decimal128(5)" "DateTime64(3, 'Asia/Istanbul')"
do
$CLICKHOUSE_CLIENT -mn <<EOF
DROP TABLE IF EXISTS A;
DROP TABLE IF EXISTS B;
CREATE TABLE A(k UInt32, t ${typename}, a Float64) ENGINE = MergeTree() ORDER BY (k, t);
INSERT INTO A(k,t,a) VALUES (2,1,1),(2,3,3),(2,5,5);
CREATE TABLE B(k UInt32, t ${typename}, b Float64) ENGINE = MergeTree() ORDER BY (k, t);
INSERT INTO B(k,t,b) VALUES (2,3,3);
SELECT k, t, a, b FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (k,t);
DROP TABLE A;
DROP TABLE B;
EOF
done

View File

@ -0,0 +1,27 @@
{% for typename in ["UInt32", "UInt64", "Float64", "Float32", "DateTime('Asia/Istanbul')", "Decimal32(5)", "Decimal64(5)", "Decimal128(5)", "DateTime64(3, 'Asia/Istanbul')"] -%}
DROP TABLE IF EXISTS A;
DROP TABLE IF EXISTS B;
CREATE TABLE A(k UInt32, t {{ typename }}, a Float64) ENGINE = MergeTree() ORDER BY (k, t);
INSERT INTO A(k,t,a) VALUES (2,1,1),(2,3,3),(2,5,5);
CREATE TABLE B(k UInt32, t {{ typename }}, b Float64) ENGINE = MergeTree() ORDER BY (k, t);
INSERT INTO B(k,t,b) VALUES (2,3,3);
SELECT '-';
SELECT k, t, a, b FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (k,t)
SETTINGS join_algorithm = 'full_sorting_merge';
SELECT '-';
SELECT k, t, a, b FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (k,t)
SETTINGS join_algorithm = 'hash';
DROP TABLE A;
DROP TABLE B;
{% endfor %}

View File

@ -12,3 +12,18 @@
2 1970-01-01 00:00:15 5 6.5 6 2 1970-01-01 00:00:15 5 6.5 6
2 1970-01-01 00:00:16 5 5.6 6 2 1970-01-01 00:00:16 5 5.6 6
2 1970-01-01 00:00:20 17 8.5 18 2 1970-01-01 00:00:20 17 8.5 18
-
1 1970-01-01 00:00:05 1 1.5 2
1 1970-01-01 00:00:06 1 1.51 2
1 1970-01-01 00:00:10 11 11.5 12
1 1970-01-01 00:00:11 11 11.51 12
1 1970-01-01 00:00:15 5 5.5 6
1 1970-01-01 00:00:16 5 5.6 6
1 1970-01-01 00:00:20 7 7.5 8
2 1970-01-01 00:00:05 11 2.5 12
2 1970-01-01 00:00:06 11 2.51 12
2 1970-01-01 00:00:10 21 12.5 22
2 1970-01-01 00:00:11 21 12.51 22
2 1970-01-01 00:00:15 5 6.5 6
2 1970-01-01 00:00:16 5 5.6 6
2 1970-01-01 00:00:20 17 8.5 18

View File

@ -9,7 +9,13 @@ CREATE TABLE tv(key UInt32, t DateTime, tv Float64) ENGINE = MergeTree() ORDER B
INSERT INTO tv(key,t,tv) VALUES (1,5,1.5),(1,6,1.51),(1,10,11.5),(1,11,11.51),(1,15,5.5),(1,16,5.6),(1,20,7.5); INSERT INTO tv(key,t,tv) VALUES (1,5,1.5),(1,6,1.51),(1,10,11.5),(1,11,11.51),(1,15,5.5),(1,16,5.6),(1,20,7.5);
INSERT INTO tv(key,t,tv) VALUES (2,5,2.5),(2,6,2.51),(2,10,12.5),(2,11,12.51),(2,15,6.5),(2,16,5.6),(2,20,8.5); INSERT INTO tv(key,t,tv) VALUES (2,5,2.5),(2,6,2.51),(2,10,12.5),(2,11,12.51),(2,15,6.5),(2,16,5.6),(2,20,8.5);
SELECT tv.key, toString(tv.t, 'UTC'), md.bid, tv.tv, md.ask FROM tv ASOF LEFT JOIN md USING(key,t) ORDER BY (tv.key, tv.t); SELECT tv.key, toString(tv.t, 'UTC'), md.bid, tv.tv, md.ask FROM tv ASOF LEFT JOIN md USING(key,t) ORDER BY (tv.key, tv.t)
;
SELECT '-';
SELECT tv.key, toString(tv.t, 'UTC'), md.bid, tv.tv, md.ask FROM tv ASOF LEFT JOIN md USING(key,t) ORDER BY (tv.key, tv.t)
SETTINGS join_algorithm = 'full_sorting_merge';
DROP TABLE md; DROP TABLE md;
DROP TABLE tv; DROP TABLE tv;

View File

@ -1,3 +1,4 @@
- default / join_use_nulls = 0 -
1 1 0 0 1 1 0 0
1 2 1 2 1 2 1 2
1 3 1 2 1 3 1 2
@ -34,3 +35,114 @@
2 1 2 3 2 1 2 3
2 2 2 3 2 2 2 3
1 2 1 2 1 2 1 2
- full_sorting_merge / join_use_nulls = 0 -
1 1 0 0
1 2 1 2
1 3 1 2
2 1 0 0
2 2 0 0
2 3 2 3
3 1 0 0
3 2 0 0
3 3 0 0
9
1 2 1 2
1 3 1 2
2 3 2 3
-
1 1 1 2
1 2 1 2
1 3 1 4
2 1 2 3
2 2 2 3
2 3 2 3
-
1 1 1 2
1 2 1 2
1 3 1 4
2 1 2 3
2 2 2 3
2 3 2 3
-
1 3 1 2
-
1 1 1 2
1 2 1 4
1 3 1 4
2 1 2 3
2 2 2 3
1 2 1 2
- default / join_use_nulls = 1 -
1 1 \N \N
1 2 1 2
1 3 1 2
2 1 \N \N
2 2 \N \N
2 3 2 3
3 1 \N \N
3 2 \N \N
3 3 \N \N
9
1 2 1 2
1 3 1 2
2 3 2 3
-
1 1 1 2
1 2 1 2
1 3 1 4
2 1 2 3
2 2 2 3
2 3 2 3
-
1 1 1 2
1 2 1 2
1 3 1 4
2 1 2 3
2 2 2 3
2 3 2 3
-
1 3 1 2
-
1 1 1 2
1 2 1 4
1 3 1 4
2 1 2 3
2 2 2 3
1 2 1 2
- full_sorting_merge / join_use_nulls = 1 -
1 1 \N \N
1 2 1 2
1 3 1 2
2 1 \N \N
2 2 \N \N
2 3 2 3
3 1 \N \N
3 2 \N \N
3 3 \N \N
9
1 2 1 2
1 3 1 2
2 3 2 3
-
1 1 1 2
1 2 1 2
1 3 1 4
2 1 2 3
2 2 2 3
2 3 2 3
-
1 1 1 2
1 2 1 2
1 3 1 4
2 1 2 3
2 2 2 3
2 3 2 3
-
1 3 1 2
-
1 1 1 2
1 2 1 4
1 3 1 4
2 1 2 3
2 2 2 3
1 2 1 2

View File

@ -7,6 +7,14 @@ CREATE TABLE B(b UInt32, t UInt32) ENGINE = Memory;
INSERT INTO A (a,t) VALUES (1,1),(1,2),(1,3), (2,1),(2,2),(2,3), (3,1),(3,2),(3,3); INSERT INTO A (a,t) VALUES (1,1),(1,2),(1,3), (2,1),(2,2),(2,3), (3,1),(3,2),(3,3);
INSERT INTO B (b,t) VALUES (1,2),(1,4),(2,3); INSERT INTO B (b,t) VALUES (1,2),(1,4),(2,3);
{% for join_use_nulls in [0, 1] -%}
{% for join_algorithm in ['default', 'full_sorting_merge'] -%}
SET join_algorithm = '{{ join_algorithm }}';
SELECT '- {{ join_algorithm }} / join_use_nulls = {{ join_use_nulls }} -';
set join_use_nulls = {{ join_use_nulls }};
SELECT A.a, A.t, B.b, B.t FROM A ASOF LEFT JOIN B ON A.a == B.b AND A.t >= B.t ORDER BY (A.a, A.t); SELECT A.a, A.t, B.b, B.t FROM A ASOF LEFT JOIN B ON A.a == B.b AND A.t >= B.t ORDER BY (A.a, A.t);
SELECT count() FROM A ASOF LEFT JOIN B ON A.a == B.b AND B.t <= A.t; SELECT count() FROM A ASOF LEFT JOIN B ON A.a == B.b AND B.t <= A.t;
SELECT A.a, A.t, B.b, B.t FROM A ASOF INNER JOIN B ON B.t <= A.t AND A.a == B.b ORDER BY (A.a, A.t); SELECT A.a, A.t, B.b, B.t FROM A ASOF INNER JOIN B ON B.t <= A.t AND A.a == B.b ORDER BY (A.a, A.t);
@ -28,5 +36,8 @@ ASOF INNER JOIN (SELECT * FROM B UNION ALL SELECT 1, 3) AS B ON B.t <= A.t AND A
WHERE B.t != 3 ORDER BY (A.a, A.t) WHERE B.t != 3 ORDER BY (A.a, A.t)
; ;
{% endfor -%}
{% endfor -%}
DROP TABLE A; DROP TABLE A;
DROP TABLE B; DROP TABLE B;

View File

@ -12,10 +12,10 @@ ORDER BY (primary_key);
INSERT INTO set_array INSERT INTO set_array
select select
toString(intDiv(number, 1000000)) as primary_key, toString(intDiv(number, 100000)) as primary_key,
array(number) as index_array array(number) as index_array
from system.numbers from system.numbers
limit 10000000; limit 1000000;
OPTIMIZE TABLE set_array FINAL; OPTIMIZE TABLE set_array FINAL;

View File

@ -1,3 +1,6 @@
v1 o1 ['s2','s1'] v1 o1 ['s2','s1']
v1 o2 ['s4'] v1 o2 ['s4']
v2 o3 ['s5','s3'] v2 o3 ['s5','s3']
v1 o1 ['s2','s1']
v1 o2 ['s4']
v2 o3 ['s5','s3']

Some files were not shown because too many files have changed in this diff Show More