Merge remote-tracking branch 'origin/master' into pr-local-plan

This commit is contained in:
Igor Nikonov 2024-07-22 13:45:31 +00:00
commit 9900abade6
132 changed files with 4201 additions and 1003 deletions

168
.github/actions/release/action.yml vendored Normal file
View File

@ -0,0 +1,168 @@
name: Release
description: Makes patch releases and creates new release branch
inputs:
ref:
description: 'Git reference (branch or commit sha) from which to create the release'
required: true
type: string
type:
description: 'The type of release: "new" for a new release or "patch" for a patch release'
required: true
type: choice
options:
- patch
- new
dry-run:
description: 'Dry run'
required: false
default: true
type: boolean
token:
required: true
type: string
runs:
using: "composite"
steps:
- name: Prepare Release Info
shell: bash
run: |
python3 ./tests/ci/create_release.py --prepare-release-info \
--ref ${{ inputs.ref }} --release-type ${{ inputs.type }} \
${{ inputs.dry-run && '--dry-run' || '' }}
echo "::group::Release Info"
python3 -m json.tool /tmp/release_info.json
echo "::endgroup::"
release_tag=$(jq -r '.release_tag' /tmp/release_info.json)
commit_sha=$(jq -r '.commit_sha' /tmp/release_info.json)
echo "Release Tag: $release_tag"
echo "RELEASE_TAG=$release_tag" >> "$GITHUB_ENV"
echo "COMMIT_SHA=$commit_sha" >> "$GITHUB_ENV"
- name: Download All Release Artifacts
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --download-packages ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Push Git Tag for the Release
shell: bash
run: |
python3 ./tests/ci/create_release.py --push-release-tag ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Push New Release Branch
if: ${{ inputs.type == 'new' }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --push-new-release-branch ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Bump CH Version and Update Contributors' List
shell: bash
run: |
python3 ./tests/ci/create_release.py --create-bump-version-pr ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Bump Docker versions, Changelog, Security
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
git checkout master
python3 ./tests/ci/create_release.py --set-progress-started --progress "update changelog, docker version, security"
echo "List versions"
./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv
echo "Update docker version"
./utils/list-versions/update-docker-version.sh
echo "Generate ChangeLog"
export CI=1
docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 -e CI=1 --network=host \
--volume=".:/ClickHouse" clickhouse/style-test \
/ClickHouse/tests/ci/changelog.py -v --debug-helpers \
--gh-user-or-token=${{ inputs.token }} --jobs=5 \
--output="/ClickHouse/docs/changelogs/${{ env.RELEASE_TAG }}.md" ${{ env.RELEASE_TAG }}
git add ./docs/changelogs/${{ env.RELEASE_TAG }}.md
echo "Generate Security"
python3 ./utils/security-generator/generate_security.py > SECURITY.md
git diff HEAD
- name: Create ChangeLog PR
if: ${{ inputs.type == 'patch' && ! inputs.dry-run }}
uses: peter-evans/create-pull-request@v6
with:
author: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
token: ${{ inputs.token }}
committer: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
commit-message: Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }}
branch: auto/${{ env.RELEASE_TAG }}
assignees: ${{ github.event.sender.login }} # assign the PR to the tag pusher
delete-branch: true
title: Update version_date.tsv and changelog after ${{ env.RELEASE_TAG }}
labels: do not test
body: |
Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }}
### Changelog category (leave one):
- Not for changelog (changelog entry is not required)
- name: Complete previous steps and Restore git state
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --set-progress-completed
git reset --hard HEAD
git checkout "$GITHUB_REF_NAME"
- name: Create GH Release
shell: bash
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/create_release.py --create-gh-release ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export TGZ Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --export-tgz ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test TGZ Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --test-tgz ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export RPM Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --export-rpm ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test RPM Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --test-rpm ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export Debian Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --export-debian ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test Debian Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --test-debian ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Docker clickhouse/clickhouse-server building
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
cd "./tests/ci"
python3 ./create_release.py --set-progress-started --progress "docker server release"
export CHECK_NAME="Docker server image"
python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }}
python3 ./create_release.py --set-progress-completed
- name: Docker clickhouse/clickhouse-keeper building
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
cd "./tests/ci"
python3 ./create_release.py --set-progress-started --progress "docker keeper release"
export CHECK_NAME="Docker keeper image"
python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }}
python3 ./create_release.py --set-progress-completed
- name: Set current Release progress to Completed with OK
shell: bash
run: |
python3 ./tests/ci/create_release.py --set-progress-started --progress "completed"
python3 ./tests/ci/create_release.py --set-progress-completed
- name: Post Slack Message
if: ${{ !cancelled() }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --post-status ${{ inputs.dry-run && '--dry-run' || '' }}

View File

@ -1,44 +1,110 @@
name: AutoRelease
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
DRY_RUN: true
concurrency:
group: auto-release
group: release
on: # yamllint disable-line rule:truthy
# schedule:
# - cron: '0 10-16 * * 1-5'
# Workflow uses a test bucket for packages and dry run mode (no real releases)
schedule:
- cron: '0 9 * * *'
- cron: '0 15 * * *'
workflow_dispatch:
inputs:
dry-run:
description: 'Dry run'
required: false
default: true
type: boolean
jobs:
CherryPick:
runs-on: [self-hosted, style-checker-aarch64]
AutoRelease:
runs-on: [self-hosted, release-maker]
steps:
- name: DebugInfo
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
- name: Set envs
# https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/cherry_pick
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
RCSK
REPO_OWNER=ClickHouse
REPO_NAME=ClickHouse
REPO_TEAM=core
EOF
- name: Set DRY_RUN for schedule
if: ${{ github.event_name == 'schedule' }}
run: echo "DRY_RUN=true" >> "$GITHUB_ENV"
- name: Set DRY_RUN for dispatch
if: ${{ github.event_name == 'workflow_dispatch' }}
run: echo "DRY_RUN=${{ github.event.inputs.dry-run }}" >> "$GITHUB_ENV"
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
fetch-depth: 0
- name: Auto-release
- name: Auto Release Prepare
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 auto_release.py --release-after-days=3
- name: Cleanup
if: always()
python3 auto_release.py --prepare
echo "::group::Auto Release Info"
python3 -m json.tool /tmp/autorelease_info.json
echo "::endgroup::"
{
echo 'AUTO_RELEASE_PARAMS<<EOF'
cat /tmp/autorelease_info.json
echo 'EOF'
} >> "$GITHUB_ENV"
- name: Post Release Branch statuses
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 auto_release.py --post-status
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0].release_branch }}
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0] && fromJson(env.AUTO_RELEASE_PARAMS).releases[0].ready }}
uses: ./.github/actions/release
with:
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0].commit_sha }}
type: patch
dry-run: ${{ env.DRY_RUN }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[1].release_branch }}
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0] && fromJson(env.AUTO_RELEASE_PARAMS).releases[1].ready }}
uses: ./.github/actions/release
with:
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[1].commit_sha }}
type: patch
dry-run: ${{ env.DRY_RUN }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[2].release_branch }}
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[2] && fromJson(env.AUTO_RELEASE_PARAMS).releases[2].ready }}
uses: ./.github/actions/release
with:
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[2].commit_sha }}
type: patch
dry-run: ${{ env.DRY_RUN }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[3].release_branch }}
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[3] && fromJson(env.AUTO_RELEASE_PARAMS).releases[3].ready }}
uses: ./.github/actions/release
with:
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[3].commit_sha }}
type: patch
dry-run: ${{ env.DRY_RUN }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[4].release_branch }}
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[4] && fromJson(env.AUTO_RELEASE_PARAMS).releases[4].ready }}
uses: ./.github/actions/release
with:
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[4].commit_sha }}
type: patch
dry-run: ${{ env.DRY_RUN }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
- name: Post Slack Message
if: ${{ !cancelled() }}
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 auto_release.py --post-auto-release-complete --wf-status ${{ job.status }}
- name: Clean up
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:

View File

@ -2,7 +2,6 @@ name: CreateRelease
concurrency:
group: release
'on':
workflow_dispatch:
inputs:
@ -31,136 +30,15 @@ jobs:
steps:
- name: DebugInfo
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
- name: Set envs
# https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings
run: |
cat >> "$GITHUB_ENV" << 'EOF'
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
RCSK
RELEASE_INFO_FILE=${{ runner.temp }}/release_info.json
EOF
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
fetch-depth: 0
- name: Prepare Release Info
run: |
python3 ./tests/ci/create_release.py --prepare-release-info \
--ref ${{ inputs.ref }} --release-type ${{ inputs.type }} \
--outfile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
echo "::group::Release Info"
python3 -m json.tool "$RELEASE_INFO_FILE"
echo "::endgroup::"
release_tag=$(jq -r '.release_tag' "$RELEASE_INFO_FILE")
commit_sha=$(jq -r '.commit_sha' "$RELEASE_INFO_FILE")
echo "Release Tag: $release_tag"
echo "RELEASE_TAG=$release_tag" >> "$GITHUB_ENV"
echo "COMMIT_SHA=$commit_sha" >> "$GITHUB_ENV"
- name: Download All Release Artifacts
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/create_release.py --infile "$RELEASE_INFO_FILE" --download-packages ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Push Git Tag for the Release
run: |
python3 ./tests/ci/create_release.py --push-release-tag --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Push New Release Branch
if: ${{ inputs.type == 'new' }}
run: |
python3 ./tests/ci/create_release.py --push-new-release-branch --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Bump CH Version and Update Contributors' List
run: |
python3 ./tests/ci/create_release.py --create-bump-version-pr --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Checkout master
run: |
git checkout master
- name: Bump Docker versions, Changelog, Security
if: ${{ inputs.type == 'patch' }}
run: |
[ "$(git branch --show-current)" != "master" ] && echo "not on the master" && exit 1
echo "List versions"
./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv
echo "Update docker version"
./utils/list-versions/update-docker-version.sh
echo "Generate ChangeLog"
export CI=1
docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 -e CI=1 --network=host \
--volume=".:/ClickHouse" clickhouse/style-test \
/ClickHouse/tests/ci/changelog.py -v --debug-helpers \
--gh-user-or-token="$GH_TOKEN" --jobs=5 \
--output="/ClickHouse/docs/changelogs/${{ env.RELEASE_TAG }}.md" ${{ env.RELEASE_TAG }}
git add ./docs/changelogs/${{ env.RELEASE_TAG }}.md
echo "Generate Security"
python3 ./utils/security-generator/generate_security.py > SECURITY.md
git diff HEAD
- name: Create ChangeLog PR
if: ${{ inputs.type == 'patch' && ! inputs.dry-run }}
uses: peter-evans/create-pull-request@v6
- name: Call Release Action
uses: ./.github/actions/release
with:
author: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
token: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }}
committer: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
commit-message: Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }}
branch: auto/${{ env.RELEASE_TAG }}
assignees: ${{ github.event.sender.login }} # assign the PR to the tag pusher
delete-branch: true
title: Update version_date.tsv and changelog after ${{ env.RELEASE_TAG }}
labels: do not test
body: |
Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }}
### Changelog category (leave one):
- Not for changelog (changelog entry is not required)
- name: Reset changes if Dry-run
if: ${{ inputs.dry-run }}
run: |
git reset --hard HEAD
- name: Checkout back to GITHUB_REF
run: |
git checkout "$GITHUB_REF_NAME"
- name: Create GH Release
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/create_release.py --create-gh-release \
--infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export TGZ Packages
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/artifactory.py --export-tgz --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test TGZ Packages
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/artifactory.py --test-tgz --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export RPM Packages
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/artifactory.py --export-rpm --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test RPM Packages
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/artifactory.py --test-rpm --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export Debian Packages
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/artifactory.py --export-debian --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test Debian Packages
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/artifactory.py --test-debian --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Docker clickhouse/clickhouse-server building
if: ${{ inputs.type == 'patch' }}
run: |
cd "./tests/ci"
export CHECK_NAME="Docker server image"
python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }}
- name: Docker clickhouse/clickhouse-keeper building
if: ${{ inputs.type == 'patch' }}
run: |
cd "./tests/ci"
export CHECK_NAME="Docker keeper image"
python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }}
- name: Post Slack Message
if: always()
run: |
echo Slack Message
ref: ${{ inputs.ref }}
type: ${{ inputs.type }}
dry-run: ${{ inputs.dry-run }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}

View File

@ -297,7 +297,7 @@ Algorithm requires the special column in tables. This column:
- Must contain an ordered sequence.
- Can be one of the following types: [Int, UInt](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), [Date](../../../sql-reference/data-types/date.md), [DateTime](../../../sql-reference/data-types/datetime.md), [Decimal](../../../sql-reference/data-types/decimal.md).
- Cant be the only column in the `JOIN` clause.
- For `hash` join algorithm it cant be the only column in the `JOIN` clause.
Syntax `ASOF JOIN ... ON`:
@ -337,7 +337,8 @@ For example, consider the following tables:
`ASOF JOIN` can take the timestamp of a user event from `table_1` and find an event in `table_2` where the timestamp is closest to the timestamp of the event from `table_1` corresponding to the closest match condition. Equal timestamp values are the closest if available. Here, the `user_id` column can be used for joining on equality and the `ev_time` column can be used for joining on the closest match. In our example, `event_1_1` can be joined with `event_2_1` and `event_1_2` can be joined with `event_2_3`, but `event_2_2` cant be joined.
:::note
`ASOF` join is **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine.
`ASOF JOIN` is supported only by `hash` and `full_sorting_merge` join algorithms.
It's **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine.
:::
## PASTE JOIN Usage

View File

@ -6,38 +6,38 @@ sidebar_label: Playground
# ClickHouse Playground {#clickhouse-playground}
[ClickHouse Playground](https://play.clickhouse.com/play?user=play) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster.
Several example datasets are available in Playground.
[ClickHouse Playground](https://play.clickhouse.com/play?user=play) позволяет пользователям экспериментировать с ClickHouse, выполняя запросы мгновенно, без необходимости настройки сервера или кластера.
В Playground доступны несколько примеров наборов данных.
You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md).
Вы можете выполнять запросы к Playground, используя любой HTTP-клиент, например [curl](https://curl.haxx.se) или [wget](https://www.gnu.org/software/wget/), или настроить соединение, используя драйверы [JDBC](../interfaces/jdbc.md) или [ODBC](../interfaces/odbc.md). Дополнительную информацию о программных продуктах, поддерживающих ClickHouse, можно найти [здесь](../interfaces/index.md).
## Credentials {#credentials}
## Учетные данные {#credentials}
| Parameter | Value |
| Параметр | Значение |
|:--------------------|:-----------------------------------|
| HTTPS endpoint | `https://play.clickhouse.com:443/` |
| Native TCP endpoint | `play.clickhouse.com:9440` |
| User | `explorer` or `play` |
| Password | (empty) |
| HTTPS-адрес | `https://play.clickhouse.com:443/` |
| TCP-адрес | `play.clickhouse.com:9440` |
| Пользователь | `explorer` или `play` |
| Пароль | (пусто) |
## Limitations {#limitations}
## Ограничения {#limitations}
The queries are executed as a read-only user. It implies some limitations:
Запросы выполняются от имени пользователя с правами только на чтение. Это предполагает некоторые ограничения:
- DDL queries are not allowed
- INSERT queries are not allowed
- DDL-запросы не разрешены
- INSERT-запросы не разрешены
The service also have quotas on its usage.
Сервис также имеет квоты на использование.
## Examples {#examples}
## Примеры {#examples}
HTTPS endpoint example with `curl`:
Пример использования HTTPS-адреса с `curl`:
``` bash
```bash
curl "https://play.clickhouse.com/?user=explorer" --data-binary "SELECT 'Play ClickHouse'"
```
TCP endpoint example with [CLI](../interfaces/cli.md):
Пример использования TCP-адреса с [CLI](../interfaces/cli.md):
``` bash
clickhouse client --secure --host play.clickhouse.com --user explorer

View File

@ -68,6 +68,41 @@ QueryTreeNodePtr findEqualsFunction(const QueryTreeNodes & nodes)
return nullptr;
}
/// Checks if the node is combination of isNull and notEquals functions of two the same arguments
bool matchIsNullOfTwoArgs(const QueryTreeNodes & nodes, QueryTreeNodePtr & lhs, QueryTreeNodePtr & rhs)
{
QueryTreeNodePtrWithHashSet all_arguments;
for (const auto & node : nodes)
{
const auto * func_node = node->as<FunctionNode>();
if (!func_node)
return false;
const auto & arguments = func_node->getArguments().getNodes();
if (func_node->getFunctionName() == "isNull" && arguments.size() == 1)
all_arguments.insert(QueryTreeNodePtrWithHash(arguments[0]));
else if (func_node->getFunctionName() == "notEquals" && arguments.size() == 2)
{
if (arguments[0]->isEqual(*arguments[1]))
return false;
all_arguments.insert(QueryTreeNodePtrWithHash(arguments[0]));
all_arguments.insert(QueryTreeNodePtrWithHash(arguments[1]));
}
else
return false;
if (all_arguments.size() > 2)
return false;
}
if (all_arguments.size() != 2)
return false;
lhs = all_arguments.begin()->node;
rhs = std::next(all_arguments.begin())->node;
return true;
}
bool isBooleanConstant(const QueryTreeNodePtr & node, bool expected_value)
{
const auto * constant_node = node->as<ConstantNode>();
@ -213,11 +248,14 @@ private:
else if (func_name == "and")
{
const auto & and_arguments = argument_function->getArguments().getNodes();
bool all_are_is_null = and_arguments.size() == 2 && isNodeFunction(and_arguments[0], "isNull") && isNodeFunction(and_arguments[1], "isNull");
if (all_are_is_null)
QueryTreeNodePtr is_null_lhs_arg;
QueryTreeNodePtr is_null_rhs_arg;
if (matchIsNullOfTwoArgs(and_arguments, is_null_lhs_arg, is_null_rhs_arg))
{
is_null_argument_to_indices[getFunctionArgument(and_arguments.front(), 0)].push_back(or_operands.size() - 1);
is_null_argument_to_indices[getFunctionArgument(and_arguments.back(), 0)].push_back(or_operands.size() - 1);
is_null_argument_to_indices[is_null_lhs_arg].push_back(or_operands.size() - 1);
is_null_argument_to_indices[is_null_rhs_arg].push_back(or_operands.size() - 1);
continue;
}
/// Expression `a = b AND (a IS NOT NULL) AND true AND (b IS NOT NULL)` we can be replaced with `a = b`

View File

@ -4124,7 +4124,9 @@ void QueryAnalyzer::resolveInterpolateColumnsNodeList(QueryTreeNodePtr & interpo
auto * column_to_interpolate = interpolate_node_typed.getExpression()->as<IdentifierNode>();
if (!column_to_interpolate)
throw Exception(ErrorCodes::LOGICAL_ERROR, "INTERPOLATE can work only for indentifiers, but {} is found",
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"INTERPOLATE can work only for identifiers, but {} is found",
interpolate_node_typed.getExpression()->formatASTForErrorMessage());
auto column_to_interpolate_name = column_to_interpolate->getIdentifier().getFullName();

View File

@ -366,13 +366,10 @@ void ColumnAggregateFunction::updateHashWithValue(size_t n, SipHash & hash) cons
hash.update(wbuf.str().c_str(), wbuf.str().size());
}
void ColumnAggregateFunction::updateWeakHash32(WeakHash32 & hash) const
WeakHash32 ColumnAggregateFunction::getWeakHash32() const
{
auto s = data.size();
if (hash.getData().size() != data.size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), hash.getData().size());
WeakHash32 hash(s);
auto & hash_data = hash.getData();
std::vector<UInt8> v;
@ -383,6 +380,8 @@ void ColumnAggregateFunction::updateWeakHash32(WeakHash32 & hash) const
wbuf.finalize();
hash_data[i] = ::updateWeakHash32(v.data(), v.size(), hash_data[i]);
}
return hash;
}
void ColumnAggregateFunction::updateHashFast(SipHash & hash) const

View File

@ -177,7 +177,7 @@ public:
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override;

View File

@ -271,15 +271,12 @@ void ColumnArray::updateHashWithValue(size_t n, SipHash & hash) const
getData().updateHashWithValue(offset + i, hash);
}
void ColumnArray::updateWeakHash32(WeakHash32 & hash) const
WeakHash32 ColumnArray::getWeakHash32() const
{
auto s = offsets->size();
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", s, hash.getData().size());
WeakHash32 hash(s);
WeakHash32 internal_hash(data->size());
data->updateWeakHash32(internal_hash);
WeakHash32 internal_hash = data->getWeakHash32();
Offset prev_offset = 0;
const auto & offsets_data = getOffsets();
@ -300,6 +297,8 @@ void ColumnArray::updateWeakHash32(WeakHash32 & hash) const
prev_offset = offsets_data[i];
}
return hash;
}
void ColumnArray::updateHashFast(SipHash & hash) const

View File

@ -82,7 +82,7 @@ public:
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;

View File

@ -3,6 +3,7 @@
#include <optional>
#include <Core/Field.h>
#include <Columns/IColumn.h>
#include <Common/WeakHash.h>
#include <IO/BufferWithOwnMemory.h>
@ -98,7 +99,7 @@ public:
const char * deserializeAndInsertFromArena(const char *) override { throwMustBeDecompressed(); }
const char * skipSerializedInArena(const char *) const override { throwMustBeDecompressed(); }
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeDecompressed(); }
void updateWeakHash32(WeakHash32 &) const override { throwMustBeDecompressed(); }
WeakHash32 getWeakHash32() const override { throwMustBeDecompressed(); }
void updateHashFast(SipHash &) const override { throwMustBeDecompressed(); }
ColumnPtr filter(const Filter &, ssize_t) const override { throwMustBeDecompressed(); }
void expand(const Filter &, bool) override { throwMustBeDecompressed(); }

View File

@ -137,18 +137,10 @@ void ColumnConst::updatePermutation(PermutationSortDirection /*direction*/, Perm
{
}
void ColumnConst::updateWeakHash32(WeakHash32 & hash) const
WeakHash32 ColumnConst::getWeakHash32() const
{
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
WeakHash32 element_hash(1);
data->updateWeakHash32(element_hash);
size_t data_hash = element_hash.getData()[0];
for (auto & value : hash.getData())
value = static_cast<UInt32>(intHashCRC32(data_hash, value));
WeakHash32 element_hash = data->getWeakHash32();
return WeakHash32(s, element_hash.getData()[0]);
}
void ColumnConst::compareColumn(

View File

@ -204,7 +204,7 @@ public:
data->updateHashWithValue(0, hash);
}
void updateWeakHash32(WeakHash32 & hash) const override;
WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override
{

View File

@ -28,7 +28,6 @@ namespace ErrorCodes
extern const int PARAMETER_OUT_OF_BOUND;
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
extern const int NOT_IMPLEMENTED;
extern const int LOGICAL_ERROR;
}
template <is_decimal T>
@ -76,13 +75,10 @@ void ColumnDecimal<T>::updateHashWithValue(size_t n, SipHash & hash) const
}
template <is_decimal T>
void ColumnDecimal<T>::updateWeakHash32(WeakHash32 & hash) const
WeakHash32 ColumnDecimal<T>::getWeakHash32() const
{
auto s = data.size();
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
WeakHash32 hash(s);
const T * begin = data.data();
const T * end = begin + s;
@ -94,6 +90,8 @@ void ColumnDecimal<T>::updateWeakHash32(WeakHash32 & hash) const
++begin;
++hash_data;
}
return hash;
}
template <is_decimal T>

View File

@ -102,7 +102,7 @@ public:
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;

View File

@ -4,6 +4,7 @@
#include <Columns/ColumnVector.h>
#include <Columns/ColumnVariant.h>
#include <DataTypes/IDataType.h>
#include <Common/WeakHash.h>
namespace DB
@ -174,9 +175,9 @@ public:
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override
WeakHash32 getWeakHash32() const override
{
variant_column->updateWeakHash32(hash);
return variant_column->getWeakHash32();
}
void updateHashFast(SipHash & hash) const override

View File

@ -137,14 +137,10 @@ void ColumnFixedString::updateHashWithValue(size_t index, SipHash & hash) const
hash.update(reinterpret_cast<const char *>(&chars[n * index]), n);
}
void ColumnFixedString::updateWeakHash32(WeakHash32 & hash) const
WeakHash32 ColumnFixedString::getWeakHash32() const
{
auto s = size();
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, "
"hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
WeakHash32 hash(s);
const UInt8 * pos = chars.data();
UInt32 * hash_data = hash.getData().data();
@ -156,6 +152,8 @@ void ColumnFixedString::updateWeakHash32(WeakHash32 & hash) const
pos += n;
++hash_data;
}
return hash;
}
void ColumnFixedString::updateHashFast(SipHash & hash) const

View File

@ -133,7 +133,7 @@ public:
void updateHashWithValue(size_t index, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override;

View File

@ -4,6 +4,7 @@
#include <Core/NamesAndTypes.h>
#include <Core/ColumnsWithTypeAndName.h>
#include <Columns/IColumn.h>
#include <Common/WeakHash.h>
namespace DB
@ -130,9 +131,9 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "updateHashWithValue is not implemented for {}", getName());
}
void updateWeakHash32(WeakHash32 &) const override
WeakHash32 getWeakHash32() const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "updateWeakHash32 is not implemented for {}", getName());
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "getWeakHash32 is not implemented for {}", getName());
}
void updateHashFast(SipHash &) const override

View File

@ -7,8 +7,7 @@
#include <Common/HashTable/HashMap.h>
#include <Common/WeakHash.h>
#include <Common/assert_cast.h>
#include "Storages/IndicesDescription.h"
#include "base/types.h"
#include <base/types.h>
#include <base/sort.h>
#include <base/scope_guard.h>
@ -320,19 +319,10 @@ const char * ColumnLowCardinality::skipSerializedInArena(const char * pos) const
return getDictionary().skipSerializedInArena(pos);
}
void ColumnLowCardinality::updateWeakHash32(WeakHash32 & hash) const
WeakHash32 ColumnLowCardinality::getWeakHash32() const
{
auto s = size();
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
const auto & dict = getDictionary().getNestedColumn();
WeakHash32 dict_hash(dict->size());
dict->updateWeakHash32(dict_hash);
idx.updateWeakHash(hash, dict_hash);
WeakHash32 dict_hash = getDictionary().getNestedColumn()->getWeakHash32();
return idx.getWeakHash(dict_hash);
}
void ColumnLowCardinality::updateHashFast(SipHash & hash) const
@ -832,10 +822,11 @@ bool ColumnLowCardinality::Index::containsDefault() const
return contains;
}
void ColumnLowCardinality::Index::updateWeakHash(WeakHash32 & hash, WeakHash32 & dict_hash) const
WeakHash32 ColumnLowCardinality::Index::getWeakHash(const WeakHash32 & dict_hash) const
{
WeakHash32 hash(positions->size());
auto & hash_data = hash.getData();
auto & dict_hash_data = dict_hash.getData();
const auto & dict_hash_data = dict_hash.getData();
auto update_weak_hash = [&](auto x)
{
@ -844,10 +835,11 @@ void ColumnLowCardinality::Index::updateWeakHash(WeakHash32 & hash, WeakHash32 &
auto size = data.size();
for (size_t i = 0; i < size; ++i)
hash_data[i] = static_cast<UInt32>(intHashCRC32(dict_hash_data[data[i]], hash_data[i]));
hash_data[i] = dict_hash_data[data[i]];
};
callForType(std::move(update_weak_hash), size_of_type);
return hash;
}
void ColumnLowCardinality::Index::collectSerializedValueSizes(

View File

@ -111,7 +111,7 @@ public:
getDictionary().updateHashWithValue(getIndexes().getUInt(n), hash);
}
void updateWeakHash32(WeakHash32 & hash) const override;
WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash &) const override;
@ -325,7 +325,7 @@ public:
bool containsDefault() const;
void updateWeakHash(WeakHash32 & hash, WeakHash32 & dict_hash) const;
WeakHash32 getWeakHash(const WeakHash32 & dict_hash) const;
void collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const PaddedPODArray<UInt64> & dict_sizes) const;

View File

@ -143,9 +143,9 @@ void ColumnMap::updateHashWithValue(size_t n, SipHash & hash) const
nested->updateHashWithValue(n, hash);
}
void ColumnMap::updateWeakHash32(WeakHash32 & hash) const
WeakHash32 ColumnMap::getWeakHash32() const
{
nested->updateWeakHash32(hash);
return nested->getWeakHash32();
}
void ColumnMap::updateHashFast(SipHash & hash) const

View File

@ -64,7 +64,7 @@ public:
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)

View File

@ -56,25 +56,21 @@ void ColumnNullable::updateHashWithValue(size_t n, SipHash & hash) const
getNestedColumn().updateHashWithValue(n, hash);
}
void ColumnNullable::updateWeakHash32(WeakHash32 & hash) const
WeakHash32 ColumnNullable::getWeakHash32() const
{
auto s = size();
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
WeakHash32 old_hash = hash;
nested_column->updateWeakHash32(hash);
WeakHash32 hash = nested_column->getWeakHash32();
const auto & null_map_data = getNullMapData();
auto & hash_data = hash.getData();
auto & old_hash_data = old_hash.getData();
/// Use old data for nulls.
/// Use default for nulls.
for (size_t row = 0; row < s; ++row)
if (null_map_data[row])
hash_data[row] = old_hash_data[row];
hash_data[row] = WeakHash32::kDefaultInitialValue;
return hash;
}
void ColumnNullable::updateHashFast(SipHash & hash) const

View File

@ -133,7 +133,7 @@ public:
void protect() override;
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override;
void getExtremes(Field & min, Field & max) const override;
// Special function for nullable minmax index

View File

@ -5,6 +5,7 @@
#include <Core/Names.h>
#include <DataTypes/Serializations/SubcolumnsTree.h>
#include <Common/PODArray.h>
#include <Common/WeakHash.h>
#include <DataTypes/IDataType.h>
@ -252,7 +253,7 @@ public:
const char * deserializeAndInsertFromArena(const char *) override { throwMustBeConcrete(); }
const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); }
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); }
void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); }
WeakHash32 getWeakHash32() const override { throwMustBeConcrete(); }
void updateHashFast(SipHash & hash) const override;
void expand(const Filter &, bool) override { throwMustBeConcrete(); }
bool hasEqualValues() const override { throwMustBeConcrete(); }

View File

@ -678,20 +678,22 @@ void ColumnSparse::updateHashWithValue(size_t n, SipHash & hash) const
values->updateHashWithValue(getValueIndex(n), hash);
}
void ColumnSparse::updateWeakHash32(WeakHash32 & hash) const
WeakHash32 ColumnSparse::getWeakHash32() const
{
if (hash.getData().size() != _size)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", _size, hash.getData().size());
WeakHash32 values_hash = values->getWeakHash32();
WeakHash32 hash(size());
auto & hash_data = hash.getData();
auto & values_hash_data = values_hash.getData();
auto offset_it = begin();
auto & hash_data = hash.getData();
for (size_t i = 0; i < _size; ++i, ++offset_it)
{
size_t value_index = offset_it.getValueIndex();
auto data_ref = values->getDataAt(value_index);
hash_data[i] = ::updateWeakHash32(reinterpret_cast<const UInt8 *>(data_ref.data), data_ref.size, hash_data[i]);
hash_data[i] = values_hash_data[value_index];
}
return hash;
}
void ColumnSparse::updateHashFast(SipHash & hash) const

View File

@ -139,7 +139,7 @@ public:
void protect() override;
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override;
void getExtremes(Field & min, Field & max) const override;

View File

@ -108,13 +108,10 @@ MutableColumnPtr ColumnString::cloneResized(size_t to_size) const
return res;
}
void ColumnString::updateWeakHash32(WeakHash32 & hash) const
WeakHash32 ColumnString::getWeakHash32() const
{
auto s = offsets.size();
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
WeakHash32 hash(s);
const UInt8 * pos = chars.data();
UInt32 * hash_data = hash.getData().data();
@ -130,6 +127,8 @@ void ColumnString::updateWeakHash32(WeakHash32 & hash) const
prev_offset = offset;
++hash_data;
}
return hash;
}

View File

@ -212,7 +212,7 @@ public:
hash.update(reinterpret_cast<const char *>(&chars[offset]), string_size);
}
void updateWeakHash32(WeakHash32 & hash) const override;
WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override
{

View File

@ -310,16 +310,15 @@ void ColumnTuple::updateHashWithValue(size_t n, SipHash & hash) const
column->updateHashWithValue(n, hash);
}
void ColumnTuple::updateWeakHash32(WeakHash32 & hash) const
WeakHash32 ColumnTuple::getWeakHash32() const
{
auto s = size();
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
WeakHash32 hash(s);
for (const auto & column : columns)
column->updateWeakHash32(hash);
hash.update(column->getWeakHash32());
return hash;
}
void ColumnTuple::updateHashFast(SipHash & hash) const

View File

@ -81,7 +81,7 @@ public:
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;

View File

@ -789,36 +789,26 @@ void ColumnVariant::updateHashWithValue(size_t n, SipHash & hash) const
variants[localDiscriminatorByGlobal(global_discr)]->updateHashWithValue(offsetAt(n), hash);
}
void ColumnVariant::updateWeakHash32(WeakHash32 & hash) const
WeakHash32 ColumnVariant::getWeakHash32() const
{
auto s = size();
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
/// If we have only NULLs, keep hash unchanged.
if (hasOnlyNulls())
return;
return WeakHash32(s);
/// Optimization for case when there is only 1 non-empty variant and no NULLs.
/// In this case we can just calculate weak hash for this variant.
if (auto non_empty_local_discr = getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls())
{
variants[*non_empty_local_discr]->updateWeakHash32(hash);
return;
}
return variants[*non_empty_local_discr]->getWeakHash32();
/// Calculate weak hash for all variants.
std::vector<WeakHash32> nested_hashes;
for (const auto & variant : variants)
{
WeakHash32 nested_hash(variant->size());
variant->updateWeakHash32(nested_hash);
nested_hashes.emplace_back(std::move(nested_hash));
}
nested_hashes.emplace_back(variant->getWeakHash32());
/// For each row hash is a hash of corresponding row from corresponding variant.
WeakHash32 hash(s);
auto & hash_data = hash.getData();
const auto & local_discriminators_data = getLocalDiscriminators();
const auto & offsets_data = getOffsets();
@ -827,11 +817,10 @@ void ColumnVariant::updateWeakHash32(WeakHash32 & hash) const
Discriminator discr = local_discriminators_data[i];
/// Update hash only for non-NULL values
if (discr != NULL_DISCRIMINATOR)
{
auto nested_hash = nested_hashes[local_discriminators_data[i]].getData()[offsets_data[i]];
hash_data[i] = static_cast<UInt32>(hashCRC32(nested_hash, hash_data[i]));
}
hash_data[i] = nested_hashes[discr].getData()[offsets_data[i]];
}
return hash;
}
void ColumnVariant::updateHashFast(SipHash & hash) const

View File

@ -213,7 +213,7 @@ public:
const char * deserializeVariantAndInsertFromArena(Discriminator global_discr, const char * pos);
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
void expand(const Filter & mask, bool inverted) override;

View File

@ -73,13 +73,10 @@ void ColumnVector<T>::updateHashWithValue(size_t n, SipHash & hash) const
}
template <typename T>
void ColumnVector<T>::updateWeakHash32(WeakHash32 & hash) const
WeakHash32 ColumnVector<T>::getWeakHash32() const
{
auto s = data.size();
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
WeakHash32 hash(s);
const T * begin = data.data();
const T * end = begin + s;
@ -91,6 +88,8 @@ void ColumnVector<T>::updateWeakHash32(WeakHash32 & hash) const
++begin;
++hash_data;
}
return hash;
}
template <typename T>

View File

@ -114,7 +114,7 @@ public:
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override;

View File

@ -300,10 +300,10 @@ public:
/// passed bytes to hash must identify sequence of values unambiguously.
virtual void updateHashWithValue(size_t n, SipHash & hash) const = 0;
/// Update hash function value. Hash is calculated for each element.
/// Get hash function value. Hash is calculated for each element.
/// It's a fast weak hash function. Mainly need to scatter data between threads.
/// WeakHash32 must have the same size as column.
virtual void updateWeakHash32(WeakHash32 & hash) const = 0;
virtual WeakHash32 getWeakHash32() const = 0;
/// Update state of hash with all column.
virtual void updateHashFast(SipHash & hash) const = 0;

View File

@ -1,6 +1,7 @@
#pragma once
#include <Columns/IColumn.h>
#include <Common/WeakHash.h>
namespace DB
@ -63,8 +64,9 @@ public:
{
}
void updateWeakHash32(WeakHash32 & /*hash*/) const override
WeakHash32 getWeakHash32() const override
{
return WeakHash32(s);
}
void updateHashFast(SipHash & /*hash*/) const override

View File

@ -1,6 +1,7 @@
#pragma once
#include <optional>
#include <Columns/IColumn.h>
#include <Common/WeakHash.h>
namespace DB
{
@ -166,9 +167,9 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method scatter is not supported for ColumnUnique.");
}
void updateWeakHash32(WeakHash32 &) const override
WeakHash32 getWeakHash32() const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method updateWeakHash32 is not supported for ColumnUnique.");
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getWeakHash32 is not supported for ColumnUnique.");
}
void updateHashFast(SipHash &) const override

View File

@ -60,8 +60,7 @@ TEST(WeakHash32, ColumnVectorU8)
data.push_back(i);
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), col->getData());
}
@ -77,8 +76,7 @@ TEST(WeakHash32, ColumnVectorI8)
data.push_back(i);
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), col->getData());
}
@ -94,8 +92,7 @@ TEST(WeakHash32, ColumnVectorU16)
data.push_back(i);
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), col->getData());
}
@ -111,8 +108,7 @@ TEST(WeakHash32, ColumnVectorI16)
data.push_back(i);
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), col->getData());
}
@ -128,8 +124,7 @@ TEST(WeakHash32, ColumnVectorU32)
data.push_back(i << 16u);
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), col->getData());
}
@ -145,8 +140,7 @@ TEST(WeakHash32, ColumnVectorI32)
data.push_back(i << 16);
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), col->getData());
}
@ -162,8 +156,7 @@ TEST(WeakHash32, ColumnVectorU64)
data.push_back(i << 32u);
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), col->getData());
}
@ -179,8 +172,7 @@ TEST(WeakHash32, ColumnVectorI64)
data.push_back(i << 32);
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), col->getData());
}
@ -204,8 +196,7 @@ TEST(WeakHash32, ColumnVectorU128)
}
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), eq_data);
}
@ -221,8 +212,7 @@ TEST(WeakHash32, ColumnVectorI128)
data.push_back(i << 32);
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), col->getData());
}
@ -238,8 +228,7 @@ TEST(WeakHash32, ColumnDecimal32)
data.push_back(i << 16);
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), col->getData());
}
@ -255,8 +244,7 @@ TEST(WeakHash32, ColumnDecimal64)
data.push_back(i << 32);
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), col->getData());
}
@ -272,8 +260,7 @@ TEST(WeakHash32, ColumnDecimal128)
data.push_back(i << 32);
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), col->getData());
}
@ -294,8 +281,7 @@ TEST(WeakHash32, ColumnString1)
}
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), data);
}
@ -331,8 +317,7 @@ TEST(WeakHash32, ColumnString2)
}
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), data);
}
@ -369,8 +354,7 @@ TEST(WeakHash32, ColumnString3)
}
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), data);
}
@ -397,8 +381,7 @@ TEST(WeakHash32, ColumnFixedString)
}
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), data);
}
@ -444,8 +427,7 @@ TEST(WeakHash32, ColumnArray)
auto col_arr = ColumnArray::create(std::move(val), std::move(off));
WeakHash32 hash(col_arr->size());
col_arr->updateWeakHash32(hash);
WeakHash32 hash = col_arr->getWeakHash32();
checkColumn(hash.getData(), eq_data);
}
@ -479,8 +461,7 @@ TEST(WeakHash32, ColumnArray2)
auto col_arr = ColumnArray::create(std::move(val), std::move(off));
WeakHash32 hash(col_arr->size());
col_arr->updateWeakHash32(hash);
WeakHash32 hash = col_arr->getWeakHash32();
checkColumn(hash.getData(), eq_data);
}
@ -536,8 +517,7 @@ TEST(WeakHash32, ColumnArrayArray)
auto col_arr = ColumnArray::create(std::move(val), std::move(off));
auto col_arr_arr = ColumnArray::create(std::move(col_arr), std::move(off2));
WeakHash32 hash(col_arr_arr->size());
col_arr_arr->updateWeakHash32(hash);
WeakHash32 hash = col_arr_arr->getWeakHash32();
checkColumn(hash.getData(), eq_data);
}
@ -555,8 +535,7 @@ TEST(WeakHash32, ColumnConst)
auto col_const = ColumnConst::create(std::move(inner_col), 256);
WeakHash32 hash(col_const->size());
col_const->updateWeakHash32(hash);
WeakHash32 hash = col_const->getWeakHash32();
checkColumn(hash.getData(), data);
}
@ -576,8 +555,7 @@ TEST(WeakHash32, ColumnLowcardinality)
}
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), data);
}
@ -602,8 +580,7 @@ TEST(WeakHash32, ColumnNullable)
auto col_null = ColumnNullable::create(std::move(col), std::move(mask));
WeakHash32 hash(col_null->size());
col_null->updateWeakHash32(hash);
WeakHash32 hash = col_null->getWeakHash32();
checkColumn(hash.getData(), eq);
}
@ -633,8 +610,7 @@ TEST(WeakHash32, ColumnTupleUInt64UInt64)
columns.emplace_back(std::move(col2));
auto col_tuple = ColumnTuple::create(std::move(columns));
WeakHash32 hash(col_tuple->size());
col_tuple->updateWeakHash32(hash);
WeakHash32 hash = col_tuple->getWeakHash32();
checkColumn(hash.getData(), eq);
}
@ -671,8 +647,7 @@ TEST(WeakHash32, ColumnTupleUInt64String)
columns.emplace_back(std::move(col2));
auto col_tuple = ColumnTuple::create(std::move(columns));
WeakHash32 hash(col_tuple->size());
col_tuple->updateWeakHash32(hash);
WeakHash32 hash = col_tuple->getWeakHash32();
checkColumn(hash.getData(), eq);
}
@ -709,8 +684,7 @@ TEST(WeakHash32, ColumnTupleUInt64FixedString)
columns.emplace_back(std::move(col2));
auto col_tuple = ColumnTuple::create(std::move(columns));
WeakHash32 hash(col_tuple->size());
col_tuple->updateWeakHash32(hash);
WeakHash32 hash = col_tuple->getWeakHash32();
checkColumn(hash.getData(), eq);
}
@ -756,8 +730,7 @@ TEST(WeakHash32, ColumnTupleUInt64Array)
columns.emplace_back(ColumnArray::create(std::move(val), std::move(off)));
auto col_tuple = ColumnTuple::create(std::move(columns));
WeakHash32 hash(col_tuple->size());
col_tuple->updateWeakHash32(hash);
WeakHash32 hash = col_tuple->getWeakHash32();
checkColumn(hash.getData(), eq_data);
}

View File

@ -206,7 +206,7 @@ void * Allocator<clear_memory_, populate>::realloc(void * buf, size_t old_size,
}
else
{
[[maybe_unused]] auto trace_free = CurrentMemoryTracker::free(old_size);
[[maybe_unused]] auto trace_free = CurrentMemoryTracker::free(new_size);
ProfileEvents::increment(ProfileEvents::GWPAsanAllocateFailed);
}
}
@ -239,7 +239,7 @@ void * Allocator<clear_memory_, populate>::realloc(void * buf, size_t old_size,
void * new_buf = ::realloc(buf, new_size);
if (nullptr == new_buf)
{
[[maybe_unused]] auto trace_free = CurrentMemoryTracker::free(old_size);
[[maybe_unused]] auto trace_free = CurrentMemoryTracker::free(new_size);
throw DB::ErrnoException(
DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY,
"Allocator: Cannot realloc from {} to {}",

View File

@ -244,6 +244,15 @@ private:
const char * className() const noexcept override { return "DB::ErrnoException"; }
};
/// An exception to use in unit tests to test interfaces.
/// It is distinguished from others, so it does not have to be logged.
class TestException : public Exception
{
public:
using Exception::Exception;
};
using Exceptions = std::vector<std::exception_ptr>;
/** Try to write an exception to the log (and forget about it).

View File

@ -23,8 +23,20 @@ namespace DB
LazyPipeFDs TraceSender::pipe;
static thread_local bool inside_send = false;
void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Extras extras)
{
/** The method shouldn't be called recursively or throw exceptions.
* There are several reasons:
* - avoid infinite recursion when some of subsequent functions invoke tracing;
* - avoid inconsistent writes if the method was interrupted by a signal handler in the middle of writing,
* and then another tracing is invoked (e.g., from query profiler).
*/
if (unlikely(inside_send))
return;
inside_send = true;
DENY_ALLOCATIONS_IN_SCOPE;
constexpr size_t buf_size = sizeof(char) /// TraceCollector stop flag
+ sizeof(UInt8) /// String size
+ QUERY_ID_MAX_LEN /// Maximum query_id length
@ -80,6 +92,8 @@ void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Ext
writePODBinary(extras.increment, out);
out.next();
inside_send = false;
}
}

View File

@ -1,2 +1,24 @@
#include <Common/WeakHash.h>
#include <Common/Exception.h>
#include <Common/HashTable/Hash.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
void WeakHash32::update(const WeakHash32 & other)
{
size_t size = data.size();
if (size != other.data.size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match:"
"left size is {}, right size is {}", size, other.data.size());
for (size_t i = 0; i < size; ++i)
data[i] = static_cast<UInt32>(intHashCRC32(other.data[i], data[i]));
}
}

View File

@ -11,9 +11,8 @@ namespace DB
/// The main purpose why this class needed is to support data initialization. Initially, every bit is 1.
class WeakHash32
{
static constexpr UInt32 kDefaultInitialValue = ~UInt32(0);
public:
static constexpr UInt32 kDefaultInitialValue = ~UInt32(0);
using Container = PaddedPODArray<UInt32>;
@ -22,6 +21,8 @@ public:
void reset(size_t size, UInt32 initial_value = kDefaultInitialValue) { data.assign(size, initial_value); }
void update(const WeakHash32 & other);
const Container & getData() const { return data; }
Container & getData() { return data; }

View File

@ -6,12 +6,17 @@ namespace DB
{
String getRandomASCIIString(size_t length)
{
return getRandomASCIIString(length, thread_local_rng);
}
String getRandomASCIIString(size_t length, pcg64 & rng)
{
std::uniform_int_distribution<int> distribution('a', 'z');
String res;
res.resize(length);
for (auto & c : res)
c = distribution(thread_local_rng);
c = distribution(rng);
return res;
}

View File

@ -2,11 +2,14 @@
#include <Core/Types.h>
#include <pcg_random.hpp>
namespace DB
{
/// Slow random string. Useful for random names and things like this. Not for generating data.
String getRandomASCIIString(size_t length);
String getRandomASCIIString(size_t length, pcg64 & rng);
}

View File

@ -54,16 +54,3 @@ TEST(ShellCommand, ExecuteWithInput)
EXPECT_EQ(res, "Hello, world!\n");
}
TEST(ShellCommand, AutoWait)
{
// <defunct> hunting:
for (int i = 0; i < 1000; ++i)
{
auto command = ShellCommand::execute("echo " + std::to_string(i));
//command->wait(); // now automatic
}
// std::cerr << "inspect me: ps auxwwf\n";
// std::this_thread::sleep_for(std::chrono::seconds(100));
}

View File

@ -47,54 +47,85 @@ bool allArgumentsAreConstants(const ColumnsWithTypeAndName & args)
return true;
}
/// Replaces single low cardinality column in a function call by its dictionary
/// This can only happen after the arguments have been adapted in IFunctionOverloadResolver::getReturnType
/// as it's only possible if there is one low cardinality column and, optionally, const columns
ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
ColumnsWithTypeAndName & args, bool can_be_executed_on_default_arguments, size_t input_rows_count)
{
size_t num_rows = input_rows_count;
/// We return the LC indexes so the LC can be reconstructed with the function result
ColumnPtr indexes;
/// Find first LowCardinality column and replace it to nested dictionary.
for (auto & column : args)
size_t number_low_cardinality_columns = 0;
size_t last_low_cardinality = 0;
size_t number_const_columns = 0;
size_t number_full_columns = 0;
for (size_t i = 0; i < args.size(); i++)
{
if (const auto * low_cardinality_column = checkAndGetColumn<ColumnLowCardinality>(column.column.get()))
auto const & arg = args[i];
if (checkAndGetColumn<ColumnLowCardinality>(arg.column.get()))
{
/// Single LowCardinality column is supported now.
if (indexes)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected single dictionary argument for function.");
const auto * low_cardinality_type = checkAndGetDataType<DataTypeLowCardinality>(column.type.get());
if (!low_cardinality_type)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Incompatible type for LowCardinality column: {}",
column.type->getName());
if (can_be_executed_on_default_arguments)
{
/// Normal case, when function can be executed on values' default.
column.column = low_cardinality_column->getDictionary().getNestedColumn();
indexes = low_cardinality_column->getIndexesPtr();
}
else
{
/// Special case when default value can't be used. Example: 1 % LowCardinality(Int).
/// LowCardinality always contains default, so 1 % 0 will throw exception in normal case.
auto dict_encoded = low_cardinality_column->getMinimalDictionaryEncodedColumn(0, low_cardinality_column->size());
column.column = dict_encoded.dictionary;
indexes = dict_encoded.indexes;
}
num_rows = column.column->size();
column.type = low_cardinality_type->getDictionaryType();
number_low_cardinality_columns++;
last_low_cardinality = i;
}
else if (checkAndGetColumn<ColumnConst>(arg.column.get()))
number_const_columns++;
else
number_full_columns++;
}
/// Change size of constants.
if (!number_low_cardinality_columns && !number_const_columns)
return nullptr;
if (number_full_columns > 0 || number_low_cardinality_columns > 1)
{
/// This should not be possible but currently there are multiple tests in CI failing because of it
/// TODO: Fix those cases, then enable this exception
#if 0
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected low cardinality types found. Low cardinality: {}. Full {}. Const {}",
number_low_cardinality_columns, number_full_columns, number_const_columns);
#else
return nullptr;
#endif
}
else if (number_low_cardinality_columns == 1)
{
auto & lc_arg = args[last_low_cardinality];
const auto * low_cardinality_type = checkAndGetDataType<DataTypeLowCardinality>(lc_arg.type.get());
if (!low_cardinality_type)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Incompatible type for LowCardinality column: {}", lc_arg.type->getName());
const auto * low_cardinality_column = checkAndGetColumn<ColumnLowCardinality>(lc_arg.column.get());
chassert(low_cardinality_column);
if (can_be_executed_on_default_arguments)
{
/// Normal case, when function can be executed on values' default.
lc_arg.column = low_cardinality_column->getDictionary().getNestedColumn();
indexes = low_cardinality_column->getIndexesPtr();
}
else
{
/// Special case when default value can't be used. Example: 1 % LowCardinality(Int).
/// LowCardinality always contains default, so 1 % 0 will throw exception in normal case.
auto dict_encoded = low_cardinality_column->getMinimalDictionaryEncodedColumn(0, low_cardinality_column->size());
lc_arg.column = dict_encoded.dictionary;
indexes = dict_encoded.indexes;
}
/// The new column will have a different number of rows, normally less but occasionally it might be more (NULL)
input_rows_count = lc_arg.column->size();
lc_arg.type = low_cardinality_type->getDictionaryType();
}
/// Change size of constants
for (auto & column : args)
{
if (const auto * column_const = checkAndGetColumn<ColumnConst>(column.column.get()))
{
column.column = ColumnConst::create(recursiveRemoveLowCardinality(column_const->getDataColumnPtr()), num_rows);
column.column = ColumnConst::create(recursiveRemoveLowCardinality(column_const->getDataColumnPtr()), input_rows_count);
column.type = recursiveRemoveLowCardinality(column.type);
}
}
@ -270,6 +301,8 @@ ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithType
bool can_be_executed_on_default_arguments = canBeExecutedOnDefaultArguments();
const auto & dictionary_type = res_low_cardinality_type->getDictionaryType();
/// The arguments should have been adapted in IFunctionOverloadResolver::getReturnType
/// So there is only one low cardinality column (and optionally some const columns) and no full column
ColumnPtr indexes = replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
columns_without_low_cardinality, can_be_executed_on_default_arguments, input_rows_count);

View File

@ -310,7 +310,7 @@ IColumn::Selector ConcurrentHashJoin::selectDispatchBlock(const Strings & key_co
{
const auto & key_col = from_block.getByName(key_name).column->convertToFullColumnIfConst();
const auto & key_col_no_lc = recursiveRemoveLowCardinality(recursiveRemoveSparse(key_col));
key_col_no_lc->updateWeakHash32(hash);
hash.update(key_col_no_lc->getWeakHash32());
}
return hashToSelector(hash, num_shards);
}

View File

@ -1726,7 +1726,10 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
const auto & join_clause = table_join.getOnlyClause();
auto join_kind = table_join.kind();
bool kind_allows_filtering = isInner(join_kind) || isLeft(join_kind) || isRight(join_kind);
auto join_strictness = table_join.strictness();
bool join_type_allows_filtering = (join_strictness == JoinStrictness::All || join_strictness == JoinStrictness::Any)
&& (isInner(join_kind) || isLeft(join_kind) || isRight(join_kind));
auto has_non_const = [](const Block & block, const auto & keys)
{
@ -1745,7 +1748,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
bool has_non_const_keys = has_non_const(query_plan.getCurrentDataStream().header, join_clause.key_names_left)
&& has_non_const(joined_plan->getCurrentDataStream().header, join_clause.key_names_right);
if (settings.max_rows_in_set_to_optimize_join > 0 && kind_allows_filtering && has_non_const_keys)
if (settings.max_rows_in_set_to_optimize_join > 0 && join_type_allows_filtering && has_non_const_keys)
{
auto * left_set = add_create_set(query_plan, join_clause.key_names_left, JoinTableSide::Left);
auto * right_set = add_create_set(*joined_plan, join_clause.key_names_right, JoinTableSide::Right);

View File

@ -554,7 +554,7 @@ static Blocks scatterBlockByHashImpl(const Strings & key_columns_names, const Bl
for (const auto & key_name : key_columns_names)
{
ColumnPtr key_col = materializeColumn(block, key_name);
key_col->updateWeakHash32(hash);
hash.update(key_col->getWeakHash32());
}
auto selector = hashToSelector(hash, sharder);

View File

@ -7,7 +7,6 @@
#include <Common/FieldVisitorToString.h>
#include <Common/KnownObjectNames.h>
#include <Common/SipHash.h>
#include <Common/typeid_cast.h>
#include <IO/Operators.h>
#include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h>
@ -19,9 +18,6 @@
#include <Parsers/queryToString.h>
#include <Parsers/ASTSetQuery.h>
#include <Parsers/FunctionSecretArgumentsFinderAST.h>
#include <Core/QualifiedTableName.h>
#include <boost/algorithm/string.hpp>
using namespace std::literals;
@ -632,6 +628,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
settings.ostr << ", ";
if (arguments->children[i]->as<ASTSetQuery>())
settings.ostr << "SETTINGS ";
nested_dont_need_parens.list_element_index = i;
arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens);
}
settings.ostr << (settings.hilite ? hilite_operator : "") << ']' << (settings.hilite ? hilite_none : "");
@ -642,12 +639,14 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
{
settings.ostr << (settings.hilite ? hilite_operator : "") << ((frame.need_parens && !alias.empty()) ? "tuple" : "") << '('
<< (settings.hilite ? hilite_none : "");
for (size_t i = 0; i < arguments->children.size(); ++i)
{
if (i != 0)
settings.ostr << ", ";
if (arguments->children[i]->as<ASTSetQuery>())
settings.ostr << "SETTINGS ";
nested_dont_need_parens.list_element_index = i;
arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens);
}
settings.ostr << (settings.hilite ? hilite_operator : "") << ')' << (settings.hilite ? hilite_none : "");
@ -663,6 +662,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
settings.ostr << ", ";
if (arguments->children[i]->as<ASTSetQuery>())
settings.ostr << "SETTINGS ";
nested_dont_need_parens.list_element_index = i;
arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens);
}
settings.ostr << (settings.hilite ? hilite_operator : "") << ')' << (settings.hilite ? hilite_none : "");

View File

@ -745,7 +745,12 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan,
{
auto & interpolate_node_typed = interpolate_node->as<InterpolateNode &>();
PlannerActionsVisitor planner_actions_visitor(planner_context);
PlannerActionsVisitor planner_actions_visitor(
planner_context,
/* use_column_identifier_as_action_node_name_, (default value)*/ true,
/// Prefer the INPUT to CONSTANT nodes (actions must be non constant)
/* always_use_const_column_for_constant_nodes */ false);
auto expression_to_interpolate_expression_nodes = planner_actions_visitor.visit(*interpolate_actions_dag,
interpolate_node_typed.getExpression());
if (expression_to_interpolate_expression_nodes.size() != 1)

View File

@ -487,16 +487,33 @@ public:
return node;
}
const ActionsDAG::Node * addConstantIfNecessary(const std::string & node_name, const ColumnWithTypeAndName & column)
[[nodiscard]] String addConstantIfNecessary(
const std::string & node_name, const ColumnWithTypeAndName & column, bool always_use_const_column_for_constant_nodes)
{
chassert(column.column != nullptr);
auto it = node_name_to_node.find(node_name);
if (it != node_name_to_node.end() && (!always_use_const_column_for_constant_nodes || it->second->column))
return {node_name};
if (it != node_name_to_node.end())
return it->second;
{
/// There is a node with this name, but it doesn't have a column
/// This likely happens because we executed the query until WithMergeableState with a const node in the
/// WHERE clause and, as the results of headers are materialized, the column was removed
/// Let's add a new column and keep this
String dupped_name{node_name + "_dupped"};
if (node_name_to_node.find(dupped_name) != node_name_to_node.end())
return dupped_name;
const auto * node = &actions_dag.addColumn(column);
node_name_to_node[dupped_name] = node;
return dupped_name;
}
const auto * node = &actions_dag.addColumn(column);
node_name_to_node[node->result_name] = node;
return node;
return {node_name};
}
template <typename FunctionOrOverloadResolver>
@ -525,7 +542,7 @@ public:
}
private:
std::unordered_map<std::string_view, const ActionsDAG::Node *> node_name_to_node;
std::unordered_map<String, const ActionsDAG::Node *> node_name_to_node;
ActionsDAG & actions_dag;
QueryTreeNodePtr scope_node;
};
@ -533,9 +550,11 @@ private:
class PlannerActionsVisitorImpl
{
public:
PlannerActionsVisitorImpl(ActionsDAG & actions_dag,
PlannerActionsVisitorImpl(
ActionsDAG & actions_dag,
const PlannerContextPtr & planner_context_,
bool use_column_identifier_as_action_node_name_);
bool use_column_identifier_as_action_node_name_,
bool always_use_const_column_for_constant_nodes_);
ActionsDAG::NodeRawConstPtrs visit(QueryTreeNodePtr expression_node);
@ -595,14 +614,18 @@ private:
const PlannerContextPtr planner_context;
ActionNodeNameHelper action_node_name_helper;
bool use_column_identifier_as_action_node_name;
bool always_use_const_column_for_constant_nodes;
};
PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(ActionsDAG & actions_dag,
PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(
ActionsDAG & actions_dag,
const PlannerContextPtr & planner_context_,
bool use_column_identifier_as_action_node_name_)
bool use_column_identifier_as_action_node_name_,
bool always_use_const_column_for_constant_nodes_)
: planner_context(planner_context_)
, action_node_name_helper(node_to_node_name, *planner_context, use_column_identifier_as_action_node_name_)
, use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_)
, always_use_const_column_for_constant_nodes(always_use_const_column_for_constant_nodes_)
{
actions_stack.emplace_back(actions_dag, nullptr);
}
@ -725,17 +748,16 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi
column.type = constant_type;
column.column = column.type->createColumnConst(1, constant_literal);
actions_stack[0].addConstantIfNecessary(constant_node_name, column);
String final_name = actions_stack[0].addConstantIfNecessary(constant_node_name, column, always_use_const_column_for_constant_nodes);
size_t actions_stack_size = actions_stack.size();
for (size_t i = 1; i < actions_stack_size; ++i)
{
auto & actions_stack_node = actions_stack[i];
actions_stack_node.addInputConstantColumnIfNecessary(constant_node_name, column);
actions_stack_node.addInputConstantColumnIfNecessary(final_name, column);
}
return {constant_node_name, Levels(0)};
return {final_name, Levels(0)};
}
PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitLambda(const QueryTreeNodePtr & node)
@ -864,16 +886,16 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::ma
else
column.column = std::move(column_set);
actions_stack[0].addConstantIfNecessary(column.name, column);
String final_name = actions_stack[0].addConstantIfNecessary(column.name, column, always_use_const_column_for_constant_nodes);
size_t actions_stack_size = actions_stack.size();
for (size_t i = 1; i < actions_stack_size; ++i)
{
auto & actions_stack_node = actions_stack[i];
actions_stack_node.addInputConstantColumnIfNecessary(column.name, column);
actions_stack_node.addInputConstantColumnIfNecessary(final_name, column);
}
return {column.name, Levels(0)};
return {final_name, Levels(0)};
}
PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitIndexHintFunction(const QueryTreeNodePtr & node)
@ -1010,14 +1032,19 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi
}
PlannerActionsVisitor::PlannerActionsVisitor(const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_)
PlannerActionsVisitor::PlannerActionsVisitor(
const PlannerContextPtr & planner_context_,
bool use_column_identifier_as_action_node_name_,
bool always_use_const_column_for_constant_nodes_)
: planner_context(planner_context_)
, use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_)
, always_use_const_column_for_constant_nodes(always_use_const_column_for_constant_nodes_)
{}
ActionsDAG::NodeRawConstPtrs PlannerActionsVisitor::visit(ActionsDAG & actions_dag, QueryTreeNodePtr expression_node)
{
PlannerActionsVisitorImpl actions_visitor_impl(actions_dag, planner_context, use_column_identifier_as_action_node_name);
PlannerActionsVisitorImpl actions_visitor_impl(
actions_dag, planner_context, use_column_identifier_as_action_node_name, always_use_const_column_for_constant_nodes);
return actions_visitor_impl.visit(expression_node);
}

View File

@ -27,11 +27,17 @@ using PlannerContextPtr = std::shared_ptr<PlannerContext>;
* During actions build, there is special handling for following functions:
* 1. Aggregate functions are added in actions dag as INPUT nodes. Aggregate functions arguments are not added.
* 2. For function `in` and its variants, already collected sets from planner context are used.
* 3. When building actions that use CONSTANT nodes, by default we ignore pre-existing INPUTs if those don't have
* a column (a const column always has a column). This is for compatibility with previous headers. We disable this
* behaviour when we explicitly want to override CONSTANT nodes with the input (resolving InterpolateNode for example)
*/
class PlannerActionsVisitor
{
public:
explicit PlannerActionsVisitor(const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_ = true);
explicit PlannerActionsVisitor(
const PlannerContextPtr & planner_context_,
bool use_column_identifier_as_action_node_name_ = true,
bool always_use_const_column_for_constant_nodes_ = true);
/** Add actions necessary to calculate expression node into expression dag.
* Necessary actions are not added in actions dag output.
@ -42,6 +48,7 @@ public:
private:
const PlannerContextPtr planner_context;
bool use_column_identifier_as_action_node_name = true;
bool always_use_const_column_for_constant_nodes = true;
};
/** Calculate query tree expression node action dag name and add them into node to name map.

View File

@ -77,7 +77,6 @@ namespace ErrorCodes
extern const int INVALID_JOIN_ON_EXPRESSION;
extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED;
extern const int SYNTAX_ERROR;
extern const int ACCESS_DENIED;
extern const int PARAMETER_OUT_OF_BOUND;
extern const int TOO_MANY_COLUMNS;
@ -1417,12 +1416,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
{
if (!join_clause.hasASOF())
throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
"JOIN {} no inequality in ASOF JOIN ON section.",
join_node.formatASTForErrorMessage());
if (table_join_clause.key_names_left.size() <= 1)
throw Exception(ErrorCodes::SYNTAX_ERROR,
"JOIN {} ASOF join needs at least one equi-join column",
"JOIN {} no inequality in ASOF JOIN ON section",
join_node.formatASTForErrorMessage());
}
@ -1544,7 +1538,9 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
{
const auto & join_clause = table_join->getOnlyClause();
bool kind_allows_filtering = isInner(join_kind) || isLeft(join_kind) || isRight(join_kind);
bool join_type_allows_filtering = (join_strictness == JoinStrictness::All || join_strictness == JoinStrictness::Any)
&& (isInner(join_kind) || isLeft(join_kind) || isRight(join_kind));
auto has_non_const = [](const Block & block, const auto & keys)
{
@ -1564,7 +1560,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
bool has_non_const_keys = has_non_const(left_plan.getCurrentDataStream().header, join_clause.key_names_left)
&& has_non_const(right_plan.getCurrentDataStream().header, join_clause.key_names_right);
if (settings.max_rows_in_set_to_optimize_join > 0 && kind_allows_filtering && has_non_const_keys)
if (settings.max_rows_in_set_to_optimize_join > 0 && join_type_allows_filtering && has_non_const_keys)
{
auto * left_set = add_create_set(left_plan, join_clause.key_names_left, JoinTableSide::Left);
auto * right_set = add_create_set(right_plan, join_clause.key_names_right, JoinTableSide::Right);

View File

@ -34,13 +34,20 @@ namespace ErrorCodes
namespace
{
FullMergeJoinCursorPtr createCursor(const Block & block, const Names & columns)
FullMergeJoinCursorPtr createCursor(const Block & block, const Names & columns, JoinStrictness strictness)
{
SortDescription desc;
desc.reserve(columns.size());
for (const auto & name : columns)
desc.emplace_back(name);
return std::make_unique<FullMergeJoinCursor>(block, desc);
return std::make_unique<FullMergeJoinCursor>(block, desc, strictness == JoinStrictness::Asof);
}
bool ALWAYS_INLINE isNullAt(const IColumn & column, size_t row)
{
if (const auto * nullable_column = checkAndGetColumn<ColumnNullable>(&column))
return nullable_column->isNullAt(row);
return false;
}
template <bool has_left_nulls, bool has_right_nulls>
@ -54,7 +61,7 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column,
if (left_nullable && right_nullable)
{
int res = left_nullable->compareAt(lhs_pos, rhs_pos, right_column, null_direction_hint);
if (res)
if (res != 0)
return res;
/// NULL != NULL case
@ -90,9 +97,10 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column,
int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, size_t lpos,
const SortCursorImpl & rhs, size_t rpos,
size_t key_length,
int null_direction_hint)
{
for (size_t i = 0; i < lhs.sort_columns_size; ++i)
for (size_t i = 0; i < key_length; ++i)
{
/// TODO(@vdimir): use nullableCompareAt only if there's nullable columns
int cmp = nullableCompareAt<true, true>(*lhs.sort_columns[i], *rhs.sort_columns[i], lpos, rpos, null_direction_hint);
@ -104,13 +112,18 @@ int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, size_t lpos,
int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, const SortCursorImpl & rhs, int null_direction_hint)
{
return compareCursors(lhs, lhs.getRow(), rhs, rhs.getRow(), null_direction_hint);
return compareCursors(lhs, lhs.getRow(), rhs, rhs.getRow(), lhs.sort_columns_size, null_direction_hint);
}
int compareAsofCursors(const FullMergeJoinCursor & lhs, const FullMergeJoinCursor & rhs, int null_direction_hint)
{
return nullableCompareAt<true, true>(*lhs.getAsofColumn(), *rhs.getAsofColumn(), lhs->getRow(), rhs->getRow(), null_direction_hint);
}
bool ALWAYS_INLINE totallyLess(SortCursorImpl & lhs, SortCursorImpl & rhs, int null_direction_hint)
{
/// The last row of left cursor is less than the current row of the right cursor.
int cmp = compareCursors(lhs, lhs.rows - 1, rhs, rhs.getRow(), null_direction_hint);
int cmp = compareCursors(lhs, lhs.rows - 1, rhs, rhs.getRow(), lhs.sort_columns_size, null_direction_hint);
return cmp < 0;
}
@ -222,25 +235,136 @@ Chunk getRowFromChunk(const Chunk & chunk, size_t pos)
return result;
}
void inline addRange(PaddedPODArray<UInt64> & left_map, size_t start, size_t end)
void inline addRange(PaddedPODArray<UInt64> & values, UInt64 start, UInt64 end)
{
assert(end > start);
for (size_t i = start; i < end; ++i)
left_map.push_back(i);
for (UInt64 i = start; i < end; ++i)
values.push_back(i);
}
void inline addMany(PaddedPODArray<UInt64> & left_or_right_map, size_t idx, size_t num)
void inline addMany(PaddedPODArray<UInt64> & values, UInt64 value, size_t num)
{
for (size_t i = 0; i < num; ++i)
left_or_right_map.push_back(idx);
values.resize_fill(values.size() + num, value);
}
}
FullMergeJoinCursor::FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_)
: sample_block(materializeBlock(sample_block_).cloneEmpty()), desc(description_)
JoinKeyRow::JoinKeyRow(const FullMergeJoinCursor & cursor, size_t pos)
{
row.reserve(cursor->sort_columns.size());
for (const auto & col : cursor->sort_columns)
{
auto new_col = col->cloneEmpty();
new_col->insertFrom(*col, pos);
row.push_back(std::move(new_col));
}
if (const IColumn * asof_column = cursor.getAsofColumn())
{
if (const auto * nullable_asof_column = checkAndGetColumn<ColumnNullable>(asof_column))
{
/// We save matched column, and since NULL do not match anything, we can't use it as a key
chassert(!nullable_asof_column->isNullAt(pos));
asof_column = nullable_asof_column->getNestedColumnPtr().get();
}
auto new_col = asof_column->cloneEmpty();
new_col->insertFrom(*asof_column, pos);
row.push_back(std::move(new_col));
}
}
void JoinKeyRow::reset()
{
row.clear();
}
bool JoinKeyRow::equals(const FullMergeJoinCursor & cursor) const
{
if (row.empty())
return false;
for (size_t i = 0; i < cursor->sort_columns_size; ++i)
{
// int cmp = this->row[i]->compareAt(0, cursor->getRow(), *(cursor->sort_columns[i]), cursor->desc[i].nulls_direction);
int cmp = nullableCompareAt<true, true>(*this->row[i], *cursor->sort_columns[i], 0, cursor->getRow(), cursor->desc[i].nulls_direction);
if (cmp != 0)
return false;
}
return true;
}
bool JoinKeyRow::asofMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const
{
chassert(this->row.size() == cursor->sort_columns_size + 1);
if (!equals(cursor))
return false;
const auto & asof_row = row.back();
if (isNullAt(*asof_row, 0) || isNullAt(*cursor.getAsofColumn(), cursor->getRow()))
return false;
int cmp = 0;
if (const auto * nullable_column = checkAndGetColumn<ColumnNullable>(cursor.getAsofColumn()))
cmp = nullable_column->getNestedColumn().compareAt(cursor->getRow(), 0, *asof_row, 1);
else
cmp = cursor.getAsofColumn()->compareAt(cursor->getRow(), 0, *asof_row, 1);
return (asof_inequality == ASOFJoinInequality::Less && cmp < 0)
|| (asof_inequality == ASOFJoinInequality::LessOrEquals && cmp <= 0)
|| (asof_inequality == ASOFJoinInequality::Greater && cmp > 0)
|| (asof_inequality == ASOFJoinInequality::GreaterOrEquals && cmp >= 0);
}
void AnyJoinState::set(size_t source_num, const FullMergeJoinCursor & cursor)
{
assert(cursor->rows);
keys[source_num] = JoinKeyRow(cursor, cursor->rows - 1);
}
void AnyJoinState::reset(size_t source_num)
{
keys[source_num].reset();
value.clear();
}
void AnyJoinState::setValue(Chunk value_)
{
value = std::move(value_);
}
bool AnyJoinState::empty() const { return keys[0].row.empty() && keys[1].row.empty(); }
void AsofJoinState::set(const FullMergeJoinCursor & rcursor, size_t rpos)
{
key = JoinKeyRow(rcursor, rpos);
value = rcursor.getCurrent().clone();
value_row = rpos;
}
void AsofJoinState::reset()
{
key.reset();
value.clear();
}
FullMergeJoinCursor::FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_, bool is_asof)
: sample_block(materializeBlock(sample_block_).cloneEmpty())
, desc(description_)
{
if (desc.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty sort description for FullMergeJoinCursor");
if (is_asof)
{
/// For ASOF join prefix of sort description is used for equality comparison
/// and the last column is used for inequality comparison and is handled separately
auto asof_column_description = desc.back();
desc.pop_back();
chassert(asof_column_description.direction == 1 && asof_column_description.nulls_direction == 1);
asof_column_position = sample_block.getPositionByName(asof_column_description.column_name);
}
}
const Chunk & FullMergeJoinCursor::getCurrent() const
{
@ -278,48 +402,103 @@ bool FullMergeJoinCursor::fullyCompleted() const
return !cursor.isValid() && recieved_all_blocks;
}
String FullMergeJoinCursor::dump() const
{
Strings row_dump;
if (cursor.isValid())
{
Field val;
for (size_t i = 0; i < cursor.sort_columns_size; ++i)
{
cursor.sort_columns[i]->get(cursor.getRow(), val);
row_dump.push_back(val.dump());
}
if (const auto * asof_column = getAsofColumn())
{
asof_column->get(cursor.getRow(), val);
row_dump.push_back(val.dump());
}
}
return fmt::format("<{}/{}{}>[{}]",
cursor.getRow(), cursor.rows,
recieved_all_blocks ? "(finished)" : "",
fmt::join(row_dump, ", "));
}
MergeJoinAlgorithm::MergeJoinAlgorithm(
JoinPtr table_join_,
JoinKind kind_,
JoinStrictness strictness_,
const TableJoin::JoinOnClause & on_clause_,
const Blocks & input_headers,
size_t max_block_size_)
: table_join(table_join_)
: kind(kind_)
, strictness(strictness_)
, max_block_size(max_block_size_)
, log(getLogger("MergeJoinAlgorithm"))
{
if (input_headers.size() != 2)
throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeJoinAlgorithm requires exactly two inputs");
auto strictness = table_join->getTableJoin().strictness();
if (strictness != JoinStrictness::Any && strictness != JoinStrictness::All)
if (strictness != JoinStrictness::Any && strictness != JoinStrictness::All && strictness != JoinStrictness::Asof)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm is not implemented for strictness {}", strictness);
auto kind = table_join->getTableJoin().kind();
if (strictness == JoinStrictness::Asof)
{
if (kind != JoinKind::Left && kind != JoinKind::Inner)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm does not implement ASOF {} join", kind);
}
if (!isInner(kind) && !isLeft(kind) && !isRight(kind) && !isFull(kind))
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm is not implemented for kind {}", kind);
const auto & join_on = table_join->getTableJoin().getOnlyClause();
if (join_on.on_filter_condition_left || join_on.on_filter_condition_right)
if (on_clause_.on_filter_condition_left || on_clause_.on_filter_condition_right)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm does not support ON filter conditions");
cursors = {
createCursor(input_headers[0], join_on.key_names_left),
createCursor(input_headers[1], join_on.key_names_right)
createCursor(input_headers[0], on_clause_.key_names_left, strictness),
createCursor(input_headers[1], on_clause_.key_names_right, strictness),
};
}
for (const auto & [left_key, right_key] : table_join->getTableJoin().leftToRightKeyRemap())
MergeJoinAlgorithm::MergeJoinAlgorithm(
JoinPtr join_ptr,
const Blocks & input_headers,
size_t max_block_size_)
: MergeJoinAlgorithm(
join_ptr->getTableJoin().kind(),
join_ptr->getTableJoin().strictness(),
join_ptr->getTableJoin().getOnlyClause(),
input_headers,
max_block_size_)
{
for (const auto & [left_key, right_key] : join_ptr->getTableJoin().leftToRightKeyRemap())
{
size_t left_idx = input_headers[0].getPositionByName(left_key);
size_t right_idx = input_headers[1].getPositionByName(right_key);
left_to_right_key_remap[left_idx] = right_idx;
}
const auto *smjPtr = typeid_cast<const FullSortingMergeJoin *>(table_join.get());
const auto *smjPtr = typeid_cast<const FullSortingMergeJoin *>(join_ptr.get());
if (smjPtr)
{
null_direction_hint = smjPtr->getNullDirection();
}
if (strictness == JoinStrictness::Asof)
setAsofInequality(join_ptr->getTableJoin().getAsofInequality());
}
void MergeJoinAlgorithm::setAsofInequality(ASOFJoinInequality asof_inequality_)
{
if (strictness != JoinStrictness::Asof)
throw Exception(ErrorCodes::LOGICAL_ERROR, "setAsofInequality is only supported for ASOF joins");
if (asof_inequality_ == ASOFJoinInequality::None)
throw Exception(ErrorCodes::LOGICAL_ERROR, "ASOF inequality cannot be None");
asof_inequality = asof_inequality_;
}
void MergeJoinAlgorithm::logElapsed(double seconds)
@ -407,7 +586,7 @@ struct AllJoinImpl
size_t lnum = nextDistinct(left_cursor.cursor);
size_t rnum = nextDistinct(right_cursor.cursor);
bool all_fit_in_block = std::max(left_map.size(), right_map.size()) + lnum * rnum <= max_block_size;
bool all_fit_in_block = !max_block_size || std::max(left_map.size(), right_map.size()) + lnum * rnum <= max_block_size;
bool have_all_ranges = left_cursor.cursor.isValid() && right_cursor.cursor.isValid();
if (all_fit_in_block && have_all_ranges)
{
@ -421,7 +600,7 @@ struct AllJoinImpl
else
{
assert(state == nullptr);
state = std::make_unique<AllJoinState>(left_cursor.cursor, lpos, right_cursor.cursor, rpos);
state = std::make_unique<AllJoinState>(left_cursor, lpos, right_cursor, rpos);
state->addRange(0, left_cursor.getCurrent().clone(), lpos, lnum);
state->addRange(1, right_cursor.getCurrent().clone(), rpos, rnum);
return;
@ -466,6 +645,17 @@ void dispatchKind(JoinKind kind, Args && ... args)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported join kind: \"{}\"", kind);
}
MutableColumns MergeJoinAlgorithm::getEmptyResultColumns() const
{
MutableColumns result_cols;
for (size_t i = 0; i < 2; ++i)
{
for (const auto & col : cursors[i]->sampleColumns())
result_cols.push_back(col->cloneEmpty());
}
return result_cols;
}
std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAllJoinState()
{
if (all_join_state && all_join_state->finished())
@ -479,7 +669,7 @@ std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAllJoinState
/// Accumulate blocks with same key in all_join_state
for (size_t i = 0; i < 2; ++i)
{
if (cursors[i]->cursor.isValid() && all_join_state->keys[i].equals(cursors[i]->cursor))
if (cursors[i]->cursor.isValid() && all_join_state->keys[i].equals(*cursors[i]))
{
size_t pos = cursors[i]->cursor.getRow();
size_t num = nextDistinct(cursors[i]->cursor);
@ -499,15 +689,10 @@ std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAllJoinState
stat.max_blocks_loaded = std::max(stat.max_blocks_loaded, all_join_state->blocksStored());
/// join all rows with current key
MutableColumns result_cols;
for (size_t i = 0; i < 2; ++i)
{
for (const auto & col : cursors[i]->sampleColumns())
result_cols.push_back(col->cloneEmpty());
}
MutableColumns result_cols = getEmptyResultColumns();
size_t total_rows = 0;
while (total_rows < max_block_size)
while (!max_block_size || total_rows < max_block_size)
{
const auto & left_range = all_join_state->getLeft();
const auto & right_range = all_join_state->getRight();
@ -532,7 +717,52 @@ std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAllJoinState
return {};
}
MergeJoinAlgorithm::Status MergeJoinAlgorithm::allJoin(JoinKind kind)
std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAsofJoinState()
{
if (strictness != JoinStrictness::Asof)
return {};
if (!cursors[1]->fullyCompleted())
return {};
auto & left_cursor = *cursors[0];
const auto & left_columns = left_cursor.getCurrent().getColumns();
MutableColumns result_cols = getEmptyResultColumns();
while (left_cursor->isValid() && asof_join_state.hasMatch(left_cursor, asof_inequality))
{
size_t i = 0;
for (const auto & col : left_columns)
result_cols[i++]->insertFrom(*col, left_cursor->getRow());
for (const auto & col : asof_join_state.value.getColumns())
result_cols[i++]->insertFrom(*col, asof_join_state.value_row);
chassert(i == result_cols.size());
left_cursor->next();
}
while (isLeft(kind) && left_cursor->isValid())
{
/// return row with default values at right side
size_t i = 0;
for (const auto & col : left_columns)
result_cols[i++]->insertFrom(*col, left_cursor->getRow());
for (; i < result_cols.size(); ++i)
result_cols[i]->insertDefault();
chassert(i == result_cols.size());
left_cursor->next();
}
size_t result_rows = result_cols.empty() ? 0 : result_cols.front()->size();
if (result_rows)
return Status(Chunk(std::move(result_cols), result_rows));
return {};
}
MergeJoinAlgorithm::Status MergeJoinAlgorithm::allJoin()
{
PaddedPODArray<UInt64> idx_map[2];
@ -595,7 +825,7 @@ struct AnyJoinImpl
FullMergeJoinCursor & right_cursor,
PaddedPODArray<UInt64> & left_map,
PaddedPODArray<UInt64> & right_map,
AnyJoinState & state,
AnyJoinState & any_join_state,
int null_direction_hint)
{
assert(enabled);
@ -656,21 +886,21 @@ struct AnyJoinImpl
}
}
/// Remember index of last joined row to propagate it to next block
/// Remember last joined row to propagate it to next block
state.setValue({});
any_join_state.setValue({});
if (!left_cursor->isValid())
{
state.set(0, left_cursor.cursor);
any_join_state.set(0, left_cursor);
if (cmp == 0 && isLeft(kind))
state.setValue(getRowFromChunk(right_cursor.getCurrent(), rpos));
any_join_state.setValue(getRowFromChunk(right_cursor.getCurrent(), rpos));
}
if (!right_cursor->isValid())
{
state.set(1, right_cursor.cursor);
any_join_state.set(1, right_cursor);
if (cmp == 0 && isRight(kind))
state.setValue(getRowFromChunk(left_cursor.getCurrent(), lpos));
any_join_state.setValue(getRowFromChunk(left_cursor.getCurrent(), lpos));
}
}
};
@ -680,40 +910,34 @@ std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAnyJoinState
if (any_join_state.empty())
return {};
auto kind = table_join->getTableJoin().kind();
Chunk result;
for (size_t source_num = 0; source_num < 2; ++source_num)
{
auto & current = *cursors[source_num];
auto & state = any_join_state;
if (any_join_state.keys[source_num].equals(current.cursor))
if (any_join_state.keys[source_num].equals(current))
{
size_t start_pos = current->getRow();
size_t length = nextDistinct(current.cursor);
if (length && isLeft(kind) && source_num == 0)
{
if (state.value)
result = copyChunkResized(current.getCurrent(), state.value, start_pos, length);
if (any_join_state.value)
result = copyChunkResized(current.getCurrent(), any_join_state.value, start_pos, length);
else
result = createBlockWithDefaults(source_num, start_pos, length);
}
if (length && isRight(kind) && source_num == 1)
{
if (state.value)
result = copyChunkResized(state.value, current.getCurrent(), start_pos, length);
if (any_join_state.value)
result = copyChunkResized(any_join_state.value, current.getCurrent(), start_pos, length);
else
result = createBlockWithDefaults(source_num, start_pos, length);
}
/// We've found row with other key, no need to skip more rows with current key
if (current->isValid())
{
state.keys[source_num].reset();
}
any_join_state.keys[source_num].reset();
}
else
{
@ -726,7 +950,7 @@ std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAnyJoinState
return {};
}
MergeJoinAlgorithm::Status MergeJoinAlgorithm::anyJoin(JoinKind kind)
MergeJoinAlgorithm::Status MergeJoinAlgorithm::anyJoin()
{
if (auto result = handleAnyJoinState())
return std::move(*result);
@ -771,10 +995,151 @@ MergeJoinAlgorithm::Status MergeJoinAlgorithm::anyJoin(JoinKind kind)
return Status(std::move(result));
}
MergeJoinAlgorithm::Status MergeJoinAlgorithm::asofJoin()
{
auto & left_cursor = *cursors[0];
if (!left_cursor->isValid())
return Status(0);
auto & right_cursor = *cursors[1];
if (!right_cursor->isValid())
return Status(1);
const auto & left_columns = left_cursor.getCurrent().getColumns();
const auto & right_columns = right_cursor.getCurrent().getColumns();
MutableColumns result_cols = getEmptyResultColumns();
while (left_cursor->isValid() && right_cursor->isValid())
{
auto lpos = left_cursor->getRow();
auto rpos = right_cursor->getRow();
auto cmp = compareCursors(*left_cursor, *right_cursor, null_direction_hint);
if (cmp == 0)
{
if (isNullAt(*left_cursor.getAsofColumn(), lpos))
cmp = -1;
if (isNullAt(*right_cursor.getAsofColumn(), rpos))
cmp = 1;
}
if (cmp == 0)
{
auto asof_cmp = compareAsofCursors(left_cursor, right_cursor, null_direction_hint);
if ((asof_inequality == ASOFJoinInequality::Less && asof_cmp <= -1)
|| (asof_inequality == ASOFJoinInequality::LessOrEquals && asof_cmp <= 0))
{
/// First row in right table that is greater (or equal) than current row in left table
/// matches asof join condition the best
size_t i = 0;
for (const auto & col : left_columns)
result_cols[i++]->insertFrom(*col, lpos);
for (const auto & col : right_columns)
result_cols[i++]->insertFrom(*col, rpos);
chassert(i == result_cols.size());
left_cursor->next();
continue;
}
if (asof_inequality == ASOFJoinInequality::Less || asof_inequality == ASOFJoinInequality::LessOrEquals)
{
/// Asof condition is not (yet) satisfied, skip row in right table
right_cursor->next();
continue;
}
if ((asof_inequality == ASOFJoinInequality::Greater && asof_cmp >= 1)
|| (asof_inequality == ASOFJoinInequality::GreaterOrEquals && asof_cmp >= 0))
{
/// condition is satisfied, remember this row and move next to try to find better match
asof_join_state.set(right_cursor, rpos);
right_cursor->next();
continue;
}
if (asof_inequality == ASOFJoinInequality::Greater || asof_inequality == ASOFJoinInequality::GreaterOrEquals)
{
/// Asof condition is not satisfied anymore, use last matched row from right table
if (asof_join_state.hasMatch(left_cursor, asof_inequality))
{
size_t i = 0;
for (const auto & col : left_columns)
result_cols[i++]->insertFrom(*col, lpos);
for (const auto & col : asof_join_state.value.getColumns())
result_cols[i++]->insertFrom(*col, asof_join_state.value_row);
chassert(i == result_cols.size());
}
else
{
asof_join_state.reset();
if (isLeft(kind))
{
/// return row with default values at right side
size_t i = 0;
for (const auto & col : left_columns)
result_cols[i++]->insertFrom(*col, lpos);
for (; i < result_cols.size(); ++i)
result_cols[i]->insertDefault();
chassert(i == result_cols.size());
}
}
left_cursor->next();
continue;
}
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "TODO: implement ASOF equality join");
}
else if (cmp < 0)
{
if (asof_join_state.hasMatch(left_cursor, asof_inequality))
{
size_t i = 0;
for (const auto & col : left_columns)
result_cols[i++]->insertFrom(*col, lpos);
for (const auto & col : asof_join_state.value.getColumns())
result_cols[i++]->insertFrom(*col, asof_join_state.value_row);
chassert(i == result_cols.size());
left_cursor->next();
continue;
}
else
{
asof_join_state.reset();
}
/// no matches for rows in left table, just pass them through
size_t num = nextDistinct(*left_cursor);
if (isLeft(kind) && num)
{
/// return them with default values at right side
size_t i = 0;
for (const auto & col : left_columns)
result_cols[i++]->insertRangeFrom(*col, lpos, num);
for (; i < result_cols.size(); ++i)
result_cols[i]->insertManyDefaults(num);
chassert(i == result_cols.size());
}
}
else
{
/// skip rows in right table until we find match for current row in left table
nextDistinct(*right_cursor);
}
}
size_t num_rows = result_cols.empty() ? 0 : result_cols.front()->size();
return Status(Chunk(std::move(result_cols), num_rows));
}
/// if `source_num == 0` get data from left cursor and fill defaults at right
/// otherwise - vice versa
Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num, size_t start, size_t num_rows) const
{
ColumnRawPtrs cols;
{
const auto & columns_left = source_num == 0 ? cursors[0]->getCurrent().getColumns() : cursors[0]->sampleColumns();
@ -797,7 +1162,6 @@ Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num, size_t star
cols.push_back(col.get());
}
}
Chunk result_chunk;
copyColumnsResized(cols, start, num_rows, result_chunk);
return result_chunk;
@ -813,7 +1177,6 @@ Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num)
IMergingAlgorithm::Status MergeJoinAlgorithm::merge()
{
auto kind = table_join->getTableJoin().kind();
if (!cursors[0]->cursor.isValid() && !cursors[0]->fullyCompleted())
return Status(0);
@ -821,11 +1184,11 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge()
if (!cursors[1]->cursor.isValid() && !cursors[1]->fullyCompleted())
return Status(1);
if (auto result = handleAllJoinState())
{
return std::move(*result);
}
if (auto result = handleAsofJoinState())
return std::move(*result);
if (cursors[0]->fullyCompleted() || cursors[1]->fullyCompleted())
{
@ -839,7 +1202,7 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge()
}
/// check if blocks are not intersecting at all
if (int cmp = totallyCompare(cursors[0]->cursor, cursors[1]->cursor, null_direction_hint); cmp != 0)
if (int cmp = totallyCompare(cursors[0]->cursor, cursors[1]->cursor, null_direction_hint); cmp != 0 && strictness != JoinStrictness::Asof)
{
if (cmp < 0)
{
@ -858,13 +1221,14 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge()
}
}
auto strictness = table_join->getTableJoin().strictness();
if (strictness == JoinStrictness::Any)
return anyJoin(kind);
return anyJoin();
if (strictness == JoinStrictness::All)
return allJoin(kind);
return allJoin();
if (strictness == JoinStrictness::Asof)
return asofJoin();
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported strictness '{}'", strictness);
}
@ -883,9 +1247,26 @@ MergeJoinTransform::MergeJoinTransform(
/* always_read_till_end_= */ false,
/* empty_chunk_on_finish_= */ true,
table_join, input_headers, max_block_size)
, log(getLogger("MergeJoinTransform"))
{
LOG_TRACE(log, "Use MergeJoinTransform");
}
MergeJoinTransform::MergeJoinTransform(
JoinKind kind_,
JoinStrictness strictness_,
const TableJoin::JoinOnClause & on_clause_,
const Blocks & input_headers,
const Block & output_header,
size_t max_block_size,
UInt64 limit_hint_)
: IMergingTransform<MergeJoinAlgorithm>(
input_headers,
output_header,
/* have_all_inputs_= */ true,
limit_hint_,
/* always_read_till_end_= */ false,
/* empty_chunk_on_finish_= */ true,
kind_, strictness_, on_clause_, input_headers, max_block_size)
{
}
void MergeJoinTransform::onFinish()

View File

@ -8,6 +8,7 @@
#include <unordered_map>
#include <utility>
#include <vector>
#include <array>
#include <boost/core/noncopyable.hpp>
@ -19,6 +20,7 @@
#include <Processors/Chunk.h>
#include <Processors/Merges/Algorithms/IMergingAlgorithm.h>
#include <Processors/Merges/IMergingTransform.h>
#include <Interpreters/TableJoin.h>
namespace Poco { class Logger; }
@ -35,57 +37,28 @@ using FullMergeJoinCursorPtr = std::unique_ptr<FullMergeJoinCursor>;
/// Used instead of storing previous block
struct JoinKeyRow
{
std::vector<ColumnPtr> row;
JoinKeyRow() = default;
explicit JoinKeyRow(const SortCursorImpl & impl_, size_t pos)
{
row.reserve(impl_.sort_columns.size());
for (const auto & col : impl_.sort_columns)
{
auto new_col = col->cloneEmpty();
new_col->insertFrom(*col, pos);
row.push_back(std::move(new_col));
}
}
JoinKeyRow(const FullMergeJoinCursor & cursor, size_t pos);
void reset()
{
row.clear();
}
bool equals(const FullMergeJoinCursor & cursor) const;
bool asofMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const;
bool equals(const SortCursorImpl & impl) const
{
if (row.empty())
return false;
void reset();
assert(this->row.size() == impl.sort_columns_size);
for (size_t i = 0; i < impl.sort_columns_size; ++i)
{
int cmp = this->row[i]->compareAt(0, impl.getRow(), *impl.sort_columns[i], impl.desc[i].nulls_direction);
if (cmp != 0)
return false;
}
return true;
}
std::vector<ColumnPtr> row;
};
/// Remembers previous key if it was joined in previous block
class AnyJoinState : boost::noncopyable
{
public:
AnyJoinState() = default;
void set(size_t source_num, const FullMergeJoinCursor & cursor);
void setValue(Chunk value_);
void set(size_t source_num, const SortCursorImpl & cursor)
{
assert(cursor.rows);
keys[source_num] = JoinKeyRow(cursor, cursor.rows - 1);
}
void reset(size_t source_num);
void setValue(Chunk value_) { value = std::move(value_); }
bool empty() const { return keys[0].row.empty() && keys[1].row.empty(); }
bool empty() const;
/// current keys
JoinKeyRow keys[2];
@ -118,8 +91,8 @@ public:
Chunk chunk;
};
AllJoinState(const SortCursorImpl & lcursor, size_t lpos,
const SortCursorImpl & rcursor, size_t rpos)
AllJoinState(const FullMergeJoinCursor & lcursor, size_t lpos,
const FullMergeJoinCursor & rcursor, size_t rpos)
: keys{JoinKeyRow(lcursor, lpos), JoinKeyRow(rcursor, rpos)}
{
}
@ -187,13 +160,32 @@ private:
size_t ridx = 0;
};
class AsofJoinState : boost::noncopyable
{
public:
void set(const FullMergeJoinCursor & rcursor, size_t rpos);
void reset();
bool hasMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const
{
if (value.empty())
return false;
return key.asofMatch(cursor, asof_inequality);
}
JoinKeyRow key;
Chunk value;
size_t value_row = 0;
};
/*
* Wrapper for SortCursorImpl
*/
class FullMergeJoinCursor : boost::noncopyable
{
public:
explicit FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_);
explicit FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_, bool is_asof = false);
bool fullyCompleted() const;
void setChunk(Chunk && chunk);
@ -203,17 +195,31 @@ public:
SortCursorImpl * operator-> () { return &cursor; }
const SortCursorImpl * operator-> () const { return &cursor; }
SortCursorImpl & operator* () { return cursor; }
const SortCursorImpl & operator* () const { return cursor; }
SortCursorImpl cursor;
const Block & sampleBlock() const { return sample_block; }
Columns sampleColumns() const { return sample_block.getColumns(); }
const IColumn * getAsofColumn() const
{
if (!asof_column_position)
return nullptr;
return cursor.all_columns[*asof_column_position];
}
String dump() const;
private:
Block sample_block;
SortDescription desc;
Chunk current_chunk;
bool recieved_all_blocks = false;
std::optional<size_t> asof_column_position;
};
/*
@ -223,22 +229,33 @@ private:
class MergeJoinAlgorithm final : public IMergingAlgorithm
{
public:
explicit MergeJoinAlgorithm(JoinPtr table_join, const Blocks & input_headers, size_t max_block_size_);
MergeJoinAlgorithm(JoinKind kind_,
JoinStrictness strictness_,
const TableJoin::JoinOnClause & on_clause_,
const Blocks & input_headers,
size_t max_block_size_);
MergeJoinAlgorithm(JoinPtr join_ptr, const Blocks & input_headers, size_t max_block_size_);
const char * getName() const override { return "MergeJoinAlgorithm"; }
void initialize(Inputs inputs) override;
void consume(Input & input, size_t source_num) override;
Status merge() override;
void logElapsed(double seconds);
void setAsofInequality(ASOFJoinInequality asof_inequality_);
void logElapsed(double seconds);
private:
std::optional<Status> handleAnyJoinState();
Status anyJoin(JoinKind kind);
Status anyJoin();
std::optional<Status> handleAllJoinState();
Status allJoin(JoinKind kind);
Status allJoin();
std::optional<Status> handleAsofJoinState();
Status asofJoin();
MutableColumns getEmptyResultColumns() const;
Chunk createBlockWithDefaults(size_t source_num);
Chunk createBlockWithDefaults(size_t source_num, size_t start, size_t num_rows) const;
@ -246,12 +263,15 @@ private:
std::unordered_map<size_t, size_t> left_to_right_key_remap;
std::array<FullMergeJoinCursorPtr, 2> cursors;
ASOFJoinInequality asof_inequality = ASOFJoinInequality::None;
/// Keep some state to make connection between data in different blocks
/// Keep some state to make handle data from different blocks
AnyJoinState any_join_state;
std::unique_ptr<AllJoinState> all_join_state;
AsofJoinState asof_join_state;
JoinPtr table_join;
JoinKind kind;
JoinStrictness strictness;
size_t max_block_size;
int null_direction_hint = 1;
@ -281,12 +301,21 @@ public:
size_t max_block_size,
UInt64 limit_hint = 0);
MergeJoinTransform(
JoinKind kind_,
JoinStrictness strictness_,
const TableJoin::JoinOnClause & on_clause_,
const Blocks & input_headers,
const Block & output_header,
size_t max_block_size,
UInt64 limit_hint_ = 0);
String getName() const override { return "MergeJoinTransform"; }
void setAsofInequality(ASOFJoinInequality asof_inequality_) { algorithm.setAsofInequality(asof_inequality_); }
protected:
void onFinish() override;
LoggerPtr log;
};
}

View File

@ -109,7 +109,7 @@ void ScatterByPartitionTransform::generateOutputChunks()
hash.reset(num_rows);
for (const auto & column_number : key_columns)
columns[column_number]->updateWeakHash32(hash);
hash.update(columns[column_number]->getWeakHash32());
const auto & hash_data = hash.getData();
IColumn::Selector selector(num_rows);

View File

@ -0,0 +1,768 @@
#include <gtest/gtest.h>
#include <pcg_random.hpp>
#include <random>
#include <Poco/ConsoleChannel.h>
#include <Poco/Logger.h>
#include <Poco/AutoPtr.h>
#include <Columns/ColumnsNumber.h>
#include <Common/getRandomASCIIString.h>
#include <Common/randomSeed.h>
#include <DataTypes/DataTypesNumber.h>
#include <Interpreters/TableJoin.h>
#include <Processors/Executors/PipelineExecutor.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Processors/Sinks/NullSink.h>
#include <Processors/Sources/SourceFromChunks.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <Processors/Transforms/MergeJoinTransform.h>
#include <Processors/Formats/Impl/PrettyCompactBlockOutputFormat.h>
#include <Processors/Executors/CompletedPipelineExecutor.h>
#include <QueryPipeline/QueryPipeline.h>
using namespace DB;
namespace
{
QueryPipeline buildJoinPipeline(
std::shared_ptr<ISource> left_source,
std::shared_ptr<ISource> right_source,
size_t key_length = 1,
JoinKind kind = JoinKind::Inner,
JoinStrictness strictness = JoinStrictness::All,
ASOFJoinInequality asof_inequality = ASOFJoinInequality::None)
{
Blocks inputs;
inputs.emplace_back(left_source->getPort().getHeader());
inputs.emplace_back(right_source->getPort().getHeader());
Block out_header;
for (const auto & input : inputs)
{
for (ColumnWithTypeAndName column : input)
{
if (&input == &inputs.front())
column.name = "t1." + column.name;
else
column.name = "t2." + column.name;
out_header.insert(column);
}
}
TableJoin::JoinOnClause on_clause;
for (size_t i = 0; i < key_length; ++i)
{
on_clause.key_names_left.emplace_back(inputs[0].getByPosition(i).name);
on_clause.key_names_right.emplace_back(inputs[1].getByPosition(i).name);
}
auto joining = std::make_shared<MergeJoinTransform>(
kind,
strictness,
on_clause,
inputs, out_header, /* max_block_size = */ 0);
if (asof_inequality != ASOFJoinInequality::None)
joining->setAsofInequality(asof_inequality);
chassert(joining->getInputs().size() == 2);
connect(left_source->getPort(), joining->getInputs().front());
connect(right_source->getPort(), joining->getInputs().back());
auto * output_port = &joining->getOutputPort();
auto processors = std::make_shared<Processors>();
processors->emplace_back(std::move(left_source));
processors->emplace_back(std::move(right_source));
processors->emplace_back(std::move(joining));
QueryPipeline pipeline(QueryPlanResourceHolder{}, processors, output_port);
return pipeline;
}
std::shared_ptr<ISource> oneColumnSource(const std::vector<std::vector<UInt64>> & values)
{
Block header = {
ColumnWithTypeAndName(std::make_shared<DataTypeUInt64>(), "key"),
ColumnWithTypeAndName(std::make_shared<DataTypeUInt64>(), "idx"),
};
UInt64 idx = 0;
Chunks chunks;
for (const auto & chunk_values : values)
{
auto key_column = ColumnUInt64::create();
auto idx_column = ColumnUInt64::create();
for (auto n : chunk_values)
{
key_column->insertValue(n);
idx_column->insertValue(idx);
++idx;
}
chunks.emplace_back(Chunk(Columns{std::move(key_column), std::move(idx_column)}, chunk_values.size()));
}
return std::make_shared<SourceFromChunks>(header, std::move(chunks));
}
class SourceChunksBuilder
{
public:
explicit SourceChunksBuilder(const Block & header_)
: header(header_)
{
current_chunk = header.cloneEmptyColumns();
chassert(!current_chunk.empty());
}
void setBreakProbability(pcg64 & rng_)
{
/// random probability with possibility to have exact 0.0 and 1.0 values
break_prob = std::uniform_int_distribution<size_t>(0, 5)(rng_) / static_cast<double>(5);
rng = &rng_;
}
void addRow(const std::vector<Field> & row)
{
chassert(row.size() == current_chunk.size());
for (size_t i = 0; i < current_chunk.size(); ++i)
current_chunk[i]->insert(row[i]);
if (rng && std::uniform_real_distribution<>(0.0, 1.0)(*rng) < break_prob)
addChunk();
}
void addChunk()
{
if (current_chunk.front()->empty())
return;
size_t rows = current_chunk.front()->size();
chunks.emplace_back(std::move(current_chunk), rows);
current_chunk = header.cloneEmptyColumns();
}
std::shared_ptr<ISource> getSource()
{
addChunk();
/// copy chunk to allow reusing same builder
Chunks chunks_copy;
chunks_copy.reserve(chunks.size());
for (const auto & chunk : chunks)
chunks_copy.emplace_back(chunk.clone());
return std::make_shared<SourceFromChunks>(header, std::move(chunks_copy));
}
private:
Block header;
Chunks chunks;
MutableColumns current_chunk;
pcg64 * rng = nullptr;
double break_prob = 0.0;
};
std::vector<std::vector<Field>> getValuesFromBlock(const Block & block, const Names & names)
{
std::vector<std::vector<Field>> result;
for (size_t i = 0; i < block.rows(); ++i)
{
auto & row = result.emplace_back();
for (const auto & name : names)
block.getByName(name).column->get(i, row.emplace_back());
}
return result;
}
Block executePipeline(QueryPipeline && pipeline)
{
PullingPipelineExecutor executor(pipeline);
Blocks result_blocks;
while (true)
{
Block block;
bool is_ok = executor.pull(block);
if (!is_ok)
break;
result_blocks.emplace_back(std::move(block));
}
return concatenateBlocks(result_blocks);
}
template <typename T>
void assertColumnVectorEq(const typename ColumnVector<T>::Container & expected, const Block & block, const std::string & name)
{
const auto * actual = typeid_cast<const ColumnVector<T> *>(block.getByName(name).column.get());
ASSERT_TRUE(actual) << "unexpected column type: " << block.getByName(name).column->dumpStructure() << "expected: " << typeid(ColumnVector<T>).name();
auto get_first_diff = [&]() -> String
{
const auto & actual_data = actual->getData();
size_t num_rows = std::min(expected.size(), actual_data.size());
for (size_t i = 0; i < num_rows; ++i)
{
if (expected[i] != actual_data[i])
return fmt::format(", expected: {}, actual: {} at row {}", expected[i], actual_data[i], i);
}
return "";
};
EXPECT_EQ(actual->getData().size(), expected.size());
ASSERT_EQ(actual->getData(), expected) << "column name: " << name << get_first_diff();
}
template <typename T>
void assertColumnEq(const IColumn & expected, const Block & block, const std::string & name)
{
const ColumnPtr & actual = block.getByName(name).column;
ASSERT_TRUE(checkColumn<T>(*actual));
ASSERT_TRUE(checkColumn<T>(expected));
EXPECT_EQ(actual->size(), expected.size());
auto dump_val = [](const IColumn & col, size_t i) -> String
{
Field value;
col.get(i, value);
return value.dump();
};
size_t num_rows = std::min(actual->size(), expected.size());
for (size_t i = 0; i < num_rows; ++i)
ASSERT_EQ(actual->compareAt(i, i, expected, 1), 0) << dump_val(*actual, i) << " != " << dump_val(expected, i) << " at row " << i;
}
template <typename T>
T getRandomFrom(pcg64 & rng, const std::initializer_list<T> & opts)
{
std::vector<T> options(opts.begin(), opts.end());
size_t idx = std::uniform_int_distribution<size_t>(0, options.size() - 1)(rng);
return options[idx];
}
void generateNextKey(pcg64 & rng, UInt64 & k1, String & k2)
{
size_t str_len = std::uniform_int_distribution<>(1, 10)(rng);
String new_k2 = getRandomASCIIString(str_len, rng);
if (new_k2.compare(k2) <= 0)
++k1;
k2 = new_k2;
}
bool isStrict(ASOFJoinInequality inequality)
{
return inequality == ASOFJoinInequality::Less || inequality == ASOFJoinInequality::Greater;
}
}
class FullSortingJoinTest : public ::testing::Test
{
public:
FullSortingJoinTest() = default;
void SetUp() override
{
Poco::AutoPtr<Poco::ConsoleChannel> channel(new Poco::ConsoleChannel(std::cerr));
Poco::Logger::root().setChannel(channel);
if (const char * test_log_level = std::getenv("TEST_LOG_LEVEL")) // NOLINT(concurrency-mt-unsafe)
Poco::Logger::root().setLevel(test_log_level);
else
Poco::Logger::root().setLevel("none");
UInt64 seed = randomSeed();
if (const char * random_seed = std::getenv("TEST_RANDOM_SEED")) // NOLINT(concurrency-mt-unsafe)
seed = std::stoull(random_seed);
std::cout << "TEST_RANDOM_SEED=" << seed << std::endl;
rng = pcg64(seed);
}
void TearDown() override
{
}
pcg64 rng;
};
TEST_F(FullSortingJoinTest, AllAnyOneKey)
try
{
{
SCOPED_TRACE("Inner All");
Block result = executePipeline(buildJoinPipeline(
oneColumnSource({ {1, 2, 3, 4, 5} }),
oneColumnSource({ {1}, {2}, {3}, {4}, {5} }),
1, JoinKind::Inner, JoinStrictness::All));
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t1.idx");
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t2.idx");
}
{
SCOPED_TRACE("Inner Any");
Block result = executePipeline(buildJoinPipeline(
oneColumnSource({ {1, 2, 3, 4, 5} }),
oneColumnSource({ {1}, {2}, {3}, {4}, {5} }),
1, JoinKind::Inner, JoinStrictness::Any));
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t1.idx");
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t2.idx");
}
{
SCOPED_TRACE("Inner All");
Block result = executePipeline(buildJoinPipeline(
oneColumnSource({ {2, 2, 2}, {2, 3}, {3, 5} }),
oneColumnSource({ {1, 1, 1}, {2, 2}, {3, 4} }),
1, JoinKind::Inner, JoinStrictness::All));
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 0, 1, 2, 3, 3, 4, 5}), result, "t1.idx");
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({3, 3, 3, 4, 4, 4, 3, 4, 5, 5}), result, "t2.idx");
}
{
SCOPED_TRACE("Inner Any");
Block result = executePipeline(buildJoinPipeline(
oneColumnSource({ {2, 2, 2}, {2, 3}, {3, 5} }),
oneColumnSource({ {1, 1, 1}, {2, 2}, {3, 4} }),
1, JoinKind::Inner, JoinStrictness::Any));
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 4}), result, "t1.idx");
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({3, 5}), result, "t2.idx");
}
{
SCOPED_TRACE("Inner Any");
Block result = executePipeline(buildJoinPipeline(
oneColumnSource({ {2, 2, 2, 2}, {3}, {3, 5} }),
oneColumnSource({ {1, 1, 1, 2}, {2}, {3, 4} }),
1, JoinKind::Inner, JoinStrictness::Any));
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 4}), result, "t1.idx");
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({3, 5}), result, "t2.idx");
}
{
SCOPED_TRACE("Left Any");
Block result = executePipeline(buildJoinPipeline(
oneColumnSource({ {2, 2, 2}, {2, 3}, {3, 5} }),
oneColumnSource({ {1, 1, 1}, {2, 2}, {3, 4} }),
1, JoinKind::Left, JoinStrictness::Any));
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4, 5, 6}), result, "t1.idx");
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({3, 3, 3, 3, 5, 5, 0}), result, "t2.idx");
}
{
SCOPED_TRACE("Left Any");
Block result = executePipeline(buildJoinPipeline(
oneColumnSource({ {2, 2, 2, 2}, {3}, {3, 5} }),
oneColumnSource({ {1, 1, 1, 2}, {2}, {3, 4} }),
1, JoinKind::Left, JoinStrictness::Any));
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4, 5, 6}), result, "t1.idx");
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({3, 3, 3, 3, 5, 5, 0}), result, "t2.idx");
}
}
catch (Exception & e)
{
std::cout << e.getStackTraceString() << std::endl;
throw;
}
TEST_F(FullSortingJoinTest, AnySimple)
try
{
JoinKind kind = getRandomFrom(rng, {JoinKind::Inner, JoinKind::Left, JoinKind::Right});
SourceChunksBuilder left_source({
{std::make_shared<DataTypeUInt64>(), "k1"},
{std::make_shared<DataTypeString>(), "k2"},
{std::make_shared<DataTypeString>(), "attr"},
});
SourceChunksBuilder right_source({
{std::make_shared<DataTypeUInt64>(), "k1"},
{std::make_shared<DataTypeString>(), "k2"},
{std::make_shared<DataTypeString>(), "attr"},
});
left_source.setBreakProbability(rng);
right_source.setBreakProbability(rng);
size_t num_keys = std::uniform_int_distribution<>(100, 1000)(rng);
auto expected_left = ColumnString::create();
auto expected_right = ColumnString::create();
UInt64 k1 = 1;
String k2;
auto get_attr = [&](const String & side, size_t idx) -> String
{
return toString(k1) + "_" + k2 + "_" + side + "_" + toString(idx);
};
for (size_t i = 0; i < num_keys; ++i)
{
generateNextKey(rng, k1, k2);
/// Key is present in left, right or both tables. Both tables is more probable.
size_t key_presence = std::uniform_int_distribution<>(0, 10)(rng);
size_t num_rows_left = key_presence == 0 ? 0 : std::uniform_int_distribution<>(1, 10)(rng);
for (size_t j = 0; j < num_rows_left; ++j)
left_source.addRow({k1, k2, get_attr("left", j)});
size_t num_rows_right = key_presence == 1 ? 0 : std::uniform_int_distribution<>(1, 10)(rng);
for (size_t j = 0; j < num_rows_right; ++j)
right_source.addRow({k1, k2, get_attr("right", j)});
String left_attr = num_rows_left ? get_attr("left", 0) : "";
String right_attr = num_rows_right ? get_attr("right", 0) : "";
if (kind == JoinKind::Inner && num_rows_left && num_rows_right)
{
expected_left->insert(left_attr);
expected_right->insert(right_attr);
}
else if (kind == JoinKind::Left)
{
for (size_t j = 0; j < num_rows_left; ++j)
{
expected_left->insert(get_attr("left", j));
expected_right->insert(right_attr);
}
}
else if (kind == JoinKind::Right)
{
for (size_t j = 0; j < num_rows_right; ++j)
{
expected_left->insert(left_attr);
expected_right->insert(get_attr("right", j));
}
}
}
Block result_block = executePipeline(buildJoinPipeline(
left_source.getSource(), right_source.getSource(), /* key_length = */ 2,
kind, JoinStrictness::Any));
assertColumnEq<ColumnString>(*expected_left, result_block, "t1.attr");
assertColumnEq<ColumnString>(*expected_right, result_block, "t2.attr");
}
catch (Exception & e)
{
std::cout << e.getStackTraceString() << std::endl;
throw;
}
TEST_F(FullSortingJoinTest, AsofSimple)
try
{
SourceChunksBuilder left_source({
{std::make_shared<DataTypeString>(), "key"},
{std::make_shared<DataTypeUInt64>(), "t"},
});
left_source.addRow({"AMZN", 3});
left_source.addRow({"AMZN", 4});
left_source.addRow({"AMZN", 6});
left_source.addRow({"SBUX", 10});
SourceChunksBuilder right_source({
{std::make_shared<DataTypeString>(), "key"},
{std::make_shared<DataTypeUInt64>(), "t"},
{std::make_shared<DataTypeUInt64>(), "value"},
});
right_source.addRow({"AAPL", 1, 97});
right_source.addChunk();
right_source.addRow({"AAPL", 2, 98});
right_source.addRow({"AAPL", 3, 99});
right_source.addRow({"AMZN", 1, 100});
right_source.addRow({"AMZN", 2, 110});
right_source.addChunk();
right_source.addRow({"AMZN", 2, 110});
right_source.addChunk();
right_source.addRow({"AMZN", 4, 130});
right_source.addRow({"AMZN", 5, 140});
right_source.addRow({"SBUX", 8, 180});
right_source.addChunk();
right_source.addRow({"SBUX", 9, 190});
{
Block result_block = executePipeline(buildJoinPipeline(
left_source.getSource(), right_source.getSource(), /* key_length = */ 2,
JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::LessOrEquals));
auto values = getValuesFromBlock(result_block, {"t1.key", "t1.t", "t2.t", "t2.value"});
ASSERT_EQ(values, (std::vector<std::vector<Field>>{
{"AMZN", 3u, 4u, 130u},
{"AMZN", 4u, 4u, 130u},
}));
}
{
Block result_block = executePipeline(buildJoinPipeline(
left_source.getSource(), right_source.getSource(), /* key_length = */ 2,
JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::GreaterOrEquals));
auto values = getValuesFromBlock(result_block, {"t1.key", "t1.t", "t2.t", "t2.value"});
ASSERT_EQ(values, (std::vector<std::vector<Field>>{
{"AMZN", 3u, 2u, 110u},
{"AMZN", 4u, 4u, 130u},
{"AMZN", 6u, 5u, 140u},
{"SBUX", 10u, 9u, 190u},
}));
}
}
catch (Exception & e)
{
std::cout << e.getStackTraceString() << std::endl;
throw;
}
TEST_F(FullSortingJoinTest, AsofOnlyColumn)
try
{
auto left_source = oneColumnSource({ {3}, {3, 3, 3}, {3, 5, 5, 6}, {9, 9}, {10, 20} });
SourceChunksBuilder right_source_builder({
{std::make_shared<DataTypeUInt64>(), "t"},
{std::make_shared<DataTypeUInt64>(), "value"},
});
right_source_builder.setBreakProbability(rng);
for (const auto & row : std::vector<std::vector<Field>>{ {1, 101}, {2, 102}, {4, 104}, {5, 105}, {11, 111}, {15, 115} })
right_source_builder.addRow(row);
auto right_source = right_source_builder.getSource();
auto pipeline = buildJoinPipeline(
left_source, right_source, /* key_length = */ 1,
JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::LessOrEquals);
Block result_block = executePipeline(std::move(pipeline));
ASSERT_EQ(
assert_cast<const ColumnUInt64 *>(result_block.getByName("t1.key").column.get())->getData(),
(ColumnUInt64::Container{3, 3, 3, 3, 3, 5, 5, 6, 9, 9, 10})
);
ASSERT_EQ(
assert_cast<const ColumnUInt64 *>(result_block.getByName("t2.t").column.get())->getData(),
(ColumnUInt64::Container{4, 4, 4, 4, 4, 5, 5, 11, 11, 11, 11})
);
ASSERT_EQ(
assert_cast<const ColumnUInt64 *>(result_block.getByName("t2.value").column.get())->getData(),
(ColumnUInt64::Container{104, 104, 104, 104, 104, 105, 105, 111, 111, 111, 111})
);
}
catch (Exception & e)
{
std::cout << e.getStackTraceString() << std::endl;
throw;
}
TEST_F(FullSortingJoinTest, AsofLessGeneratedTestData)
try
{
/// Generate data random and build expected result at the same time.
/// Test specific combinations of join kind and inequality per each run
auto join_kind = getRandomFrom(rng, { JoinKind::Inner, JoinKind::Left });
auto asof_inequality = getRandomFrom(rng, { ASOFJoinInequality::Less, ASOFJoinInequality::LessOrEquals });
SCOPED_TRACE(fmt::format("{} {}", join_kind, asof_inequality));
/// Key is complex, `k1, k2` for equality and `t` for asof
SourceChunksBuilder left_source_builder({
{std::make_shared<DataTypeUInt64>(), "k1"},
{std::make_shared<DataTypeString>(), "k2"},
{std::make_shared<DataTypeUInt64>(), "t"},
{std::make_shared<DataTypeInt64>(), "attr"},
});
SourceChunksBuilder right_source_builder({
{std::make_shared<DataTypeUInt64>(), "k1"},
{std::make_shared<DataTypeString>(), "k2"},
{std::make_shared<DataTypeUInt64>(), "t"},
{std::make_shared<DataTypeInt64>(), "attr"},
});
/// How small generated block should be
left_source_builder.setBreakProbability(rng);
right_source_builder.setBreakProbability(rng);
/// We are going to generate sorted data and remember expected result
ColumnInt64::Container expected;
UInt64 k1 = 1;
String k2;
auto key_num_total = std::uniform_int_distribution<>(1, 1000)(rng);
for (size_t key_num = 0; key_num < key_num_total; ++key_num)
{
/// Generate new key greater than previous
generateNextKey(rng, k1, k2);
Int64 left_t = 0;
/// Generate several rows for the key
size_t num_left_rows = std::uniform_int_distribution<>(1, 100)(rng);
for (size_t i = 0; i < num_left_rows; ++i)
{
/// t is strictly greater than previous
left_t += std::uniform_int_distribution<>(1, 10)(rng);
auto left_arrtibute_value = 10 * left_t;
left_source_builder.addRow({k1, k2, left_t, left_arrtibute_value});
expected.push_back(left_arrtibute_value);
auto num_matches = 1 + std::poisson_distribution<>(4)(rng);
/// Generate several matches in the right table
auto right_t = left_t;
for (size_t j = 0; j < num_matches; ++j)
{
int min_step = isStrict(asof_inequality) ? 1 : 0;
right_t += std::uniform_int_distribution<>(min_step, 3)(rng);
/// First row should match
bool is_match = j == 0;
right_source_builder.addRow({k1, k2, right_t, is_match ? 10 * left_arrtibute_value : -1});
}
/// Next left_t should be greater than right_t not to match with previous rows
left_t = right_t;
}
/// generate some rows with greater left_t to check that they are not matched
num_left_rows = std::bernoulli_distribution(0.5)(rng) ? std::uniform_int_distribution<>(1, 100)(rng) : 0;
for (size_t i = 0; i < num_left_rows; ++i)
{
left_t += std::uniform_int_distribution<>(1, 10)(rng);
left_source_builder.addRow({k1, k2, left_t, -10 * left_t});
if (join_kind == JoinKind::Left)
expected.push_back(-10 * left_t);
}
}
Block result_block = executePipeline(buildJoinPipeline(
left_source_builder.getSource(), right_source_builder.getSource(),
/* key_length = */ 3,
join_kind, JoinStrictness::Asof, asof_inequality));
assertColumnVectorEq<Int64>(expected, result_block, "t1.attr");
for (auto & e : expected)
/// Non matched rows from left table have negative attr
/// Value if attribute in right table is 10 times greater than in left table
e = e < 0 ? 0 : 10 * e;
assertColumnVectorEq<Int64>(expected, result_block, "t2.attr");
}
catch (Exception & e)
{
std::cout << e.getStackTraceString() << std::endl;
throw;
}
TEST_F(FullSortingJoinTest, AsofGreaterGeneratedTestData)
try
{
/// Generate data random and build expected result at the same time.
/// Test specific combinations of join kind and inequality per each run
auto join_kind = getRandomFrom(rng, { JoinKind::Inner, JoinKind::Left });
auto asof_inequality = getRandomFrom(rng, { ASOFJoinInequality::Greater, ASOFJoinInequality::GreaterOrEquals });
SCOPED_TRACE(fmt::format("{} {}", join_kind, asof_inequality));
SourceChunksBuilder left_source_builder({
{std::make_shared<DataTypeUInt64>(), "k1"},
{std::make_shared<DataTypeString>(), "k2"},
{std::make_shared<DataTypeUInt64>(), "t"},
{std::make_shared<DataTypeInt64>(), "attr"},
});
SourceChunksBuilder right_source_builder({
{std::make_shared<DataTypeUInt64>(), "k1"},
{std::make_shared<DataTypeString>(), "k2"},
{std::make_shared<DataTypeUInt64>(), "t"},
{std::make_shared<DataTypeInt64>(), "attr"},
});
left_source_builder.setBreakProbability(rng);
right_source_builder.setBreakProbability(rng);
ColumnInt64::Container expected;
UInt64 k1 = 1;
String k2;
UInt64 left_t = 0;
auto key_num_total = std::uniform_int_distribution<>(1, 1000)(rng);
for (size_t key_num = 0; key_num < key_num_total; ++key_num)
{
/// Generate new key greater than previous
generateNextKey(rng, k1, k2);
/// Generate some rows with smaller left_t to check that they are not matched
size_t num_left_rows = std::bernoulli_distribution(0.5)(rng) ? std::uniform_int_distribution<>(1, 100)(rng) : 0;
for (size_t i = 0; i < num_left_rows; ++i)
{
left_t += std::uniform_int_distribution<>(1, 10)(rng);
left_source_builder.addRow({k1, k2, left_t, -10 * left_t});
if (join_kind == JoinKind::Left)
expected.push_back(-10 * left_t);
}
if (std::bernoulli_distribution(0.1)(rng))
continue;
size_t num_right_matches = std::uniform_int_distribution<>(1, 10)(rng);
auto right_t = left_t + std::uniform_int_distribution<>(isStrict(asof_inequality) ? 0 : 1, 10)(rng);
auto attribute_value = 10 * right_t;
for (size_t j = 0; j < num_right_matches; ++j)
{
right_t += std::uniform_int_distribution<>(0, 3)(rng);
bool is_match = j == num_right_matches - 1;
right_source_builder.addRow({k1, k2, right_t, is_match ? 10 * attribute_value : -1});
}
/// Next left_t should be greater than (or equals) right_t to match with previous rows
left_t = right_t + std::uniform_int_distribution<>(isStrict(asof_inequality) ? 1 : 0, 100)(rng);
size_t num_left_matches = std::uniform_int_distribution<>(1, 100)(rng);
for (size_t j = 0; j < num_left_matches; ++j)
{
left_t += std::uniform_int_distribution<>(0, 3)(rng);
left_source_builder.addRow({k1, k2, left_t, attribute_value});
expected.push_back(attribute_value);
}
}
Block result_block = executePipeline(buildJoinPipeline(
left_source_builder.getSource(), right_source_builder.getSource(),
/* key_length = */ 3,
join_kind, JoinStrictness::Asof, asof_inequality));
assertColumnVectorEq<Int64>(expected, result_block, "t1.attr");
for (auto & e : expected)
/// Non matched rows from left table have negative attr
/// Value if attribute in right table is 10 times greater than in left table
e = e < 0 ? 0 : 10 * e;
assertColumnVectorEq<Int64>(expected, result_block, "t2.attr");
}
catch (Exception & e)
{
std::cout << e.getStackTraceString() << std::endl;
throw;
}

View File

@ -155,6 +155,10 @@ void printExceptionWithRespectToAbort(LoggerPtr log, const String & query_id)
{
std::rethrow_exception(ex);
}
catch (const TestException &) // NOLINT
{
/// Exception from a unit test, ignore it.
}
catch (const Exception & e)
{
NOEXCEPT_SCOPE({

View File

@ -34,7 +34,7 @@ public:
auto choice = distribution(generator);
if (choice == 0)
throw std::runtime_error("Unlucky...");
throw TestException();
return false;
}
@ -48,7 +48,7 @@ public:
{
auto choice = distribution(generator);
if (choice == 0)
throw std::runtime_error("Unlucky...");
throw TestException();
}
Priority getPriority() const override { return {}; }

View File

@ -3,8 +3,13 @@ import time
from pathlib import Path
from typing import Optional
from shutil import copy2
from create_release import PackageDownloader, ReleaseInfo, ShellRunner
from ci_utils import WithIter
from create_release import (
PackageDownloader,
ReleaseInfo,
ReleaseContextManager,
ReleaseProgress,
)
from ci_utils import WithIter, Shell
class MountPointApp(metaclass=WithIter):
@ -38,7 +43,6 @@ class R2MountPoint:
self.bucket_name = self._PROD_BUCKET_NAME
self.aux_mount_options = ""
self.async_mount = False
if self.app == MountPointApp.S3FS:
self.cache_dir = "/home/ubuntu/s3fs_cache"
# self.aux_mount_options += "-o nomodtime " if self.NOMODTIME else "" not for s3fs
@ -52,7 +56,6 @@ class R2MountPoint:
self.mount_cmd = f"s3fs {self.bucket_name} {self.MOUNT_POINT} -o url={self.API_ENDPOINT} -o use_path_request_style -o umask=0000 -o nomultipart -o logfile={self.LOG_FILE} {self.aux_mount_options}"
elif self.app == MountPointApp.RCLONE:
# run rclone mount process asynchronously, otherwise subprocess.run(daemonized command) will not return
self.async_mount = True
self.cache_dir = "/home/ubuntu/rclone_cache"
self.aux_mount_options += "--no-modtime " if self.NOMODTIME else ""
self.aux_mount_options += "-v " if self.DEBUG else "" # -vv too verbose
@ -76,19 +79,22 @@ class R2MountPoint:
)
_TEST_MOUNT_CMD = f"mount | grep -q {self.MOUNT_POINT}"
ShellRunner.run(_CLEAN_LOG_FILE_CMD)
ShellRunner.run(_UNMOUNT_CMD)
ShellRunner.run(_MKDIR_CMD)
ShellRunner.run(_MKDIR_FOR_CACHE)
ShellRunner.run(self.mount_cmd, async_=self.async_mount)
if self.async_mount:
time.sleep(3)
ShellRunner.run(_TEST_MOUNT_CMD)
Shell.run(_CLEAN_LOG_FILE_CMD)
Shell.run(_UNMOUNT_CMD)
Shell.run(_MKDIR_CMD)
Shell.run(_MKDIR_FOR_CACHE)
if self.app == MountPointApp.S3FS:
Shell.run(self.mount_cmd, check=True)
else:
# didn't manage to use simple run() and without blocking or failure
Shell.run_as_daemon(self.mount_cmd)
time.sleep(3)
Shell.run(_TEST_MOUNT_CMD, check=True)
@classmethod
def teardown(cls):
print(f"Unmount [{cls.MOUNT_POINT}]")
ShellRunner.run(f"umount {cls.MOUNT_POINT}")
Shell.run(f"umount {cls.MOUNT_POINT}")
class RepoCodenames(metaclass=WithIter):
@ -101,6 +107,7 @@ class DebianArtifactory:
_PROD_REPO_URL = "https://packages.clickhouse.com/deb"
def __init__(self, release_info: ReleaseInfo, dry_run: bool):
self.release_info = release_info
self.codename = release_info.codename
self.version = release_info.version
if dry_run:
@ -124,8 +131,8 @@ class DebianArtifactory:
cmd = f"{REPREPRO_CMD_PREFIX} includedeb {self.codename} {' '.join(paths)}"
print("Running export command:")
print(f" {cmd}")
ShellRunner.run(cmd)
ShellRunner.run("sync")
Shell.run(cmd, check=True)
Shell.run("sync")
if self.codename == RepoCodenames.LTS:
packages_with_version = [
@ -137,16 +144,19 @@ class DebianArtifactory:
cmd = f"{REPREPRO_CMD_PREFIX} copy {RepoCodenames.STABLE} {RepoCodenames.LTS} {' '.join(packages_with_version)}"
print("Running copy command:")
print(f" {cmd}")
ShellRunner.run(cmd)
ShellRunner.run("sync")
Shell.run(cmd, check=True)
Shell.run("sync")
def test_packages(self):
ShellRunner.run("docker pull ubuntu:latest")
Shell.run("docker pull ubuntu:latest")
print(f"Test packages installation, version [{self.version}]")
cmd = f"docker run --rm ubuntu:latest bash -c \"apt update -y; apt install -y sudo gnupg ca-certificates; apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754; echo 'deb {self.repo_url} stable main' | tee /etc/apt/sources.list.d/clickhouse.list; apt update -y; apt-get install -y clickhouse-client={self.version}\""
debian_command = f"echo 'deb {self.repo_url} stable main' | tee /etc/apt/sources.list.d/clickhouse.list; apt update -y; apt-get install -y clickhouse-common-static={self.version} clickhouse-client={self.version}"
cmd = f'docker run --rm ubuntu:latest bash -c "apt update -y; apt install -y sudo gnupg ca-certificates; apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754; {debian_command}"'
print("Running test command:")
print(f" {cmd}")
ShellRunner.run(cmd)
Shell.run(cmd, check=True)
self.release_info.debian_command = debian_command
self.release_info.dump()
def _copy_if_not_exists(src: Path, dst: Path) -> Path:
@ -167,6 +177,7 @@ class RpmArtifactory:
_SIGN_KEY = "885E2BDCF96B0B45ABF058453E4AD4719DDE9A38"
def __init__(self, release_info: ReleaseInfo, dry_run: bool):
self.release_info = release_info
self.codename = release_info.codename
self.version = release_info.version
if dry_run:
@ -202,23 +213,26 @@ class RpmArtifactory:
for command in commands:
print("Running command:")
print(f" {command}")
ShellRunner.run(command)
Shell.run(command, check=True)
update_public_key = f"gpg --armor --export {self._SIGN_KEY}"
pub_key_path = dest_dir / "repodata" / "repomd.xml.key"
print("Updating repomd.xml.key")
pub_key_path.write_text(ShellRunner.run(update_public_key)[1])
pub_key_path.write_text(Shell.run(update_public_key, check=True))
if codename == RepoCodenames.LTS:
self.export_packages(RepoCodenames.STABLE)
ShellRunner.run("sync")
Shell.run("sync")
def test_packages(self):
ShellRunner.run("docker pull fedora:latest")
Shell.run("docker pull fedora:latest")
print(f"Test package installation, version [{self.version}]")
cmd = f'docker run --rm fedora:latest /bin/bash -c "dnf -y install dnf-plugins-core && dnf config-manager --add-repo={self.repo_url} && dnf makecache && dnf -y install clickhouse-client-{self.version}-1"'
rpm_command = f"dnf config-manager --add-repo={self.repo_url} && dnf makecache && dnf -y install clickhouse-client-{self.version}-1"
cmd = f'docker run --rm fedora:latest /bin/bash -c "dnf -y install dnf-plugins-core && dnf config-manager --add-repo={self.repo_url} && {rpm_command}"'
print("Running test command:")
print(f" {cmd}")
ShellRunner.run(cmd)
Shell.run(cmd, check=True)
self.release_info.rpm_command = rpm_command
self.release_info.dump()
class TgzArtifactory:
@ -226,6 +240,7 @@ class TgzArtifactory:
_PROD_REPO_URL = "https://packages.clickhouse.com/tgz"
def __init__(self, release_info: ReleaseInfo, dry_run: bool):
self.release_info = release_info
self.codename = release_info.codename
self.version = release_info.version
if dry_run:
@ -256,23 +271,28 @@ class TgzArtifactory:
if codename == RepoCodenames.LTS:
self.export_packages(RepoCodenames.STABLE)
ShellRunner.run("sync")
Shell.run("sync")
def test_packages(self):
tgz_file = "/tmp/tmp.tgz"
tgz_sha_file = "/tmp/tmp.tgz.sha512"
ShellRunner.run(
f"curl -o {tgz_file} -f0 {self.repo_url}/stable/clickhouse-client-{self.version}-arm64.tgz"
cmd = f"curl -o {tgz_file} -f0 {self.repo_url}/stable/clickhouse-client-{self.version}-arm64.tgz"
Shell.run(
cmd,
check=True,
)
ShellRunner.run(
f"curl -o {tgz_sha_file} -f0 {self.repo_url}/stable/clickhouse-client-{self.version}-arm64.tgz.sha512"
Shell.run(
f"curl -o {tgz_sha_file} -f0 {self.repo_url}/stable/clickhouse-client-{self.version}-arm64.tgz.sha512",
check=True,
)
expected_checksum = ShellRunner.run(f"cut -d ' ' -f 1 {tgz_sha_file}")
actual_checksum = ShellRunner.run(f"sha512sum {tgz_file} | cut -d ' ' -f 1")
expected_checksum = Shell.run(f"cut -d ' ' -f 1 {tgz_sha_file}", check=True)
actual_checksum = Shell.run(f"sha512sum {tgz_file} | cut -d ' ' -f 1")
assert (
expected_checksum == actual_checksum
), f"[{actual_checksum} != {expected_checksum}]"
ShellRunner.run("rm /tmp/tmp.tgz*")
Shell.run("rm /tmp/tmp.tgz*")
self.release_info.tgz_command = cmd
self.release_info.dump()
def parse_args() -> argparse.Namespace:
@ -280,12 +300,6 @@ def parse_args() -> argparse.Namespace:
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description="Adds release packages to the repository",
)
parser.add_argument(
"--infile",
type=str,
required=True,
help="input file with release info",
)
parser.add_argument(
"--export-debian",
action="store_true",
@ -326,9 +340,7 @@ def parse_args() -> argparse.Namespace:
if __name__ == "__main__":
args = parse_args()
assert args.dry_run
release_info = ReleaseInfo.from_file(args.infile)
"""
Use S3FS. RCLONE has some errors with r2 remote which I didn't figure out how to resolve:
ERROR : IO error: NotImplemented: versionId not implemented
@ -336,20 +348,38 @@ if __name__ == "__main__":
"""
mp = R2MountPoint(MountPointApp.S3FS, dry_run=args.dry_run)
if args.export_debian:
mp.init()
DebianArtifactory(release_info, dry_run=args.dry_run).export_packages()
mp.teardown()
with ReleaseContextManager(
release_progress=ReleaseProgress.EXPORT_DEB
) as release_info:
mp.init()
DebianArtifactory(release_info, dry_run=args.dry_run).export_packages()
mp.teardown()
if args.export_rpm:
mp.init()
RpmArtifactory(release_info, dry_run=args.dry_run).export_packages()
mp.teardown()
with ReleaseContextManager(
release_progress=ReleaseProgress.EXPORT_RPM
) as release_info:
mp.init()
RpmArtifactory(release_info, dry_run=args.dry_run).export_packages()
mp.teardown()
if args.export_tgz:
mp.init()
TgzArtifactory(release_info, dry_run=args.dry_run).export_packages()
mp.teardown()
with ReleaseContextManager(
release_progress=ReleaseProgress.EXPORT_TGZ
) as release_info:
mp.init()
TgzArtifactory(release_info, dry_run=args.dry_run).export_packages()
mp.teardown()
if args.test_debian:
DebianArtifactory(release_info, dry_run=args.dry_run).test_packages()
with ReleaseContextManager(
release_progress=ReleaseProgress.TEST_DEB
) as release_info:
DebianArtifactory(release_info, dry_run=args.dry_run).test_packages()
if args.test_tgz:
TgzArtifactory(release_info, dry_run=args.dry_run).test_packages()
with ReleaseContextManager(
release_progress=ReleaseProgress.TEST_TGZ
) as release_info:
TgzArtifactory(release_info, dry_run=args.dry_run).test_packages()
if args.test_rpm:
RpmArtifactory(release_info, dry_run=args.dry_run).test_packages()
with ReleaseContextManager(
release_progress=ReleaseProgress.TEST_RPM
) as release_info:
RpmArtifactory(release_info, dry_run=args.dry_run).test_packages()

View File

@ -1,17 +1,17 @@
import argparse
from datetime import timedelta, datetime
import logging
import dataclasses
import json
import os
from commit_status_helper import get_commit_filtered_statuses
import sys
from typing import List
from get_robot_token import get_best_robot_token
from github_helper import GitHub
from release import Release, Repo as ReleaseRepo, RELEASE_READY_STATUS
from ci_utils import Shell
from env_helper import GITHUB_REPOSITORY
from report import SUCCESS
from ssh import SSHKey
LOGGER_NAME = __name__
HELPER_LOGGERS = ["github_helper", LOGGER_NAME]
logger = logging.getLogger(LOGGER_NAME)
from ci_buddy import CIBuddy
from ci_config import CI
def parse_args():
@ -21,120 +21,198 @@ def parse_args():
)
parser.add_argument("--token", help="GitHub token, if not set, used from smm")
parser.add_argument(
"--repo", default="ClickHouse/ClickHouse", help="Repo owner/name"
)
parser.add_argument("--dry-run", action="store_true", help="Do not create anything")
parser.add_argument(
"--release-after-days",
type=int,
default=3,
help="Do automatic release on the latest green commit after the latest "
"release if the newest release is older than the specified days",
)
parser.add_argument(
"--debug-helpers",
"--post-status",
action="store_true",
help="Add debug logging for this script and github_helper",
help="Post release branch statuses",
)
parser.add_argument(
"--remote-protocol",
"-p",
default="ssh",
choices=ReleaseRepo.VALID,
help="repo protocol for git commands remote, 'origin' is a special case and "
"uses 'origin' as a remote",
"--post-auto-release-complete",
action="store_true",
help="Post autorelease completion status",
)
parser.add_argument(
"--prepare",
action="store_true",
help="Prepare autorelease info",
)
parser.add_argument(
"--wf-status",
type=str,
default="",
help="overall workflow status [success|failure]",
)
return parser.parse_args(), parser
return parser.parse_args()
MAX_NUMBER_OF_COMMITS_TO_CONSIDER_FOR_RELEASE = 5
AUTORELEASE_INFO_FILE = "/tmp/autorelease_info.json"
@dataclasses.dataclass
class ReleaseParams:
ready: bool
ci_status: str
num_patches: int
release_branch: str
commit_sha: str
commits_to_branch_head: int
latest: bool
def to_dict(self):
return dataclasses.asdict(self)
@dataclasses.dataclass
class AutoReleaseInfo:
releases: List[ReleaseParams]
def add_release(self, release_params: ReleaseParams) -> None:
self.releases.append(release_params)
def dump(self):
print(f"Dump release info into [{AUTORELEASE_INFO_FILE}]")
with open(AUTORELEASE_INFO_FILE, "w", encoding="utf-8") as f:
print(json.dumps(dataclasses.asdict(self), indent=2), file=f)
@staticmethod
def from_file() -> "AutoReleaseInfo":
with open(AUTORELEASE_INFO_FILE, "r", encoding="utf-8") as json_file:
res = json.load(json_file)
releases = [ReleaseParams(**release) for release in res["releases"]]
return AutoReleaseInfo(releases=releases)
def _prepare(token):
assert len(token) > 10
os.environ["GH_TOKEN"] = token
Shell.run("gh auth status", check=True)
gh = GitHub(token)
prs = gh.get_release_pulls(GITHUB_REPOSITORY)
prs.sort(key=lambda x: x.head.ref)
branch_names = [pr.head.ref for pr in prs]
print(f"Found release branches [{branch_names}]")
repo = gh.get_repo(GITHUB_REPOSITORY)
autoRelease_info = AutoReleaseInfo(releases=[])
for pr in prs:
print(f"\nChecking PR [{pr.head.ref}]")
refs = list(repo.get_git_matching_refs(f"tags/v{pr.head.ref}"))
assert refs
refs.sort(key=lambda ref: ref.ref)
latest_release_tag_ref = refs[-1]
latest_release_tag = repo.get_git_tag(latest_release_tag_ref.object.sha)
commits = Shell.run(
f"git rev-list --first-parent {latest_release_tag.tag}..origin/{pr.head.ref}",
check=True,
).split("\n")
commit_num = len(commits)
print(
f"Previous release [{latest_release_tag.tag}] was [{commit_num}] commits ago, date [{latest_release_tag.tagger.date}]"
)
commits_to_check = commits[:-1] # Exclude the version bump commit
commit_sha = ""
commit_ci_status = ""
commits_to_branch_head = 0
for idx, commit in enumerate(
commits_to_check[:MAX_NUMBER_OF_COMMITS_TO_CONSIDER_FOR_RELEASE]
):
print(
f"Check commit [{commit}] [{pr.head.ref}~{idx+1}] as release candidate"
)
commit_num -= 1
is_completed = CI.GHActions.check_wf_completed(
token=token, commit_sha=commit
)
if not is_completed:
print(f"CI is in progress for [{commit}] - check previous commit")
commits_to_branch_head += 1
continue
commit_ci_status = CI.GHActions.get_commit_status_by_name(
token=token,
commit_sha=commit,
status_name=(CI.JobNames.BUILD_CHECK, "ClickHouse build check"),
)
commit_sha = commit
if commit_ci_status == SUCCESS:
break
print(f"CI status [{commit_ci_status}] - skip")
commits_to_branch_head += 1
ready = False
if commit_ci_status == SUCCESS and commit_sha:
print(
f"Add release ready info for commit [{commit_sha}] and release branch [{pr.head.ref}]"
)
ready = True
else:
print(f"WARNING: No ready commits found for release branch [{pr.head.ref}]")
autoRelease_info.add_release(
ReleaseParams(
release_branch=pr.head.ref,
commit_sha=commit_sha,
ready=ready,
ci_status=commit_ci_status,
num_patches=commit_num,
commits_to_branch_head=commits_to_branch_head,
latest=False,
)
)
if autoRelease_info.releases:
autoRelease_info.releases[-1].latest = True
autoRelease_info.dump()
def main():
args = parse_args()
logging.basicConfig(level=logging.INFO)
if args.debug_helpers:
for logger_name in HELPER_LOGGERS:
logging.getLogger(logger_name).setLevel(logging.DEBUG)
args, parser = parse_args()
token = args.token or get_best_robot_token()
days_as_timedelta = timedelta(days=args.release_after_days)
now = datetime.now()
gh = GitHub(token)
prs = gh.get_release_pulls(args.repo)
branch_names = [pr.head.ref for pr in prs]
logger.info("Found release branches: %s\n ", " \n".join(branch_names))
repo = gh.get_repo(args.repo)
# In general there is no guarantee on which order the refs/commits are
# returned from the API, so we have to order them.
for pr in prs:
logger.info("Checking PR %s", pr.head.ref)
refs = list(repo.get_git_matching_refs(f"tags/v{pr.head.ref}"))
refs.sort(key=lambda ref: ref.ref)
latest_release_tag_ref = refs[-1]
latest_release_tag = repo.get_git_tag(latest_release_tag_ref.object.sha)
logger.info("That last release was done at %s", latest_release_tag.tagger.date)
if latest_release_tag.tagger.date + days_as_timedelta > now:
logger.info(
"Not enough days since the last release %s,"
" no automatic release can be done",
latest_release_tag.tag,
if args.post_status:
info = AutoReleaseInfo.from_file()
for release_info in info.releases:
if release_info.ready:
CIBuddy(dry_run=False).post_info(
title=f"Auto Release Status for {release_info.release_branch}",
body=release_info.to_dict(),
)
else:
CIBuddy(dry_run=False).post_warning(
title=f"Auto Release Status for {release_info.release_branch}",
body=release_info.to_dict(),
)
elif args.post_auto_release_complete:
assert args.wf_status, "--wf-status Required with --post-auto-release-complete"
if args.wf_status != SUCCESS:
CIBuddy(dry_run=False).post_job_error(
error_description="Autorelease workflow failed",
job_name="Autorelease",
with_instance_info=False,
with_wf_link=True,
critical=True,
)
continue
unreleased_commits = list(
repo.get_commits(sha=pr.head.ref, since=latest_release_tag.tagger.date)
)
unreleased_commits.sort(
key=lambda commit: commit.commit.committer.date, reverse=True
)
for commit in unreleased_commits:
logger.info("Checking statuses of commit %s", commit.sha)
statuses = get_commit_filtered_statuses(commit)
all_success = all(st.state == SUCCESS for st in statuses)
passed_ready_for_release_check = any(
st.context == RELEASE_READY_STATUS and st.state == SUCCESS
for st in statuses
else:
CIBuddy(dry_run=False).post_info(
title=f"Autorelease completed",
body="",
with_wf_link=True,
)
if not (all_success and passed_ready_for_release_check):
logger.info("Commit is not green, thus not suitable for release")
continue
logger.info("Commit is ready for release, let's release!")
release = Release(
ReleaseRepo(args.repo, args.remote_protocol),
commit.sha,
"patch",
args.dry_run,
True,
)
try:
release.do(True, True, True)
except:
if release.has_rollback:
logging.error(
"!!The release process finished with error, read the output carefully!!"
)
logging.error(
"Probably, rollback finished with error. "
"If you don't see any of the following commands in the output, "
"execute them manually:"
)
release.log_rollback()
raise
logging.info("New release is done!")
break
elif args.prepare:
_prepare(token=args.token or get_best_robot_token())
else:
parser.print_help()
sys.exit(2)
if __name__ == "__main__":
if os.getenv("ROBOT_CLICKHOUSE_SSH_KEY", ""):
with SSHKey("ROBOT_CLICKHOUSE_SSH_KEY"):
main()
else:
main()
main()

View File

@ -1110,13 +1110,14 @@ def main() -> int:
ci_cache.print_status()
if IS_CI and not pr_info.is_merge_queue:
# wait for pending jobs to be finished, await_jobs is a long blocking call
ci_cache.await_pending_jobs(pr_info.is_release)
if pr_info.is_release:
print("Release/master: CI Cache add pending records for all todo jobs")
ci_cache.push_pending_all(pr_info.is_release)
# wait for pending jobs to be finished, await_jobs is a long blocking call
ci_cache.await_pending_jobs(pr_info.is_release)
# conclude results
result["git_ref"] = git_ref
result["version"] = version
@ -1292,10 +1293,11 @@ def main() -> int:
pass
if Utils.is_killed_with_oom():
print("WARNING: OOM while job execution")
print(subprocess.run("sudo dmesg -T", check=False))
error_description = f"Out Of Memory, exit_code {job_report.exit_code}"
else:
error_description = f"Unknown, exit_code {job_report.exit_code}"
CIBuddy().post_error(
CIBuddy().post_job_error(
error_description + f" after {int(job_report.duration)}s",
job_name=_get_ext_check_name(args.job_name),
)

View File

@ -1,5 +1,6 @@
import json
import os
from typing import Union, Dict
import boto3
import requests
@ -60,14 +61,64 @@ class CIBuddy:
except Exception as e:
print(f"ERROR: Failed to post message, ex {e}")
def post_error(self, error_description, job_name="", with_instance_info=True):
def _post_formatted(
self, title: str, body: Union[Dict, str], with_wf_link: bool
) -> None:
message = title
if isinstance(body, dict):
for name, value in body.items():
if "commit_sha" in name:
value = (
f"<https://github.com/{self.repo}/commit/{value}|{value[:8]}>"
)
message += f" *{name}*: {value}\n"
else:
message += body + "\n"
run_id = os.getenv("GITHUB_RUN_ID", "")
if with_wf_link and run_id:
message += f" *workflow*: <https://github.com/{self.repo}/actions/runs/{run_id}|{run_id}>\n"
self.post(message)
def post_info(
self, title: str, body: Union[Dict, str], with_wf_link: bool = True
) -> None:
title_extended = f":white_circle: *{title}*\n\n"
self._post_formatted(title_extended, body, with_wf_link)
def post_done(
self, title: str, body: Union[Dict, str], with_wf_link: bool = True
) -> None:
title_extended = f":white_check_mark: *{title}*\n\n"
self._post_formatted(title_extended, body, with_wf_link)
def post_warning(
self, title: str, body: Union[Dict, str], with_wf_link: bool = True
) -> None:
title_extended = f":warning: *{title}*\n\n"
self._post_formatted(title_extended, body, with_wf_link)
def post_critical(
self, title: str, body: Union[Dict, str], with_wf_link: bool = True
) -> None:
title_extended = f":black_circle: *{title}*\n\n"
self._post_formatted(title_extended, body, with_wf_link)
def post_job_error(
self,
error_description: str,
job_name: str = "",
with_instance_info: bool = True,
with_wf_link: bool = True,
critical: bool = False,
) -> None:
instance_id, instance_type = "unknown", "unknown"
if with_instance_info:
instance_id = Shell.run("ec2metadata --instance-id") or instance_id
instance_type = Shell.run("ec2metadata --instance-type") or instance_type
if not job_name:
job_name = os.getenv("CHECK_NAME", "unknown")
line_err = f":red_circle: *Error: {error_description}*\n\n"
sign = ":red_circle:" if not critical else ":black_circle:"
line_err = f"{sign} *Error: {error_description}*\n\n"
line_ghr = f" *Runner:* `{instance_type}`, `{instance_id}`\n"
line_job = f" *Job:* `{job_name}`\n"
line_pr_ = f" *PR:* <https://github.com/{self.repo}/pull/{self.pr_number}|#{self.pr_number}>, <{self.commit_url}|{self.sha}>\n"
@ -82,10 +133,13 @@ class CIBuddy:
message += line_pr_
else:
message += line_br_
run_id = os.getenv("GITHUB_RUN_ID", "")
if with_wf_link and run_id:
message += f" *workflow*: <https://github.com/{self.repo}/actions/runs/{run_id}|{run_id}>\n"
self.post(message)
if __name__ == "__main__":
# test
buddy = CIBuddy(dry_run=True)
buddy.post_error("TEst")
buddy.post_job_error("TEst")

View File

@ -638,7 +638,14 @@ class CiCache:
pushes pending records for all jobs that supposed to be run
"""
for job, job_config in self.jobs_to_do.items():
if not job_config.has_digest():
if (
job in self.jobs_to_wait
or not job_config.has_digest()
or job_config.disable_await
):
# 1. "job in self.jobs_to_wait" - this job already has a pending record in cache
# 2. "not job_config.has_digest()" - cache is not used for these jobs
# 3. "job_config.disable_await" - await is explicitly disabled
continue
pending_state = PendingState(time.time(), run_url=GITHUB_RUN_URL)
assert job_config.batches
@ -708,7 +715,7 @@ class CiCache:
Filter is to be applied in PRs to remove jobs that are not affected by the change
:return:
"""
remove_from_to_do = []
remove_from_workflow = []
required_builds = []
has_test_jobs_to_skip = False
for job_name, job_config in self.jobs_to_do.items():
@ -723,26 +730,41 @@ class CiCache:
job=reference_name,
job_config=reference_config,
):
remove_from_to_do.append(job_name)
remove_from_workflow.append(job_name)
has_test_jobs_to_skip = True
else:
required_builds += (
job_config.required_builds if job_config.required_builds else []
)
if has_test_jobs_to_skip:
# If there are tests to skip, it means build digest has not been changed.
# If there are tests to skip, it means builds are not affected as well.
# No need to test builds. Let's keep all builds required for test jobs and skip the others
for job_name, job_config in self.jobs_to_do.items():
if CI.is_build_job(job_name):
if job_name not in required_builds:
remove_from_to_do.append(job_name)
remove_from_workflow.append(job_name)
for job in remove_from_to_do:
for job in remove_from_workflow:
print(f"Filter job [{job}] - not affected by the change")
if job in self.jobs_to_do:
del self.jobs_to_do[job]
if job in self.jobs_to_wait:
del self.jobs_to_wait[job]
if job in self.jobs_to_skip:
self.jobs_to_skip.remove(job)
# special handling for the special job: BUILD_CHECK
has_builds = False
for job in list(self.jobs_to_do) + self.jobs_to_skip:
if CI.is_build_job(job):
has_builds = True
break
if not has_builds:
if CI.JobNames.BUILD_CHECK in self.jobs_to_do:
print(
f"Filter job [{CI.JobNames.BUILD_CHECK}] - no builds are required in the workflow"
)
del self.jobs_to_do[CI.JobNames.BUILD_CHECK]
def await_pending_jobs(self, is_release: bool, dry_run: bool = False) -> None:
"""
@ -884,3 +906,87 @@ class CiCache:
self.jobs_to_wait[job] = job_config
return self
if __name__ == "__main__":
# for testing
job_digest = {
"package_release": "bbbd3519d1",
"package_aarch64": "bbbd3519d1",
"package_asan": "bbbd3519d1",
"package_ubsan": "bbbd3519d1",
"package_tsan": "bbbd3519d1",
"package_msan": "bbbd3519d1",
"package_debug": "bbbd3519d1",
"package_release_coverage": "bbbd3519d1",
"binary_release": "bbbd3519d1",
"binary_tidy": "bbbd3519d1",
"binary_darwin": "bbbd3519d1",
"binary_aarch64": "bbbd3519d1",
"binary_aarch64_v80compat": "bbbd3519d1",
"binary_freebsd": "bbbd3519d1",
"binary_darwin_aarch64": "bbbd3519d1",
"binary_ppc64le": "bbbd3519d1",
"binary_amd64_compat": "bbbd3519d1",
"binary_amd64_musl": "bbbd3519d1",
"binary_riscv64": "bbbd3519d1",
"binary_s390x": "bbbd3519d1",
"binary_loongarch64": "bbbd3519d1",
"Builds": "f5dffeecb8",
"Install packages (release)": "ba0c89660e",
"Install packages (aarch64)": "ba0c89660e",
"Stateful tests (asan)": "32a9a1aba9",
"Stateful tests (tsan)": "32a9a1aba9",
"Stateful tests (msan)": "32a9a1aba9",
"Stateful tests (ubsan)": "32a9a1aba9",
"Stateful tests (debug)": "32a9a1aba9",
"Stateful tests (release)": "32a9a1aba9",
"Stateful tests (coverage)": "32a9a1aba9",
"Stateful tests (aarch64)": "32a9a1aba9",
"Stateful tests (release, ParallelReplicas)": "32a9a1aba9",
"Stateful tests (debug, ParallelReplicas)": "32a9a1aba9",
"Stateless tests (asan)": "deb6778b88",
"Stateless tests (tsan)": "deb6778b88",
"Stateless tests (msan)": "deb6778b88",
"Stateless tests (ubsan)": "deb6778b88",
"Stateless tests (debug)": "deb6778b88",
"Stateless tests (release)": "deb6778b88",
"Stateless tests (coverage)": "deb6778b88",
"Stateless tests (aarch64)": "deb6778b88",
"Stateless tests (release, old analyzer, s3, DatabaseReplicated)": "deb6778b88",
"Stateless tests (debug, s3 storage)": "deb6778b88",
"Stateless tests (tsan, s3 storage)": "deb6778b88",
"Stress test (debug)": "aa298abf10",
"Stress test (tsan)": "aa298abf10",
"Upgrade check (debug)": "5ce4d3ee02",
"Integration tests (asan, old analyzer)": "42e58be3aa",
"Integration tests (tsan)": "42e58be3aa",
"Integration tests (aarch64)": "42e58be3aa",
"Integration tests flaky check (asan)": "42e58be3aa",
"Compatibility check (release)": "ecb69d8c4b",
"Compatibility check (aarch64)": "ecb69d8c4b",
"Unit tests (release)": "09d00b702e",
"Unit tests (asan)": "09d00b702e",
"Unit tests (msan)": "09d00b702e",
"Unit tests (tsan)": "09d00b702e",
"Unit tests (ubsan)": "09d00b702e",
"AST fuzzer (debug)": "c38ebf947f",
"AST fuzzer (asan)": "c38ebf947f",
"AST fuzzer (msan)": "c38ebf947f",
"AST fuzzer (tsan)": "c38ebf947f",
"AST fuzzer (ubsan)": "c38ebf947f",
"Stateless tests flaky check (asan)": "deb6778b88",
"Performance Comparison (release)": "a8a7179258",
"ClickBench (release)": "45c07c4aa6",
"ClickBench (aarch64)": "45c07c4aa6",
"Docker server image": "6a24d5b187",
"Docker keeper image": "6a24d5b187",
"Docs check": "4764154c62",
"Fast test": "cb269133f2",
"Style check": "ffffffffff",
"Stateful tests (ubsan, ParallelReplicas)": "32a9a1aba9",
"Stress test (msan)": "aa298abf10",
"Upgrade check (asan)": "5ce4d3ee02",
}
ci_cache = CiCache(job_digests=job_digest, cache_enabled=True, s3=S3Helper())
ci_cache.update()

View File

@ -32,6 +32,9 @@ class CI:
from ci_definitions import MQ_JOBS as MQ_JOBS
from ci_definitions import WorkflowStages as WorkflowStages
from ci_definitions import Runners as Runners
from ci_utils import Envs as Envs
from ci_utils import Utils as Utils
from ci_utils import GHActions as GHActions
from ci_definitions import Labels as Labels
from ci_definitions import TRUSTED_CONTRIBUTORS as TRUSTED_CONTRIBUTORS
from ci_utils import CATEGORY_TO_LABEL as CATEGORY_TO_LABEL

View File

@ -351,6 +351,8 @@ class JobConfig:
run_by_label: str = ""
# to run always regardless of the job digest or/and label
run_always: bool = False
# disables CI await for a given job
disable_await: bool = False
# if the job needs to be run on the release branch, including master (building packages, docker server).
# NOTE: Subsequent runs on the same branch with the similar digest are still considered skip-able.
required_on_release_branch: bool = False
@ -395,6 +397,7 @@ class CommonJobConfigs:
],
),
runner_type=Runners.STYLE_CHECKER_ARM,
disable_await=True,
)
COMPATIBILITY_TEST = JobConfig(
job_name_keyword="compatibility",

View File

@ -1,9 +1,16 @@
import os
import re
import subprocess
import time
from contextlib import contextmanager
from pathlib import Path
from typing import Any, Iterator, List, Union, Optional, Tuple
from typing import Any, Iterator, List, Union, Optional, Sequence, Tuple
import requests
class Envs:
GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")
LABEL_CATEGORIES = {
@ -80,6 +87,71 @@ class GHActions:
print(line)
print("::endgroup::")
@staticmethod
def get_commit_status_by_name(
token: str, commit_sha: str, status_name: Union[str, Sequence]
) -> str:
assert len(token) == 40
assert len(commit_sha) == 40
assert is_hex(commit_sha)
assert not is_hex(token)
url = f"https://api.github.com/repos/{Envs.GITHUB_REPOSITORY}/commits/{commit_sha}/statuses?per_page={200}"
headers = {
"Authorization": f"token {token}",
"Accept": "application/vnd.github.v3+json",
}
response = requests.get(url, headers=headers, timeout=5)
if isinstance(status_name, str):
status_name = (status_name,)
if response.status_code == 200:
assert "next" not in response.links, "Response truncated"
statuses = response.json()
for status in statuses:
if status["context"] in status_name:
return status["state"] # type: ignore
return ""
@staticmethod
def check_wf_completed(token: str, commit_sha: str) -> bool:
headers = {
"Authorization": f"token {token}",
"Accept": "application/vnd.github.v3+json",
}
url = f"https://api.github.com/repos/{Envs.GITHUB_REPOSITORY}/commits/{commit_sha}/check-runs?per_page={100}"
for i in range(3):
try:
response = requests.get(url, headers=headers, timeout=5)
response.raise_for_status()
# assert "next" not in response.links, "Response truncated"
data = response.json()
assert data["check_runs"], "?"
for check in data["check_runs"]:
if check["status"] != "completed":
print(
f" Check workflow status: Check not completed [{check['name']}]"
)
return False
return True
except Exception as e:
print(f"ERROR: exception after attempt [{i}]: {e}")
time.sleep(1)
return False
@staticmethod
def get_pr_url_by_branch(repo, branch):
get_url_cmd = (
f"gh pr list --repo {repo} --head {branch} --json url --jq '.[0].url'"
)
url = Shell.run(get_url_cmd)
if not url:
print(f"ERROR: PR nor found, branch [{branch}]")
return url
class Shell:
@classmethod
@ -95,7 +167,11 @@ class Shell:
return res.stdout.strip()
@classmethod
def run(cls, command):
def run(cls, command, check=False, dry_run=False):
if dry_run:
print(f"Dry-ryn. Would run command [{command}]")
return ""
print(f"Run command [{command}]")
res = ""
result = subprocess.run(
command,
@ -106,13 +182,26 @@ class Shell:
check=False,
)
if result.returncode == 0:
print(f"stdout: {result.stdout.strip()}")
res = result.stdout
else:
print(
f"ERROR: stdout: {result.stdout.strip()}, stderr: {result.stderr.strip()}"
)
if check:
assert result.returncode == 0
return res.strip()
@classmethod
def run_as_daemon(cls, command):
print(f"Run daemon command [{command}]")
subprocess.Popen(command.split(" ")) # pylint:disable=consider-using-with
return 0, ""
@classmethod
def check(cls, command):
result = subprocess.run(
command + " 2>&1",
command,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,

View File

@ -2,7 +2,6 @@ import argparse
import dataclasses
import json
import os
import subprocess
from contextlib import contextmanager
from copy import copy
@ -13,7 +12,8 @@ from git_helper import Git, GIT_PREFIX
from ssh import SSHAgent
from env_helper import GITHUB_REPOSITORY, S3_BUILDS_BUCKET
from s3_helper import S3Helper
from ci_utils import Shell
from ci_utils import Shell, GHActions
from ci_buddy import CIBuddy
from version_helper import (
FILE_WITH_VERSION_PATH,
GENERATED_CONTRIBUTORS,
@ -27,34 +27,66 @@ from ci_config import CI
CMAKE_PATH = get_abs_path(FILE_WITH_VERSION_PATH)
CONTRIBUTORS_PATH = get_abs_path(GENERATED_CONTRIBUTORS)
RELEASE_INFO_FILE = "/tmp/release_info.json"
class ShellRunner:
class ReleaseProgress:
STARTED = "started"
DOWNLOAD_PACKAGES = "download packages"
PUSH_RELEASE_TAG = "push release tag"
PUSH_NEW_RELEASE_BRANCH = "push new release branch"
BUMP_VERSION = "bump version"
CREATE_GH_RELEASE = "create GH release"
EXPORT_TGZ = "export TGZ packages"
EXPORT_RPM = "export RPM packages"
EXPORT_DEB = "export DEB packages"
TEST_TGZ = "test TGZ packages"
TEST_RPM = "test RPM packages"
TEST_DEB = "test DEB packages"
COMPLETED = "completed"
@classmethod
def run(
cls, command, check_retcode=True, print_output=True, async_=False, dry_run=False
):
if dry_run:
print(f"Dry-run: Would run shell command: [{command}]")
return 0, ""
print(f"Running shell command: [{command}]")
if async_:
subprocess.Popen(command.split(" ")) # pylint:disable=consider-using-with
return 0, ""
result = subprocess.run(
command + " 2>&1",
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=True,
)
if print_output:
print(result.stdout)
if check_retcode:
assert result.returncode == 0, f"Return code [{result.returncode}]"
return result.returncode, result.stdout
class ReleaseProgressDescription:
OK = "OK"
FAILED = "FAILED"
class ReleaseContextManager:
def __init__(self, release_progress):
self.release_progress = release_progress
self.release_info = None
def __enter__(self):
if self.release_progress == ReleaseProgress.STARTED:
# create initial release info
self.release_info = ReleaseInfo(
release_branch="NA",
commit_sha=args.ref,
release_tag="NA",
version="NA",
codename="NA",
previous_release_tag="NA",
previous_release_sha="NA",
release_progress=ReleaseProgress.STARTED,
).dump()
else:
# fetch release info from fs and update
self.release_info = ReleaseInfo.from_file()
assert self.release_info
assert (
self.release_info.progress_description == ReleaseProgressDescription.OK
), "Must be OK on the start of new context"
self.release_info.release_progress = self.release_progress
self.release_info.dump()
return self.release_info
def __exit__(self, exc_type, exc_value, traceback):
assert self.release_info
if exc_type is not None:
self.release_info.progress_description = ReleaseProgressDescription.FAILED
else:
self.release_info.progress_description = ReleaseProgressDescription.OK
self.release_info.dump()
@dataclasses.dataclass
@ -67,31 +99,50 @@ class ReleaseInfo:
codename: str
previous_release_tag: str
previous_release_sha: str
changelog_pr: str = ""
version_bump_pr: str = ""
release_url: str = ""
debian_command: str = ""
rpm_command: str = ""
tgz_command: str = ""
docker_command: str = ""
release_progress: str = ""
progress_description: str = ""
def is_patch(self):
return self.release_branch != "master"
def is_new_release_branch(self):
return self.release_branch == "master"
@staticmethod
def from_file(file_path: str) -> "ReleaseInfo":
with open(file_path, "r", encoding="utf-8") as json_file:
def from_file() -> "ReleaseInfo":
with open(RELEASE_INFO_FILE, "r", encoding="utf-8") as json_file:
res = json.load(json_file)
return ReleaseInfo(**res)
@staticmethod
def prepare(commit_ref: str, release_type: str, outfile: str) -> None:
Path(outfile).parent.mkdir(parents=True, exist_ok=True)
Path(outfile).unlink(missing_ok=True)
def dump(self):
print(f"Dump release info into [{RELEASE_INFO_FILE}]")
with open(RELEASE_INFO_FILE, "w", encoding="utf-8") as f:
print(json.dumps(dataclasses.asdict(self), indent=2), file=f)
return self
def prepare(self, commit_ref: str, release_type: str) -> "ReleaseInfo":
version = None
release_branch = None
release_tag = None
previous_release_tag = None
previous_release_sha = None
codename = None
codename = ""
assert release_type in ("patch", "new")
if release_type == "new":
# check commit_ref is right and on a right branch
ShellRunner.run(
f"git merge-base --is-ancestor origin/{commit_ref} origin/master"
Shell.run(
f"git merge-base --is-ancestor {commit_ref} origin/master",
check=True,
)
with checkout(commit_ref):
_, commit_sha = ShellRunner.run(f"git rev-parse {commit_ref}")
commit_sha = Shell.run(f"git rev-parse {commit_ref}", check=True)
# Git() must be inside "with checkout" contextmanager
git = Git()
version = get_version_from_repo(git=git)
@ -102,9 +153,6 @@ class ReleaseInfo:
git.latest_tag == expected_prev_tag
), f"BUG: latest tag [{git.latest_tag}], expected [{expected_prev_tag}]"
release_tag = version.describe
codename = (
VersionType.STABLE
) # dummy value (artifactory won't be updated for new release)
previous_release_tag = expected_prev_tag
previous_release_sha = Shell.run_strict(
f"git rev-parse {previous_release_tag}"
@ -112,7 +160,7 @@ class ReleaseInfo:
assert previous_release_sha
if release_type == "patch":
with checkout(commit_ref):
_, commit_sha = ShellRunner.run(f"git rev-parse {commit_ref}")
commit_sha = Shell.run(f"git rev-parse {commit_ref}", check=True)
# Git() must be inside "with checkout" contextmanager
git = Git()
version = get_version_from_repo(git=git)
@ -120,10 +168,11 @@ class ReleaseInfo:
version.with_description(codename)
release_branch = f"{version.major}.{version.minor}"
release_tag = version.describe
ShellRunner.run(f"{GIT_PREFIX} fetch origin {release_branch} --tags")
Shell.run(f"{GIT_PREFIX} fetch origin {release_branch} --tags", check=True)
# check commit is right and on a right branch
ShellRunner.run(
f"git merge-base --is-ancestor {commit_ref} origin/{release_branch}"
Shell.run(
f"git merge-base --is-ancestor {commit_ref} origin/{release_branch}",
check=True,
)
if version.patch == 1:
expected_version = copy(version)
@ -160,24 +209,24 @@ class ReleaseInfo:
and commit_sha
and release_tag
and version
and codename in ("lts", "stable")
and (codename in ("lts", "stable") or release_type == "new")
)
res = ReleaseInfo(
release_branch=release_branch,
commit_sha=commit_sha,
release_tag=release_tag,
version=version.string,
codename=codename,
previous_release_tag=previous_release_tag,
previous_release_sha=previous_release_sha,
)
with open(outfile, "w", encoding="utf-8") as f:
print(json.dumps(dataclasses.asdict(res), indent=2), file=f)
self.release_branch = release_branch
self.commit_sha = commit_sha
self.release_tag = release_tag
self.version = version.string
self.codename = codename
self.previous_release_tag = previous_release_tag
self.previous_release_sha = previous_release_sha
self.release_progress = ReleaseProgress.STARTED
self.progress_description = ReleaseProgressDescription.OK
return self
def push_release_tag(self, dry_run: bool) -> None:
if dry_run:
# remove locally created tag from prev run
ShellRunner.run(
Shell.run(
f"{GIT_PREFIX} tag -l | grep -q {self.release_tag} && git tag -d {self.release_tag} ||:"
)
# Create release tag
@ -185,16 +234,17 @@ class ReleaseInfo:
f"Create and push release tag [{self.release_tag}], commit [{self.commit_sha}]"
)
tag_message = f"Release {self.release_tag}"
ShellRunner.run(
f"{GIT_PREFIX} tag -a -m '{tag_message}' {self.release_tag} {self.commit_sha}"
Shell.run(
f"{GIT_PREFIX} tag -a -m '{tag_message}' {self.release_tag} {self.commit_sha}",
check=True,
)
cmd_push_tag = f"{GIT_PREFIX} push origin {self.release_tag}:{self.release_tag}"
ShellRunner.run(cmd_push_tag, dry_run=dry_run)
Shell.run(cmd_push_tag, dry_run=dry_run, check=True)
@staticmethod
def _create_gh_label(label: str, color_hex: str, dry_run: bool) -> None:
cmd = f"gh api repos/{GITHUB_REPOSITORY}/labels -f name={label} -f color={color_hex}"
ShellRunner.run(cmd, dry_run=dry_run)
Shell.run(cmd, dry_run=dry_run, check=True)
def push_new_release_branch(self, dry_run: bool) -> None:
assert (
@ -211,8 +261,8 @@ class ReleaseInfo:
), f"Unexpected current version in git, must precede [{self.version}] by one step, actual [{version.string}]"
if dry_run:
# remove locally created branch from prev run
ShellRunner.run(
f"{GIT_PREFIX} branch -l | grep -q {new_release_branch} && git branch -d {new_release_branch} ||:"
Shell.run(
f"{GIT_PREFIX} branch -l | grep -q {new_release_branch} && git branch -d {new_release_branch}"
)
print(
f"Create and push new release branch [{new_release_branch}], commit [{self.commit_sha}]"
@ -225,7 +275,7 @@ class ReleaseInfo:
cmd_push_branch = (
f"{GIT_PREFIX} push --set-upstream origin {new_release_branch}"
)
ShellRunner.run(cmd_push_branch, dry_run=dry_run)
Shell.run(cmd_push_branch, dry_run=dry_run, check=True)
print("Create and push backport tags for new release branch")
ReleaseInfo._create_gh_label(
@ -234,12 +284,13 @@ class ReleaseInfo:
ReleaseInfo._create_gh_label(
f"v{new_release_branch}-affected", "c2bfff", dry_run=dry_run
)
ShellRunner.run(
Shell.run(
f"""gh pr create --repo {GITHUB_REPOSITORY} --title 'Release pull request for branch {new_release_branch}'
--head {new_release_branch} {pr_labels}
--body 'This PullRequest is a part of ClickHouse release cycle. It is used by CI system only. Do not perform any changes with it.'
""",
dry_run=dry_run,
check=True,
)
def update_version_and_contributors_list(self, dry_run: bool) -> None:
@ -265,32 +316,55 @@ class ReleaseInfo:
body_file = get_abs_path(".github/PULL_REQUEST_TEMPLATE.md")
actor = os.getenv("GITHUB_ACTOR", "") or "me"
cmd_create_pr = f"gh pr create --repo {GITHUB_REPOSITORY} --title 'Update version after release' --head {branch_upd_version_contributors} --base {self.release_branch} --body-file '{body_file} --label 'do not test' --assignee @{actor}"
ShellRunner.run(cmd_commit_version_upd, dry_run=dry_run)
ShellRunner.run(cmd_push_branch, dry_run=dry_run)
ShellRunner.run(cmd_create_pr, dry_run=dry_run)
Shell.run(cmd_commit_version_upd, check=True, dry_run=dry_run)
Shell.run(cmd_push_branch, check=True, dry_run=dry_run)
Shell.run(cmd_create_pr, check=True, dry_run=dry_run)
if dry_run:
ShellRunner.run(
f"{GIT_PREFIX} diff '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'"
)
ShellRunner.run(
Shell.run(f"{GIT_PREFIX} diff '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'")
Shell.run(
f"{GIT_PREFIX} checkout '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'"
)
self.version_bump_pr = "dry-run"
else:
self.version_bump_pr = GHActions.get_pr_url_by_branch(
repo=GITHUB_REPOSITORY, branch=branch_upd_version_contributors
)
def update_release_info(self, dry_run: bool) -> "ReleaseInfo":
if self.release_branch != "master":
branch = f"auto/{release_info.release_tag}"
if not dry_run:
url = GHActions.get_pr_url_by_branch(
repo=GITHUB_REPOSITORY, branch=branch
)
else:
url = "dry-run"
print(f"ChangeLog PR url [{url}]")
self.changelog_pr = url
print(f"Release url [{url}]")
self.release_url = f"https://github.com/{GITHUB_REPOSITORY}/releases/tag/{self.release_tag}"
if self.release_progress == ReleaseProgress.COMPLETED:
self.docker_command = f"docker run --rm clickhouse/clickhouse:{self.version} clickhouse --version"
self.dump()
return self
def create_gh_release(self, packages_files: List[str], dry_run: bool) -> None:
repo = os.getenv("GITHUB_REPOSITORY")
assert repo
cmds = []
cmds.append(
cmds = [
f"gh release create --repo {repo} --title 'Release {self.release_tag}' {self.release_tag}"
)
]
for file in packages_files:
cmds.append(f"gh release upload {self.release_tag} {file}")
if not dry_run:
for cmd in cmds:
ShellRunner.run(cmd)
Shell.run(cmd, check=True)
self.release_url = f"https://github.com/{GITHUB_REPOSITORY}/releases/tag/{self.release_tag}"
else:
print("Dry-run, would run commands:")
print("\n * ".join(cmds))
self.release_url = f"dry-run"
self.dump()
class RepoTypes:
@ -350,7 +424,7 @@ class PackageDownloader:
self.macos_package_files = ["clickhouse-macos", "clickhouse-macos-aarch64"]
self.file_to_type = {}
ShellRunner.run(f"mkdir -p {self.LOCAL_DIR}")
Shell.run(f"mkdir -p {self.LOCAL_DIR}")
for package_type in self.PACKAGE_TYPES:
for package in self.package_names:
@ -400,7 +474,7 @@ class PackageDownloader:
return res
def run(self):
ShellRunner.run(f"rm -rf {self.LOCAL_DIR}/*")
Shell.run(f"rm -rf {self.LOCAL_DIR}/*")
for package_file in (
self.deb_package_files + self.rpm_package_files + self.tgz_package_files
):
@ -473,6 +547,37 @@ class PackageDownloader:
return True
@contextmanager
def checkout(ref: str) -> Iterator[None]:
orig_ref = Shell.run(f"{GIT_PREFIX} symbolic-ref --short HEAD", check=True)
rollback_cmd = f"{GIT_PREFIX} checkout {orig_ref}"
assert orig_ref
if ref not in (orig_ref,):
Shell.run(f"{GIT_PREFIX} checkout {ref}")
try:
yield
except (Exception, KeyboardInterrupt) as e:
print(f"ERROR: Exception [{e}]")
Shell.run(rollback_cmd)
raise
Shell.run(rollback_cmd)
@contextmanager
def checkout_new(ref: str) -> Iterator[None]:
orig_ref = Shell.run(f"{GIT_PREFIX} symbolic-ref --short HEAD", check=True)
rollback_cmd = f"{GIT_PREFIX} checkout {orig_ref}"
assert orig_ref
Shell.run(f"{GIT_PREFIX} checkout -b {ref}", check=True)
try:
yield
except (Exception, KeyboardInterrupt) as e:
print(f"ERROR: Exception [{e}]")
Shell.run(rollback_cmd)
raise
Shell.run(rollback_cmd)
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
@ -508,6 +613,11 @@ def parse_args() -> argparse.Namespace:
action="store_true",
help="Create GH Release object and attach all packages",
)
parser.add_argument(
"--post-status",
action="store_true",
help="Post release status into Slack",
)
parser.add_argument(
"--ref",
type=str,
@ -526,55 +636,25 @@ def parse_args() -> argparse.Namespace:
help="do not make any actual changes in the repo, just show what will be done",
)
parser.add_argument(
"--outfile",
default="",
type=str,
help="output file to write json result to, if not set - stdout",
"--set-progress-started",
action="store_true",
help="Set new progress step, --progress <PROGRESS STEP> must be set",
)
parser.add_argument(
"--infile",
default="",
"--progress",
type=str,
help="input file with release info",
help="Progress step name, see @ReleaseProgress",
)
parser.add_argument(
"--set-progress-completed",
action="store_true",
help="Set current progress step to OK (completed)",
)
return parser.parse_args()
@contextmanager
def checkout(ref: str) -> Iterator[None]:
_, orig_ref = ShellRunner.run(f"{GIT_PREFIX} symbolic-ref --short HEAD")
rollback_cmd = f"{GIT_PREFIX} checkout {orig_ref}"
assert orig_ref
if ref not in (orig_ref,):
ShellRunner.run(f"{GIT_PREFIX} checkout {ref}")
try:
yield
except (Exception, KeyboardInterrupt) as e:
print(f"ERROR: Exception [{e}]")
ShellRunner.run(rollback_cmd)
raise
ShellRunner.run(rollback_cmd)
@contextmanager
def checkout_new(ref: str) -> Iterator[None]:
_, orig_ref = ShellRunner.run(f"{GIT_PREFIX} symbolic-ref --short HEAD")
rollback_cmd = f"{GIT_PREFIX} checkout {orig_ref}"
assert orig_ref
ShellRunner.run(f"{GIT_PREFIX} checkout -b {ref}")
try:
yield
except (Exception, KeyboardInterrupt) as e:
print(f"ERROR: Exception [{e}]")
ShellRunner.run(rollback_cmd)
raise
ShellRunner.run(rollback_cmd)
if __name__ == "__main__":
args = parse_args()
assert args.dry_run
# prepare ssh for git if needed
_ssh_agent = None
@ -586,43 +666,91 @@ if __name__ == "__main__":
_ssh_agent.print_keys()
if args.prepare_release_info:
assert (
args.ref and args.release_type and args.outfile
), "--ref, --release-type and --outfile must be provided with --prepare-release-info"
ReleaseInfo.prepare(
commit_ref=args.ref, release_type=args.release_type, outfile=args.outfile
)
if args.push_release_tag:
assert args.infile, "--infile <release info file path> must be provided"
release_info = ReleaseInfo.from_file(args.infile)
release_info.push_release_tag(dry_run=args.dry_run)
if args.push_new_release_branch:
assert args.infile, "--infile <release info file path> must be provided"
release_info = ReleaseInfo.from_file(args.infile)
release_info.push_new_release_branch(dry_run=args.dry_run)
if args.create_bump_version_pr:
# TODO: store link to PR in release info
assert args.infile, "--infile <release info file path> must be provided"
release_info = ReleaseInfo.from_file(args.infile)
release_info.update_version_and_contributors_list(dry_run=args.dry_run)
with ReleaseContextManager(
release_progress=ReleaseProgress.STARTED
) as release_info:
assert (
args.ref and args.release_type
), "--ref and --release-type must be provided with --prepare-release-info"
release_info.prepare(commit_ref=args.ref, release_type=args.release_type)
if args.download_packages:
assert args.infile, "--infile <release info file path> must be provided"
release_info = ReleaseInfo.from_file(args.infile)
p = PackageDownloader(
release=release_info.release_branch,
commit_sha=release_info.commit_sha,
version=release_info.version,
)
p.run()
with ReleaseContextManager(
release_progress=ReleaseProgress.DOWNLOAD_PACKAGES
) as release_info:
p = PackageDownloader(
release=release_info.release_branch,
commit_sha=release_info.commit_sha,
version=release_info.version,
)
p.run()
if args.push_release_tag:
with ReleaseContextManager(
release_progress=ReleaseProgress.PUSH_RELEASE_TAG
) as release_info:
release_info.push_release_tag(dry_run=args.dry_run)
if args.push_new_release_branch:
with ReleaseContextManager(
release_progress=ReleaseProgress.PUSH_NEW_RELEASE_BRANCH
) as release_info:
release_info.push_new_release_branch(dry_run=args.dry_run)
if args.create_bump_version_pr:
with ReleaseContextManager(
release_progress=ReleaseProgress.BUMP_VERSION
) as release_info:
release_info.update_version_and_contributors_list(dry_run=args.dry_run)
if args.create_gh_release:
assert args.infile, "--infile <release info file path> must be provided"
release_info = ReleaseInfo.from_file(args.infile)
p = PackageDownloader(
release=release_info.release_branch,
commit_sha=release_info.commit_sha,
version=release_info.version,
)
release_info.create_gh_release(p.get_all_packages_files(), args.dry_run)
with ReleaseContextManager(
release_progress=ReleaseProgress.CREATE_GH_RELEASE
) as release_info:
p = PackageDownloader(
release=release_info.release_branch,
commit_sha=release_info.commit_sha,
version=release_info.version,
)
release_info.create_gh_release(
packages_files=p.get_all_packages_files(), dry_run=args.dry_run
)
if args.post_status:
release_info = ReleaseInfo.from_file()
release_info.update_release_info(dry_run=args.dry_run)
if release_info.is_new_release_branch():
title = "New release branch"
else:
title = "New release"
if (
release_info.progress_description == ReleaseProgressDescription.OK
and release_info.release_progress == ReleaseProgress.COMPLETED
):
title = "Completed: " + title
CIBuddy(dry_run=args.dry_run).post_done(
title, dataclasses.asdict(release_info)
)
else:
title = "Failed: " + title
CIBuddy(dry_run=args.dry_run).post_critical(
title, dataclasses.asdict(release_info)
)
if args.set_progress_started:
ri = ReleaseInfo.from_file()
ri.release_progress = args.progress
ri.progress_description = ReleaseProgressDescription.FAILED
ri.dump()
assert args.progress, "Progress step name must be provided"
if args.set_progress_completed:
ri = ReleaseInfo.from_file()
assert (
ri.progress_description == ReleaseProgressDescription.FAILED
), "Must be FAILED before set to OK"
ri.progress_description = ReleaseProgressDescription.OK
ri.dump()
# tear down ssh
if _ssh_agent and _key_pub:

View File

@ -254,11 +254,14 @@ def main():
statuses = get_commit_filtered_statuses(commit)
has_failed_statuses = False
has_native_failed_status = False
for status in statuses:
print(f"Check status [{status.context}], [{status.state}]")
if CI.is_required(status.context) and status.state != SUCCESS:
print(f"WARNING: Failed status [{status.context}], [{status.state}]")
has_failed_statuses = True
if status.context != CI.StatusNames.SYNC:
has_native_failed_status = True
if args.wf_status == SUCCESS or has_failed_statuses:
# set Mergeable check if workflow is successful (green)
@ -280,7 +283,7 @@ def main():
print(
"Workflow failed but no failed statuses found (died runner?) - cannot set Mergeable Check status"
)
if args.wf_status == SUCCESS and not has_failed_statuses:
if args.wf_status == SUCCESS and not has_native_failed_status:
sys.exit(0)
else:
sys.exit(1)

View File

@ -296,13 +296,16 @@ class PRInfo:
else:
if "schedule" in github_event:
self.event_type = EventType.SCHEDULE
else:
elif "inputs" in github_event:
# assume this is a dispatch
self.event_type = EventType.DISPATCH
logging.warning(
"event.json does not match pull_request or push:\n%s",
json.dumps(github_event, sort_keys=True, indent=4),
)
print("PR Info:")
print(self)
else:
logging.warning(
"event.json does not match pull_request or push:\n%s",
json.dumps(github_event, sort_keys=True, indent=4),
)
self.sha = os.getenv(
"GITHUB_SHA", "0000000000000000000000000000000000000000"
)

View File

@ -587,11 +587,11 @@ class TestCIConfig(unittest.TestCase):
for job, job_config in ci_cache.jobs_to_do.items():
if job in MOCK_AFFECTED_JOBS:
MOCK_REQUIRED_BUILDS += job_config.required_builds
elif job not in MOCK_AFFECTED_JOBS:
elif job not in MOCK_AFFECTED_JOBS and not job_config.disable_await:
ci_cache.jobs_to_wait[job] = job_config
for job, job_config in ci_cache.jobs_to_do.items():
if job_config.reference_job_name:
if job_config.reference_job_name or job_config.disable_await:
# jobs with reference_job_name in config are not supposed to have records in the cache - continue
continue
if job in MOCK_AFFECTED_JOBS:
@ -624,11 +624,76 @@ class TestCIConfig(unittest.TestCase):
+ MOCK_AFFECTED_JOBS
+ MOCK_REQUIRED_BUILDS
)
self.assertTrue(
CI.JobNames.BUILD_CHECK not in ci_cache.jobs_to_wait,
"We must never await on Builds Report",
)
self.assertCountEqual(
list(ci_cache.jobs_to_wait),
[
CI.JobNames.BUILD_CHECK,
]
+ MOCK_REQUIRED_BUILDS,
MOCK_REQUIRED_BUILDS,
)
self.assertCountEqual(list(ci_cache.jobs_to_do), expected_to_do)
def test_ci_py_filters_not_affected_jobs_in_prs_no_builds(self):
"""
checks ci.py filters not affected jobs in PRs, no builds required
"""
settings = CiSettings()
settings.no_ci_cache = True
pr_info = PRInfo(github_event=_TEST_EVENT_JSON)
pr_info.event_type = EventType.PULL_REQUEST
pr_info.number = 123
assert pr_info.is_pr
ci_cache = CIPY._configure_jobs(
S3Helper(), pr_info, settings, skip_jobs=False, dry_run=True
)
self.assertTrue(not ci_cache.jobs_to_skip, "Must be no jobs in skip list")
assert not ci_cache.jobs_to_wait
assert not ci_cache.jobs_to_skip
MOCK_AFFECTED_JOBS = [
CI.JobNames.FAST_TEST,
]
MOCK_REQUIRED_BUILDS = []
# pretend there are pending jobs that we need to wait
for job, job_config in ci_cache.jobs_to_do.items():
if job in MOCK_AFFECTED_JOBS:
if job_config.required_builds:
MOCK_REQUIRED_BUILDS += job_config.required_builds
elif job not in MOCK_AFFECTED_JOBS and not job_config.disable_await:
ci_cache.jobs_to_wait[job] = job_config
for job, job_config in ci_cache.jobs_to_do.items():
if job_config.reference_job_name or job_config.disable_await:
# jobs with reference_job_name in config are not supposed to have records in the cache - continue
continue
if job in MOCK_AFFECTED_JOBS:
continue
for batch in range(job_config.num_batches):
# add any record into cache
record = CiCache.Record(
record_type=random.choice(
[
CiCache.RecordType.FAILED,
CiCache.RecordType.PENDING,
CiCache.RecordType.SUCCESSFUL,
]
),
job_name=job,
job_digest=ci_cache.job_digests[job],
batch=batch,
num_batches=job_config.num_batches,
release_branch=True,
)
for record_t_, records_ in ci_cache.records.items():
if record_t_.value == CiCache.RecordType.FAILED.value:
records_[record.to_str_key()] = record
ci_cache.filter_out_not_affected_jobs()
expected_to_do = MOCK_AFFECTED_JOBS + MOCK_REQUIRED_BUILDS
self.assertCountEqual(
list(ci_cache.jobs_to_wait),
MOCK_REQUIRED_BUILDS,
)
self.assertCountEqual(list(ci_cache.jobs_to_do), expected_to_do)

View File

@ -50,7 +50,7 @@ set -uo pipefail
# set accordingly to a runner role #
####################################
echo "Running init v1"
echo "Running init v1.1"
export DEBIAN_FRONTEND=noninteractive
export RUNNER_HOME=/home/ubuntu/actions-runner
@ -66,6 +66,14 @@ bash /usr/local/share/scripts/init-network.sh
RUNNER_TYPE=$(/usr/local/bin/aws ec2 describe-tags --filters "Name=resource-id,Values=$INSTANCE_ID" --query "Tags[?Key=='github:runner-type'].Value" --output text)
LABELS="self-hosted,Linux,$(uname -m),$RUNNER_TYPE"
export LABELS
echo "Instance Labels: $LABELS"
LIFE_CYCLE=$(curl -s --fail http://169.254.169.254/latest/meta-data/instance-life-cycle)
export LIFE_CYCLE
echo "Instance lifecycle: $LIFE_CYCLE"
INSTANCE_TYPE=$(ec2metadata --instance-type)
echo "Instance type: $INSTANCE_TYPE"
# Refresh CloudWatch agent config
aws ssm get-parameter --region us-east-1 --name AmazonCloudWatch-github-runners --query 'Parameter.Value' --output text > /opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json
@ -124,10 +132,6 @@ terminate_decrease_and_exit() {
declare -f terminate_and_exit >> /tmp/actions-hooks/common.sh
check_spot_instance_is_old() {
# This function should be executed ONLY BETWEEN runnings.
# It's unsafe to execute while the runner is working!
local LIFE_CYCLE
LIFE_CYCLE=$(curl -s --fail http://169.254.169.254/latest/meta-data/instance-life-cycle)
if [ "$LIFE_CYCLE" == "spot" ]; then
local UPTIME
UPTIME=$(< /proc/uptime)

View File

@ -208,13 +208,21 @@ def test_merge_tree_custom_disk_setting(start_cluster):
secret_access_key='minio123');
"""
)
count = len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
list1 = list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))
count1 = len(list1)
node1.query(f"INSERT INTO {TABLE_NAME}_3 SELECT number FROM numbers(100)")
assert int(node1.query(f"SELECT count() FROM {TABLE_NAME}_3")) == 100
assert (
len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
== count
)
list2 = list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))
count2 = len(list2)
if count1 != count2:
print("list1: ", list1)
print("list2: ", list2)
assert count1 == count2
assert (
len(list(minio.list_objects(cluster.minio_bucket, "data2/", recursive=True)))
> 0

View File

@ -43,15 +43,10 @@ def started_cluster():
config = """<clickhouse>
<openSSL>
<client>
<verificationMode>none</verificationMode>
<verificationMode>strict</verificationMode>
<certificateFile>{certificateFile}</certificateFile>
<privateKeyFile>{privateKeyFile}</privateKeyFile>
<caConfig>{caConfig}</caConfig>
<invalidCertificateHandler>
<name>AcceptCertificateHandler</name>
</invalidCertificateHandler>
</client>
</openSSL>
</clickhouse>"""

View File

@ -2220,13 +2220,11 @@ def test_rabbitmq_commit_on_block_write(rabbitmq_cluster):
def test_rabbitmq_no_connection_at_startup_1(rabbitmq_cluster):
# no connection when table is initialized
rabbitmq_cluster.pause_container("rabbitmq1")
instance.query_and_get_error(
error = instance.query_and_get_error(
"""
CREATE TABLE test.cs (key UInt64, value UInt64)
ENGINE = RabbitMQ
SETTINGS rabbitmq_host_port = 'rabbitmq1:5672',
SETTINGS rabbitmq_host_port = 'no_connection_at_startup:5672',
rabbitmq_exchange_name = 'cs',
rabbitmq_format = 'JSONEachRow',
rabbitmq_flush_interval_ms=1000,
@ -2234,7 +2232,7 @@ def test_rabbitmq_no_connection_at_startup_1(rabbitmq_cluster):
rabbitmq_row_delimiter = '\\n';
"""
)
rabbitmq_cluster.unpause_container("rabbitmq1")
assert "CANNOT_CONNECT_RABBITMQ" in error
def test_rabbitmq_no_connection_at_startup_2(rabbitmq_cluster):

View File

@ -10,8 +10,8 @@
PARTITION BY toYYYYMM(d) ORDER BY key
</create_query>
<fill_query>INSERT INTO optimized_select_final SELECT toDate('2000-01-01'), 2*number, randomPrintableASCII(1000) FROM numbers(5000000)</fill_query>
<fill_query>INSERT INTO optimized_select_final SELECT toDate('2020-01-01'), 2*number+1, randomPrintableASCII(1000) FROM numbers(5000000)</fill_query>
<fill_query>INSERT INTO optimized_select_final SELECT toDate('2000-01-01'), 2*number, randomPrintableASCII(1000) FROM numbers(2500000)</fill_query>
<fill_query>INSERT INTO optimized_select_final SELECT toDate('2020-01-01'), 2*number+1, randomPrintableASCII(1000) FROM numbers(2500000)</fill_query>
<query>SELECT * FROM optimized_select_final FINAL FORMAT Null SETTINGS max_threads = 8</query>
<query>SELECT * FROM optimized_select_final FINAL WHERE key % 10 = 0 FORMAT Null</query>

View File

@ -1,5 +1,5 @@
<test>
<query>with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByChar(' ', materialize(s)) as w from numbers(1000000)</query>
<query>with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByRegexp(' ', materialize(s)) as w from numbers(1000000)</query>
<query>with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByRegexp('\s+', materialize(s)) as w from numbers(100000)</query>
<query>with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByRegexp(' ', materialize(s)) as w from numbers(200000)</query>
<query>with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByRegexp('\s+', materialize(s)) as w from numbers(20000)</query>
</test>

View File

@ -24,10 +24,10 @@
<min_insert_block_size_rows>1</min_insert_block_size_rows>
</settings>
<!-- 100 parts -->
<query>INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(100)</query>
<query>INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(1000)</query>
<query>INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(100)</query>
<!-- 50 parts -->
<query>INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(50)</query>
<query>INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(500)</query>
<query>INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(50)</query>
<drop_query>DROP TABLE IF EXISTS hits_wide</drop_query>
<drop_query>DROP TABLE IF EXISTS hits_compact</drop_query>

View File

@ -555,7 +555,7 @@ if args.report == "main":
"Total client time for measured query runs,&nbsp;s", # 2
"Queries", # 3
"Longest query, total for measured runs,&nbsp;s", # 4
"Wall clock time per query,&nbsp;s", # 5
"Average query wall clock time,&nbsp;s", # 5
"Shortest query, total for measured runs,&nbsp;s", # 6
"", # Runs #7
]

View File

@ -8,13 +8,13 @@
40
41
0
41
2 42
2 42
43
0
43
11
11

View File

@ -1,13 +1,36 @@
-- { echoOn }
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t);
1 101 1 0 0 0
1 102 2 2 102 1
1 103 3 2 102 1
1 104 4 4 104 1
1 105 5 4 104 1
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t);
1 101 1 0 0 0
1 102 2 2 102 1
1 103 3 2 102 1
1 104 4 4 104 1
1 105 5 4 104 1
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t);
1 101 1 0 0 0
1 102 2 2 102 1
1 103 3 2 102 1
1 104 4 4 104 1
1 105 5 4 104 1
SET join_algorithm = 'full_sorting_merge';
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t);
1 101 1 0 0 0
1 102 2 2 102 1
1 103 3 2 102 1
1 104 4 4 104 1
1 105 5 4 104 1
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t);
1 101 1 0 0 0
1 102 2 2 102 1
1 103 3 2 102 1
1 104 4 4 104 1
1 105 5 4 104 1
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t);
1 101 1 0 0 0
1 102 2 2 102 1
1 103 3 2 102 1

View File

@ -4,20 +4,29 @@ DROP TABLE IF EXISTS B;
CREATE TABLE A(k UInt32, t UInt32, a UInt64) ENGINE = MergeTree() ORDER BY (k, t);
INSERT INTO A(k,t,a) VALUES (1,101,1),(1,102,2),(1,103,3),(1,104,4),(1,105,5);
CREATE TABLE B(k UInt32, t UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t);
INSERT INTO B(k,t,b) VALUES (1,102,2), (1,104,4);
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t);
DROP TABLE B;
CREATE TABLE B1(k UInt32, t UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t);
INSERT INTO B1(k,t,b) VALUES (1,102,2), (1,104,4);
CREATE TABLE B2(t UInt32, k UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t);
INSERT INTO B2(k,t,b) VALUES (1,102,2), (1,104,4);
CREATE TABLE B(t UInt32, k UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t);
INSERT INTO B(k,t,b) VALUES (1,102,2), (1,104,4);
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t);
DROP TABLE B;
CREATE TABLE B3(k UInt32, b UInt64, t UInt32) ENGINE = MergeTree() ORDER BY (k, t);
INSERT INTO B3(k,t,b) VALUES (1,102,2), (1,104,4);
CREATE TABLE B(k UInt32, b UInt64, t UInt32) ENGINE = MergeTree() ORDER BY (k, t);
INSERT INTO B(k,t,b) VALUES (1,102,2), (1,104,4);
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t);
DROP TABLE B;
-- { echoOn }
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t);
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t);
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t);
SET join_algorithm = 'full_sorting_merge';
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t);
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t);
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t);
-- { echoOff }
DROP TABLE B1;
DROP TABLE B2;
DROP TABLE B3;
DROP TABLE A;

View File

@ -1 +1,2 @@
3000000
3000000

View File

@ -2,15 +2,28 @@
DROP TABLE IF EXISTS tvs;
-- to use different algorithms for in subquery
SET allow_experimental_analyzer = 1;
CREATE TABLE tvs(k UInt32, t UInt32, tv UInt64) ENGINE = Memory;
INSERT INTO tvs(k,t,tv) SELECT k, t, t
FROM (SELECT toUInt32(number) AS k FROM numbers(1000)) keys
CROSS JOIN (SELECT toUInt32(number * 3) as t FROM numbers(10000)) tv_times;
CROSS JOIN (SELECT toUInt32(number * 3) as t FROM numbers(10000)) tv_times
SETTINGS join_algorithm = 'hash';
SELECT SUM(trades.price - tvs.tv) FROM
(SELECT k, t, t as price
FROM (SELECT toUInt32(number) AS k FROM numbers(1000)) keys
CROSS JOIN (SELECT toUInt32(number * 10) AS t FROM numbers(3000)) trade_times) trades
CROSS JOIN (SELECT toUInt32(number * 10) AS t FROM numbers(3000)) trade_times
SETTINGS join_algorithm = 'hash') trades
ASOF LEFT JOIN tvs USING(k,t);
SELECT SUM(trades.price - tvs.tv) FROM
(SELECT k, t, t as price
FROM (SELECT toUInt32(number) AS k FROM numbers(1000)) keys
CROSS JOIN (SELECT toUInt32(number * 10) AS t FROM numbers(3000)) trade_times
SETTINGS join_algorithm = 'hash') trades
ASOF LEFT JOIN tvs USING(k,t)
SETTINGS join_algorithm = 'full_sorting_merge';
DROP TABLE tvs;

View File

@ -27,3 +27,32 @@
2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2
2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2
2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2
1 1970-01-01 00:00:01 1 0 1970-01-01 00:00:00 0
1 1970-01-01 00:00:02 2 2 1970-01-01 00:00:02 1
1 1970-01-01 00:00:03 3 2 1970-01-01 00:00:02 1
1 1970-01-01 00:00:04 4 4 1970-01-01 00:00:04 1
1 1970-01-01 00:00:05 5 4 1970-01-01 00:00:04 1
2 1970-01-01 00:00:01 1 0 1970-01-01 00:00:00 0
2 1970-01-01 00:00:02 2 0 1970-01-01 00:00:00 0
2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2
2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2
2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2
3 1970-01-01 00:00:01 1 0 1970-01-01 00:00:00 0
3 1970-01-01 00:00:02 2 0 1970-01-01 00:00:00 0
3 1970-01-01 00:00:03 3 0 1970-01-01 00:00:00 0
3 1970-01-01 00:00:04 4 0 1970-01-01 00:00:00 0
3 1970-01-01 00:00:05 5 0 1970-01-01 00:00:00 0
1 1970-01-01 00:00:02 2 2 1970-01-01 00:00:02 1
1 1970-01-01 00:00:03 3 2 1970-01-01 00:00:02 1
1 1970-01-01 00:00:04 4 4 1970-01-01 00:00:04 1
1 1970-01-01 00:00:05 5 4 1970-01-01 00:00:04 1
2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2
2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2
2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2
1 1970-01-01 00:00:02 2 2 1970-01-01 00:00:02 1
1 1970-01-01 00:00:03 3 2 1970-01-01 00:00:02 1
1 1970-01-01 00:00:04 4 4 1970-01-01 00:00:04 1
1 1970-01-01 00:00:05 5 4 1970-01-01 00:00:04 1
2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2
2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2
2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2

View File

@ -11,9 +11,12 @@ INSERT INTO B(k,t,b) VALUES (1,2,2),(1,4,4);
INSERT INTO B(k,t,b) VALUES (2,3,3);
SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t);
SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF INNER JOIN B ON A.k == B.k AND A.t >= B.t ORDER BY (A.k, A.t);
SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF JOIN B USING(k,t) ORDER BY (A.k, A.t);
SET join_algorithm = 'full_sorting_merge';
SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t);
SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF INNER JOIN B ON A.k == B.k AND A.t >= B.t ORDER BY (A.k, A.t);
SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF JOIN B USING(k,t) ORDER BY (A.k, A.t);
DROP TABLE A;

View File

@ -1,27 +1,72 @@
-
2 1 1 0
2 3 3 3
2 5 5 3
-
2 1 1 0
2 3 3 3
2 5 5 3
-
2 1 1 0
2 3 3 3
2 5 5 3
-
2 1 1 0
2 3 3 3
2 5 5 3
-
2 1 1 0
2 3 3 3
2 5 5 3
-
2 1 1 0
2 3 3 3
2 5 5 3
-
2 1 1 0
2 3 3 3
2 5 5 3
-
2 1 1 0
2 3 3 3
2 5 5 3
-
2 1970-01-01 02:00:01 1 0
2 1970-01-01 02:00:03 3 3
2 1970-01-01 02:00:05 5 3
-
2 1970-01-01 02:00:01 1 0
2 1970-01-01 02:00:03 3 3
2 1970-01-01 02:00:05 5 3
-
2 1 1 0
2 3 3 3
2 5 5 3
-
2 1 1 0
2 3 3 3
2 5 5 3
-
2 1 1 0
2 3 3 3
2 5 5 3
-
2 1 1 0
2 3 3 3
2 5 5 3
-
2 1 1 0
2 3 3 3
2 5 5 3
-
2 1 1 0
2 3 3 3
2 5 5 3
-
2 1970-01-01 02:00:00.001 1 0
2 1970-01-01 02:00:00.003 3 3
2 1970-01-01 02:00:00.005 5 3
-
2 1970-01-01 02:00:00.001 1 0
2 1970-01-01 02:00:00.003 3 3
2 1970-01-01 02:00:00.005 5 3

View File

@ -1,27 +0,0 @@
#!/usr/bin/env bash
set -e
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
for typename in "UInt32" "UInt64" "Float64" "Float32" "DateTime('Asia/Istanbul')" "Decimal32(5)" "Decimal64(5)" "Decimal128(5)" "DateTime64(3, 'Asia/Istanbul')"
do
$CLICKHOUSE_CLIENT -mn <<EOF
DROP TABLE IF EXISTS A;
DROP TABLE IF EXISTS B;
CREATE TABLE A(k UInt32, t ${typename}, a Float64) ENGINE = MergeTree() ORDER BY (k, t);
INSERT INTO A(k,t,a) VALUES (2,1,1),(2,3,3),(2,5,5);
CREATE TABLE B(k UInt32, t ${typename}, b Float64) ENGINE = MergeTree() ORDER BY (k, t);
INSERT INTO B(k,t,b) VALUES (2,3,3);
SELECT k, t, a, b FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (k,t);
DROP TABLE A;
DROP TABLE B;
EOF
done

View File

@ -0,0 +1,27 @@
{% for typename in ["UInt32", "UInt64", "Float64", "Float32", "DateTime('Asia/Istanbul')", "Decimal32(5)", "Decimal64(5)", "Decimal128(5)", "DateTime64(3, 'Asia/Istanbul')"] -%}
DROP TABLE IF EXISTS A;
DROP TABLE IF EXISTS B;
CREATE TABLE A(k UInt32, t {{ typename }}, a Float64) ENGINE = MergeTree() ORDER BY (k, t);
INSERT INTO A(k,t,a) VALUES (2,1,1),(2,3,3),(2,5,5);
CREATE TABLE B(k UInt32, t {{ typename }}, b Float64) ENGINE = MergeTree() ORDER BY (k, t);
INSERT INTO B(k,t,b) VALUES (2,3,3);
SELECT '-';
SELECT k, t, a, b FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (k,t)
SETTINGS join_algorithm = 'full_sorting_merge';
SELECT '-';
SELECT k, t, a, b FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (k,t)
SETTINGS join_algorithm = 'hash';
DROP TABLE A;
DROP TABLE B;
{% endfor %}

View File

@ -12,3 +12,18 @@
2 1970-01-01 00:00:15 5 6.5 6
2 1970-01-01 00:00:16 5 5.6 6
2 1970-01-01 00:00:20 17 8.5 18
-
1 1970-01-01 00:00:05 1 1.5 2
1 1970-01-01 00:00:06 1 1.51 2
1 1970-01-01 00:00:10 11 11.5 12
1 1970-01-01 00:00:11 11 11.51 12
1 1970-01-01 00:00:15 5 5.5 6
1 1970-01-01 00:00:16 5 5.6 6
1 1970-01-01 00:00:20 7 7.5 8
2 1970-01-01 00:00:05 11 2.5 12
2 1970-01-01 00:00:06 11 2.51 12
2 1970-01-01 00:00:10 21 12.5 22
2 1970-01-01 00:00:11 21 12.51 22
2 1970-01-01 00:00:15 5 6.5 6
2 1970-01-01 00:00:16 5 5.6 6
2 1970-01-01 00:00:20 17 8.5 18

View File

@ -9,7 +9,13 @@ CREATE TABLE tv(key UInt32, t DateTime, tv Float64) ENGINE = MergeTree() ORDER B
INSERT INTO tv(key,t,tv) VALUES (1,5,1.5),(1,6,1.51),(1,10,11.5),(1,11,11.51),(1,15,5.5),(1,16,5.6),(1,20,7.5);
INSERT INTO tv(key,t,tv) VALUES (2,5,2.5),(2,6,2.51),(2,10,12.5),(2,11,12.51),(2,15,6.5),(2,16,5.6),(2,20,8.5);
SELECT tv.key, toString(tv.t, 'UTC'), md.bid, tv.tv, md.ask FROM tv ASOF LEFT JOIN md USING(key,t) ORDER BY (tv.key, tv.t);
SELECT tv.key, toString(tv.t, 'UTC'), md.bid, tv.tv, md.ask FROM tv ASOF LEFT JOIN md USING(key,t) ORDER BY (tv.key, tv.t)
;
SELECT '-';
SELECT tv.key, toString(tv.t, 'UTC'), md.bid, tv.tv, md.ask FROM tv ASOF LEFT JOIN md USING(key,t) ORDER BY (tv.key, tv.t)
SETTINGS join_algorithm = 'full_sorting_merge';
DROP TABLE md;
DROP TABLE tv;

View File

@ -1,3 +1,4 @@
- default / join_use_nulls = 0 -
1 1 0 0
1 2 1 2
1 3 1 2
@ -34,3 +35,114 @@
2 1 2 3
2 2 2 3
1 2 1 2
- full_sorting_merge / join_use_nulls = 0 -
1 1 0 0
1 2 1 2
1 3 1 2
2 1 0 0
2 2 0 0
2 3 2 3
3 1 0 0
3 2 0 0
3 3 0 0
9
1 2 1 2
1 3 1 2
2 3 2 3
-
1 1 1 2
1 2 1 2
1 3 1 4
2 1 2 3
2 2 2 3
2 3 2 3
-
1 1 1 2
1 2 1 2
1 3 1 4
2 1 2 3
2 2 2 3
2 3 2 3
-
1 3 1 2
-
1 1 1 2
1 2 1 4
1 3 1 4
2 1 2 3
2 2 2 3
1 2 1 2
- default / join_use_nulls = 1 -
1 1 \N \N
1 2 1 2
1 3 1 2
2 1 \N \N
2 2 \N \N
2 3 2 3
3 1 \N \N
3 2 \N \N
3 3 \N \N
9
1 2 1 2
1 3 1 2
2 3 2 3
-
1 1 1 2
1 2 1 2
1 3 1 4
2 1 2 3
2 2 2 3
2 3 2 3
-
1 1 1 2
1 2 1 2
1 3 1 4
2 1 2 3
2 2 2 3
2 3 2 3
-
1 3 1 2
-
1 1 1 2
1 2 1 4
1 3 1 4
2 1 2 3
2 2 2 3
1 2 1 2
- full_sorting_merge / join_use_nulls = 1 -
1 1 \N \N
1 2 1 2
1 3 1 2
2 1 \N \N
2 2 \N \N
2 3 2 3
3 1 \N \N
3 2 \N \N
3 3 \N \N
9
1 2 1 2
1 3 1 2
2 3 2 3
-
1 1 1 2
1 2 1 2
1 3 1 4
2 1 2 3
2 2 2 3
2 3 2 3
-
1 1 1 2
1 2 1 2
1 3 1 4
2 1 2 3
2 2 2 3
2 3 2 3
-
1 3 1 2
-
1 1 1 2
1 2 1 4
1 3 1 4
2 1 2 3
2 2 2 3
1 2 1 2

View File

@ -7,6 +7,14 @@ CREATE TABLE B(b UInt32, t UInt32) ENGINE = Memory;
INSERT INTO A (a,t) VALUES (1,1),(1,2),(1,3), (2,1),(2,2),(2,3), (3,1),(3,2),(3,3);
INSERT INTO B (b,t) VALUES (1,2),(1,4),(2,3);
{% for join_use_nulls in [0, 1] -%}
{% for join_algorithm in ['default', 'full_sorting_merge'] -%}
SET join_algorithm = '{{ join_algorithm }}';
SELECT '- {{ join_algorithm }} / join_use_nulls = {{ join_use_nulls }} -';
set join_use_nulls = {{ join_use_nulls }};
SELECT A.a, A.t, B.b, B.t FROM A ASOF LEFT JOIN B ON A.a == B.b AND A.t >= B.t ORDER BY (A.a, A.t);
SELECT count() FROM A ASOF LEFT JOIN B ON A.a == B.b AND B.t <= A.t;
SELECT A.a, A.t, B.b, B.t FROM A ASOF INNER JOIN B ON B.t <= A.t AND A.a == B.b ORDER BY (A.a, A.t);
@ -28,5 +36,8 @@ ASOF INNER JOIN (SELECT * FROM B UNION ALL SELECT 1, 3) AS B ON B.t <= A.t AND A
WHERE B.t != 3 ORDER BY (A.a, A.t)
;
{% endfor -%}
{% endfor -%}
DROP TABLE A;
DROP TABLE B;

View File

@ -12,10 +12,10 @@ ORDER BY (primary_key);
INSERT INTO set_array
select
toString(intDiv(number, 1000000)) as primary_key,
toString(intDiv(number, 100000)) as primary_key,
array(number) as index_array
from system.numbers
limit 10000000;
limit 1000000;
OPTIMIZE TABLE set_array FINAL;

View File

@ -1,3 +1,6 @@
v1 o1 ['s2','s1']
v1 o2 ['s4']
v2 o3 ['s5','s3']
v1 o1 ['s2','s1']
v1 o2 ['s4']
v2 o3 ['s5','s3']

Some files were not shown because too many files have changed in this diff Show More