Merge branch 'master' of github.com:ClickHouse/ClickHouse into json-data-type

This commit is contained in:
avogar 2024-07-22 15:35:57 +00:00
commit 300073f51d
183 changed files with 4599 additions and 1524 deletions

168
.github/actions/release/action.yml vendored Normal file
View File

@ -0,0 +1,168 @@
name: Release
description: Makes patch releases and creates new release branch
inputs:
ref:
description: 'Git reference (branch or commit sha) from which to create the release'
required: true
type: string
type:
description: 'The type of release: "new" for a new release or "patch" for a patch release'
required: true
type: choice
options:
- patch
- new
dry-run:
description: 'Dry run'
required: false
default: true
type: boolean
token:
required: true
type: string
runs:
using: "composite"
steps:
- name: Prepare Release Info
shell: bash
run: |
python3 ./tests/ci/create_release.py --prepare-release-info \
--ref ${{ inputs.ref }} --release-type ${{ inputs.type }} \
${{ inputs.dry-run && '--dry-run' || '' }}
echo "::group::Release Info"
python3 -m json.tool /tmp/release_info.json
echo "::endgroup::"
release_tag=$(jq -r '.release_tag' /tmp/release_info.json)
commit_sha=$(jq -r '.commit_sha' /tmp/release_info.json)
echo "Release Tag: $release_tag"
echo "RELEASE_TAG=$release_tag" >> "$GITHUB_ENV"
echo "COMMIT_SHA=$commit_sha" >> "$GITHUB_ENV"
- name: Download All Release Artifacts
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --download-packages ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Push Git Tag for the Release
shell: bash
run: |
python3 ./tests/ci/create_release.py --push-release-tag ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Push New Release Branch
if: ${{ inputs.type == 'new' }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --push-new-release-branch ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Bump CH Version and Update Contributors' List
shell: bash
run: |
python3 ./tests/ci/create_release.py --create-bump-version-pr ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Bump Docker versions, Changelog, Security
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
git checkout master
python3 ./tests/ci/create_release.py --set-progress-started --progress "update changelog, docker version, security"
echo "List versions"
./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv
echo "Update docker version"
./utils/list-versions/update-docker-version.sh
echo "Generate ChangeLog"
export CI=1
docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 -e CI=1 --network=host \
--volume=".:/ClickHouse" clickhouse/style-test \
/ClickHouse/tests/ci/changelog.py -v --debug-helpers \
--gh-user-or-token=${{ inputs.token }} --jobs=5 \
--output="/ClickHouse/docs/changelogs/${{ env.RELEASE_TAG }}.md" ${{ env.RELEASE_TAG }}
git add ./docs/changelogs/${{ env.RELEASE_TAG }}.md
echo "Generate Security"
python3 ./utils/security-generator/generate_security.py > SECURITY.md
git diff HEAD
- name: Create ChangeLog PR
if: ${{ inputs.type == 'patch' && ! inputs.dry-run }}
uses: peter-evans/create-pull-request@v6
with:
author: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
token: ${{ inputs.token }}
committer: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
commit-message: Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }}
branch: auto/${{ env.RELEASE_TAG }}
assignees: ${{ github.event.sender.login }} # assign the PR to the tag pusher
delete-branch: true
title: Update version_date.tsv and changelog after ${{ env.RELEASE_TAG }}
labels: do not test
body: |
Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }}
### Changelog category (leave one):
- Not for changelog (changelog entry is not required)
- name: Complete previous steps and Restore git state
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --set-progress-completed
git reset --hard HEAD
git checkout "$GITHUB_REF_NAME"
- name: Create GH Release
shell: bash
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/create_release.py --create-gh-release ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export TGZ Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --export-tgz ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test TGZ Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --test-tgz ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export RPM Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --export-rpm ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test RPM Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --test-rpm ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export Debian Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --export-debian ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test Debian Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --test-debian ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Docker clickhouse/clickhouse-server building
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
cd "./tests/ci"
python3 ./create_release.py --set-progress-started --progress "docker server release"
export CHECK_NAME="Docker server image"
python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }}
python3 ./create_release.py --set-progress-completed
- name: Docker clickhouse/clickhouse-keeper building
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
cd "./tests/ci"
python3 ./create_release.py --set-progress-started --progress "docker keeper release"
export CHECK_NAME="Docker keeper image"
python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }}
python3 ./create_release.py --set-progress-completed
- name: Set current Release progress to Completed with OK
shell: bash
run: |
python3 ./tests/ci/create_release.py --set-progress-started --progress "completed"
python3 ./tests/ci/create_release.py --set-progress-completed
- name: Post Slack Message
if: ${{ !cancelled() }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --post-status ${{ inputs.dry-run && '--dry-run' || '' }}

View File

@ -1,44 +1,110 @@
name: AutoRelease
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
DRY_RUN: true
concurrency:
group: auto-release
group: release
on: # yamllint disable-line rule:truthy
# schedule:
# - cron: '0 10-16 * * 1-5'
# Workflow uses a test bucket for packages and dry run mode (no real releases)
schedule:
- cron: '0 9 * * *'
- cron: '0 15 * * *'
workflow_dispatch:
inputs:
dry-run:
description: 'Dry run'
required: false
default: true
type: boolean
jobs:
CherryPick:
runs-on: [self-hosted, style-checker-aarch64]
AutoRelease:
runs-on: [self-hosted, release-maker]
steps:
- name: DebugInfo
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
- name: Set envs
# https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/cherry_pick
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
RCSK
REPO_OWNER=ClickHouse
REPO_NAME=ClickHouse
REPO_TEAM=core
EOF
- name: Set DRY_RUN for schedule
if: ${{ github.event_name == 'schedule' }}
run: echo "DRY_RUN=true" >> "$GITHUB_ENV"
- name: Set DRY_RUN for dispatch
if: ${{ github.event_name == 'workflow_dispatch' }}
run: echo "DRY_RUN=${{ github.event.inputs.dry-run }}" >> "$GITHUB_ENV"
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
fetch-depth: 0
- name: Auto-release
- name: Auto Release Prepare
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 auto_release.py --release-after-days=3
- name: Cleanup
if: always()
python3 auto_release.py --prepare
echo "::group::Auto Release Info"
python3 -m json.tool /tmp/autorelease_info.json
echo "::endgroup::"
{
echo 'AUTO_RELEASE_PARAMS<<EOF'
cat /tmp/autorelease_info.json
echo 'EOF'
} >> "$GITHUB_ENV"
- name: Post Release Branch statuses
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 auto_release.py --post-status
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0].release_branch }}
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0] && fromJson(env.AUTO_RELEASE_PARAMS).releases[0].ready }}
uses: ./.github/actions/release
with:
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0].commit_sha }}
type: patch
dry-run: ${{ env.DRY_RUN }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[1].release_branch }}
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0] && fromJson(env.AUTO_RELEASE_PARAMS).releases[1].ready }}
uses: ./.github/actions/release
with:
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[1].commit_sha }}
type: patch
dry-run: ${{ env.DRY_RUN }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[2].release_branch }}
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[2] && fromJson(env.AUTO_RELEASE_PARAMS).releases[2].ready }}
uses: ./.github/actions/release
with:
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[2].commit_sha }}
type: patch
dry-run: ${{ env.DRY_RUN }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[3].release_branch }}
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[3] && fromJson(env.AUTO_RELEASE_PARAMS).releases[3].ready }}
uses: ./.github/actions/release
with:
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[3].commit_sha }}
type: patch
dry-run: ${{ env.DRY_RUN }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[4].release_branch }}
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[4] && fromJson(env.AUTO_RELEASE_PARAMS).releases[4].ready }}
uses: ./.github/actions/release
with:
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[4].commit_sha }}
type: patch
dry-run: ${{ env.DRY_RUN }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
- name: Post Slack Message
if: ${{ !cancelled() }}
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 auto_release.py --post-auto-release-complete --wf-status ${{ job.status }}
- name: Clean up
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:

View File

@ -2,7 +2,6 @@ name: CreateRelease
concurrency:
group: release
'on':
workflow_dispatch:
inputs:
@ -31,136 +30,15 @@ jobs:
steps:
- name: DebugInfo
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
- name: Set envs
# https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings
run: |
cat >> "$GITHUB_ENV" << 'EOF'
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
RCSK
RELEASE_INFO_FILE=${{ runner.temp }}/release_info.json
EOF
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
fetch-depth: 0
- name: Prepare Release Info
run: |
python3 ./tests/ci/create_release.py --prepare-release-info \
--ref ${{ inputs.ref }} --release-type ${{ inputs.type }} \
--outfile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
echo "::group::Release Info"
python3 -m json.tool "$RELEASE_INFO_FILE"
echo "::endgroup::"
release_tag=$(jq -r '.release_tag' "$RELEASE_INFO_FILE")
commit_sha=$(jq -r '.commit_sha' "$RELEASE_INFO_FILE")
echo "Release Tag: $release_tag"
echo "RELEASE_TAG=$release_tag" >> "$GITHUB_ENV"
echo "COMMIT_SHA=$commit_sha" >> "$GITHUB_ENV"
- name: Download All Release Artifacts
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/create_release.py --infile "$RELEASE_INFO_FILE" --download-packages ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Push Git Tag for the Release
run: |
python3 ./tests/ci/create_release.py --push-release-tag --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Push New Release Branch
if: ${{ inputs.type == 'new' }}
run: |
python3 ./tests/ci/create_release.py --push-new-release-branch --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Bump CH Version and Update Contributors' List
run: |
python3 ./tests/ci/create_release.py --create-bump-version-pr --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Checkout master
run: |
git checkout master
- name: Bump Docker versions, Changelog, Security
if: ${{ inputs.type == 'patch' }}
run: |
[ "$(git branch --show-current)" != "master" ] && echo "not on the master" && exit 1
echo "List versions"
./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv
echo "Update docker version"
./utils/list-versions/update-docker-version.sh
echo "Generate ChangeLog"
export CI=1
docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 -e CI=1 --network=host \
--volume=".:/ClickHouse" clickhouse/style-test \
/ClickHouse/tests/ci/changelog.py -v --debug-helpers \
--gh-user-or-token="$GH_TOKEN" --jobs=5 \
--output="/ClickHouse/docs/changelogs/${{ env.RELEASE_TAG }}.md" ${{ env.RELEASE_TAG }}
git add ./docs/changelogs/${{ env.RELEASE_TAG }}.md
echo "Generate Security"
python3 ./utils/security-generator/generate_security.py > SECURITY.md
git diff HEAD
- name: Create ChangeLog PR
if: ${{ inputs.type == 'patch' && ! inputs.dry-run }}
uses: peter-evans/create-pull-request@v6
- name: Call Release Action
uses: ./.github/actions/release
with:
author: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
token: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }}
committer: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
commit-message: Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }}
branch: auto/${{ env.RELEASE_TAG }}
assignees: ${{ github.event.sender.login }} # assign the PR to the tag pusher
delete-branch: true
title: Update version_date.tsv and changelog after ${{ env.RELEASE_TAG }}
labels: do not test
body: |
Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }}
### Changelog category (leave one):
- Not for changelog (changelog entry is not required)
- name: Reset changes if Dry-run
if: ${{ inputs.dry-run }}
run: |
git reset --hard HEAD
- name: Checkout back to GITHUB_REF
run: |
git checkout "$GITHUB_REF_NAME"
- name: Create GH Release
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/create_release.py --create-gh-release \
--infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export TGZ Packages
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/artifactory.py --export-tgz --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test TGZ Packages
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/artifactory.py --test-tgz --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export RPM Packages
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/artifactory.py --export-rpm --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test RPM Packages
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/artifactory.py --test-rpm --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export Debian Packages
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/artifactory.py --export-debian --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test Debian Packages
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/artifactory.py --test-debian --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Docker clickhouse/clickhouse-server building
if: ${{ inputs.type == 'patch' }}
run: |
cd "./tests/ci"
export CHECK_NAME="Docker server image"
python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }}
- name: Docker clickhouse/clickhouse-keeper building
if: ${{ inputs.type == 'patch' }}
run: |
cd "./tests/ci"
export CHECK_NAME="Docker keeper image"
python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }}
- name: Post Slack Message
if: always()
run: |
echo Slack Message
ref: ${{ inputs.ref }}
type: ${{ inputs.type }}
dry-run: ${{ inputs.dry-run }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}

View File

@ -87,10 +87,13 @@
# define ASAN_POISON_MEMORY_REGION(a, b)
#endif
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(NDEBUG) || defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) || defined(MEMORY_SANITIZER) || defined(UNDEFINED_BEHAVIOR_SANITIZER)
#define ABORT_ON_LOGICAL_ERROR
#endif
/// We used to have only ABORT_ON_LOGICAL_ERROR macro, but most of its uses were actually in places where we didn't care about logical errors
/// but wanted to check exactly if the current build type is debug or with sanitizer. This new macro is introduced to fix those places.
#if !defined(DEBUG_OR_SANITIZER_BUILD)
# if !defined(NDEBUG) || defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) || defined(MEMORY_SANITIZER) \
|| defined(UNDEFINED_BEHAVIOR_SANITIZER)
# define DEBUG_OR_SANITIZER_BUILD
# endif
#endif
/// chassert(x) is similar to assert(x), but:
@ -101,7 +104,7 @@
/// Also it makes sense to call abort() instead of __builtin_unreachable() in debug builds,
/// because SIGABRT is easier to debug than SIGTRAP (the second one makes gdb crazy)
#if !defined(chassert)
#if defined(ABORT_ON_LOGICAL_ERROR)
# if defined(DEBUG_OR_SANITIZER_BUILD)
// clang-format off
#include <base/types.h>
namespace DB

View File

@ -22,6 +22,21 @@ Structure of the `users` section:
<!-- Or -->
<password_sha256_hex></password_sha256_hex>
<ssh_keys>
<ssh_key>
<type>ssh-ed25519</type>
<base64_key>AAAAC3NzaC1lZDI1NTE5AAAAIDNf0r6vRl24Ix3tv2IgPmNPO2ATa2krvt80DdcTatLj</base64_key>
</ssh_key>
<ssh_key>
<type>ecdsa-sha2-nistp256</type>
<base64_key>AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBNxeV2uN5UY6CUbCzTA1rXfYimKQA5ivNIqxdax4bcMXz4D0nSk2l5E1TkR5mG8EBWtmExSPbcEPJ8V7lyWWbA8=</base64_key>
</ssh_key>
<ssh_key>
<type>ssh-rsa</type>
<base64_key>AAAAB3NzaC1yc2EAAAADAQABAAABgQCpgqL1SHhPVBOTFlOm0pu+cYBbADzC2jL41sPMawYCJHDyHuq7t+htaVVh2fRgpAPmSEnLEC2d4BEIKMtPK3bfR8plJqVXlLt6Q8t4b1oUlnjb3VPA9P6iGcW7CV1FBkZQEVx8ckOfJ3F+kI5VsrRlEDgiecm/C1VPl0/9M2llW/mPUMaD65cM9nlZgM/hUeBrfxOEqM11gDYxEZm1aRSbZoY4dfdm3vzvpSQ6lrCrkjn3X2aSmaCLcOWJhfBWMovNDB8uiPuw54g3ioZ++qEQMlfxVsqXDGYhXCrsArOVuW/5RbReO79BvXqdssiYShfwo+GhQ0+aLWMIW/jgBkkqx/n7uKLzCMX7b2F+aebRYFh+/QXEj7SnihdVfr9ud6NN3MWzZ1ltfIczlEcFLrLJ1Yq57wW6wXtviWh59WvTWFiPejGjeSjjJyqqB49tKdFVFuBnIU5u/bch2DXVgiAEdQwUrIp1ACoYPq22HFFAYUJrL32y7RxX3PGzuAv3LOc=</base64_key>
</ssh_key>
</ssh_keys>
<access_management>0|1</access_management>
<networks incl="networks" replace="replace">
@ -79,6 +94,24 @@ Password can be specified in plaintext or in SHA256 (hex format).
The first line of the result is the password. The second line is the corresponding double SHA1 hash.
### username/ssh-key {#user-sshkey}
This setting allows authenticating with SSH keys.
Given a SSH key (as generated by `ssh-keygen`) like
```
ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDNf0r6vRl24Ix3tv2IgPmNPO2ATa2krvt80DdcTatLj john@example.com
```
The `ssh_key` element is expected to be
```
<ssh_key>
<type>ssh-ed25519</type>
<base64_key>AAAAC3NzaC1lZDI1NTE5AAAAIDNf0r6vRl24Ix3tv2IgPmNPO2ATa2krvt80DdcTatLj</base64_key>
</ssh_key>
```
Substitute `ssh-ed25519` with `ssh-rsa` or `ecdsa-sha2-nistp256` for the other supported algorithms.
### access_management {#access_management-user-setting}
This setting enables or disables using of SQL-driven [access control and account management](../../guides/sre/user-management/index.md#access-control) for the user.

View File

@ -297,7 +297,7 @@ Algorithm requires the special column in tables. This column:
- Must contain an ordered sequence.
- Can be one of the following types: [Int, UInt](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), [Date](../../../sql-reference/data-types/date.md), [DateTime](../../../sql-reference/data-types/datetime.md), [Decimal](../../../sql-reference/data-types/decimal.md).
- Cant be the only column in the `JOIN` clause.
- For `hash` join algorithm it cant be the only column in the `JOIN` clause.
Syntax `ASOF JOIN ... ON`:
@ -337,7 +337,8 @@ For example, consider the following tables:
`ASOF JOIN` can take the timestamp of a user event from `table_1` and find an event in `table_2` where the timestamp is closest to the timestamp of the event from `table_1` corresponding to the closest match condition. Equal timestamp values are the closest if available. Here, the `user_id` column can be used for joining on equality and the `ev_time` column can be used for joining on the closest match. In our example, `event_1_1` can be joined with `event_2_1` and `event_1_2` can be joined with `event_2_3`, but `event_2_2` cant be joined.
:::note
`ASOF` join is **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine.
`ASOF JOIN` is supported only by `hash` and `full_sorting_merge` join algorithms.
It's **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine.
:::
## PASTE JOIN Usage

View File

@ -6,38 +6,38 @@ sidebar_label: Playground
# ClickHouse Playground {#clickhouse-playground}
[ClickHouse Playground](https://play.clickhouse.com/play?user=play) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster.
Several example datasets are available in Playground.
[ClickHouse Playground](https://play.clickhouse.com/play?user=play) позволяет пользователям экспериментировать с ClickHouse, выполняя запросы мгновенно, без необходимости настройки сервера или кластера.
В Playground доступны несколько примеров наборов данных.
You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md).
Вы можете выполнять запросы к Playground, используя любой HTTP-клиент, например [curl](https://curl.haxx.se) или [wget](https://www.gnu.org/software/wget/), или настроить соединение, используя драйверы [JDBC](../interfaces/jdbc.md) или [ODBC](../interfaces/odbc.md). Дополнительную информацию о программных продуктах, поддерживающих ClickHouse, можно найти [здесь](../interfaces/index.md).
## Credentials {#credentials}
## Учетные данные {#credentials}
| Parameter | Value |
| Параметр | Значение |
|:--------------------|:-----------------------------------|
| HTTPS endpoint | `https://play.clickhouse.com:443/` |
| Native TCP endpoint | `play.clickhouse.com:9440` |
| User | `explorer` or `play` |
| Password | (empty) |
| HTTPS-адрес | `https://play.clickhouse.com:443/` |
| TCP-адрес | `play.clickhouse.com:9440` |
| Пользователь | `explorer` или `play` |
| Пароль | (пусто) |
## Limitations {#limitations}
## Ограничения {#limitations}
The queries are executed as a read-only user. It implies some limitations:
Запросы выполняются от имени пользователя с правами только на чтение. Это предполагает некоторые ограничения:
- DDL queries are not allowed
- INSERT queries are not allowed
- DDL-запросы не разрешены
- INSERT-запросы не разрешены
The service also have quotas on its usage.
Сервис также имеет квоты на использование.
## Examples {#examples}
## Примеры {#examples}
HTTPS endpoint example with `curl`:
Пример использования HTTPS-адреса с `curl`:
``` bash
```bash
curl "https://play.clickhouse.com/?user=explorer" --data-binary "SELECT 'Play ClickHouse'"
```
TCP endpoint example with [CLI](../interfaces/cli.md):
Пример использования TCP-адреса с [CLI](../interfaces/cli.md):
``` bash
clickhouse client --secure --host play.clickhouse.com --user explorer

View File

@ -68,6 +68,41 @@ QueryTreeNodePtr findEqualsFunction(const QueryTreeNodes & nodes)
return nullptr;
}
/// Checks if the node is combination of isNull and notEquals functions of two the same arguments
bool matchIsNullOfTwoArgs(const QueryTreeNodes & nodes, QueryTreeNodePtr & lhs, QueryTreeNodePtr & rhs)
{
QueryTreeNodePtrWithHashSet all_arguments;
for (const auto & node : nodes)
{
const auto * func_node = node->as<FunctionNode>();
if (!func_node)
return false;
const auto & arguments = func_node->getArguments().getNodes();
if (func_node->getFunctionName() == "isNull" && arguments.size() == 1)
all_arguments.insert(QueryTreeNodePtrWithHash(arguments[0]));
else if (func_node->getFunctionName() == "notEquals" && arguments.size() == 2)
{
if (arguments[0]->isEqual(*arguments[1]))
return false;
all_arguments.insert(QueryTreeNodePtrWithHash(arguments[0]));
all_arguments.insert(QueryTreeNodePtrWithHash(arguments[1]));
}
else
return false;
if (all_arguments.size() > 2)
return false;
}
if (all_arguments.size() != 2)
return false;
lhs = all_arguments.begin()->node;
rhs = std::next(all_arguments.begin())->node;
return true;
}
bool isBooleanConstant(const QueryTreeNodePtr & node, bool expected_value)
{
const auto * constant_node = node->as<ConstantNode>();
@ -213,11 +248,14 @@ private:
else if (func_name == "and")
{
const auto & and_arguments = argument_function->getArguments().getNodes();
bool all_are_is_null = and_arguments.size() == 2 && isNodeFunction(and_arguments[0], "isNull") && isNodeFunction(and_arguments[1], "isNull");
if (all_are_is_null)
QueryTreeNodePtr is_null_lhs_arg;
QueryTreeNodePtr is_null_rhs_arg;
if (matchIsNullOfTwoArgs(and_arguments, is_null_lhs_arg, is_null_rhs_arg))
{
is_null_argument_to_indices[getFunctionArgument(and_arguments.front(), 0)].push_back(or_operands.size() - 1);
is_null_argument_to_indices[getFunctionArgument(and_arguments.back(), 0)].push_back(or_operands.size() - 1);
is_null_argument_to_indices[is_null_lhs_arg].push_back(or_operands.size() - 1);
is_null_argument_to_indices[is_null_rhs_arg].push_back(or_operands.size() - 1);
continue;
}
/// Expression `a = b AND (a IS NOT NULL) AND true AND (b IS NOT NULL)` we can be replaced with `a = b`

View File

@ -62,7 +62,7 @@ namespace ErrorCodes
namespace
{
#if defined(ABORT_ON_LOGICAL_ERROR)
#if defined(DEBUG_OR_SANITIZER_BUILD)
/** This visitor checks if Query Tree structure is valid after each pass
* in debug build.
@ -183,7 +183,7 @@ void QueryTreePassManager::run(QueryTreeNodePtr query_tree_node)
for (size_t i = 0; i < passes_size; ++i)
{
passes[i]->run(query_tree_node, current_context);
#if defined(ABORT_ON_LOGICAL_ERROR)
#if defined(DEBUG_OR_SANITIZER_BUILD)
ValidationChecker(passes[i]->getName()).visit(query_tree_node);
#endif
}
@ -208,7 +208,7 @@ void QueryTreePassManager::run(QueryTreeNodePtr query_tree_node, size_t up_to_pa
for (size_t i = 0; i < up_to_pass_index; ++i)
{
passes[i]->run(query_tree_node, current_context);
#if defined(ABORT_ON_LOGICAL_ERROR)
#if defined(DEBUG_OR_SANITIZER_BUILD)
ValidationChecker(passes[i]->getName()).visit(query_tree_node);
#endif
}

View File

@ -4124,7 +4124,9 @@ void QueryAnalyzer::resolveInterpolateColumnsNodeList(QueryTreeNodePtr & interpo
auto * column_to_interpolate = interpolate_node_typed.getExpression()->as<IdentifierNode>();
if (!column_to_interpolate)
throw Exception(ErrorCodes::LOGICAL_ERROR, "INTERPOLATE can work only for indentifiers, but {} is found",
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"INTERPOLATE can work only for identifiers, but {} is found",
interpolate_node_typed.getExpression()->formatASTForErrorMessage());
auto column_to_interpolate_name = column_to_interpolate->getIdentifier().getFullName();

View File

@ -267,7 +267,7 @@ bool ColumnAggregateFunction::structureEquals(const IColumn & to) const
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnAggregateFunction::insertRangeFrom(const IColumn & from, size_t start, size_t length)
#else
void ColumnAggregateFunction::doInsertRangeFrom(const IColumn & from, size_t start, size_t length)
@ -366,13 +366,10 @@ void ColumnAggregateFunction::updateHashWithValue(size_t n, SipHash & hash) cons
hash.update(wbuf.str().c_str(), wbuf.str().size());
}
void ColumnAggregateFunction::updateWeakHash32(WeakHash32 & hash) const
WeakHash32 ColumnAggregateFunction::getWeakHash32() const
{
auto s = data.size();
if (hash.getData().size() != data.size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), hash.getData().size());
WeakHash32 hash(s);
auto & hash_data = hash.getData();
std::vector<UInt8> v;
@ -383,6 +380,8 @@ void ColumnAggregateFunction::updateWeakHash32(WeakHash32 & hash) const
wbuf.finalize();
hash_data[i] = ::updateWeakHash32(v.data(), v.size(), hash_data[i]);
}
return hash;
}
void ColumnAggregateFunction::updateHashFast(SipHash & hash) const
@ -466,7 +465,7 @@ void ColumnAggregateFunction::insertFromWithOwnership(const IColumn & from, size
insertMergeFrom(from, n);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnAggregateFunction::insertFrom(const IColumn & from, size_t n)
#else
void ColumnAggregateFunction::doInsertFrom(const IColumn & from, size_t n)

View File

@ -145,7 +145,7 @@ public:
void insertData(const char * pos, size_t length) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & from, size_t n) override;
#else
using IColumn::insertFrom;
@ -177,7 +177,7 @@ public:
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override;
@ -189,7 +189,7 @@ public:
void protect() override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn & from, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & from, size_t start, size_t length) override;
@ -212,7 +212,7 @@ public:
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t, size_t, const IColumn &, int) const override
#else
int doCompareAt(size_t, size_t, const IColumn &, int) const override

View File

@ -271,15 +271,12 @@ void ColumnArray::updateHashWithValue(size_t n, SipHash & hash) const
getData().updateHashWithValue(offset + i, hash);
}
void ColumnArray::updateWeakHash32(WeakHash32 & hash) const
WeakHash32 ColumnArray::getWeakHash32() const
{
auto s = offsets->size();
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", s, hash.getData().size());
WeakHash32 hash(s);
WeakHash32 internal_hash(data->size());
data->updateWeakHash32(internal_hash);
WeakHash32 internal_hash = data->getWeakHash32();
Offset prev_offset = 0;
const auto & offsets_data = getOffsets();
@ -300,6 +297,8 @@ void ColumnArray::updateWeakHash32(WeakHash32 & hash) const
prev_offset = offsets_data[i];
}
return hash;
}
void ColumnArray::updateHashFast(SipHash & hash) const
@ -337,7 +336,7 @@ bool ColumnArray::tryInsert(const Field & x)
return true;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnArray::insertFrom(const IColumn & src_, size_t n)
#else
void ColumnArray::doInsertFrom(const IColumn & src_, size_t n)
@ -396,7 +395,7 @@ int ColumnArray::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan
: 1);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const
#else
int ColumnArray::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const
@ -543,7 +542,7 @@ void ColumnArray::getExtremes(Field & min, Field & max) const
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnArray::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)

View File

@ -82,16 +82,16 @@ public:
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#endif
void insert(const Field & x) override;
bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & src_, size_t n) override;
#else
void doInsertFrom(const IColumn & src_, size_t n) override;
@ -103,7 +103,7 @@ public:
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
template <typename Type> ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;

View File

@ -3,6 +3,7 @@
#include <optional>
#include <Core/Field.h>
#include <Columns/IColumn.h>
#include <Common/WeakHash.h>
#include <IO/BufferWithOwnMemory.h>
@ -85,7 +86,7 @@ public:
bool isDefaultAt(size_t) const override { throwMustBeDecompressed(); }
void insert(const Field &) override { throwMustBeDecompressed(); }
bool tryInsert(const Field &) override { throwMustBeDecompressed(); }
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); }
#else
void doInsertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); }
@ -98,13 +99,13 @@ public:
const char * deserializeAndInsertFromArena(const char *) override { throwMustBeDecompressed(); }
const char * skipSerializedInArena(const char *) const override { throwMustBeDecompressed(); }
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeDecompressed(); }
void updateWeakHash32(WeakHash32 &) const override { throwMustBeDecompressed(); }
WeakHash32 getWeakHash32() const override { throwMustBeDecompressed(); }
void updateHashFast(SipHash &) const override { throwMustBeDecompressed(); }
ColumnPtr filter(const Filter &, ssize_t) const override { throwMustBeDecompressed(); }
void expand(const Filter &, bool) override { throwMustBeDecompressed(); }
ColumnPtr permute(const Permutation &, size_t) const override { throwMustBeDecompressed(); }
ColumnPtr index(const IColumn &, size_t) const override { throwMustBeDecompressed(); }
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); }
#else
int doCompareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); }

View File

@ -137,18 +137,10 @@ void ColumnConst::updatePermutation(PermutationSortDirection /*direction*/, Perm
{
}
void ColumnConst::updateWeakHash32(WeakHash32 & hash) const
WeakHash32 ColumnConst::getWeakHash32() const
{
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
WeakHash32 element_hash(1);
data->updateWeakHash32(element_hash);
size_t data_hash = element_hash.getData()[0];
for (auto & value : hash.getData())
value = static_cast<UInt32>(intHashCRC32(data_hash, value));
WeakHash32 element_hash = data->getWeakHash32();
return WeakHash32(s, element_hash.getData()[0]);
}
void ColumnConst::compareColumn(

View File

@ -123,7 +123,7 @@ public:
return data->isNullAt(0);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn &, size_t /*start*/, size_t length) override
#else
void doInsertRangeFrom(const IColumn &, size_t /*start*/, size_t length) override
@ -151,7 +151,7 @@ public:
++s;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn &, size_t) override
#else
void doInsertFrom(const IColumn &, size_t) override
@ -160,7 +160,7 @@ public:
++s;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; }
#else
void doInsertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; }
@ -204,7 +204,7 @@ public:
data->updateHashWithValue(0, hash);
}
void updateWeakHash32(WeakHash32 & hash) const override;
WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override
{
@ -237,7 +237,7 @@ public:
return data->allocatedBytes() + sizeof(s);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t, size_t, const IColumn & rhs, int nan_direction_hint) const override
#else
int doCompareAt(size_t, size_t, const IColumn & rhs, int nan_direction_hint) const override

View File

@ -28,11 +28,10 @@ namespace ErrorCodes
extern const int PARAMETER_OUT_OF_BOUND;
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
extern const int NOT_IMPLEMENTED;
extern const int LOGICAL_ERROR;
}
template <is_decimal T>
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int ColumnDecimal<T>::compareAt(size_t n, size_t m, const IColumn & rhs_, int) const
#else
int ColumnDecimal<T>::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int) const
@ -76,13 +75,10 @@ void ColumnDecimal<T>::updateHashWithValue(size_t n, SipHash & hash) const
}
template <is_decimal T>
void ColumnDecimal<T>::updateWeakHash32(WeakHash32 & hash) const
WeakHash32 ColumnDecimal<T>::getWeakHash32() const
{
auto s = data.size();
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
WeakHash32 hash(s);
const T * begin = data.data();
const T * end = begin + s;
@ -94,6 +90,8 @@ void ColumnDecimal<T>::updateWeakHash32(WeakHash32 & hash) const
++begin;
++hash_data;
}
return hash;
}
template <is_decimal T>
@ -335,7 +333,7 @@ void ColumnDecimal<T>::insertData(const char * src, size_t /*length*/)
}
template <is_decimal T>
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnDecimal<T>::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnDecimal<T>::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)

View File

@ -55,13 +55,13 @@ public:
void reserve(size_t n) override { data.reserve_exact(n); }
void shrinkToFit() override { data.shrink_to_fit(); }
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast<const Self &>(src).getData()[n]); }
#else
void doInsertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast<const Self &>(src).getData()[n]); }
#endif
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertManyFrom(const IColumn & src, size_t position, size_t length) override
#else
void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override
@ -76,7 +76,7 @@ public:
void insertManyDefaults(size_t length) override { data.resize_fill(data.size() + length); }
void insert(const Field & x) override { data.push_back(x.get<T>()); }
bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
@ -102,9 +102,9 @@ public:
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;

View File

@ -215,7 +215,7 @@ bool ColumnDynamic::tryInsert(const DB::Field & x)
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnDynamic::insertFrom(const DB::IColumn & src_, size_t n)
#else
void ColumnDynamic::doInsertFrom(const DB::IColumn & src_, size_t n)
@ -269,7 +269,7 @@ void ColumnDynamic::doInsertFrom(const DB::IColumn & src_, size_t n)
variant_col.insertIntoVariantFrom(string_variant_discr, *tmp_string_column, 0);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnDynamic::insertRangeFrom(const DB::IColumn & src_, size_t start, size_t length)
#else
void ColumnDynamic::doInsertRangeFrom(const DB::IColumn & src_, size_t start, size_t length)
@ -439,7 +439,7 @@ void ColumnDynamic::doInsertRangeFrom(const DB::IColumn & src_, size_t start, si
}
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnDynamic::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length)
#else
void ColumnDynamic::doInsertManyFrom(const DB::IColumn & src_, size_t position, size_t length)
@ -603,7 +603,7 @@ void ColumnDynamic::updateHashWithValue(size_t n, SipHash & hash) const
variant_col.getVariantByGlobalDiscriminator(discr).updateHashWithValue(variant_col.offsetAt(n), hash);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int ColumnDynamic::compareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const
#else
int ColumnDynamic::doCompareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const

View File

@ -4,6 +4,7 @@
#include <Columns/ColumnVector.h>
#include <Columns/ColumnVariant.h>
#include <DataTypes/IDataType.h>
#include <Common/WeakHash.h>
namespace DB
@ -142,7 +143,7 @@ public:
void insert(const Field & x) override;
bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & src_, size_t n) override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
@ -173,9 +174,9 @@ public:
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override
WeakHash32 getWeakHash32() const override
{
variant_column->updateWeakHash32(hash);
return variant_column->getWeakHash32();
}
void updateHashFast(SipHash & hash) const override
@ -219,7 +220,7 @@ public:
return scattered_columns;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;

View File

@ -74,7 +74,7 @@ bool ColumnFixedString::tryInsert(const Field & x)
return true;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnFixedString::insertFrom(const IColumn & src_, size_t index)
#else
void ColumnFixedString::doInsertFrom(const IColumn & src_, size_t index)
@ -90,7 +90,7 @@ void ColumnFixedString::doInsertFrom(const IColumn & src_, size_t index)
memcpySmallAllowReadWriteOverflow15(chars.data() + old_size, &src.chars[n * index], n);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnFixedString::insertManyFrom(const IColumn & src, size_t position, size_t length)
#else
void ColumnFixedString::doInsertManyFrom(const IColumn & src, size_t position, size_t length)
@ -137,14 +137,10 @@ void ColumnFixedString::updateHashWithValue(size_t index, SipHash & hash) const
hash.update(reinterpret_cast<const char *>(&chars[n * index]), n);
}
void ColumnFixedString::updateWeakHash32(WeakHash32 & hash) const
WeakHash32 ColumnFixedString::getWeakHash32() const
{
auto s = size();
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, "
"hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
WeakHash32 hash(s);
const UInt8 * pos = chars.data();
UInt32 * hash_data = hash.getData().data();
@ -156,6 +152,8 @@ void ColumnFixedString::updateWeakHash32(WeakHash32 & hash) const
pos += n;
++hash_data;
}
return hash;
}
void ColumnFixedString::updateHashFast(SipHash & hash) const
@ -227,7 +225,7 @@ size_t ColumnFixedString::estimateCardinalityInPermutedRange(const Permutation &
return elements.size();
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnFixedString::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnFixedString::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)

View File

@ -98,13 +98,13 @@ public:
bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & src_, size_t index) override;
#else
void doInsertFrom(const IColumn & src_, size_t index) override;
#endif
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
#else
void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override;
@ -133,11 +133,11 @@ public:
void updateHashWithValue(size_t index, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override
#else
int doCompareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override
@ -156,7 +156,7 @@ public:
size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;

View File

@ -72,7 +72,7 @@ ColumnPtr ColumnFunction::cut(size_t start, size_t length) const
return ColumnFunction::create(length, function, capture, is_short_circuit_argument, is_function_compiled);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnFunction::insertFrom(const IColumn & src, size_t n)
#else
void ColumnFunction::doInsertFrom(const IColumn & src, size_t n)
@ -93,7 +93,7 @@ void ColumnFunction::doInsertFrom(const IColumn & src, size_t n)
++elements_size;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnFunction::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnFunction::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)

View File

@ -4,6 +4,7 @@
#include <Core/NamesAndTypes.h>
#include <Core/ColumnsWithTypeAndName.h>
#include <Columns/IColumn.h>
#include <Common/WeakHash.h>
namespace DB
@ -94,12 +95,12 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot insert into {}", getName());
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & src, size_t n) override;
#else
void doInsertFrom(const IColumn & src, size_t n) override;
#endif
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn &, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn &, size_t start, size_t length) override;
@ -130,9 +131,9 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "updateHashWithValue is not implemented for {}", getName());
}
void updateWeakHash32(WeakHash32 &) const override
WeakHash32 getWeakHash32() const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "updateWeakHash32 is not implemented for {}", getName());
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "getWeakHash32 is not implemented for {}", getName());
}
void updateHashFast(SipHash &) const override
@ -145,7 +146,7 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "popBack is not implemented for {}", getName());
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t, size_t, const IColumn &, int) const override
#else
int doCompareAt(size_t, size_t, const IColumn &, int) const override

View File

@ -7,8 +7,7 @@
#include <Common/HashTable/HashMap.h>
#include <Common/WeakHash.h>
#include <Common/assert_cast.h>
#include "Storages/IndicesDescription.h"
#include "base/types.h"
#include <base/types.h>
#include <base/sort.h>
#include <base/scope_guard.h>
@ -159,7 +158,7 @@ void ColumnLowCardinality::insertDefault()
idx.insertPosition(getDictionary().getDefaultValueIndex());
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnLowCardinality::insertFrom(const IColumn & src, size_t n)
#else
void ColumnLowCardinality::doInsertFrom(const IColumn & src, size_t n)
@ -191,7 +190,7 @@ void ColumnLowCardinality::insertFromFullColumn(const IColumn & src, size_t n)
idx.insertPosition(getDictionary().uniqueInsertFrom(src, n));
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnLowCardinality::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
@ -320,19 +319,10 @@ const char * ColumnLowCardinality::skipSerializedInArena(const char * pos) const
return getDictionary().skipSerializedInArena(pos);
}
void ColumnLowCardinality::updateWeakHash32(WeakHash32 & hash) const
WeakHash32 ColumnLowCardinality::getWeakHash32() const
{
auto s = size();
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
const auto & dict = getDictionary().getNestedColumn();
WeakHash32 dict_hash(dict->size());
dict->updateWeakHash32(dict_hash);
idx.updateWeakHash(hash, dict_hash);
WeakHash32 dict_hash = getDictionary().getNestedColumn()->getWeakHash32();
return idx.getWeakHash(dict_hash);
}
void ColumnLowCardinality::updateHashFast(SipHash & hash) const
@ -372,7 +362,7 @@ int ColumnLowCardinality::compareAtImpl(size_t n, size_t m, const IColumn & rhs,
return getDictionary().compareAt(n_index, m_index, low_cardinality_column.getDictionary(), nan_direction_hint);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
#else
int ColumnLowCardinality::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
@ -832,10 +822,11 @@ bool ColumnLowCardinality::Index::containsDefault() const
return contains;
}
void ColumnLowCardinality::Index::updateWeakHash(WeakHash32 & hash, WeakHash32 & dict_hash) const
WeakHash32 ColumnLowCardinality::Index::getWeakHash(const WeakHash32 & dict_hash) const
{
WeakHash32 hash(positions->size());
auto & hash_data = hash.getData();
auto & dict_hash_data = dict_hash.getData();
const auto & dict_hash_data = dict_hash.getData();
auto update_weak_hash = [&](auto x)
{
@ -844,10 +835,11 @@ void ColumnLowCardinality::Index::updateWeakHash(WeakHash32 & hash, WeakHash32 &
auto size = data.size();
for (size_t i = 0; i < size; ++i)
hash_data[i] = static_cast<UInt32>(intHashCRC32(dict_hash_data[data[i]], hash_data[i]));
hash_data[i] = dict_hash_data[data[i]];
};
callForType(std::move(update_weak_hash), size_of_type);
return hash;
}
void ColumnLowCardinality::Index::collectSerializedValueSizes(

View File

@ -78,14 +78,14 @@ public:
bool tryInsert(const Field & x) override;
void insertDefault() override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & src, size_t n) override;
#else
void doInsertFrom(const IColumn & src, size_t n) override;
#endif
void insertFromFullColumn(const IColumn & src, size_t n);
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
@ -111,7 +111,7 @@ public:
getDictionary().updateHashWithValue(getIndexes().getUInt(n), hash);
}
void updateWeakHash32(WeakHash32 & hash) const override;
WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash &) const override;
@ -135,7 +135,7 @@ public:
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().index(indexes_, limit));
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
@ -325,7 +325,7 @@ public:
bool containsDefault() const;
void updateWeakHash(WeakHash32 & hash, WeakHash32 & dict_hash) const;
WeakHash32 getWeakHash(const WeakHash32 & dict_hash) const;
void collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const PaddedPODArray<UInt64> & dict_sizes) const;

View File

@ -143,9 +143,9 @@ void ColumnMap::updateHashWithValue(size_t n, SipHash & hash) const
nested->updateHashWithValue(n, hash);
}
void ColumnMap::updateWeakHash32(WeakHash32 & hash) const
WeakHash32 ColumnMap::getWeakHash32() const
{
nested->updateWeakHash32(hash);
return nested->getWeakHash32();
}
void ColumnMap::updateHashFast(SipHash & hash) const
@ -153,7 +153,7 @@ void ColumnMap::updateHashFast(SipHash & hash) const
nested->updateHashFast(hash);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnMap::insertFrom(const IColumn & src, size_t n)
#else
void ColumnMap::doInsertFrom(const IColumn & src, size_t n)
@ -162,7 +162,7 @@ void ColumnMap::doInsertFrom(const IColumn & src, size_t n)
nested->insertFrom(assert_cast<const ColumnMap &>(src).getNestedColumn(), n);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnMap::insertManyFrom(const IColumn & src, size_t position, size_t length)
#else
void ColumnMap::doInsertManyFrom(const IColumn & src, size_t position, size_t length)
@ -171,7 +171,7 @@ void ColumnMap::doInsertManyFrom(const IColumn & src, size_t position, size_t le
assert_cast<ColumnArray &>(*nested).insertManyFrom(assert_cast<const ColumnMap &>(src).getNestedColumn(), position, length);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnMap::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnMap::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
@ -222,7 +222,7 @@ MutableColumns ColumnMap::scatter(ColumnIndex num_columns, const Selector & sele
return res;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int ColumnMap::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
#else
int ColumnMap::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const

View File

@ -64,10 +64,10 @@ public:
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & src_, size_t n) override;
void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
@ -83,7 +83,7 @@ public:
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
ColumnPtr replicate(const Offsets & offsets) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;

View File

@ -56,25 +56,21 @@ void ColumnNullable::updateHashWithValue(size_t n, SipHash & hash) const
getNestedColumn().updateHashWithValue(n, hash);
}
void ColumnNullable::updateWeakHash32(WeakHash32 & hash) const
WeakHash32 ColumnNullable::getWeakHash32() const
{
auto s = size();
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
WeakHash32 old_hash = hash;
nested_column->updateWeakHash32(hash);
WeakHash32 hash = nested_column->getWeakHash32();
const auto & null_map_data = getNullMapData();
auto & hash_data = hash.getData();
auto & old_hash_data = old_hash.getData();
/// Use old data for nulls.
/// Use default for nulls.
for (size_t row = 0; row < s; ++row)
if (null_map_data[row])
hash_data[row] = old_hash_data[row];
hash_data[row] = WeakHash32::kDefaultInitialValue;
return hash;
}
void ColumnNullable::updateHashFast(SipHash & hash) const
@ -221,7 +217,7 @@ const char * ColumnNullable::skipSerializedInArena(const char * pos) const
return pos;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnNullable::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnNullable::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
@ -262,7 +258,7 @@ bool ColumnNullable::tryInsert(const Field & x)
return true;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnNullable::insertFrom(const IColumn & src, size_t n)
#else
void ColumnNullable::doInsertFrom(const IColumn & src, size_t n)
@ -274,7 +270,7 @@ void ColumnNullable::doInsertFrom(const IColumn & src, size_t n)
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnNullable::insertManyFrom(const IColumn & src, size_t position, size_t length)
#else
void ColumnNullable::doInsertManyFrom(const IColumn & src, size_t position, size_t length)
@ -414,7 +410,7 @@ int ColumnNullable::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int
return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const
#else
int ColumnNullable::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const

View File

@ -69,7 +69,7 @@ public:
char * serializeValueIntoMemory(size_t n, char * memory) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
@ -77,7 +77,7 @@ public:
void insert(const Field & x) override;
bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & src, size_t n) override;
void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
#else
@ -100,7 +100,7 @@ public:
void expand(const Filter & mask, bool inverted) override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override;
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override;
@ -133,7 +133,7 @@ public:
void protect() override;
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override;
void getExtremes(Field & min, Field & max) const override;
// Special function for nullable minmax index

View File

@ -385,7 +385,7 @@ bool ColumnObject::tryInsert(const Field & x)
return true;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnObject::insertFrom(const IColumn & src, size_t n)
#else
void ColumnObject::doInsertFrom(const IColumn & src, size_t n)
@ -418,7 +418,7 @@ void ColumnObject::doInsertFrom(const IColumn & src, size_t n)
insertFromSharedDataAndFillRemainingDynamicPaths(src_object_column, src_dynamic_paths_for_shared_data, n, 1);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnObject::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
@ -747,13 +747,15 @@ void ColumnObject::updateHashWithValue(size_t n, SipHash & hash) const
shared_data->updateHashWithValue(n, hash);
}
void ColumnObject::updateWeakHash32(WeakHash32 & hash) const
WeakHash32 ColumnObject::getWeakHash32() const
{
WeakHash32 hash(size());
for (const auto & [_, column] : typed_paths)
column->updateWeakHash32(hash);
hash.update(column->getWeakHash32());
for (const auto & [_, column] : dynamic_paths)
column->updateWeakHash32(hash);
shared_data->updateWeakHash32(hash);
hash.update(column->getWeakHash32());
hash.update(shared_data->getWeakHash32());
return hash;
}
void ColumnObject::updateHashFast(SipHash & hash) const

View File

@ -5,10 +5,13 @@
#include <Columns/ColumnArray.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnString.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/Serializations/SerializationDynamic.h>
#include <Formats/FormatSettings.h>
#include <Common/WeakHash.h>
namespace DB
{
@ -73,7 +76,7 @@ public:
void insert(const Field & x) override;
bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & src, size_t n) override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
@ -91,7 +94,7 @@ public:
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
@ -105,7 +108,7 @@ public:
void updatePermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation &, EqualRanges &) const override {}
/// Values of ColumnObject are not comparable.
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; }
#else
int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; }

View File

@ -5,6 +5,7 @@
#include <Core/Names.h>
#include <DataTypes/Serializations/SubcolumnsTree.h>
#include <Common/PODArray.h>
#include <Common/WeakHash.h>
#include <DataTypes/IDataType.h>
@ -209,7 +210,7 @@ public:
void insert(const Field & field) override;
bool tryInsert(const Field & field) override;
void insertDefault() override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & src, size_t n) override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
@ -233,7 +234,7 @@ public:
/// Order of rows in ColumnObjectDeprecated is undefined.
void getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation & res) const override;
void updatePermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation &, EqualRanges &) const override {}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; }
#else
int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; }
@ -250,7 +251,7 @@ public:
const char * deserializeAndInsertFromArena(const char *) override { throwMustBeConcrete(); }
const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); }
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); }
void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); }
WeakHash32 getWeakHash32() const override { throwMustBeConcrete(); }
void updateHashFast(SipHash &) const override;
void expand(const Filter &, bool) override { throwMustBeConcrete(); }
bool hasEqualValues() const override { throwMustBeConcrete(); }

View File

@ -174,7 +174,7 @@ const char * ColumnSparse::skipSerializedInArena(const char * pos) const
return values->skipSerializedInArena(pos);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnSparse::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnSparse::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
@ -252,7 +252,7 @@ bool ColumnSparse::tryInsert(const Field & x)
return true;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnSparse::insertFrom(const IColumn & src, size_t n)
#else
void ColumnSparse::doInsertFrom(const IColumn & src, size_t n)
@ -454,7 +454,7 @@ ColumnPtr ColumnSparse::indexImpl(const PaddedPODArray<Type> & indexes, size_t l
return ColumnSparse::create(std::move(res_values), std::move(res_offsets), limit);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int ColumnSparse::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const
#else
int ColumnSparse::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const
@ -678,20 +678,22 @@ void ColumnSparse::updateHashWithValue(size_t n, SipHash & hash) const
values->updateHashWithValue(getValueIndex(n), hash);
}
void ColumnSparse::updateWeakHash32(WeakHash32 & hash) const
WeakHash32 ColumnSparse::getWeakHash32() const
{
if (hash.getData().size() != _size)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", _size, hash.getData().size());
WeakHash32 values_hash = values->getWeakHash32();
WeakHash32 hash(size());
auto & hash_data = hash.getData();
auto & values_hash_data = values_hash.getData();
auto offset_it = begin();
auto & hash_data = hash.getData();
for (size_t i = 0; i < _size; ++i, ++offset_it)
{
size_t value_index = offset_it.getValueIndex();
auto data_ref = values->getDataAt(value_index);
hash_data[i] = ::updateWeakHash32(reinterpret_cast<const UInt8 *>(data_ref.data), data_ref.size, hash_data[i]);
hash_data[i] = values_hash_data[value_index];
}
return hash;
}
void ColumnSparse::updateHashFast(SipHash & hash) const

View File

@ -81,14 +81,14 @@ public:
char * serializeValueIntoMemory(size_t n, char * memory) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char *) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#endif
void insert(const Field & x) override;
bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & src, size_t n) override;
#else
void doInsertFrom(const IColumn & src, size_t n) override;
@ -106,7 +106,7 @@ public:
template <typename Type>
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override;
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override;
@ -139,7 +139,7 @@ public:
void protect() override;
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override;
void getExtremes(Field & min, Field & max) const override;

View File

@ -39,7 +39,7 @@ ColumnString::ColumnString(const ColumnString & src)
last_offset, chars.size());
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnString::insertManyFrom(const IColumn & src, size_t position, size_t length)
#else
void ColumnString::doInsertManyFrom(const IColumn & src, size_t position, size_t length)
@ -108,13 +108,10 @@ MutableColumnPtr ColumnString::cloneResized(size_t to_size) const
return res;
}
void ColumnString::updateWeakHash32(WeakHash32 & hash) const
WeakHash32 ColumnString::getWeakHash32() const
{
auto s = offsets.size();
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
WeakHash32 hash(s);
const UInt8 * pos = chars.data();
UInt32 * hash_data = hash.getData().data();
@ -130,10 +127,12 @@ void ColumnString::updateWeakHash32(WeakHash32 & hash) const
prev_offset = offset;
++hash_data;
}
return hash;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnString::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnString::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)

View File

@ -142,7 +142,7 @@ public:
return true;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & src_, size_t n) override
#else
void doInsertFrom(const IColumn & src_, size_t n) override
@ -169,7 +169,7 @@ public:
}
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
#else
void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override;
@ -212,7 +212,7 @@ public:
hash.update(reinterpret_cast<const char *>(&chars[offset]), string_size);
}
void updateWeakHash32(WeakHash32 & hash) const override;
WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override
{
@ -220,7 +220,7 @@ public:
hash.update(reinterpret_cast<const char *>(chars.data()), chars.size() * sizeof(chars[0]));
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
@ -250,7 +250,7 @@ public:
offsets.push_back(offsets.back() + 1);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override

View File

@ -206,7 +206,7 @@ bool ColumnTuple::tryInsert(const Field & x)
return true;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnTuple::insertFrom(const IColumn & src_, size_t n)
#else
void ColumnTuple::doInsertFrom(const IColumn & src_, size_t n)
@ -223,7 +223,7 @@ void ColumnTuple::doInsertFrom(const IColumn & src_, size_t n)
columns[i]->insertFrom(*src.columns[i], n);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnTuple::insertManyFrom(const IColumn & src, size_t position, size_t length)
#else
void ColumnTuple::doInsertManyFrom(const IColumn & src, size_t position, size_t length)
@ -310,16 +310,15 @@ void ColumnTuple::updateHashWithValue(size_t n, SipHash & hash) const
column->updateHashWithValue(n, hash);
}
void ColumnTuple::updateWeakHash32(WeakHash32 & hash) const
WeakHash32 ColumnTuple::getWeakHash32() const
{
auto s = size();
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
WeakHash32 hash(s);
for (const auto & column : columns)
column->updateWeakHash32(hash);
hash.update(column->getWeakHash32());
return hash;
}
void ColumnTuple::updateHashFast(SipHash & hash) const
@ -328,7 +327,7 @@ void ColumnTuple::updateHashFast(SipHash & hash) const
column->updateHashFast(hash);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnTuple::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnTuple::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
@ -484,7 +483,7 @@ int ColumnTuple::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_
return 0;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
#else
int ColumnTuple::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const

View File

@ -66,7 +66,7 @@ public:
void insert(const Field & x) override;
bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & src_, size_t n) override;
void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
#else
@ -81,9 +81,9 @@ public:
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
@ -94,7 +94,7 @@ public:
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
ColumnPtr replicate(const Offsets & offsets) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;

View File

@ -90,7 +90,7 @@ public:
return getNestedColumn()->updateHashWithValue(n, hash_func);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
@ -492,7 +492,7 @@ const char * ColumnUnique<ColumnType>::skipSerializedInArena(const char *) const
}
template <typename ColumnType>
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int ColumnUnique<ColumnType>::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
#else
int ColumnUnique<ColumnType>::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const

View File

@ -595,7 +595,7 @@ void ColumnVariant::insertManyFromImpl(const DB::IColumn & src_, size_t position
}
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnVariant::insertFrom(const IColumn & src_, size_t n)
#else
void ColumnVariant::doInsertFrom(const IColumn & src_, size_t n)
@ -604,7 +604,7 @@ void ColumnVariant::doInsertFrom(const IColumn & src_, size_t n)
insertFromImpl(src_, n, nullptr);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t length)
#else
void ColumnVariant::doInsertRangeFrom(const IColumn & src_, size_t start, size_t length)
@ -613,7 +613,7 @@ void ColumnVariant::doInsertRangeFrom(const IColumn & src_, size_t start, size_t
insertRangeFromImpl(src_, start, length, nullptr);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnVariant::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length)
#else
void ColumnVariant::doInsertManyFrom(const DB::IColumn & src_, size_t position, size_t length)
@ -789,36 +789,26 @@ void ColumnVariant::updateHashWithValue(size_t n, SipHash & hash) const
variants[localDiscriminatorByGlobal(global_discr)]->updateHashWithValue(offsetAt(n), hash);
}
void ColumnVariant::updateWeakHash32(WeakHash32 & hash) const
WeakHash32 ColumnVariant::getWeakHash32() const
{
auto s = size();
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
/// If we have only NULLs, keep hash unchanged.
if (hasOnlyNulls())
return;
return WeakHash32(s);
/// Optimization for case when there is only 1 non-empty variant and no NULLs.
/// In this case we can just calculate weak hash for this variant.
if (auto non_empty_local_discr = getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls())
{
variants[*non_empty_local_discr]->updateWeakHash32(hash);
return;
}
return variants[*non_empty_local_discr]->getWeakHash32();
/// Calculate weak hash for all variants.
std::vector<WeakHash32> nested_hashes;
for (const auto & variant : variants)
{
WeakHash32 nested_hash(variant->size());
variant->updateWeakHash32(nested_hash);
nested_hashes.emplace_back(std::move(nested_hash));
}
nested_hashes.emplace_back(variant->getWeakHash32());
/// For each row hash is a hash of corresponding row from corresponding variant.
WeakHash32 hash(s);
auto & hash_data = hash.getData();
const auto & local_discriminators_data = getLocalDiscriminators();
const auto & offsets_data = getOffsets();
@ -827,11 +817,10 @@ void ColumnVariant::updateWeakHash32(WeakHash32 & hash) const
Discriminator discr = local_discriminators_data[i];
/// Update hash only for non-NULL values
if (discr != NULL_DISCRIMINATOR)
{
auto nested_hash = nested_hashes[local_discriminators_data[i]].getData()[offsets_data[i]];
hash_data[i] = static_cast<UInt32>(hashCRC32(nested_hash, hash_data[i]));
}
hash_data[i] = nested_hashes[discr].getData()[offsets_data[i]];
}
return hash;
}
void ColumnVariant::updateHashFast(SipHash & hash) const
@ -1186,7 +1175,7 @@ bool ColumnVariant::hasEqualValues() const
return local_discriminators->hasEqualValues() && variants[localDiscriminatorAt(0)]->hasEqualValues();
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int ColumnVariant::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
#else
int ColumnVariant::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const

View File

@ -180,7 +180,7 @@ public:
void insert(const Field & x) override;
bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & src_, size_t n) override;
void insertRangeFrom(const IColumn & src_, size_t start, size_t length) override;
void insertManyFrom(const IColumn & src_, size_t position, size_t length) override;
@ -213,7 +213,7 @@ public:
const char * deserializeVariantAndInsertFromArena(Discriminator global_discr, const char * pos);
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
void expand(const Filter & mask, bool inverted) override;
@ -223,7 +223,7 @@ public:
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;

View File

@ -73,13 +73,10 @@ void ColumnVector<T>::updateHashWithValue(size_t n, SipHash & hash) const
}
template <typename T>
void ColumnVector<T>::updateWeakHash32(WeakHash32 & hash) const
WeakHash32 ColumnVector<T>::getWeakHash32() const
{
auto s = data.size();
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
WeakHash32 hash(s);
const T * begin = data.data();
const T * end = begin + s;
@ -91,6 +88,8 @@ void ColumnVector<T>::updateWeakHash32(WeakHash32 & hash) const
++begin;
++hash_data;
}
return hash;
}
template <typename T>
@ -503,7 +502,7 @@ bool ColumnVector<T>::tryInsert(const DB::Field & x)
}
template <typename T>
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnVector<T>::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnVector<T>::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)

View File

@ -64,7 +64,7 @@ public:
return data.size();
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & src, size_t n) override
#else
void doInsertFrom(const IColumn & src, size_t n) override
@ -73,7 +73,7 @@ public:
data.push_back(assert_cast<const Self &>(src).getData()[n]);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertManyFrom(const IColumn & src, size_t position, size_t length) override
#else
void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override
@ -114,7 +114,7 @@ public:
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override;
@ -150,7 +150,7 @@ public:
}
/// This method implemented in header because it could be possibly devirtualized.
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override
@ -240,7 +240,7 @@ public:
bool tryInsert(const DB::Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;

View File

@ -47,7 +47,7 @@ String IColumn::dumpStructure() const
return res.str();
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void IColumn::insertFrom(const IColumn & src, size_t n)
#else
void IColumn::doInsertFrom(const IColumn & src, size_t n)

View File

@ -179,7 +179,7 @@ public:
/// Appends n-th element from other column with the same type.
/// Is used in merge-sort and merges. It could be implemented in inherited classes more optimally than default implementation.
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
virtual void insertFrom(const IColumn & src, size_t n);
#else
void insertFrom(const IColumn & src, size_t n)
@ -191,7 +191,7 @@ public:
/// Appends range of elements from other column with the same type.
/// Could be used to concatenate columns.
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
virtual void insertRangeFrom(const IColumn & src, size_t start, size_t length) = 0;
#else
void insertRangeFrom(const IColumn & src, size_t start, size_t length)
@ -202,7 +202,7 @@ public:
#endif
/// Appends one element from other column with the same type multiple times.
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
virtual void insertManyFrom(const IColumn & src, size_t position, size_t length)
{
for (size_t i = 0; i < length; ++i)
@ -300,10 +300,10 @@ public:
/// passed bytes to hash must identify sequence of values unambiguously.
virtual void updateHashWithValue(size_t n, SipHash & hash) const = 0;
/// Update hash function value. Hash is calculated for each element.
/// Get hash function value. Hash is calculated for each element.
/// It's a fast weak hash function. Mainly need to scatter data between threads.
/// WeakHash32 must have the same size as column.
virtual void updateWeakHash32(WeakHash32 & hash) const = 0;
virtual WeakHash32 getWeakHash32() const = 0;
/// Update state of hash with all column.
virtual void updateHashFast(SipHash & hash) const = 0;
@ -345,7 +345,7 @@ public:
*
* For non Nullable and non floating point types, nan_direction_hint is ignored.
*/
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
[[nodiscard]] virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0;
#else
[[nodiscard]] int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
@ -667,7 +667,7 @@ protected:
Sort full_sort,
PartialSort partial_sort) const;
#if defined(ABORT_ON_LOGICAL_ERROR)
#if defined(DEBUG_OR_SANITIZER_BUILD)
virtual void doInsertFrom(const IColumn & src, size_t n);
virtual void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) = 0;

View File

@ -1,6 +1,7 @@
#pragma once
#include <Columns/IColumn.h>
#include <Common/WeakHash.h>
namespace DB
@ -26,7 +27,7 @@ public:
size_t byteSize() const override { return 0; }
size_t byteSizeAt(size_t) const override { return 0; }
size_t allocatedBytes() const override { return 0; }
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; }
#else
int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; }
@ -63,15 +64,16 @@ public:
{
}
void updateWeakHash32(WeakHash32 & /*hash*/) const override
WeakHash32 getWeakHash32() const override
{
return WeakHash32(s);
}
void updateHashFast(SipHash & /*hash*/) const override
{
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn &, size_t) override
#else
void doInsertFrom(const IColumn &, size_t) override
@ -80,7 +82,7 @@ public:
++s;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn & /*src*/, size_t /*start*/, size_t length) override
#else
void doInsertRangeFrom(const IColumn & /*src*/, size_t /*start*/, size_t length) override

View File

@ -1,6 +1,7 @@
#pragma once
#include <optional>
#include <Columns/IColumn.h>
#include <Common/WeakHash.h>
namespace DB
{
@ -85,7 +86,7 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method tryInsert is not supported for ColumnUnique.");
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn &, size_t, size_t) override
#else
void doInsertRangeFrom(const IColumn &, size_t, size_t) override
@ -166,9 +167,9 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method scatter is not supported for ColumnUnique.");
}
void updateWeakHash32(WeakHash32 &) const override
WeakHash32 getWeakHash32() const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method updateWeakHash32 is not supported for ColumnUnique.");
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getWeakHash32 is not supported for ColumnUnique.");
}
void updateHashFast(SipHash &) const override

View File

@ -52,7 +52,7 @@ static ColumnPtr mockColumn(const DataTypePtr & type, size_t rows)
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
static NO_INLINE void insertManyFrom(IColumn & dst, const IColumn & src)
#else
static NO_INLINE void doInsertManyFrom(IColumn & dst, const IColumn & src)

View File

@ -60,8 +60,7 @@ TEST(WeakHash32, ColumnVectorU8)
data.push_back(i);
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), col->getData());
}
@ -77,8 +76,7 @@ TEST(WeakHash32, ColumnVectorI8)
data.push_back(i);
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), col->getData());
}
@ -94,8 +92,7 @@ TEST(WeakHash32, ColumnVectorU16)
data.push_back(i);
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), col->getData());
}
@ -111,8 +108,7 @@ TEST(WeakHash32, ColumnVectorI16)
data.push_back(i);
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), col->getData());
}
@ -128,8 +124,7 @@ TEST(WeakHash32, ColumnVectorU32)
data.push_back(i << 16u);
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), col->getData());
}
@ -145,8 +140,7 @@ TEST(WeakHash32, ColumnVectorI32)
data.push_back(i << 16);
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), col->getData());
}
@ -162,8 +156,7 @@ TEST(WeakHash32, ColumnVectorU64)
data.push_back(i << 32u);
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), col->getData());
}
@ -179,8 +172,7 @@ TEST(WeakHash32, ColumnVectorI64)
data.push_back(i << 32);
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), col->getData());
}
@ -204,8 +196,7 @@ TEST(WeakHash32, ColumnVectorU128)
}
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), eq_data);
}
@ -221,8 +212,7 @@ TEST(WeakHash32, ColumnVectorI128)
data.push_back(i << 32);
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), col->getData());
}
@ -238,8 +228,7 @@ TEST(WeakHash32, ColumnDecimal32)
data.push_back(i << 16);
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), col->getData());
}
@ -255,8 +244,7 @@ TEST(WeakHash32, ColumnDecimal64)
data.push_back(i << 32);
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), col->getData());
}
@ -272,8 +260,7 @@ TEST(WeakHash32, ColumnDecimal128)
data.push_back(i << 32);
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), col->getData());
}
@ -294,8 +281,7 @@ TEST(WeakHash32, ColumnString1)
}
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), data);
}
@ -331,8 +317,7 @@ TEST(WeakHash32, ColumnString2)
}
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), data);
}
@ -369,8 +354,7 @@ TEST(WeakHash32, ColumnString3)
}
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), data);
}
@ -397,8 +381,7 @@ TEST(WeakHash32, ColumnFixedString)
}
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), data);
}
@ -444,8 +427,7 @@ TEST(WeakHash32, ColumnArray)
auto col_arr = ColumnArray::create(std::move(val), std::move(off));
WeakHash32 hash(col_arr->size());
col_arr->updateWeakHash32(hash);
WeakHash32 hash = col_arr->getWeakHash32();
checkColumn(hash.getData(), eq_data);
}
@ -479,8 +461,7 @@ TEST(WeakHash32, ColumnArray2)
auto col_arr = ColumnArray::create(std::move(val), std::move(off));
WeakHash32 hash(col_arr->size());
col_arr->updateWeakHash32(hash);
WeakHash32 hash = col_arr->getWeakHash32();
checkColumn(hash.getData(), eq_data);
}
@ -536,8 +517,7 @@ TEST(WeakHash32, ColumnArrayArray)
auto col_arr = ColumnArray::create(std::move(val), std::move(off));
auto col_arr_arr = ColumnArray::create(std::move(col_arr), std::move(off2));
WeakHash32 hash(col_arr_arr->size());
col_arr_arr->updateWeakHash32(hash);
WeakHash32 hash = col_arr_arr->getWeakHash32();
checkColumn(hash.getData(), eq_data);
}
@ -555,8 +535,7 @@ TEST(WeakHash32, ColumnConst)
auto col_const = ColumnConst::create(std::move(inner_col), 256);
WeakHash32 hash(col_const->size());
col_const->updateWeakHash32(hash);
WeakHash32 hash = col_const->getWeakHash32();
checkColumn(hash.getData(), data);
}
@ -576,8 +555,7 @@ TEST(WeakHash32, ColumnLowcardinality)
}
}
WeakHash32 hash(col->size());
col->updateWeakHash32(hash);
WeakHash32 hash = col->getWeakHash32();
checkColumn(hash.getData(), data);
}
@ -602,8 +580,7 @@ TEST(WeakHash32, ColumnNullable)
auto col_null = ColumnNullable::create(std::move(col), std::move(mask));
WeakHash32 hash(col_null->size());
col_null->updateWeakHash32(hash);
WeakHash32 hash = col_null->getWeakHash32();
checkColumn(hash.getData(), eq);
}
@ -633,8 +610,7 @@ TEST(WeakHash32, ColumnTupleUInt64UInt64)
columns.emplace_back(std::move(col2));
auto col_tuple = ColumnTuple::create(std::move(columns));
WeakHash32 hash(col_tuple->size());
col_tuple->updateWeakHash32(hash);
WeakHash32 hash = col_tuple->getWeakHash32();
checkColumn(hash.getData(), eq);
}
@ -671,8 +647,7 @@ TEST(WeakHash32, ColumnTupleUInt64String)
columns.emplace_back(std::move(col2));
auto col_tuple = ColumnTuple::create(std::move(columns));
WeakHash32 hash(col_tuple->size());
col_tuple->updateWeakHash32(hash);
WeakHash32 hash = col_tuple->getWeakHash32();
checkColumn(hash.getData(), eq);
}
@ -709,8 +684,7 @@ TEST(WeakHash32, ColumnTupleUInt64FixedString)
columns.emplace_back(std::move(col2));
auto col_tuple = ColumnTuple::create(std::move(columns));
WeakHash32 hash(col_tuple->size());
col_tuple->updateWeakHash32(hash);
WeakHash32 hash = col_tuple->getWeakHash32();
checkColumn(hash.getData(), eq);
}
@ -756,8 +730,7 @@ TEST(WeakHash32, ColumnTupleUInt64Array)
columns.emplace_back(ColumnArray::create(std::move(val), std::move(off)));
auto col_tuple = ColumnTuple::create(std::move(columns));
WeakHash32 hash(col_tuple->size());
col_tuple->updateWeakHash32(hash);
WeakHash32 hash = col_tuple->getWeakHash32();
checkColumn(hash.getData(), eq_data);
}

View File

@ -206,7 +206,7 @@ void * Allocator<clear_memory_, populate>::realloc(void * buf, size_t old_size,
}
else
{
[[maybe_unused]] auto trace_free = CurrentMemoryTracker::free(old_size);
[[maybe_unused]] auto trace_free = CurrentMemoryTracker::free(new_size);
ProfileEvents::increment(ProfileEvents::GWPAsanAllocateFailed);
}
}
@ -239,7 +239,7 @@ void * Allocator<clear_memory_, populate>::realloc(void * buf, size_t old_size,
void * new_buf = ::realloc(buf, new_size);
if (nullptr == new_buf)
{
[[maybe_unused]] auto trace_free = CurrentMemoryTracker::free(old_size);
[[maybe_unused]] auto trace_free = CurrentMemoryTracker::free(new_size);
throw DB::ErrnoException(
DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY,
"Allocator: Cannot realloc from {} to {}",

View File

@ -38,7 +38,7 @@ namespace
std::erase_if(left_subkeys, [&](const String & key) { return ignore_keys->contains(key); });
std::erase_if(right_subkeys, [&](const String & key) { return ignore_keys->contains(key); });
#if defined(ABORT_ON_LOGICAL_ERROR)
#if defined(DEBUG_OR_SANITIZER_BUILD)
/// Compound `ignore_keys` are not yet implemented.
for (const auto & ignore_key : *ignore_keys)
chassert(ignore_key.find('.') == std::string_view::npos);

View File

@ -234,10 +234,10 @@
M(PartsCommitted, "Deprecated. See PartsActive.") \
M(PartsPreActive, "The part is in data_parts, but not used for SELECTs.") \
M(PartsActive, "Active data part, used by current and upcoming SELECTs.") \
M(AttachedDatabase, "Active database, used by current and upcoming SELECTs.") \
M(AttachedTable, "Active table, used by current and upcoming SELECTs.") \
M(AttachedView, "Active view, used by current and upcoming SELECTs.") \
M(AttachedDictionary, "Active dictionary, used by current and upcoming SELECTs.") \
M(AttachedDatabase, "Active databases.") \
M(AttachedTable, "Active tables.") \
M(AttachedView, "Active views.") \
M(AttachedDictionary, "Active dictionaries.") \
M(PartsOutdated, "Not active data part, but could be used by only current SELECTs, could be deleted after SELECTs finishes.") \
M(PartsDeleting, "Not active data part with identity refcounter, it is deleting right now by a cleaner.") \
M(PartsDeleteOnDestroy, "Part was moved to another disk and should be deleted in own destructor.") \

View File

@ -64,7 +64,7 @@ void handle_error_code(const std::string & msg, int code, bool remote, const Exc
{
// In debug builds and builds with sanitizers, treat LOGICAL_ERROR as an assertion failure.
// Log the message before we fail.
#ifdef ABORT_ON_LOGICAL_ERROR
#ifdef DEBUG_OR_SANITIZER_BUILD
if (code == ErrorCodes::LOGICAL_ERROR)
{
abortOnFailedAssertion(msg, trace.data(), 0, trace.size());
@ -443,7 +443,7 @@ PreformattedMessage getCurrentExceptionMessageAndPattern(bool with_stacktrace, b
}
catch (...) {} // NOLINT(bugprone-empty-catch)
#ifdef ABORT_ON_LOGICAL_ERROR
#ifdef DEBUG_OR_SANITIZER_BUILD
try
{
throw;

View File

@ -244,6 +244,15 @@ private:
const char * className() const noexcept override { return "DB::ErrnoException"; }
};
/// An exception to use in unit tests to test interfaces.
/// It is distinguished from others, so it does not have to be logged.
class TestException : public Exception
{
public:
using Exception::Exception;
};
using Exceptions = std::vector<std::exception_ptr>;
/** Try to write an exception to the log (and forget about it).

View File

@ -192,7 +192,7 @@ void MemoryTracker::debugLogBigAllocationWithoutCheck(Int64 size [[maybe_unused]
{
/// Big allocations through allocNoThrow (without checking memory limits) may easily lead to OOM (and it's hard to debug).
/// Let's find them.
#ifdef ABORT_ON_LOGICAL_ERROR
#ifdef DEBUG_OR_SANITIZER_BUILD
if (size < 0)
return;

View File

@ -424,7 +424,7 @@ static void logUnexpectedSyscallError(std::string name)
{
std::string message = fmt::format("{} failed: {}", name, errnoToString());
LOG_WARNING(&Poco::Logger::get("PageCache"), "{}", message);
#if defined(ABORT_ON_LOGICAL_ERROR)
#if defined(DEBUG_OR_SANITIZER_BUILD)
volatile bool true_ = true;
if (true_) // suppress warning about missing [[noreturn]]
abortOnFailedAssertion(message);

View File

@ -23,8 +23,20 @@ namespace DB
LazyPipeFDs TraceSender::pipe;
static thread_local bool inside_send = false;
void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Extras extras)
{
/** The method shouldn't be called recursively or throw exceptions.
* There are several reasons:
* - avoid infinite recursion when some of subsequent functions invoke tracing;
* - avoid inconsistent writes if the method was interrupted by a signal handler in the middle of writing,
* and then another tracing is invoked (e.g., from query profiler).
*/
if (unlikely(inside_send))
return;
inside_send = true;
DENY_ALLOCATIONS_IN_SCOPE;
constexpr size_t buf_size = sizeof(char) /// TraceCollector stop flag
+ sizeof(UInt8) /// String size
+ QUERY_ID_MAX_LEN /// Maximum query_id length
@ -80,6 +92,8 @@ void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Ext
writePODBinary(extras.increment, out);
out.next();
inside_send = false;
}
}

View File

@ -1,2 +1,24 @@
#include <Common/WeakHash.h>
#include <Common/Exception.h>
#include <Common/HashTable/Hash.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
void WeakHash32::update(const WeakHash32 & other)
{
size_t size = data.size();
if (size != other.data.size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match:"
"left size is {}, right size is {}", size, other.data.size());
for (size_t i = 0; i < size; ++i)
data[i] = static_cast<UInt32>(intHashCRC32(other.data[i], data[i]));
}
}

View File

@ -11,9 +11,8 @@ namespace DB
/// The main purpose why this class needed is to support data initialization. Initially, every bit is 1.
class WeakHash32
{
static constexpr UInt32 kDefaultInitialValue = ~UInt32(0);
public:
static constexpr UInt32 kDefaultInitialValue = ~UInt32(0);
using Container = PaddedPODArray<UInt32>;
@ -22,6 +21,8 @@ public:
void reset(size_t size, UInt32 initial_value = kDefaultInitialValue) { data.assign(size, initial_value); }
void update(const WeakHash32 & other);
const Container & getData() const { return data; }
Container & getData() { return data; }

View File

@ -25,7 +25,7 @@ namespace DB
template <typename To, typename From>
inline To assert_cast(From && from)
{
#ifdef ABORT_ON_LOGICAL_ERROR
#ifdef DEBUG_OR_SANITIZER_BUILD
try
{
if constexpr (std::is_pointer_v<To>)

View File

@ -6,12 +6,17 @@ namespace DB
{
String getRandomASCIIString(size_t length)
{
return getRandomASCIIString(length, thread_local_rng);
}
String getRandomASCIIString(size_t length, pcg64 & rng)
{
std::uniform_int_distribution<int> distribution('a', 'z');
String res;
res.resize(length);
for (auto & c : res)
c = distribution(thread_local_rng);
c = distribution(rng);
return res;
}

View File

@ -2,11 +2,14 @@
#include <Core/Types.h>
#include <pcg_random.hpp>
namespace DB
{
/// Slow random string. Useful for random names and things like this. Not for generating data.
String getRandomASCIIString(size_t length);
String getRandomASCIIString(size_t length, pcg64 & rng);
}

View File

@ -166,7 +166,7 @@ TEST(Common, RWLockRecursive)
auto lock2 = fifo_lock->getLock(RWLockImpl::Read, "q2");
#ifndef ABORT_ON_LOGICAL_ERROR
#ifndef DEBUG_OR_SANITIZER_BUILD
/// It throws LOGICAL_ERROR
EXPECT_ANY_THROW({fifo_lock->getLock(RWLockImpl::Write, "q2");});
#endif

View File

@ -54,16 +54,3 @@ TEST(ShellCommand, ExecuteWithInput)
EXPECT_EQ(res, "Hello, world!\n");
}
TEST(ShellCommand, AutoWait)
{
// <defunct> hunting:
for (int i = 0; i < 1000; ++i)
{
auto command = ShellCommand::execute("echo " + std::to_string(i));
//command->wait(); // now automatic
}
// std::cerr << "inspect me: ps auxwwf\n";
// std::this_thread::sleep_for(std::chrono::seconds(100));
}

View File

@ -195,7 +195,7 @@ void DatabaseLazy::attachTable(ContextPtr /* context_ */, const String & table_n
snapshot_detached_tables.erase(table_name);
}
CurrentMetrics::add(CurrentMetrics::AttachedTable, 1);
CurrentMetrics::add(CurrentMetrics::AttachedTable);
}
StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & table_name)
@ -221,7 +221,7 @@ StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & ta
.metadata_path = getObjectMetadataPath(table_name),
.is_permanently = false});
CurrentMetrics::sub(CurrentMetrics::AttachedTable, 1);
CurrentMetrics::sub(CurrentMetrics::AttachedTable);
}
return res;
}

View File

@ -289,8 +289,8 @@ StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_n
tables.erase(it);
table_storage->is_detached = true;
if (table_storage->isSystemStorage() == false)
CurrentMetrics::sub(getAttachedCounterForStorage(table_storage), 1);
if (!table_storage->isSystemStorage() && database_name != DatabaseCatalog::SYSTEM_DATABASE)
CurrentMetrics::sub(getAttachedCounterForStorage(table_storage));
auto table_id = table_storage->getStorageID();
if (table_id.hasUUID())
@ -334,8 +334,8 @@ void DatabaseWithOwnTablesBase::attachTableUnlocked(const String & table_name, c
/// non-Atomic database the is_detached is set to true before RENAME.
table->is_detached = false;
if (table->isSystemStorage() == false && table_id.database_name != DatabaseCatalog::SYSTEM_DATABASE)
CurrentMetrics::add(getAttachedCounterForStorage(table), 1);
if (!table->isSystemStorage() && table_id.database_name != DatabaseCatalog::SYSTEM_DATABASE)
CurrentMetrics::add(getAttachedCounterForStorage(table));
}
void DatabaseWithOwnTablesBase::shutdown()

View File

@ -59,7 +59,7 @@ CachedOnDiskReadBufferFromFile::CachedOnDiskReadBufferFromFile(
std::optional<size_t> read_until_position_,
std::shared_ptr<FilesystemCacheLog> cache_log_)
: ReadBufferFromFileBase(use_external_buffer_ ? 0 : settings_.remote_fs_buffer_size, nullptr, 0, file_size_)
#ifdef ABORT_ON_LOGICAL_ERROR
#ifdef DEBUG_OR_SANITIZER_BUILD
, log(getLogger(fmt::format("CachedOnDiskReadBufferFromFile({})", cache_key_)))
#else
, log(getLogger("CachedOnDiskReadBufferFromFile"))
@ -452,7 +452,7 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegment & file_segme
{
case ReadType::CACHED:
{
#ifdef ABORT_ON_LOGICAL_ERROR
#ifdef DEBUG_OR_SANITIZER_BUILD
size_t file_size = getFileSizeFromReadBuffer(*read_buffer_for_file_segment);
if (file_size == 0 || range.left + file_size <= file_offset_of_buffer_end)
throw Exception(
@ -937,7 +937,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
if (!result)
{
#ifdef ABORT_ON_LOGICAL_ERROR
#ifdef DEBUG_OR_SANITIZER_BUILD
if (read_type == ReadType::CACHED)
{
size_t cache_file_size = getFileSizeFromReadBuffer(*implementation_buffer);

View File

@ -1954,7 +1954,10 @@ struct ToRelativeSubsecondNumImpl
return t.value;
if (scale > scale_multiplier)
return t.value / (scale / scale_multiplier);
return t.value * (scale_multiplier / scale);
return static_cast<UInt128>(t.value) * static_cast<UInt128>((scale_multiplier / scale));
/// Casting ^^: All integers are Int64, yet if t.value is big enough the multiplication can still
/// overflow which is UB. This place is too low-level and generic to check if t.value is sane.
/// Therefore just let it overflow safely and don't bother further.
}
static Int64 execute(UInt32 t, const DateLUTImpl &)
{

View File

@ -47,54 +47,85 @@ bool allArgumentsAreConstants(const ColumnsWithTypeAndName & args)
return true;
}
/// Replaces single low cardinality column in a function call by its dictionary
/// This can only happen after the arguments have been adapted in IFunctionOverloadResolver::getReturnType
/// as it's only possible if there is one low cardinality column and, optionally, const columns
ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
ColumnsWithTypeAndName & args, bool can_be_executed_on_default_arguments, size_t input_rows_count)
{
size_t num_rows = input_rows_count;
/// We return the LC indexes so the LC can be reconstructed with the function result
ColumnPtr indexes;
/// Find first LowCardinality column and replace it to nested dictionary.
for (auto & column : args)
size_t number_low_cardinality_columns = 0;
size_t last_low_cardinality = 0;
size_t number_const_columns = 0;
size_t number_full_columns = 0;
for (size_t i = 0; i < args.size(); i++)
{
if (const auto * low_cardinality_column = checkAndGetColumn<ColumnLowCardinality>(column.column.get()))
auto const & arg = args[i];
if (checkAndGetColumn<ColumnLowCardinality>(arg.column.get()))
{
/// Single LowCardinality column is supported now.
if (indexes)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected single dictionary argument for function.");
const auto * low_cardinality_type = checkAndGetDataType<DataTypeLowCardinality>(column.type.get());
if (!low_cardinality_type)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Incompatible type for LowCardinality column: {}",
column.type->getName());
if (can_be_executed_on_default_arguments)
{
/// Normal case, when function can be executed on values' default.
column.column = low_cardinality_column->getDictionary().getNestedColumn();
indexes = low_cardinality_column->getIndexesPtr();
}
else
{
/// Special case when default value can't be used. Example: 1 % LowCardinality(Int).
/// LowCardinality always contains default, so 1 % 0 will throw exception in normal case.
auto dict_encoded = low_cardinality_column->getMinimalDictionaryEncodedColumn(0, low_cardinality_column->size());
column.column = dict_encoded.dictionary;
indexes = dict_encoded.indexes;
}
num_rows = column.column->size();
column.type = low_cardinality_type->getDictionaryType();
number_low_cardinality_columns++;
last_low_cardinality = i;
}
else if (checkAndGetColumn<ColumnConst>(arg.column.get()))
number_const_columns++;
else
number_full_columns++;
}
/// Change size of constants.
if (!number_low_cardinality_columns && !number_const_columns)
return nullptr;
if (number_full_columns > 0 || number_low_cardinality_columns > 1)
{
/// This should not be possible but currently there are multiple tests in CI failing because of it
/// TODO: Fix those cases, then enable this exception
#if 0
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected low cardinality types found. Low cardinality: {}. Full {}. Const {}",
number_low_cardinality_columns, number_full_columns, number_const_columns);
#else
return nullptr;
#endif
}
else if (number_low_cardinality_columns == 1)
{
auto & lc_arg = args[last_low_cardinality];
const auto * low_cardinality_type = checkAndGetDataType<DataTypeLowCardinality>(lc_arg.type.get());
if (!low_cardinality_type)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Incompatible type for LowCardinality column: {}", lc_arg.type->getName());
const auto * low_cardinality_column = checkAndGetColumn<ColumnLowCardinality>(lc_arg.column.get());
chassert(low_cardinality_column);
if (can_be_executed_on_default_arguments)
{
/// Normal case, when function can be executed on values' default.
lc_arg.column = low_cardinality_column->getDictionary().getNestedColumn();
indexes = low_cardinality_column->getIndexesPtr();
}
else
{
/// Special case when default value can't be used. Example: 1 % LowCardinality(Int).
/// LowCardinality always contains default, so 1 % 0 will throw exception in normal case.
auto dict_encoded = low_cardinality_column->getMinimalDictionaryEncodedColumn(0, low_cardinality_column->size());
lc_arg.column = dict_encoded.dictionary;
indexes = dict_encoded.indexes;
}
/// The new column will have a different number of rows, normally less but occasionally it might be more (NULL)
input_rows_count = lc_arg.column->size();
lc_arg.type = low_cardinality_type->getDictionaryType();
}
/// Change size of constants
for (auto & column : args)
{
if (const auto * column_const = checkAndGetColumn<ColumnConst>(column.column.get()))
{
column.column = ColumnConst::create(recursiveRemoveLowCardinality(column_const->getDataColumnPtr()), num_rows);
column.column = ColumnConst::create(recursiveRemoveLowCardinality(column_const->getDataColumnPtr()), input_rows_count);
column.type = recursiveRemoveLowCardinality(column.type);
}
}
@ -270,6 +301,8 @@ ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithType
bool can_be_executed_on_default_arguments = canBeExecutedOnDefaultArguments();
const auto & dictionary_type = res_low_cardinality_type->getDictionaryType();
/// The arguments should have been adapted in IFunctionOverloadResolver::getReturnType
/// So there is only one low cardinality column (and optionally some const columns) and no full column
ColumnPtr indexes = replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
columns_without_low_cardinality, can_be_executed_on_default_arguments, input_rows_count);

View File

@ -26,8 +26,6 @@ namespace DB
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ILLEGAL_COLUMN;
extern const int BAD_ARGUMENTS;
}
@ -45,84 +43,82 @@ public:
template <typename Transform>
void dispatchForColumns(
const IColumn & x, const IColumn & y,
const IColumn & col_x, const IColumn & col_y,
const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y,
size_t input_rows_count,
ColumnInt64::Container & result) const
{
if (const auto * x_vec_16 = checkAndGetColumn<ColumnDate>(&x))
dispatchForSecondColumn<Transform>(*x_vec_16, y, timezone_x, timezone_y, result);
else if (const auto * x_vec_32 = checkAndGetColumn<ColumnDateTime>(&x))
dispatchForSecondColumn<Transform>(*x_vec_32, y, timezone_x, timezone_y, result);
else if (const auto * x_vec_32_s = checkAndGetColumn<ColumnDate32>(&x))
dispatchForSecondColumn<Transform>(*x_vec_32_s, y, timezone_x, timezone_y, result);
else if (const auto * x_vec_64 = checkAndGetColumn<ColumnDateTime64>(&x))
dispatchForSecondColumn<Transform>(*x_vec_64, y, timezone_x, timezone_y, result);
else if (const auto * x_const_16 = checkAndGetColumnConst<ColumnDate>(&x))
dispatchConstForSecondColumn<Transform>(x_const_16->getValue<UInt16>(), y, timezone_x, timezone_y, result);
else if (const auto * x_const_32 = checkAndGetColumnConst<ColumnDateTime>(&x))
dispatchConstForSecondColumn<Transform>(x_const_32->getValue<UInt32>(), y, timezone_x, timezone_y, result);
else if (const auto * x_const_32_s = checkAndGetColumnConst<ColumnDate32>(&x))
dispatchConstForSecondColumn<Transform>(x_const_32_s->getValue<Int32>(), y, timezone_x, timezone_y, result);
else if (const auto * x_const_64 = checkAndGetColumnConst<ColumnDateTime64>(&x))
dispatchConstForSecondColumn<Transform>(x_const_64->getValue<DecimalField<DateTime64>>(), y, timezone_x, timezone_y, result);
if (const auto * x_vec_16 = checkAndGetColumn<ColumnDate>(&col_x))
dispatchForSecondColumn<Transform>(*x_vec_16, col_y, timezone_x, timezone_y, input_rows_count, result);
else if (const auto * x_vec_32 = checkAndGetColumn<ColumnDateTime>(&col_x))
dispatchForSecondColumn<Transform>(*x_vec_32, col_y, timezone_x, timezone_y, input_rows_count, result);
else if (const auto * x_vec_32_s = checkAndGetColumn<ColumnDate32>(&col_x))
dispatchForSecondColumn<Transform>(*x_vec_32_s, col_y, timezone_x, timezone_y, input_rows_count, result);
else if (const auto * x_vec_64 = checkAndGetColumn<ColumnDateTime64>(&col_x))
dispatchForSecondColumn<Transform>(*x_vec_64, col_y, timezone_x, timezone_y, input_rows_count, result);
else if (const auto * x_const_16 = checkAndGetColumnConst<ColumnDate>(&col_x))
dispatchConstForSecondColumn<Transform>(x_const_16->getValue<UInt16>(), col_y, timezone_x, timezone_y, input_rows_count, result);
else if (const auto * x_const_32 = checkAndGetColumnConst<ColumnDateTime>(&col_x))
dispatchConstForSecondColumn<Transform>(x_const_32->getValue<UInt32>(), col_y, timezone_x, timezone_y, input_rows_count, result);
else if (const auto * x_const_32_s = checkAndGetColumnConst<ColumnDate32>(&col_x))
dispatchConstForSecondColumn<Transform>(x_const_32_s->getValue<Int32>(), col_y, timezone_x, timezone_y, input_rows_count, result);
else if (const auto * x_const_64 = checkAndGetColumnConst<ColumnDateTime64>(&col_x))
dispatchConstForSecondColumn<Transform>(x_const_64->getValue<DecimalField<DateTime64>>(), col_y, timezone_x, timezone_y, input_rows_count, result);
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
"Illegal column for first argument of function {}, must be Date, Date32, DateTime or DateTime64",
name);
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for first argument of function {}, must be Date, Date32, DateTime or DateTime64", name);
}
template <typename Transform, typename LeftColumnType>
void dispatchForSecondColumn(
const LeftColumnType & x, const IColumn & y,
const LeftColumnType & x, const IColumn & col_y,
const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y,
size_t input_rows_count,
ColumnInt64::Container & result) const
{
if (const auto * y_vec_16 = checkAndGetColumn<ColumnDate>(&y))
vectorVector<Transform>(x, *y_vec_16, timezone_x, timezone_y, result);
else if (const auto * y_vec_32 = checkAndGetColumn<ColumnDateTime>(&y))
vectorVector<Transform>(x, *y_vec_32, timezone_x, timezone_y, result);
else if (const auto * y_vec_32_s = checkAndGetColumn<ColumnDate32>(&y))
vectorVector<Transform>(x, *y_vec_32_s, timezone_x, timezone_y, result);
else if (const auto * y_vec_64 = checkAndGetColumn<ColumnDateTime64>(&y))
vectorVector<Transform>(x, *y_vec_64, timezone_x, timezone_y, result);
else if (const auto * y_const_16 = checkAndGetColumnConst<ColumnDate>(&y))
vectorConstant<Transform>(x, y_const_16->getValue<UInt16>(), timezone_x, timezone_y, result);
else if (const auto * y_const_32 = checkAndGetColumnConst<ColumnDateTime>(&y))
vectorConstant<Transform>(x, y_const_32->getValue<UInt32>(), timezone_x, timezone_y, result);
else if (const auto * y_const_32_s = checkAndGetColumnConst<ColumnDate32>(&y))
vectorConstant<Transform>(x, y_const_32_s->getValue<Int32>(), timezone_x, timezone_y, result);
else if (const auto * y_const_64 = checkAndGetColumnConst<ColumnDateTime64>(&y))
vectorConstant<Transform>(x, y_const_64->getValue<DecimalField<DateTime64>>(), timezone_x, timezone_y, result);
if (const auto * y_vec_16 = checkAndGetColumn<ColumnDate>(&col_y))
vectorVector<Transform>(x, *y_vec_16, timezone_x, timezone_y, input_rows_count, result);
else if (const auto * y_vec_32 = checkAndGetColumn<ColumnDateTime>(&col_y))
vectorVector<Transform>(x, *y_vec_32, timezone_x, timezone_y, input_rows_count, result);
else if (const auto * y_vec_32_s = checkAndGetColumn<ColumnDate32>(&col_y))
vectorVector<Transform>(x, *y_vec_32_s, timezone_x, timezone_y, input_rows_count, result);
else if (const auto * y_vec_64 = checkAndGetColumn<ColumnDateTime64>(&col_y))
vectorVector<Transform>(x, *y_vec_64, timezone_x, timezone_y, input_rows_count, result);
else if (const auto * y_const_16 = checkAndGetColumnConst<ColumnDate>(&col_y))
vectorConstant<Transform>(x, y_const_16->getValue<UInt16>(), timezone_x, timezone_y, input_rows_count, result);
else if (const auto * y_const_32 = checkAndGetColumnConst<ColumnDateTime>(&col_y))
vectorConstant<Transform>(x, y_const_32->getValue<UInt32>(), timezone_x, timezone_y, input_rows_count, result);
else if (const auto * y_const_32_s = checkAndGetColumnConst<ColumnDate32>(&col_y))
vectorConstant<Transform>(x, y_const_32_s->getValue<Int32>(), timezone_x, timezone_y, input_rows_count, result);
else if (const auto * y_const_64 = checkAndGetColumnConst<ColumnDateTime64>(&col_y))
vectorConstant<Transform>(x, y_const_64->getValue<DecimalField<DateTime64>>(), timezone_x, timezone_y, input_rows_count, result);
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
"Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64",
name);
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64", name);
}
template <typename Transform, typename T1>
void dispatchConstForSecondColumn(
T1 x, const IColumn & y,
T1 x, const IColumn & col_y,
const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y,
size_t input_rows_count,
ColumnInt64::Container & result) const
{
if (const auto * y_vec_16 = checkAndGetColumn<ColumnDate>(&y))
constantVector<Transform>(x, *y_vec_16, timezone_x, timezone_y, result);
else if (const auto * y_vec_32 = checkAndGetColumn<ColumnDateTime>(&y))
constantVector<Transform>(x, *y_vec_32, timezone_x, timezone_y, result);
else if (const auto * y_vec_32_s = checkAndGetColumn<ColumnDate32>(&y))
constantVector<Transform>(x, *y_vec_32_s, timezone_x, timezone_y, result);
else if (const auto * y_vec_64 = checkAndGetColumn<ColumnDateTime64>(&y))
constantVector<Transform>(x, *y_vec_64, timezone_x, timezone_y, result);
if (const auto * y_vec_16 = checkAndGetColumn<ColumnDate>(&col_y))
constantVector<Transform>(x, *y_vec_16, timezone_x, timezone_y, input_rows_count, result);
else if (const auto * y_vec_32 = checkAndGetColumn<ColumnDateTime>(&col_y))
constantVector<Transform>(x, *y_vec_32, timezone_x, timezone_y, input_rows_count, result);
else if (const auto * y_vec_32_s = checkAndGetColumn<ColumnDate32>(&col_y))
constantVector<Transform>(x, *y_vec_32_s, timezone_x, timezone_y, input_rows_count, result);
else if (const auto * y_vec_64 = checkAndGetColumn<ColumnDateTime64>(&col_y))
constantVector<Transform>(x, *y_vec_64, timezone_x, timezone_y, input_rows_count, result);
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
"Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64",
name);
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64", name);
}
template <typename Transform, typename LeftColumnType, typename RightColumnType>
void vectorVector(
const LeftColumnType & x, const RightColumnType & y,
const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y,
size_t input_rows_count,
ColumnInt64::Container & result) const
{
const auto & x_data = x.getData();
@ -130,14 +126,15 @@ public:
const auto transform_x = TransformDateTime64<Transform>(getScale(x));
const auto transform_y = TransformDateTime64<Transform>(getScale(y));
for (size_t i = 0, size = x.size(); i < size; ++i)
result[i] = calculate(transform_x, transform_y, x_data[i], y_data[i], timezone_x, timezone_y);
for (size_t i = 0; i < input_rows_count; ++i)
result[i] = calculate(transform_x, transform_y, x_data[i], y_data[i], timezone_x, timezone_y);
}
template <typename Transform, typename LeftColumnType, typename T2>
void vectorConstant(
const LeftColumnType & x, T2 y,
const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y,
size_t input_rows_count,
ColumnInt64::Container & result) const
{
const auto & x_data = x.getData();
@ -145,7 +142,7 @@ public:
const auto transform_y = TransformDateTime64<Transform>(getScale(y));
const auto y_value = stripDecimalFieldValue(y);
for (size_t i = 0, size = x.size(); i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
result[i] = calculate(transform_x, transform_y, x_data[i], y_value, timezone_x, timezone_y);
}
@ -153,6 +150,7 @@ public:
void constantVector(
T1 x, const RightColumnType & y,
const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y,
size_t input_rows_count,
ColumnInt64::Container & result) const
{
const auto & y_data = y.getData();
@ -160,20 +158,22 @@ public:
const auto transform_y = TransformDateTime64<Transform>(getScale(y));
const auto x_value = stripDecimalFieldValue(x);
for (size_t i = 0, size = y.size(); i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
result[i] = calculate(transform_x, transform_y, x_value, y_data[i], timezone_x, timezone_y);
}
template <typename TransformX, typename TransformY, typename T1, typename T2>
Int64 calculate(const TransformX & transform_x, const TransformY & transform_y, T1 x, T2 y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y) const
{
auto res = static_cast<Int64>(transform_y.execute(y, timezone_y)) - static_cast<Int64>(transform_x.execute(x, timezone_x));
if constexpr (is_diff)
return static_cast<Int64>(transform_y.execute(y, timezone_y))
- static_cast<Int64>(transform_x.execute(x, timezone_x));
{
return res;
}
else
{
auto res = static_cast<Int64>(transform_y.execute(y, timezone_y))
- static_cast<Int64>(transform_x.execute(x, timezone_x));
/// Adjust res:
DateTimeComponentsWithFractionalPart a_comp;
DateTimeComponentsWithFractionalPart b_comp;
Int64 adjust_value;
@ -332,95 +332,73 @@ public:
static constexpr auto name = is_relative ? "dateDiff" : "age";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionDateDiff>(); }
String getName() const override
{
return name;
}
String getName() const override { return name; }
bool isVariadic() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0, 3}; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (arguments.size() != 3 && arguments.size() != 4)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Number of arguments for function {} doesn't match: passed {}, should be 3 or 4",
getName(), arguments.size());
FunctionArgumentDescriptors mandatory_args{
{"unit", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
{"startdate", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isDateOrDate32OrDateTimeOrDateTime64), nullptr, "Date[32] or DateTime[64]"},
{"enddate", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isDateOrDate32OrDateTimeOrDateTime64), nullptr, "Date[32] or DateTime[64]"},
};
if (!isString(arguments[0]))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"First argument for function {} (unit) must be String",
getName());
FunctionArgumentDescriptors optional_args{
{"timezone", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
};
if (!isDate(arguments[1]) && !isDate32(arguments[1]) && !isDateTime(arguments[1]) && !isDateTime64(arguments[1]))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Second argument for function {} must be Date, Date32, DateTime or DateTime64",
getName());
if (!isDate(arguments[2]) && !isDate32(arguments[2]) && !isDateTime(arguments[2]) && !isDateTime64(arguments[2]))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Third argument for function {} must be Date, Date32, DateTime or DateTime64",
getName()
);
if (arguments.size() == 4 && !isString(arguments[3]))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Fourth argument for function {} (timezone) must be String",
getName());
validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
return std::make_shared<DataTypeInt64>();
}
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0, 3}; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const auto * unit_column = checkAndGetColumnConst<ColumnString>(arguments[0].column.get());
if (!unit_column)
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
"First argument for function {} must be constant String",
getName());
const auto * col_unit = checkAndGetColumnConst<ColumnString>(arguments[0].column.get());
if (!col_unit)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument for function {} must be constant String", getName());
String unit = Poco::toLower(unit_column->getValue<String>());
String unit = Poco::toLower(col_unit->getValue<String>());
const IColumn & x = *arguments[1].column;
const IColumn & y = *arguments[2].column;
const IColumn & col_x = *arguments[1].column;
const IColumn & col_y = *arguments[2].column;
size_t rows = input_rows_count;
auto res = ColumnInt64::create(rows);
auto col_res = ColumnInt64::create(input_rows_count);
const auto & timezone_x = extractTimeZoneFromFunctionArguments(arguments, 3, 1);
const auto & timezone_y = extractTimeZoneFromFunctionArguments(arguments, 3, 2);
if (unit == "year" || unit == "years" || unit == "yy" || unit == "yyyy")
impl.template dispatchForColumns<ToRelativeYearNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
impl.template dispatchForColumns<ToRelativeYearNumImpl<ResultPrecision::Extended>>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData());
else if (unit == "quarter" || unit == "quarters" || unit == "qq" || unit == "q")
impl.template dispatchForColumns<ToRelativeQuarterNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
impl.template dispatchForColumns<ToRelativeQuarterNumImpl<ResultPrecision::Extended>>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData());
else if (unit == "month" || unit == "months" || unit == "mm" || unit == "m")
impl.template dispatchForColumns<ToRelativeMonthNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
impl.template dispatchForColumns<ToRelativeMonthNumImpl<ResultPrecision::Extended>>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData());
else if (unit == "week" || unit == "weeks" || unit == "wk" || unit == "ww")
impl.template dispatchForColumns<ToRelativeWeekNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
impl.template dispatchForColumns<ToRelativeWeekNumImpl<ResultPrecision::Extended>>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData());
else if (unit == "day" || unit == "days" || unit == "dd" || unit == "d")
impl.template dispatchForColumns<ToRelativeDayNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
impl.template dispatchForColumns<ToRelativeDayNumImpl<ResultPrecision::Extended>>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData());
else if (unit == "hour" || unit == "hours" || unit == "hh" || unit == "h")
impl.template dispatchForColumns<ToRelativeHourNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
impl.template dispatchForColumns<ToRelativeHourNumImpl<ResultPrecision::Extended>>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData());
else if (unit == "minute" || unit == "minutes" || unit == "mi" || unit == "n")
impl.template dispatchForColumns<ToRelativeMinuteNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
impl.template dispatchForColumns<ToRelativeMinuteNumImpl<ResultPrecision::Extended>>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData());
else if (unit == "second" || unit == "seconds" || unit == "ss" || unit == "s")
impl.template dispatchForColumns<ToRelativeSecondNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
impl.template dispatchForColumns<ToRelativeSecondNumImpl<ResultPrecision::Extended>>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData());
else if (unit == "millisecond" || unit == "milliseconds" || unit == "ms")
impl.template dispatchForColumns<ToRelativeSubsecondNumImpl<millisecond_multiplier>>(x, y, timezone_x, timezone_y, res->getData());
impl.template dispatchForColumns<ToRelativeSubsecondNumImpl<millisecond_multiplier>>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData());
else if (unit == "microsecond" || unit == "microseconds" || unit == "us" || unit == "u")
impl.template dispatchForColumns<ToRelativeSubsecondNumImpl<microsecond_multiplier>>(x, y, timezone_x, timezone_y, res->getData());
impl.template dispatchForColumns<ToRelativeSubsecondNumImpl<microsecond_multiplier>>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData());
else if (unit == "nanosecond" || unit == "nanoseconds" || unit == "ns")
impl.template dispatchForColumns<ToRelativeSubsecondNumImpl<nanosecond_multiplier>>(x, y, timezone_x, timezone_y, res->getData());
impl.template dispatchForColumns<ToRelativeSubsecondNumImpl<nanosecond_multiplier>>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData());
else
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Function {} does not support '{}' unit", getName(), unit);
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} does not support '{}' unit", getName(), unit);
return res;
return col_res;
}
private:
DateDiffImpl<is_relative> impl{name};
@ -437,50 +415,35 @@ public:
static constexpr auto name = "timeDiff";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionTimeDiff>(); }
String getName() const override
{
return name;
}
String getName() const override { return name; }
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {}; }
bool isVariadic() const override { return false; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
size_t getNumberOfArguments() const override { return 2; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (arguments.size() != 2)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Number of arguments for function {} doesn't match: passed {}, should be 2",
getName(), arguments.size());
FunctionArgumentDescriptors args{
{"first_datetime", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isDateOrDate32OrDateTimeOrDateTime64), nullptr, "Date[32] or DateTime[64]"},
{"second_datetime", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isDateOrDate32OrDateTimeOrDateTime64), nullptr, "Date[32] or DateTime[64]"},
};
if (!isDate(arguments[0]) && !isDate32(arguments[0]) && !isDateTime(arguments[0]) && !isDateTime64(arguments[0]))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"First argument for function {} must be Date, Date32, DateTime or DateTime64",
getName());
if (!isDate(arguments[1]) && !isDate32(arguments[1]) && !isDateTime(arguments[1]) && !isDateTime64(arguments[1]))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Second argument for function {} must be Date, Date32, DateTime or DateTime64",
getName()
);
validateFunctionArguments(*this, arguments, args);
return std::make_shared<DataTypeInt64>();
}
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {}; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const IColumn & x = *arguments[0].column;
const IColumn & y = *arguments[1].column;
const IColumn & col_x = *arguments[0].column;
const IColumn & col_y = *arguments[1].column;
size_t rows = input_rows_count;
auto res = ColumnInt64::create(rows);
auto col_res = ColumnInt64::create(input_rows_count);
impl.dispatchForColumns<ToRelativeSecondNumImpl<ResultPrecision::Extended>>(x, y, DateLUT::instance(), DateLUT::instance(), res->getData());
impl.dispatchForColumns<ToRelativeSecondNumImpl<ResultPrecision::Extended>>(col_x, col_y, DateLUT::instance(), DateLUT::instance(), input_rows_count, col_res->getData());
return res;
return col_res;
}
private:
DateDiffImpl<true> impl{name};

View File

@ -134,7 +134,7 @@ TEST(MemoryResizeTest, SmallInitAndBigResizeOverflowWhenPadding)
ASSERT_EQ(memory.m_capacity, 0x8000000000000000ULL - 1);
ASSERT_EQ(memory.m_size, 0x8000000000000000ULL - PADDING_FOR_SIMD);
#ifndef ABORT_ON_LOGICAL_ERROR
#ifndef DEBUG_OR_SANITIZER_BUILD
EXPECT_THROW_ERROR_CODE(memory.resize(0x8000000000000000ULL - (PADDING_FOR_SIMD - 1)), Exception, ErrorCodes::LOGICAL_ERROR);
ASSERT_TRUE(memory.m_data); // state is intact after exception
ASSERT_EQ(memory.m_capacity, 0x8000000000000000ULL - 1);
@ -158,7 +158,7 @@ TEST(MemoryResizeTest, SmallInitAndBigResizeOverflowWhenPadding)
ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD);
ASSERT_EQ(memory.m_size, 1);
#ifndef ABORT_ON_LOGICAL_ERROR
#ifndef DEBUG_OR_SANITIZER_BUILD
EXPECT_THROW_ERROR_CODE(memory.resize(0x8000000000000000ULL - (PADDING_FOR_SIMD - 1)), Exception, ErrorCodes::LOGICAL_ERROR);
ASSERT_TRUE(memory.m_data); // state is intact after exception
ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD);
@ -197,7 +197,7 @@ TEST(MemoryResizeTest, BigInitAndSmallResizeOverflowWhenPadding)
, ErrorCodes::ARGUMENT_OUT_OF_BOUND);
}
#ifndef ABORT_ON_LOGICAL_ERROR
#ifndef DEBUG_OR_SANITIZER_BUILD
{
EXPECT_THROW_ERROR_CODE(
{

View File

@ -917,8 +917,8 @@ TEST_P(SyncAsync, ExceptionOnUploadPart) {
TEST_F(WBS3Test, PrefinalizeCalledMultipleTimes) {
#ifdef ABORT_ON_LOGICAL_ERROR
GTEST_SKIP() << "this test trigger LOGICAL_ERROR, runs only if ABORT_ON_LOGICAL_ERROR is not defined";
#ifdef DEBUG_OR_SANITIZER_BUILD
GTEST_SKIP() << "this test trigger LOGICAL_ERROR, runs only if DEBUG_OR_SANITIZER_BUILD is not defined";
#else
EXPECT_THROW({
try {

View File

@ -1007,7 +1007,7 @@ void FileCache::freeSpaceRatioKeepingThreadFunc()
limits_satisfied = main_priority->collectCandidatesForEviction(
desired_size, desired_elements_num, keep_up_free_space_remove_batch, stat, eviction_candidates, lock);
#ifdef ABORT_ON_LOGICAL_ERROR
#ifdef DEBUG_OR_SANITIZER_BUILD
/// Let's make sure that we correctly processed the limits.
if (limits_satisfied && eviction_candidates.size() < keep_up_free_space_remove_batch)
{
@ -1110,7 +1110,7 @@ void FileCache::removeAllReleasable(const UserID & user_id)
{
assertInitialized();
#ifdef ABORT_ON_LOGICAL_ERROR
#ifdef DEBUG_OR_SANITIZER_BUILD
assertCacheCorrectness();
#endif
@ -1226,7 +1226,7 @@ void FileCache::loadMetadataImpl()
if (first_exception)
std::rethrow_exception(first_exception);
#ifdef ABORT_ON_LOGICAL_ERROR
#ifdef DEBUG_OR_SANITIZER_BUILD
assertCacheCorrectness();
#endif
}
@ -1393,7 +1393,7 @@ void FileCache::loadMetadataForKeys(const fs::path & keys_dir)
FileCache::~FileCache()
{
deactivateBackgroundOperations();
#ifdef ABORT_ON_LOGICAL_ERROR
#ifdef DEBUG_OR_SANITIZER_BUILD
assertCacheCorrectness();
#endif
}

View File

@ -67,7 +67,7 @@ FileSegment::FileSegment(
, key_metadata(key_metadata_)
, queue_iterator(queue_iterator_)
, cache(cache_)
#ifdef ABORT_ON_LOGICAL_ERROR
#ifdef DEBUG_OR_SANITIZER_BUILD
, log(getLogger(fmt::format("FileSegment({}) : {}", key_.toString(), range().toString())))
#else
, log(getLogger("FileSegment"))
@ -385,9 +385,9 @@ void FileSegment::write(char * from, size_t size, size_t offset_in_file)
try
{
#ifdef ABORT_ON_LOGICAL_ERROR
#ifdef DEBUG_OR_SANITIZER_BUILD
/// This mutex is only needed to have a valid assertion in assertCacheCorrectness(),
/// which is only executed in debug/sanitizer builds (under ABORT_ON_LOGICAL_ERROR).
/// which is only executed in debug/sanitizer builds (under DEBUG_OR_SANITIZER_BUILD).
std::lock_guard lock(write_mutex);
#endif

View File

@ -963,7 +963,7 @@ KeyMetadata::iterator LockedKey::removeFileSegmentImpl(
}
else if (!can_be_broken)
{
#ifdef ABORT_ON_LOGICAL_ERROR
#ifdef DEBUG_OR_SANITIZER_BUILD
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected path {} to exist", path);
#else
LOG_WARNING(key_metadata->logger(), "Expected path {} to exist, while removing {}:{}",

View File

@ -310,7 +310,7 @@ IColumn::Selector ConcurrentHashJoin::selectDispatchBlock(const Strings & key_co
{
const auto & key_col = from_block.getByName(key_name).column->convertToFullColumnIfConst();
const auto & key_col_no_lc = recursiveRemoveLowCardinality(recursiveRemoveSparse(key_col));
key_col_no_lc->updateWeakHash32(hash);
hash.update(key_col_no_lc->getWeakHash32());
}
return hashToSelector(hash, num_shards);
}

View File

@ -16,7 +16,6 @@
#include <Common/logger_useful.h>
#include <Core/Settings.h>
#include <Parsers/ASTSetQuery.h>
#include <base/hex.h>
#include <Core/Defines.h>
#include <Core/SettingsEnums.h>
@ -82,13 +81,13 @@
#include <Interpreters/ApplyWithSubqueryVisitor.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <DataTypes/DataTypeFixedString.h>
#include <Functions/UserDefined/UserDefinedSQLFunctionFactory.h>
#include <Functions/UserDefined/UserDefinedSQLFunctionVisitor.h>
#include <Interpreters/ReplaceQueryParameterVisitor.h>
#include <Parsers/QueryParameterVisitor.h>
namespace CurrentMetrics
{
extern const Metric AttachedTable;
@ -147,27 +146,27 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create)
}
auto db_num_limit = getContext()->getGlobalContext()->getServerSettings().max_database_num_to_throw;
if (db_num_limit > 0)
if (db_num_limit > 0 && !internal)
{
size_t db_count = DatabaseCatalog::instance().getDatabases().size();
std::vector<String> system_databases = {
std::initializer_list<std::string_view> system_databases =
{
DatabaseCatalog::TEMPORARY_DATABASE,
DatabaseCatalog::SYSTEM_DATABASE,
DatabaseCatalog::INFORMATION_SCHEMA,
DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE,
DatabaseCatalog::DEFAULT_DATABASE
};
for (const auto & system_database : system_databases)
{
if (db_count > 0 && DatabaseCatalog::instance().isDatabaseExist(system_database))
db_count--;
if (db_count > 0 && DatabaseCatalog::instance().isDatabaseExist(std::string(system_database)))
--db_count;
}
if (db_count >= db_num_limit)
throw Exception(ErrorCodes::TOO_MANY_DATABASES,
"Too many databases in the Clickhouse. "
"The limit (setting 'max_database_num_to_throw') is set to {}, current number of databases is {}",
"Too many databases. "
"The limit (server configuration parameter `max_database_num_to_throw`) is set to {}, the current number of databases is {}",
db_num_limit, db_count);
}
@ -1601,13 +1600,13 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
}
UInt64 table_num_limit = getContext()->getGlobalContext()->getServerSettings().max_table_num_to_throw;
if (table_num_limit > 0 && create.getDatabase() != DatabaseCatalog::SYSTEM_DATABASE)
if (table_num_limit > 0 && !internal)
{
UInt64 table_count = CurrentMetrics::get(CurrentMetrics::AttachedTable);
if (table_count >= table_num_limit)
throw Exception(ErrorCodes::TOO_MANY_TABLES,
"Too many tables in the Clickhouse. "
"The limit (setting 'max_table_num_to_throw') is set to {}, current number of tables is {}",
"Too many tables. "
"The limit (server configuration parameter `max_table_num_to_throw`) is set to {}, the current number of tables is {}",
table_num_limit, table_count);
}

View File

@ -1726,7 +1726,10 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
const auto & join_clause = table_join.getOnlyClause();
auto join_kind = table_join.kind();
bool kind_allows_filtering = isInner(join_kind) || isLeft(join_kind) || isRight(join_kind);
auto join_strictness = table_join.strictness();
bool join_type_allows_filtering = (join_strictness == JoinStrictness::All || join_strictness == JoinStrictness::Any)
&& (isInner(join_kind) || isLeft(join_kind) || isRight(join_kind));
auto has_non_const = [](const Block & block, const auto & keys)
{
@ -1745,7 +1748,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
bool has_non_const_keys = has_non_const(query_plan.getCurrentDataStream().header, join_clause.key_names_left)
&& has_non_const(joined_plan->getCurrentDataStream().header, join_clause.key_names_right);
if (settings.max_rows_in_set_to_optimize_join > 0 && kind_allows_filtering && has_non_const_keys)
if (settings.max_rows_in_set_to_optimize_join > 0 && join_type_allows_filtering && has_non_const_keys)
{
auto * left_set = add_create_set(query_plan, join_clause.key_names_left, JoinTableSide::Left);
auto * right_set = add_create_set(*joined_plan, join_clause.key_names_right, JoinTableSide::Right);

View File

@ -121,9 +121,18 @@ String InterpreterShowTablesQuery::getRewrittenQuery()
if (query.merges)
{
WriteBufferFromOwnString rewritten_query;
rewritten_query << "SELECT table, database, round((elapsed * (1 / merges.progress)) - merges.elapsed, 2) AS estimate_complete, round(elapsed,2) elapsed, "
"round(progress*100, 2) AS progress, is_mutation, formatReadableSize(total_size_bytes_compressed) AS size_compressed, "
"formatReadableSize(memory_usage) AS memory_usage FROM system.merges";
rewritten_query << R"(
SELECT
table,
database,
merges.progress > 0 ? round(merges.elapsed * (1 - merges.progress) / merges.progress, 2) : NULL AS estimate_complete,
round(elapsed, 2) AS elapsed,
round(progress * 100, 2) AS progress,
is_mutation,
formatReadableSize(total_size_bytes_compressed) AS size_compressed,
formatReadableSize(memory_usage) AS memory_usage
FROM system.merges
)";
if (!query.like.empty())
{

View File

@ -554,7 +554,7 @@ static Blocks scatterBlockByHashImpl(const Strings & key_columns_names, const Bl
for (const auto & key_name : key_columns_names)
{
ColumnPtr key_col = materializeColumn(block, key_name);
key_col->updateWeakHash32(hash);
hash.update(key_col->getWeakHash32());
}
auto selector = hashToSelector(hash, sharder);

View File

@ -538,7 +538,7 @@ Chunk DDLQueryStatusSource::generate()
ExecutionStatus status(-1, "Cannot obtain error message");
/// Replicated database retries in case of error, it should not write error status.
#ifdef ABORT_ON_LOGICAL_ERROR
#ifdef DEBUG_OR_SANITIZER_BUILD
bool need_check_status = true;
#else
bool need_check_status = !is_replicated_database;

View File

@ -7,7 +7,6 @@
#include <Common/FieldVisitorToString.h>
#include <Common/KnownObjectNames.h>
#include <Common/SipHash.h>
#include <Common/typeid_cast.h>
#include <IO/Operators.h>
#include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h>
@ -19,9 +18,6 @@
#include <Parsers/queryToString.h>
#include <Parsers/ASTSetQuery.h>
#include <Parsers/FunctionSecretArgumentsFinderAST.h>
#include <Core/QualifiedTableName.h>
#include <boost/algorithm/string.hpp>
using namespace std::literals;
@ -632,6 +628,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
settings.ostr << ", ";
if (arguments->children[i]->as<ASTSetQuery>())
settings.ostr << "SETTINGS ";
nested_dont_need_parens.list_element_index = i;
arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens);
}
settings.ostr << (settings.hilite ? hilite_operator : "") << ']' << (settings.hilite ? hilite_none : "");
@ -642,12 +639,14 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
{
settings.ostr << (settings.hilite ? hilite_operator : "") << ((frame.need_parens && !alias.empty()) ? "tuple" : "") << '('
<< (settings.hilite ? hilite_none : "");
for (size_t i = 0; i < arguments->children.size(); ++i)
{
if (i != 0)
settings.ostr << ", ";
if (arguments->children[i]->as<ASTSetQuery>())
settings.ostr << "SETTINGS ";
nested_dont_need_parens.list_element_index = i;
arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens);
}
settings.ostr << (settings.hilite ? hilite_operator : "") << ')' << (settings.hilite ? hilite_none : "");
@ -663,6 +662,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
settings.ostr << ", ";
if (arguments->children[i]->as<ASTSetQuery>())
settings.ostr << "SETTINGS ";
nested_dont_need_parens.list_element_index = i;
arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens);
}
settings.ostr << (settings.hilite ? hilite_operator : "") << ')' << (settings.hilite ? hilite_none : "");

View File

@ -2743,7 +2743,7 @@ Action ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & po
/// 'AND' can be both boolean function and part of the '... BETWEEN ... AND ...' operator
if (op.function_name == "and" && layers.back()->between_counter)
{
layers.back()->between_counter--;
--layers.back()->between_counter;
op = finish_between_operator;
}

View File

@ -745,7 +745,12 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan,
{
auto & interpolate_node_typed = interpolate_node->as<InterpolateNode &>();
PlannerActionsVisitor planner_actions_visitor(planner_context);
PlannerActionsVisitor planner_actions_visitor(
planner_context,
/* use_column_identifier_as_action_node_name_, (default value)*/ true,
/// Prefer the INPUT to CONSTANT nodes (actions must be non constant)
/* always_use_const_column_for_constant_nodes */ false);
auto expression_to_interpolate_expression_nodes = planner_actions_visitor.visit(*interpolate_actions_dag,
interpolate_node_typed.getExpression());
if (expression_to_interpolate_expression_nodes.size() != 1)

View File

@ -487,16 +487,33 @@ public:
return node;
}
const ActionsDAG::Node * addConstantIfNecessary(const std::string & node_name, const ColumnWithTypeAndName & column)
[[nodiscard]] String addConstantIfNecessary(
const std::string & node_name, const ColumnWithTypeAndName & column, bool always_use_const_column_for_constant_nodes)
{
chassert(column.column != nullptr);
auto it = node_name_to_node.find(node_name);
if (it != node_name_to_node.end() && (!always_use_const_column_for_constant_nodes || it->second->column))
return {node_name};
if (it != node_name_to_node.end())
return it->second;
{
/// There is a node with this name, but it doesn't have a column
/// This likely happens because we executed the query until WithMergeableState with a const node in the
/// WHERE clause and, as the results of headers are materialized, the column was removed
/// Let's add a new column and keep this
String dupped_name{node_name + "_dupped"};
if (node_name_to_node.find(dupped_name) != node_name_to_node.end())
return dupped_name;
const auto * node = &actions_dag.addColumn(column);
node_name_to_node[dupped_name] = node;
return dupped_name;
}
const auto * node = &actions_dag.addColumn(column);
node_name_to_node[node->result_name] = node;
return node;
return {node_name};
}
template <typename FunctionOrOverloadResolver>
@ -525,7 +542,7 @@ public:
}
private:
std::unordered_map<std::string_view, const ActionsDAG::Node *> node_name_to_node;
std::unordered_map<String, const ActionsDAG::Node *> node_name_to_node;
ActionsDAG & actions_dag;
QueryTreeNodePtr scope_node;
};
@ -533,9 +550,11 @@ private:
class PlannerActionsVisitorImpl
{
public:
PlannerActionsVisitorImpl(ActionsDAG & actions_dag,
PlannerActionsVisitorImpl(
ActionsDAG & actions_dag,
const PlannerContextPtr & planner_context_,
bool use_column_identifier_as_action_node_name_);
bool use_column_identifier_as_action_node_name_,
bool always_use_const_column_for_constant_nodes_);
ActionsDAG::NodeRawConstPtrs visit(QueryTreeNodePtr expression_node);
@ -595,14 +614,18 @@ private:
const PlannerContextPtr planner_context;
ActionNodeNameHelper action_node_name_helper;
bool use_column_identifier_as_action_node_name;
bool always_use_const_column_for_constant_nodes;
};
PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(ActionsDAG & actions_dag,
PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(
ActionsDAG & actions_dag,
const PlannerContextPtr & planner_context_,
bool use_column_identifier_as_action_node_name_)
bool use_column_identifier_as_action_node_name_,
bool always_use_const_column_for_constant_nodes_)
: planner_context(planner_context_)
, action_node_name_helper(node_to_node_name, *planner_context, use_column_identifier_as_action_node_name_)
, use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_)
, always_use_const_column_for_constant_nodes(always_use_const_column_for_constant_nodes_)
{
actions_stack.emplace_back(actions_dag, nullptr);
}
@ -725,17 +748,16 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi
column.type = constant_type;
column.column = column.type->createColumnConst(1, constant_literal);
actions_stack[0].addConstantIfNecessary(constant_node_name, column);
String final_name = actions_stack[0].addConstantIfNecessary(constant_node_name, column, always_use_const_column_for_constant_nodes);
size_t actions_stack_size = actions_stack.size();
for (size_t i = 1; i < actions_stack_size; ++i)
{
auto & actions_stack_node = actions_stack[i];
actions_stack_node.addInputConstantColumnIfNecessary(constant_node_name, column);
actions_stack_node.addInputConstantColumnIfNecessary(final_name, column);
}
return {constant_node_name, Levels(0)};
return {final_name, Levels(0)};
}
PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitLambda(const QueryTreeNodePtr & node)
@ -864,16 +886,16 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::ma
else
column.column = std::move(column_set);
actions_stack[0].addConstantIfNecessary(column.name, column);
String final_name = actions_stack[0].addConstantIfNecessary(column.name, column, always_use_const_column_for_constant_nodes);
size_t actions_stack_size = actions_stack.size();
for (size_t i = 1; i < actions_stack_size; ++i)
{
auto & actions_stack_node = actions_stack[i];
actions_stack_node.addInputConstantColumnIfNecessary(column.name, column);
actions_stack_node.addInputConstantColumnIfNecessary(final_name, column);
}
return {column.name, Levels(0)};
return {final_name, Levels(0)};
}
PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitIndexHintFunction(const QueryTreeNodePtr & node)
@ -1010,14 +1032,19 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi
}
PlannerActionsVisitor::PlannerActionsVisitor(const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_)
PlannerActionsVisitor::PlannerActionsVisitor(
const PlannerContextPtr & planner_context_,
bool use_column_identifier_as_action_node_name_,
bool always_use_const_column_for_constant_nodes_)
: planner_context(planner_context_)
, use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_)
, always_use_const_column_for_constant_nodes(always_use_const_column_for_constant_nodes_)
{}
ActionsDAG::NodeRawConstPtrs PlannerActionsVisitor::visit(ActionsDAG & actions_dag, QueryTreeNodePtr expression_node)
{
PlannerActionsVisitorImpl actions_visitor_impl(actions_dag, planner_context, use_column_identifier_as_action_node_name);
PlannerActionsVisitorImpl actions_visitor_impl(
actions_dag, planner_context, use_column_identifier_as_action_node_name, always_use_const_column_for_constant_nodes);
return actions_visitor_impl.visit(expression_node);
}

View File

@ -27,11 +27,17 @@ using PlannerContextPtr = std::shared_ptr<PlannerContext>;
* During actions build, there is special handling for following functions:
* 1. Aggregate functions are added in actions dag as INPUT nodes. Aggregate functions arguments are not added.
* 2. For function `in` and its variants, already collected sets from planner context are used.
* 3. When building actions that use CONSTANT nodes, by default we ignore pre-existing INPUTs if those don't have
* a column (a const column always has a column). This is for compatibility with previous headers. We disable this
* behaviour when we explicitly want to override CONSTANT nodes with the input (resolving InterpolateNode for example)
*/
class PlannerActionsVisitor
{
public:
explicit PlannerActionsVisitor(const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_ = true);
explicit PlannerActionsVisitor(
const PlannerContextPtr & planner_context_,
bool use_column_identifier_as_action_node_name_ = true,
bool always_use_const_column_for_constant_nodes_ = true);
/** Add actions necessary to calculate expression node into expression dag.
* Necessary actions are not added in actions dag output.
@ -42,6 +48,7 @@ public:
private:
const PlannerContextPtr planner_context;
bool use_column_identifier_as_action_node_name = true;
bool always_use_const_column_for_constant_nodes = true;
};
/** Calculate query tree expression node action dag name and add them into node to name map.

View File

@ -77,7 +77,6 @@ namespace ErrorCodes
extern const int INVALID_JOIN_ON_EXPRESSION;
extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED;
extern const int SYNTAX_ERROR;
extern const int ACCESS_DENIED;
extern const int PARAMETER_OUT_OF_BOUND;
extern const int TOO_MANY_COLUMNS;
@ -1397,12 +1396,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
{
if (!join_clause.hasASOF())
throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
"JOIN {} no inequality in ASOF JOIN ON section.",
join_node.formatASTForErrorMessage());
if (table_join_clause.key_names_left.size() <= 1)
throw Exception(ErrorCodes::SYNTAX_ERROR,
"JOIN {} ASOF join needs at least one equi-join column",
"JOIN {} no inequality in ASOF JOIN ON section",
join_node.formatASTForErrorMessage());
}
@ -1524,7 +1518,9 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
{
const auto & join_clause = table_join->getOnlyClause();
bool kind_allows_filtering = isInner(join_kind) || isLeft(join_kind) || isRight(join_kind);
bool join_type_allows_filtering = (join_strictness == JoinStrictness::All || join_strictness == JoinStrictness::Any)
&& (isInner(join_kind) || isLeft(join_kind) || isRight(join_kind));
auto has_non_const = [](const Block & block, const auto & keys)
{
@ -1544,7 +1540,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
bool has_non_const_keys = has_non_const(left_plan.getCurrentDataStream().header, join_clause.key_names_left)
&& has_non_const(right_plan.getCurrentDataStream().header, join_clause.key_names_right);
if (settings.max_rows_in_set_to_optimize_join > 0 && kind_allows_filtering && has_non_const_keys)
if (settings.max_rows_in_set_to_optimize_join > 0 && join_type_allows_filtering && has_non_const_keys)
{
auto * left_set = add_create_set(left_plan, join_clause.key_names_left, JoinTableSide::Left);
auto * right_set = add_create_set(right_plan, join_clause.key_names_right, JoinTableSide::Right);

View File

@ -34,13 +34,20 @@ namespace ErrorCodes
namespace
{
FullMergeJoinCursorPtr createCursor(const Block & block, const Names & columns)
FullMergeJoinCursorPtr createCursor(const Block & block, const Names & columns, JoinStrictness strictness)
{
SortDescription desc;
desc.reserve(columns.size());
for (const auto & name : columns)
desc.emplace_back(name);
return std::make_unique<FullMergeJoinCursor>(block, desc);
return std::make_unique<FullMergeJoinCursor>(block, desc, strictness == JoinStrictness::Asof);
}
bool ALWAYS_INLINE isNullAt(const IColumn & column, size_t row)
{
if (const auto * nullable_column = checkAndGetColumn<ColumnNullable>(&column))
return nullable_column->isNullAt(row);
return false;
}
template <bool has_left_nulls, bool has_right_nulls>
@ -54,7 +61,7 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column,
if (left_nullable && right_nullable)
{
int res = left_nullable->compareAt(lhs_pos, rhs_pos, right_column, null_direction_hint);
if (res)
if (res != 0)
return res;
/// NULL != NULL case
@ -90,9 +97,10 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column,
int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, size_t lpos,
const SortCursorImpl & rhs, size_t rpos,
size_t key_length,
int null_direction_hint)
{
for (size_t i = 0; i < lhs.sort_columns_size; ++i)
for (size_t i = 0; i < key_length; ++i)
{
/// TODO(@vdimir): use nullableCompareAt only if there's nullable columns
int cmp = nullableCompareAt<true, true>(*lhs.sort_columns[i], *rhs.sort_columns[i], lpos, rpos, null_direction_hint);
@ -104,13 +112,18 @@ int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, size_t lpos,
int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, const SortCursorImpl & rhs, int null_direction_hint)
{
return compareCursors(lhs, lhs.getRow(), rhs, rhs.getRow(), null_direction_hint);
return compareCursors(lhs, lhs.getRow(), rhs, rhs.getRow(), lhs.sort_columns_size, null_direction_hint);
}
int compareAsofCursors(const FullMergeJoinCursor & lhs, const FullMergeJoinCursor & rhs, int null_direction_hint)
{
return nullableCompareAt<true, true>(*lhs.getAsofColumn(), *rhs.getAsofColumn(), lhs->getRow(), rhs->getRow(), null_direction_hint);
}
bool ALWAYS_INLINE totallyLess(SortCursorImpl & lhs, SortCursorImpl & rhs, int null_direction_hint)
{
/// The last row of left cursor is less than the current row of the right cursor.
int cmp = compareCursors(lhs, lhs.rows - 1, rhs, rhs.getRow(), null_direction_hint);
int cmp = compareCursors(lhs, lhs.rows - 1, rhs, rhs.getRow(), lhs.sort_columns_size, null_direction_hint);
return cmp < 0;
}
@ -222,25 +235,136 @@ Chunk getRowFromChunk(const Chunk & chunk, size_t pos)
return result;
}
void inline addRange(PaddedPODArray<UInt64> & left_map, size_t start, size_t end)
void inline addRange(PaddedPODArray<UInt64> & values, UInt64 start, UInt64 end)
{
assert(end > start);
for (size_t i = start; i < end; ++i)
left_map.push_back(i);
for (UInt64 i = start; i < end; ++i)
values.push_back(i);
}
void inline addMany(PaddedPODArray<UInt64> & left_or_right_map, size_t idx, size_t num)
void inline addMany(PaddedPODArray<UInt64> & values, UInt64 value, size_t num)
{
for (size_t i = 0; i < num; ++i)
left_or_right_map.push_back(idx);
values.resize_fill(values.size() + num, value);
}
}
FullMergeJoinCursor::FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_)
: sample_block(materializeBlock(sample_block_).cloneEmpty()), desc(description_)
JoinKeyRow::JoinKeyRow(const FullMergeJoinCursor & cursor, size_t pos)
{
row.reserve(cursor->sort_columns.size());
for (const auto & col : cursor->sort_columns)
{
auto new_col = col->cloneEmpty();
new_col->insertFrom(*col, pos);
row.push_back(std::move(new_col));
}
if (const IColumn * asof_column = cursor.getAsofColumn())
{
if (const auto * nullable_asof_column = checkAndGetColumn<ColumnNullable>(asof_column))
{
/// We save matched column, and since NULL do not match anything, we can't use it as a key
chassert(!nullable_asof_column->isNullAt(pos));
asof_column = nullable_asof_column->getNestedColumnPtr().get();
}
auto new_col = asof_column->cloneEmpty();
new_col->insertFrom(*asof_column, pos);
row.push_back(std::move(new_col));
}
}
void JoinKeyRow::reset()
{
row.clear();
}
bool JoinKeyRow::equals(const FullMergeJoinCursor & cursor) const
{
if (row.empty())
return false;
for (size_t i = 0; i < cursor->sort_columns_size; ++i)
{
// int cmp = this->row[i]->compareAt(0, cursor->getRow(), *(cursor->sort_columns[i]), cursor->desc[i].nulls_direction);
int cmp = nullableCompareAt<true, true>(*this->row[i], *cursor->sort_columns[i], 0, cursor->getRow(), cursor->desc[i].nulls_direction);
if (cmp != 0)
return false;
}
return true;
}
bool JoinKeyRow::asofMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const
{
chassert(this->row.size() == cursor->sort_columns_size + 1);
if (!equals(cursor))
return false;
const auto & asof_row = row.back();
if (isNullAt(*asof_row, 0) || isNullAt(*cursor.getAsofColumn(), cursor->getRow()))
return false;
int cmp = 0;
if (const auto * nullable_column = checkAndGetColumn<ColumnNullable>(cursor.getAsofColumn()))
cmp = nullable_column->getNestedColumn().compareAt(cursor->getRow(), 0, *asof_row, 1);
else
cmp = cursor.getAsofColumn()->compareAt(cursor->getRow(), 0, *asof_row, 1);
return (asof_inequality == ASOFJoinInequality::Less && cmp < 0)
|| (asof_inequality == ASOFJoinInequality::LessOrEquals && cmp <= 0)
|| (asof_inequality == ASOFJoinInequality::Greater && cmp > 0)
|| (asof_inequality == ASOFJoinInequality::GreaterOrEquals && cmp >= 0);
}
void AnyJoinState::set(size_t source_num, const FullMergeJoinCursor & cursor)
{
assert(cursor->rows);
keys[source_num] = JoinKeyRow(cursor, cursor->rows - 1);
}
void AnyJoinState::reset(size_t source_num)
{
keys[source_num].reset();
value.clear();
}
void AnyJoinState::setValue(Chunk value_)
{
value = std::move(value_);
}
bool AnyJoinState::empty() const { return keys[0].row.empty() && keys[1].row.empty(); }
void AsofJoinState::set(const FullMergeJoinCursor & rcursor, size_t rpos)
{
key = JoinKeyRow(rcursor, rpos);
value = rcursor.getCurrent().clone();
value_row = rpos;
}
void AsofJoinState::reset()
{
key.reset();
value.clear();
}
FullMergeJoinCursor::FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_, bool is_asof)
: sample_block(materializeBlock(sample_block_).cloneEmpty())
, desc(description_)
{
if (desc.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty sort description for FullMergeJoinCursor");
if (is_asof)
{
/// For ASOF join prefix of sort description is used for equality comparison
/// and the last column is used for inequality comparison and is handled separately
auto asof_column_description = desc.back();
desc.pop_back();
chassert(asof_column_description.direction == 1 && asof_column_description.nulls_direction == 1);
asof_column_position = sample_block.getPositionByName(asof_column_description.column_name);
}
}
const Chunk & FullMergeJoinCursor::getCurrent() const
{
@ -278,48 +402,103 @@ bool FullMergeJoinCursor::fullyCompleted() const
return !cursor.isValid() && recieved_all_blocks;
}
String FullMergeJoinCursor::dump() const
{
Strings row_dump;
if (cursor.isValid())
{
Field val;
for (size_t i = 0; i < cursor.sort_columns_size; ++i)
{
cursor.sort_columns[i]->get(cursor.getRow(), val);
row_dump.push_back(val.dump());
}
if (const auto * asof_column = getAsofColumn())
{
asof_column->get(cursor.getRow(), val);
row_dump.push_back(val.dump());
}
}
return fmt::format("<{}/{}{}>[{}]",
cursor.getRow(), cursor.rows,
recieved_all_blocks ? "(finished)" : "",
fmt::join(row_dump, ", "));
}
MergeJoinAlgorithm::MergeJoinAlgorithm(
JoinPtr table_join_,
JoinKind kind_,
JoinStrictness strictness_,
const TableJoin::JoinOnClause & on_clause_,
const Blocks & input_headers,
size_t max_block_size_)
: table_join(table_join_)
: kind(kind_)
, strictness(strictness_)
, max_block_size(max_block_size_)
, log(getLogger("MergeJoinAlgorithm"))
{
if (input_headers.size() != 2)
throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeJoinAlgorithm requires exactly two inputs");
auto strictness = table_join->getTableJoin().strictness();
if (strictness != JoinStrictness::Any && strictness != JoinStrictness::All)
if (strictness != JoinStrictness::Any && strictness != JoinStrictness::All && strictness != JoinStrictness::Asof)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm is not implemented for strictness {}", strictness);
auto kind = table_join->getTableJoin().kind();
if (strictness == JoinStrictness::Asof)
{
if (kind != JoinKind::Left && kind != JoinKind::Inner)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm does not implement ASOF {} join", kind);
}
if (!isInner(kind) && !isLeft(kind) && !isRight(kind) && !isFull(kind))
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm is not implemented for kind {}", kind);
const auto & join_on = table_join->getTableJoin().getOnlyClause();
if (join_on.on_filter_condition_left || join_on.on_filter_condition_right)
if (on_clause_.on_filter_condition_left || on_clause_.on_filter_condition_right)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm does not support ON filter conditions");
cursors = {
createCursor(input_headers[0], join_on.key_names_left),
createCursor(input_headers[1], join_on.key_names_right)
createCursor(input_headers[0], on_clause_.key_names_left, strictness),
createCursor(input_headers[1], on_clause_.key_names_right, strictness),
};
}
for (const auto & [left_key, right_key] : table_join->getTableJoin().leftToRightKeyRemap())
MergeJoinAlgorithm::MergeJoinAlgorithm(
JoinPtr join_ptr,
const Blocks & input_headers,
size_t max_block_size_)
: MergeJoinAlgorithm(
join_ptr->getTableJoin().kind(),
join_ptr->getTableJoin().strictness(),
join_ptr->getTableJoin().getOnlyClause(),
input_headers,
max_block_size_)
{
for (const auto & [left_key, right_key] : join_ptr->getTableJoin().leftToRightKeyRemap())
{
size_t left_idx = input_headers[0].getPositionByName(left_key);
size_t right_idx = input_headers[1].getPositionByName(right_key);
left_to_right_key_remap[left_idx] = right_idx;
}
const auto *smjPtr = typeid_cast<const FullSortingMergeJoin *>(table_join.get());
const auto *smjPtr = typeid_cast<const FullSortingMergeJoin *>(join_ptr.get());
if (smjPtr)
{
null_direction_hint = smjPtr->getNullDirection();
}
if (strictness == JoinStrictness::Asof)
setAsofInequality(join_ptr->getTableJoin().getAsofInequality());
}
void MergeJoinAlgorithm::setAsofInequality(ASOFJoinInequality asof_inequality_)
{
if (strictness != JoinStrictness::Asof)
throw Exception(ErrorCodes::LOGICAL_ERROR, "setAsofInequality is only supported for ASOF joins");
if (asof_inequality_ == ASOFJoinInequality::None)
throw Exception(ErrorCodes::LOGICAL_ERROR, "ASOF inequality cannot be None");
asof_inequality = asof_inequality_;
}
void MergeJoinAlgorithm::logElapsed(double seconds)
@ -407,7 +586,7 @@ struct AllJoinImpl
size_t lnum = nextDistinct(left_cursor.cursor);
size_t rnum = nextDistinct(right_cursor.cursor);
bool all_fit_in_block = std::max(left_map.size(), right_map.size()) + lnum * rnum <= max_block_size;
bool all_fit_in_block = !max_block_size || std::max(left_map.size(), right_map.size()) + lnum * rnum <= max_block_size;
bool have_all_ranges = left_cursor.cursor.isValid() && right_cursor.cursor.isValid();
if (all_fit_in_block && have_all_ranges)
{
@ -421,7 +600,7 @@ struct AllJoinImpl
else
{
assert(state == nullptr);
state = std::make_unique<AllJoinState>(left_cursor.cursor, lpos, right_cursor.cursor, rpos);
state = std::make_unique<AllJoinState>(left_cursor, lpos, right_cursor, rpos);
state->addRange(0, left_cursor.getCurrent().clone(), lpos, lnum);
state->addRange(1, right_cursor.getCurrent().clone(), rpos, rnum);
return;
@ -466,6 +645,17 @@ void dispatchKind(JoinKind kind, Args && ... args)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported join kind: \"{}\"", kind);
}
MutableColumns MergeJoinAlgorithm::getEmptyResultColumns() const
{
MutableColumns result_cols;
for (size_t i = 0; i < 2; ++i)
{
for (const auto & col : cursors[i]->sampleColumns())
result_cols.push_back(col->cloneEmpty());
}
return result_cols;
}
std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAllJoinState()
{
if (all_join_state && all_join_state->finished())
@ -479,7 +669,7 @@ std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAllJoinState
/// Accumulate blocks with same key in all_join_state
for (size_t i = 0; i < 2; ++i)
{
if (cursors[i]->cursor.isValid() && all_join_state->keys[i].equals(cursors[i]->cursor))
if (cursors[i]->cursor.isValid() && all_join_state->keys[i].equals(*cursors[i]))
{
size_t pos = cursors[i]->cursor.getRow();
size_t num = nextDistinct(cursors[i]->cursor);
@ -499,15 +689,10 @@ std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAllJoinState
stat.max_blocks_loaded = std::max(stat.max_blocks_loaded, all_join_state->blocksStored());
/// join all rows with current key
MutableColumns result_cols;
for (size_t i = 0; i < 2; ++i)
{
for (const auto & col : cursors[i]->sampleColumns())
result_cols.push_back(col->cloneEmpty());
}
MutableColumns result_cols = getEmptyResultColumns();
size_t total_rows = 0;
while (total_rows < max_block_size)
while (!max_block_size || total_rows < max_block_size)
{
const auto & left_range = all_join_state->getLeft();
const auto & right_range = all_join_state->getRight();
@ -532,7 +717,52 @@ std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAllJoinState
return {};
}
MergeJoinAlgorithm::Status MergeJoinAlgorithm::allJoin(JoinKind kind)
std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAsofJoinState()
{
if (strictness != JoinStrictness::Asof)
return {};
if (!cursors[1]->fullyCompleted())
return {};
auto & left_cursor = *cursors[0];
const auto & left_columns = left_cursor.getCurrent().getColumns();
MutableColumns result_cols = getEmptyResultColumns();
while (left_cursor->isValid() && asof_join_state.hasMatch(left_cursor, asof_inequality))
{
size_t i = 0;
for (const auto & col : left_columns)
result_cols[i++]->insertFrom(*col, left_cursor->getRow());
for (const auto & col : asof_join_state.value.getColumns())
result_cols[i++]->insertFrom(*col, asof_join_state.value_row);
chassert(i == result_cols.size());
left_cursor->next();
}
while (isLeft(kind) && left_cursor->isValid())
{
/// return row with default values at right side
size_t i = 0;
for (const auto & col : left_columns)
result_cols[i++]->insertFrom(*col, left_cursor->getRow());
for (; i < result_cols.size(); ++i)
result_cols[i]->insertDefault();
chassert(i == result_cols.size());
left_cursor->next();
}
size_t result_rows = result_cols.empty() ? 0 : result_cols.front()->size();
if (result_rows)
return Status(Chunk(std::move(result_cols), result_rows));
return {};
}
MergeJoinAlgorithm::Status MergeJoinAlgorithm::allJoin()
{
PaddedPODArray<UInt64> idx_map[2];
@ -595,7 +825,7 @@ struct AnyJoinImpl
FullMergeJoinCursor & right_cursor,
PaddedPODArray<UInt64> & left_map,
PaddedPODArray<UInt64> & right_map,
AnyJoinState & state,
AnyJoinState & any_join_state,
int null_direction_hint)
{
assert(enabled);
@ -656,21 +886,21 @@ struct AnyJoinImpl
}
}
/// Remember index of last joined row to propagate it to next block
/// Remember last joined row to propagate it to next block
state.setValue({});
any_join_state.setValue({});
if (!left_cursor->isValid())
{
state.set(0, left_cursor.cursor);
any_join_state.set(0, left_cursor);
if (cmp == 0 && isLeft(kind))
state.setValue(getRowFromChunk(right_cursor.getCurrent(), rpos));
any_join_state.setValue(getRowFromChunk(right_cursor.getCurrent(), rpos));
}
if (!right_cursor->isValid())
{
state.set(1, right_cursor.cursor);
any_join_state.set(1, right_cursor);
if (cmp == 0 && isRight(kind))
state.setValue(getRowFromChunk(left_cursor.getCurrent(), lpos));
any_join_state.setValue(getRowFromChunk(left_cursor.getCurrent(), lpos));
}
}
};
@ -680,40 +910,34 @@ std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAnyJoinState
if (any_join_state.empty())
return {};
auto kind = table_join->getTableJoin().kind();
Chunk result;
for (size_t source_num = 0; source_num < 2; ++source_num)
{
auto & current = *cursors[source_num];
auto & state = any_join_state;
if (any_join_state.keys[source_num].equals(current.cursor))
if (any_join_state.keys[source_num].equals(current))
{
size_t start_pos = current->getRow();
size_t length = nextDistinct(current.cursor);
if (length && isLeft(kind) && source_num == 0)
{
if (state.value)
result = copyChunkResized(current.getCurrent(), state.value, start_pos, length);
if (any_join_state.value)
result = copyChunkResized(current.getCurrent(), any_join_state.value, start_pos, length);
else
result = createBlockWithDefaults(source_num, start_pos, length);
}
if (length && isRight(kind) && source_num == 1)
{
if (state.value)
result = copyChunkResized(state.value, current.getCurrent(), start_pos, length);
if (any_join_state.value)
result = copyChunkResized(any_join_state.value, current.getCurrent(), start_pos, length);
else
result = createBlockWithDefaults(source_num, start_pos, length);
}
/// We've found row with other key, no need to skip more rows with current key
if (current->isValid())
{
state.keys[source_num].reset();
}
any_join_state.keys[source_num].reset();
}
else
{
@ -726,7 +950,7 @@ std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAnyJoinState
return {};
}
MergeJoinAlgorithm::Status MergeJoinAlgorithm::anyJoin(JoinKind kind)
MergeJoinAlgorithm::Status MergeJoinAlgorithm::anyJoin()
{
if (auto result = handleAnyJoinState())
return std::move(*result);
@ -771,10 +995,151 @@ MergeJoinAlgorithm::Status MergeJoinAlgorithm::anyJoin(JoinKind kind)
return Status(std::move(result));
}
MergeJoinAlgorithm::Status MergeJoinAlgorithm::asofJoin()
{
auto & left_cursor = *cursors[0];
if (!left_cursor->isValid())
return Status(0);
auto & right_cursor = *cursors[1];
if (!right_cursor->isValid())
return Status(1);
const auto & left_columns = left_cursor.getCurrent().getColumns();
const auto & right_columns = right_cursor.getCurrent().getColumns();
MutableColumns result_cols = getEmptyResultColumns();
while (left_cursor->isValid() && right_cursor->isValid())
{
auto lpos = left_cursor->getRow();
auto rpos = right_cursor->getRow();
auto cmp = compareCursors(*left_cursor, *right_cursor, null_direction_hint);
if (cmp == 0)
{
if (isNullAt(*left_cursor.getAsofColumn(), lpos))
cmp = -1;
if (isNullAt(*right_cursor.getAsofColumn(), rpos))
cmp = 1;
}
if (cmp == 0)
{
auto asof_cmp = compareAsofCursors(left_cursor, right_cursor, null_direction_hint);
if ((asof_inequality == ASOFJoinInequality::Less && asof_cmp <= -1)
|| (asof_inequality == ASOFJoinInequality::LessOrEquals && asof_cmp <= 0))
{
/// First row in right table that is greater (or equal) than current row in left table
/// matches asof join condition the best
size_t i = 0;
for (const auto & col : left_columns)
result_cols[i++]->insertFrom(*col, lpos);
for (const auto & col : right_columns)
result_cols[i++]->insertFrom(*col, rpos);
chassert(i == result_cols.size());
left_cursor->next();
continue;
}
if (asof_inequality == ASOFJoinInequality::Less || asof_inequality == ASOFJoinInequality::LessOrEquals)
{
/// Asof condition is not (yet) satisfied, skip row in right table
right_cursor->next();
continue;
}
if ((asof_inequality == ASOFJoinInequality::Greater && asof_cmp >= 1)
|| (asof_inequality == ASOFJoinInequality::GreaterOrEquals && asof_cmp >= 0))
{
/// condition is satisfied, remember this row and move next to try to find better match
asof_join_state.set(right_cursor, rpos);
right_cursor->next();
continue;
}
if (asof_inequality == ASOFJoinInequality::Greater || asof_inequality == ASOFJoinInequality::GreaterOrEquals)
{
/// Asof condition is not satisfied anymore, use last matched row from right table
if (asof_join_state.hasMatch(left_cursor, asof_inequality))
{
size_t i = 0;
for (const auto & col : left_columns)
result_cols[i++]->insertFrom(*col, lpos);
for (const auto & col : asof_join_state.value.getColumns())
result_cols[i++]->insertFrom(*col, asof_join_state.value_row);
chassert(i == result_cols.size());
}
else
{
asof_join_state.reset();
if (isLeft(kind))
{
/// return row with default values at right side
size_t i = 0;
for (const auto & col : left_columns)
result_cols[i++]->insertFrom(*col, lpos);
for (; i < result_cols.size(); ++i)
result_cols[i]->insertDefault();
chassert(i == result_cols.size());
}
}
left_cursor->next();
continue;
}
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "TODO: implement ASOF equality join");
}
else if (cmp < 0)
{
if (asof_join_state.hasMatch(left_cursor, asof_inequality))
{
size_t i = 0;
for (const auto & col : left_columns)
result_cols[i++]->insertFrom(*col, lpos);
for (const auto & col : asof_join_state.value.getColumns())
result_cols[i++]->insertFrom(*col, asof_join_state.value_row);
chassert(i == result_cols.size());
left_cursor->next();
continue;
}
else
{
asof_join_state.reset();
}
/// no matches for rows in left table, just pass them through
size_t num = nextDistinct(*left_cursor);
if (isLeft(kind) && num)
{
/// return them with default values at right side
size_t i = 0;
for (const auto & col : left_columns)
result_cols[i++]->insertRangeFrom(*col, lpos, num);
for (; i < result_cols.size(); ++i)
result_cols[i]->insertManyDefaults(num);
chassert(i == result_cols.size());
}
}
else
{
/// skip rows in right table until we find match for current row in left table
nextDistinct(*right_cursor);
}
}
size_t num_rows = result_cols.empty() ? 0 : result_cols.front()->size();
return Status(Chunk(std::move(result_cols), num_rows));
}
/// if `source_num == 0` get data from left cursor and fill defaults at right
/// otherwise - vice versa
Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num, size_t start, size_t num_rows) const
{
ColumnRawPtrs cols;
{
const auto & columns_left = source_num == 0 ? cursors[0]->getCurrent().getColumns() : cursors[0]->sampleColumns();
@ -797,7 +1162,6 @@ Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num, size_t star
cols.push_back(col.get());
}
}
Chunk result_chunk;
copyColumnsResized(cols, start, num_rows, result_chunk);
return result_chunk;
@ -813,7 +1177,6 @@ Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num)
IMergingAlgorithm::Status MergeJoinAlgorithm::merge()
{
auto kind = table_join->getTableJoin().kind();
if (!cursors[0]->cursor.isValid() && !cursors[0]->fullyCompleted())
return Status(0);
@ -821,11 +1184,11 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge()
if (!cursors[1]->cursor.isValid() && !cursors[1]->fullyCompleted())
return Status(1);
if (auto result = handleAllJoinState())
{
return std::move(*result);
}
if (auto result = handleAsofJoinState())
return std::move(*result);
if (cursors[0]->fullyCompleted() || cursors[1]->fullyCompleted())
{
@ -839,7 +1202,7 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge()
}
/// check if blocks are not intersecting at all
if (int cmp = totallyCompare(cursors[0]->cursor, cursors[1]->cursor, null_direction_hint); cmp != 0)
if (int cmp = totallyCompare(cursors[0]->cursor, cursors[1]->cursor, null_direction_hint); cmp != 0 && strictness != JoinStrictness::Asof)
{
if (cmp < 0)
{
@ -858,13 +1221,14 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge()
}
}
auto strictness = table_join->getTableJoin().strictness();
if (strictness == JoinStrictness::Any)
return anyJoin(kind);
return anyJoin();
if (strictness == JoinStrictness::All)
return allJoin(kind);
return allJoin();
if (strictness == JoinStrictness::Asof)
return asofJoin();
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported strictness '{}'", strictness);
}
@ -883,9 +1247,26 @@ MergeJoinTransform::MergeJoinTransform(
/* always_read_till_end_= */ false,
/* empty_chunk_on_finish_= */ true,
table_join, input_headers, max_block_size)
, log(getLogger("MergeJoinTransform"))
{
LOG_TRACE(log, "Use MergeJoinTransform");
}
MergeJoinTransform::MergeJoinTransform(
JoinKind kind_,
JoinStrictness strictness_,
const TableJoin::JoinOnClause & on_clause_,
const Blocks & input_headers,
const Block & output_header,
size_t max_block_size,
UInt64 limit_hint_)
: IMergingTransform<MergeJoinAlgorithm>(
input_headers,
output_header,
/* have_all_inputs_= */ true,
limit_hint_,
/* always_read_till_end_= */ false,
/* empty_chunk_on_finish_= */ true,
kind_, strictness_, on_clause_, input_headers, max_block_size)
{
}
void MergeJoinTransform::onFinish()

View File

@ -8,6 +8,7 @@
#include <unordered_map>
#include <utility>
#include <vector>
#include <array>
#include <boost/core/noncopyable.hpp>
@ -19,6 +20,7 @@
#include <Processors/Chunk.h>
#include <Processors/Merges/Algorithms/IMergingAlgorithm.h>
#include <Processors/Merges/IMergingTransform.h>
#include <Interpreters/TableJoin.h>
namespace Poco { class Logger; }
@ -35,57 +37,28 @@ using FullMergeJoinCursorPtr = std::unique_ptr<FullMergeJoinCursor>;
/// Used instead of storing previous block
struct JoinKeyRow
{
std::vector<ColumnPtr> row;
JoinKeyRow() = default;
explicit JoinKeyRow(const SortCursorImpl & impl_, size_t pos)
{
row.reserve(impl_.sort_columns.size());
for (const auto & col : impl_.sort_columns)
{
auto new_col = col->cloneEmpty();
new_col->insertFrom(*col, pos);
row.push_back(std::move(new_col));
}
}
JoinKeyRow(const FullMergeJoinCursor & cursor, size_t pos);
void reset()
{
row.clear();
}
bool equals(const FullMergeJoinCursor & cursor) const;
bool asofMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const;
bool equals(const SortCursorImpl & impl) const
{
if (row.empty())
return false;
void reset();
assert(this->row.size() == impl.sort_columns_size);
for (size_t i = 0; i < impl.sort_columns_size; ++i)
{
int cmp = this->row[i]->compareAt(0, impl.getRow(), *impl.sort_columns[i], impl.desc[i].nulls_direction);
if (cmp != 0)
return false;
}
return true;
}
std::vector<ColumnPtr> row;
};
/// Remembers previous key if it was joined in previous block
class AnyJoinState : boost::noncopyable
{
public:
AnyJoinState() = default;
void set(size_t source_num, const FullMergeJoinCursor & cursor);
void setValue(Chunk value_);
void set(size_t source_num, const SortCursorImpl & cursor)
{
assert(cursor.rows);
keys[source_num] = JoinKeyRow(cursor, cursor.rows - 1);
}
void reset(size_t source_num);
void setValue(Chunk value_) { value = std::move(value_); }
bool empty() const { return keys[0].row.empty() && keys[1].row.empty(); }
bool empty() const;
/// current keys
JoinKeyRow keys[2];
@ -118,8 +91,8 @@ public:
Chunk chunk;
};
AllJoinState(const SortCursorImpl & lcursor, size_t lpos,
const SortCursorImpl & rcursor, size_t rpos)
AllJoinState(const FullMergeJoinCursor & lcursor, size_t lpos,
const FullMergeJoinCursor & rcursor, size_t rpos)
: keys{JoinKeyRow(lcursor, lpos), JoinKeyRow(rcursor, rpos)}
{
}
@ -187,13 +160,32 @@ private:
size_t ridx = 0;
};
class AsofJoinState : boost::noncopyable
{
public:
void set(const FullMergeJoinCursor & rcursor, size_t rpos);
void reset();
bool hasMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const
{
if (value.empty())
return false;
return key.asofMatch(cursor, asof_inequality);
}
JoinKeyRow key;
Chunk value;
size_t value_row = 0;
};
/*
* Wrapper for SortCursorImpl
*/
class FullMergeJoinCursor : boost::noncopyable
{
public:
explicit FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_);
explicit FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_, bool is_asof = false);
bool fullyCompleted() const;
void setChunk(Chunk && chunk);
@ -203,17 +195,31 @@ public:
SortCursorImpl * operator-> () { return &cursor; }
const SortCursorImpl * operator-> () const { return &cursor; }
SortCursorImpl & operator* () { return cursor; }
const SortCursorImpl & operator* () const { return cursor; }
SortCursorImpl cursor;
const Block & sampleBlock() const { return sample_block; }
Columns sampleColumns() const { return sample_block.getColumns(); }
const IColumn * getAsofColumn() const
{
if (!asof_column_position)
return nullptr;
return cursor.all_columns[*asof_column_position];
}
String dump() const;
private:
Block sample_block;
SortDescription desc;
Chunk current_chunk;
bool recieved_all_blocks = false;
std::optional<size_t> asof_column_position;
};
/*
@ -223,22 +229,33 @@ private:
class MergeJoinAlgorithm final : public IMergingAlgorithm
{
public:
explicit MergeJoinAlgorithm(JoinPtr table_join, const Blocks & input_headers, size_t max_block_size_);
MergeJoinAlgorithm(JoinKind kind_,
JoinStrictness strictness_,
const TableJoin::JoinOnClause & on_clause_,
const Blocks & input_headers,
size_t max_block_size_);
MergeJoinAlgorithm(JoinPtr join_ptr, const Blocks & input_headers, size_t max_block_size_);
const char * getName() const override { return "MergeJoinAlgorithm"; }
void initialize(Inputs inputs) override;
void consume(Input & input, size_t source_num) override;
Status merge() override;
void logElapsed(double seconds);
void setAsofInequality(ASOFJoinInequality asof_inequality_);
void logElapsed(double seconds);
private:
std::optional<Status> handleAnyJoinState();
Status anyJoin(JoinKind kind);
Status anyJoin();
std::optional<Status> handleAllJoinState();
Status allJoin(JoinKind kind);
Status allJoin();
std::optional<Status> handleAsofJoinState();
Status asofJoin();
MutableColumns getEmptyResultColumns() const;
Chunk createBlockWithDefaults(size_t source_num);
Chunk createBlockWithDefaults(size_t source_num, size_t start, size_t num_rows) const;
@ -246,12 +263,15 @@ private:
std::unordered_map<size_t, size_t> left_to_right_key_remap;
std::array<FullMergeJoinCursorPtr, 2> cursors;
ASOFJoinInequality asof_inequality = ASOFJoinInequality::None;
/// Keep some state to make connection between data in different blocks
/// Keep some state to make handle data from different blocks
AnyJoinState any_join_state;
std::unique_ptr<AllJoinState> all_join_state;
AsofJoinState asof_join_state;
JoinPtr table_join;
JoinKind kind;
JoinStrictness strictness;
size_t max_block_size;
int null_direction_hint = 1;
@ -281,12 +301,21 @@ public:
size_t max_block_size,
UInt64 limit_hint = 0);
MergeJoinTransform(
JoinKind kind_,
JoinStrictness strictness_,
const TableJoin::JoinOnClause & on_clause_,
const Blocks & input_headers,
const Block & output_header,
size_t max_block_size,
UInt64 limit_hint_ = 0);
String getName() const override { return "MergeJoinTransform"; }
void setAsofInequality(ASOFJoinInequality asof_inequality_) { algorithm.setAsofInequality(asof_inequality_); }
protected:
void onFinish() override;
LoggerPtr log;
};
}

View File

@ -109,7 +109,7 @@ void ScatterByPartitionTransform::generateOutputChunks()
hash.reset(num_rows);
for (const auto & column_number : key_columns)
columns[column_number]->updateWeakHash32(hash);
hash.update(columns[column_number]->getWeakHash32());
const auto & hash_data = hash.getData();
IColumn::Selector selector(num_rows);

View File

@ -50,7 +50,7 @@ TEST(Processors, PortsNotConnected)
processors->emplace_back(std::move(source));
processors->emplace_back(std::move(sink));
#ifndef ABORT_ON_LOGICAL_ERROR
#ifndef DEBUG_OR_SANITIZER_BUILD
try
{
QueryStatusPtr element;

View File

@ -0,0 +1,768 @@
#include <gtest/gtest.h>
#include <pcg_random.hpp>
#include <random>
#include <Poco/ConsoleChannel.h>
#include <Poco/Logger.h>
#include <Poco/AutoPtr.h>
#include <Columns/ColumnsNumber.h>
#include <Common/getRandomASCIIString.h>
#include <Common/randomSeed.h>
#include <DataTypes/DataTypesNumber.h>
#include <Interpreters/TableJoin.h>
#include <Processors/Executors/PipelineExecutor.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Processors/Sinks/NullSink.h>
#include <Processors/Sources/SourceFromChunks.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <Processors/Transforms/MergeJoinTransform.h>
#include <Processors/Formats/Impl/PrettyCompactBlockOutputFormat.h>
#include <Processors/Executors/CompletedPipelineExecutor.h>
#include <QueryPipeline/QueryPipeline.h>
using namespace DB;
namespace
{
QueryPipeline buildJoinPipeline(
std::shared_ptr<ISource> left_source,
std::shared_ptr<ISource> right_source,
size_t key_length = 1,
JoinKind kind = JoinKind::Inner,
JoinStrictness strictness = JoinStrictness::All,
ASOFJoinInequality asof_inequality = ASOFJoinInequality::None)
{
Blocks inputs;
inputs.emplace_back(left_source->getPort().getHeader());
inputs.emplace_back(right_source->getPort().getHeader());
Block out_header;
for (const auto & input : inputs)
{
for (ColumnWithTypeAndName column : input)
{
if (&input == &inputs.front())
column.name = "t1." + column.name;
else
column.name = "t2." + column.name;
out_header.insert(column);
}
}
TableJoin::JoinOnClause on_clause;
for (size_t i = 0; i < key_length; ++i)
{
on_clause.key_names_left.emplace_back(inputs[0].getByPosition(i).name);
on_clause.key_names_right.emplace_back(inputs[1].getByPosition(i).name);
}
auto joining = std::make_shared<MergeJoinTransform>(
kind,
strictness,
on_clause,
inputs, out_header, /* max_block_size = */ 0);
if (asof_inequality != ASOFJoinInequality::None)
joining->setAsofInequality(asof_inequality);
chassert(joining->getInputs().size() == 2);
connect(left_source->getPort(), joining->getInputs().front());
connect(right_source->getPort(), joining->getInputs().back());
auto * output_port = &joining->getOutputPort();
auto processors = std::make_shared<Processors>();
processors->emplace_back(std::move(left_source));
processors->emplace_back(std::move(right_source));
processors->emplace_back(std::move(joining));
QueryPipeline pipeline(QueryPlanResourceHolder{}, processors, output_port);
return pipeline;
}
std::shared_ptr<ISource> oneColumnSource(const std::vector<std::vector<UInt64>> & values)
{
Block header = {
ColumnWithTypeAndName(std::make_shared<DataTypeUInt64>(), "key"),
ColumnWithTypeAndName(std::make_shared<DataTypeUInt64>(), "idx"),
};
UInt64 idx = 0;
Chunks chunks;
for (const auto & chunk_values : values)
{
auto key_column = ColumnUInt64::create();
auto idx_column = ColumnUInt64::create();
for (auto n : chunk_values)
{
key_column->insertValue(n);
idx_column->insertValue(idx);
++idx;
}
chunks.emplace_back(Chunk(Columns{std::move(key_column), std::move(idx_column)}, chunk_values.size()));
}
return std::make_shared<SourceFromChunks>(header, std::move(chunks));
}
class SourceChunksBuilder
{
public:
explicit SourceChunksBuilder(const Block & header_)
: header(header_)
{
current_chunk = header.cloneEmptyColumns();
chassert(!current_chunk.empty());
}
void setBreakProbability(pcg64 & rng_)
{
/// random probability with possibility to have exact 0.0 and 1.0 values
break_prob = std::uniform_int_distribution<size_t>(0, 5)(rng_) / static_cast<double>(5);
rng = &rng_;
}
void addRow(const std::vector<Field> & row)
{
chassert(row.size() == current_chunk.size());
for (size_t i = 0; i < current_chunk.size(); ++i)
current_chunk[i]->insert(row[i]);
if (rng && std::uniform_real_distribution<>(0.0, 1.0)(*rng) < break_prob)
addChunk();
}
void addChunk()
{
if (current_chunk.front()->empty())
return;
size_t rows = current_chunk.front()->size();
chunks.emplace_back(std::move(current_chunk), rows);
current_chunk = header.cloneEmptyColumns();
}
std::shared_ptr<ISource> getSource()
{
addChunk();
/// copy chunk to allow reusing same builder
Chunks chunks_copy;
chunks_copy.reserve(chunks.size());
for (const auto & chunk : chunks)
chunks_copy.emplace_back(chunk.clone());
return std::make_shared<SourceFromChunks>(header, std::move(chunks_copy));
}
private:
Block header;
Chunks chunks;
MutableColumns current_chunk;
pcg64 * rng = nullptr;
double break_prob = 0.0;
};
std::vector<std::vector<Field>> getValuesFromBlock(const Block & block, const Names & names)
{
std::vector<std::vector<Field>> result;
for (size_t i = 0; i < block.rows(); ++i)
{
auto & row = result.emplace_back();
for (const auto & name : names)
block.getByName(name).column->get(i, row.emplace_back());
}
return result;
}
Block executePipeline(QueryPipeline && pipeline)
{
PullingPipelineExecutor executor(pipeline);
Blocks result_blocks;
while (true)
{
Block block;
bool is_ok = executor.pull(block);
if (!is_ok)
break;
result_blocks.emplace_back(std::move(block));
}
return concatenateBlocks(result_blocks);
}
template <typename T>
void assertColumnVectorEq(const typename ColumnVector<T>::Container & expected, const Block & block, const std::string & name)
{
const auto * actual = typeid_cast<const ColumnVector<T> *>(block.getByName(name).column.get());
ASSERT_TRUE(actual) << "unexpected column type: " << block.getByName(name).column->dumpStructure() << "expected: " << typeid(ColumnVector<T>).name();
auto get_first_diff = [&]() -> String
{
const auto & actual_data = actual->getData();
size_t num_rows = std::min(expected.size(), actual_data.size());
for (size_t i = 0; i < num_rows; ++i)
{
if (expected[i] != actual_data[i])
return fmt::format(", expected: {}, actual: {} at row {}", expected[i], actual_data[i], i);
}
return "";
};
EXPECT_EQ(actual->getData().size(), expected.size());
ASSERT_EQ(actual->getData(), expected) << "column name: " << name << get_first_diff();
}
template <typename T>
void assertColumnEq(const IColumn & expected, const Block & block, const std::string & name)
{
const ColumnPtr & actual = block.getByName(name).column;
ASSERT_TRUE(checkColumn<T>(*actual));
ASSERT_TRUE(checkColumn<T>(expected));
EXPECT_EQ(actual->size(), expected.size());
auto dump_val = [](const IColumn & col, size_t i) -> String
{
Field value;
col.get(i, value);
return value.dump();
};
size_t num_rows = std::min(actual->size(), expected.size());
for (size_t i = 0; i < num_rows; ++i)
ASSERT_EQ(actual->compareAt(i, i, expected, 1), 0) << dump_val(*actual, i) << " != " << dump_val(expected, i) << " at row " << i;
}
template <typename T>
T getRandomFrom(pcg64 & rng, const std::initializer_list<T> & opts)
{
std::vector<T> options(opts.begin(), opts.end());
size_t idx = std::uniform_int_distribution<size_t>(0, options.size() - 1)(rng);
return options[idx];
}
void generateNextKey(pcg64 & rng, UInt64 & k1, String & k2)
{
size_t str_len = std::uniform_int_distribution<>(1, 10)(rng);
String new_k2 = getRandomASCIIString(str_len, rng);
if (new_k2.compare(k2) <= 0)
++k1;
k2 = new_k2;
}
bool isStrict(ASOFJoinInequality inequality)
{
return inequality == ASOFJoinInequality::Less || inequality == ASOFJoinInequality::Greater;
}
}
class FullSortingJoinTest : public ::testing::Test
{
public:
FullSortingJoinTest() = default;
void SetUp() override
{
Poco::AutoPtr<Poco::ConsoleChannel> channel(new Poco::ConsoleChannel(std::cerr));
Poco::Logger::root().setChannel(channel);
if (const char * test_log_level = std::getenv("TEST_LOG_LEVEL")) // NOLINT(concurrency-mt-unsafe)
Poco::Logger::root().setLevel(test_log_level);
else
Poco::Logger::root().setLevel("none");
UInt64 seed = randomSeed();
if (const char * random_seed = std::getenv("TEST_RANDOM_SEED")) // NOLINT(concurrency-mt-unsafe)
seed = std::stoull(random_seed);
std::cout << "TEST_RANDOM_SEED=" << seed << std::endl;
rng = pcg64(seed);
}
void TearDown() override
{
}
pcg64 rng;
};
TEST_F(FullSortingJoinTest, AllAnyOneKey)
try
{
{
SCOPED_TRACE("Inner All");
Block result = executePipeline(buildJoinPipeline(
oneColumnSource({ {1, 2, 3, 4, 5} }),
oneColumnSource({ {1}, {2}, {3}, {4}, {5} }),
1, JoinKind::Inner, JoinStrictness::All));
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t1.idx");
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t2.idx");
}
{
SCOPED_TRACE("Inner Any");
Block result = executePipeline(buildJoinPipeline(
oneColumnSource({ {1, 2, 3, 4, 5} }),
oneColumnSource({ {1}, {2}, {3}, {4}, {5} }),
1, JoinKind::Inner, JoinStrictness::Any));
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t1.idx");
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t2.idx");
}
{
SCOPED_TRACE("Inner All");
Block result = executePipeline(buildJoinPipeline(
oneColumnSource({ {2, 2, 2}, {2, 3}, {3, 5} }),
oneColumnSource({ {1, 1, 1}, {2, 2}, {3, 4} }),
1, JoinKind::Inner, JoinStrictness::All));
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 0, 1, 2, 3, 3, 4, 5}), result, "t1.idx");
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({3, 3, 3, 4, 4, 4, 3, 4, 5, 5}), result, "t2.idx");
}
{
SCOPED_TRACE("Inner Any");
Block result = executePipeline(buildJoinPipeline(
oneColumnSource({ {2, 2, 2}, {2, 3}, {3, 5} }),
oneColumnSource({ {1, 1, 1}, {2, 2}, {3, 4} }),
1, JoinKind::Inner, JoinStrictness::Any));
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 4}), result, "t1.idx");
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({3, 5}), result, "t2.idx");
}
{
SCOPED_TRACE("Inner Any");
Block result = executePipeline(buildJoinPipeline(
oneColumnSource({ {2, 2, 2, 2}, {3}, {3, 5} }),
oneColumnSource({ {1, 1, 1, 2}, {2}, {3, 4} }),
1, JoinKind::Inner, JoinStrictness::Any));
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 4}), result, "t1.idx");
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({3, 5}), result, "t2.idx");
}
{
SCOPED_TRACE("Left Any");
Block result = executePipeline(buildJoinPipeline(
oneColumnSource({ {2, 2, 2}, {2, 3}, {3, 5} }),
oneColumnSource({ {1, 1, 1}, {2, 2}, {3, 4} }),
1, JoinKind::Left, JoinStrictness::Any));
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4, 5, 6}), result, "t1.idx");
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({3, 3, 3, 3, 5, 5, 0}), result, "t2.idx");
}
{
SCOPED_TRACE("Left Any");
Block result = executePipeline(buildJoinPipeline(
oneColumnSource({ {2, 2, 2, 2}, {3}, {3, 5} }),
oneColumnSource({ {1, 1, 1, 2}, {2}, {3, 4} }),
1, JoinKind::Left, JoinStrictness::Any));
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4, 5, 6}), result, "t1.idx");
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({3, 3, 3, 3, 5, 5, 0}), result, "t2.idx");
}
}
catch (Exception & e)
{
std::cout << e.getStackTraceString() << std::endl;
throw;
}
TEST_F(FullSortingJoinTest, AnySimple)
try
{
JoinKind kind = getRandomFrom(rng, {JoinKind::Inner, JoinKind::Left, JoinKind::Right});
SourceChunksBuilder left_source({
{std::make_shared<DataTypeUInt64>(), "k1"},
{std::make_shared<DataTypeString>(), "k2"},
{std::make_shared<DataTypeString>(), "attr"},
});
SourceChunksBuilder right_source({
{std::make_shared<DataTypeUInt64>(), "k1"},
{std::make_shared<DataTypeString>(), "k2"},
{std::make_shared<DataTypeString>(), "attr"},
});
left_source.setBreakProbability(rng);
right_source.setBreakProbability(rng);
size_t num_keys = std::uniform_int_distribution<>(100, 1000)(rng);
auto expected_left = ColumnString::create();
auto expected_right = ColumnString::create();
UInt64 k1 = 1;
String k2;
auto get_attr = [&](const String & side, size_t idx) -> String
{
return toString(k1) + "_" + k2 + "_" + side + "_" + toString(idx);
};
for (size_t i = 0; i < num_keys; ++i)
{
generateNextKey(rng, k1, k2);
/// Key is present in left, right or both tables. Both tables is more probable.
size_t key_presence = std::uniform_int_distribution<>(0, 10)(rng);
size_t num_rows_left = key_presence == 0 ? 0 : std::uniform_int_distribution<>(1, 10)(rng);
for (size_t j = 0; j < num_rows_left; ++j)
left_source.addRow({k1, k2, get_attr("left", j)});
size_t num_rows_right = key_presence == 1 ? 0 : std::uniform_int_distribution<>(1, 10)(rng);
for (size_t j = 0; j < num_rows_right; ++j)
right_source.addRow({k1, k2, get_attr("right", j)});
String left_attr = num_rows_left ? get_attr("left", 0) : "";
String right_attr = num_rows_right ? get_attr("right", 0) : "";
if (kind == JoinKind::Inner && num_rows_left && num_rows_right)
{
expected_left->insert(left_attr);
expected_right->insert(right_attr);
}
else if (kind == JoinKind::Left)
{
for (size_t j = 0; j < num_rows_left; ++j)
{
expected_left->insert(get_attr("left", j));
expected_right->insert(right_attr);
}
}
else if (kind == JoinKind::Right)
{
for (size_t j = 0; j < num_rows_right; ++j)
{
expected_left->insert(left_attr);
expected_right->insert(get_attr("right", j));
}
}
}
Block result_block = executePipeline(buildJoinPipeline(
left_source.getSource(), right_source.getSource(), /* key_length = */ 2,
kind, JoinStrictness::Any));
assertColumnEq<ColumnString>(*expected_left, result_block, "t1.attr");
assertColumnEq<ColumnString>(*expected_right, result_block, "t2.attr");
}
catch (Exception & e)
{
std::cout << e.getStackTraceString() << std::endl;
throw;
}
TEST_F(FullSortingJoinTest, AsofSimple)
try
{
SourceChunksBuilder left_source({
{std::make_shared<DataTypeString>(), "key"},
{std::make_shared<DataTypeUInt64>(), "t"},
});
left_source.addRow({"AMZN", 3});
left_source.addRow({"AMZN", 4});
left_source.addRow({"AMZN", 6});
left_source.addRow({"SBUX", 10});
SourceChunksBuilder right_source({
{std::make_shared<DataTypeString>(), "key"},
{std::make_shared<DataTypeUInt64>(), "t"},
{std::make_shared<DataTypeUInt64>(), "value"},
});
right_source.addRow({"AAPL", 1, 97});
right_source.addChunk();
right_source.addRow({"AAPL", 2, 98});
right_source.addRow({"AAPL", 3, 99});
right_source.addRow({"AMZN", 1, 100});
right_source.addRow({"AMZN", 2, 110});
right_source.addChunk();
right_source.addRow({"AMZN", 2, 110});
right_source.addChunk();
right_source.addRow({"AMZN", 4, 130});
right_source.addRow({"AMZN", 5, 140});
right_source.addRow({"SBUX", 8, 180});
right_source.addChunk();
right_source.addRow({"SBUX", 9, 190});
{
Block result_block = executePipeline(buildJoinPipeline(
left_source.getSource(), right_source.getSource(), /* key_length = */ 2,
JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::LessOrEquals));
auto values = getValuesFromBlock(result_block, {"t1.key", "t1.t", "t2.t", "t2.value"});
ASSERT_EQ(values, (std::vector<std::vector<Field>>{
{"AMZN", 3u, 4u, 130u},
{"AMZN", 4u, 4u, 130u},
}));
}
{
Block result_block = executePipeline(buildJoinPipeline(
left_source.getSource(), right_source.getSource(), /* key_length = */ 2,
JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::GreaterOrEquals));
auto values = getValuesFromBlock(result_block, {"t1.key", "t1.t", "t2.t", "t2.value"});
ASSERT_EQ(values, (std::vector<std::vector<Field>>{
{"AMZN", 3u, 2u, 110u},
{"AMZN", 4u, 4u, 130u},
{"AMZN", 6u, 5u, 140u},
{"SBUX", 10u, 9u, 190u},
}));
}
}
catch (Exception & e)
{
std::cout << e.getStackTraceString() << std::endl;
throw;
}
TEST_F(FullSortingJoinTest, AsofOnlyColumn)
try
{
auto left_source = oneColumnSource({ {3}, {3, 3, 3}, {3, 5, 5, 6}, {9, 9}, {10, 20} });
SourceChunksBuilder right_source_builder({
{std::make_shared<DataTypeUInt64>(), "t"},
{std::make_shared<DataTypeUInt64>(), "value"},
});
right_source_builder.setBreakProbability(rng);
for (const auto & row : std::vector<std::vector<Field>>{ {1, 101}, {2, 102}, {4, 104}, {5, 105}, {11, 111}, {15, 115} })
right_source_builder.addRow(row);
auto right_source = right_source_builder.getSource();
auto pipeline = buildJoinPipeline(
left_source, right_source, /* key_length = */ 1,
JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::LessOrEquals);
Block result_block = executePipeline(std::move(pipeline));
ASSERT_EQ(
assert_cast<const ColumnUInt64 *>(result_block.getByName("t1.key").column.get())->getData(),
(ColumnUInt64::Container{3, 3, 3, 3, 3, 5, 5, 6, 9, 9, 10})
);
ASSERT_EQ(
assert_cast<const ColumnUInt64 *>(result_block.getByName("t2.t").column.get())->getData(),
(ColumnUInt64::Container{4, 4, 4, 4, 4, 5, 5, 11, 11, 11, 11})
);
ASSERT_EQ(
assert_cast<const ColumnUInt64 *>(result_block.getByName("t2.value").column.get())->getData(),
(ColumnUInt64::Container{104, 104, 104, 104, 104, 105, 105, 111, 111, 111, 111})
);
}
catch (Exception & e)
{
std::cout << e.getStackTraceString() << std::endl;
throw;
}
TEST_F(FullSortingJoinTest, AsofLessGeneratedTestData)
try
{
/// Generate data random and build expected result at the same time.
/// Test specific combinations of join kind and inequality per each run
auto join_kind = getRandomFrom(rng, { JoinKind::Inner, JoinKind::Left });
auto asof_inequality = getRandomFrom(rng, { ASOFJoinInequality::Less, ASOFJoinInequality::LessOrEquals });
SCOPED_TRACE(fmt::format("{} {}", join_kind, asof_inequality));
/// Key is complex, `k1, k2` for equality and `t` for asof
SourceChunksBuilder left_source_builder({
{std::make_shared<DataTypeUInt64>(), "k1"},
{std::make_shared<DataTypeString>(), "k2"},
{std::make_shared<DataTypeUInt64>(), "t"},
{std::make_shared<DataTypeInt64>(), "attr"},
});
SourceChunksBuilder right_source_builder({
{std::make_shared<DataTypeUInt64>(), "k1"},
{std::make_shared<DataTypeString>(), "k2"},
{std::make_shared<DataTypeUInt64>(), "t"},
{std::make_shared<DataTypeInt64>(), "attr"},
});
/// How small generated block should be
left_source_builder.setBreakProbability(rng);
right_source_builder.setBreakProbability(rng);
/// We are going to generate sorted data and remember expected result
ColumnInt64::Container expected;
UInt64 k1 = 1;
String k2;
auto key_num_total = std::uniform_int_distribution<>(1, 1000)(rng);
for (size_t key_num = 0; key_num < key_num_total; ++key_num)
{
/// Generate new key greater than previous
generateNextKey(rng, k1, k2);
Int64 left_t = 0;
/// Generate several rows for the key
size_t num_left_rows = std::uniform_int_distribution<>(1, 100)(rng);
for (size_t i = 0; i < num_left_rows; ++i)
{
/// t is strictly greater than previous
left_t += std::uniform_int_distribution<>(1, 10)(rng);
auto left_arrtibute_value = 10 * left_t;
left_source_builder.addRow({k1, k2, left_t, left_arrtibute_value});
expected.push_back(left_arrtibute_value);
auto num_matches = 1 + std::poisson_distribution<>(4)(rng);
/// Generate several matches in the right table
auto right_t = left_t;
for (size_t j = 0; j < num_matches; ++j)
{
int min_step = isStrict(asof_inequality) ? 1 : 0;
right_t += std::uniform_int_distribution<>(min_step, 3)(rng);
/// First row should match
bool is_match = j == 0;
right_source_builder.addRow({k1, k2, right_t, is_match ? 10 * left_arrtibute_value : -1});
}
/// Next left_t should be greater than right_t not to match with previous rows
left_t = right_t;
}
/// generate some rows with greater left_t to check that they are not matched
num_left_rows = std::bernoulli_distribution(0.5)(rng) ? std::uniform_int_distribution<>(1, 100)(rng) : 0;
for (size_t i = 0; i < num_left_rows; ++i)
{
left_t += std::uniform_int_distribution<>(1, 10)(rng);
left_source_builder.addRow({k1, k2, left_t, -10 * left_t});
if (join_kind == JoinKind::Left)
expected.push_back(-10 * left_t);
}
}
Block result_block = executePipeline(buildJoinPipeline(
left_source_builder.getSource(), right_source_builder.getSource(),
/* key_length = */ 3,
join_kind, JoinStrictness::Asof, asof_inequality));
assertColumnVectorEq<Int64>(expected, result_block, "t1.attr");
for (auto & e : expected)
/// Non matched rows from left table have negative attr
/// Value if attribute in right table is 10 times greater than in left table
e = e < 0 ? 0 : 10 * e;
assertColumnVectorEq<Int64>(expected, result_block, "t2.attr");
}
catch (Exception & e)
{
std::cout << e.getStackTraceString() << std::endl;
throw;
}
TEST_F(FullSortingJoinTest, AsofGreaterGeneratedTestData)
try
{
/// Generate data random and build expected result at the same time.
/// Test specific combinations of join kind and inequality per each run
auto join_kind = getRandomFrom(rng, { JoinKind::Inner, JoinKind::Left });
auto asof_inequality = getRandomFrom(rng, { ASOFJoinInequality::Greater, ASOFJoinInequality::GreaterOrEquals });
SCOPED_TRACE(fmt::format("{} {}", join_kind, asof_inequality));
SourceChunksBuilder left_source_builder({
{std::make_shared<DataTypeUInt64>(), "k1"},
{std::make_shared<DataTypeString>(), "k2"},
{std::make_shared<DataTypeUInt64>(), "t"},
{std::make_shared<DataTypeInt64>(), "attr"},
});
SourceChunksBuilder right_source_builder({
{std::make_shared<DataTypeUInt64>(), "k1"},
{std::make_shared<DataTypeString>(), "k2"},
{std::make_shared<DataTypeUInt64>(), "t"},
{std::make_shared<DataTypeInt64>(), "attr"},
});
left_source_builder.setBreakProbability(rng);
right_source_builder.setBreakProbability(rng);
ColumnInt64::Container expected;
UInt64 k1 = 1;
String k2;
UInt64 left_t = 0;
auto key_num_total = std::uniform_int_distribution<>(1, 1000)(rng);
for (size_t key_num = 0; key_num < key_num_total; ++key_num)
{
/// Generate new key greater than previous
generateNextKey(rng, k1, k2);
/// Generate some rows with smaller left_t to check that they are not matched
size_t num_left_rows = std::bernoulli_distribution(0.5)(rng) ? std::uniform_int_distribution<>(1, 100)(rng) : 0;
for (size_t i = 0; i < num_left_rows; ++i)
{
left_t += std::uniform_int_distribution<>(1, 10)(rng);
left_source_builder.addRow({k1, k2, left_t, -10 * left_t});
if (join_kind == JoinKind::Left)
expected.push_back(-10 * left_t);
}
if (std::bernoulli_distribution(0.1)(rng))
continue;
size_t num_right_matches = std::uniform_int_distribution<>(1, 10)(rng);
auto right_t = left_t + std::uniform_int_distribution<>(isStrict(asof_inequality) ? 0 : 1, 10)(rng);
auto attribute_value = 10 * right_t;
for (size_t j = 0; j < num_right_matches; ++j)
{
right_t += std::uniform_int_distribution<>(0, 3)(rng);
bool is_match = j == num_right_matches - 1;
right_source_builder.addRow({k1, k2, right_t, is_match ? 10 * attribute_value : -1});
}
/// Next left_t should be greater than (or equals) right_t to match with previous rows
left_t = right_t + std::uniform_int_distribution<>(isStrict(asof_inequality) ? 1 : 0, 100)(rng);
size_t num_left_matches = std::uniform_int_distribution<>(1, 100)(rng);
for (size_t j = 0; j < num_left_matches; ++j)
{
left_t += std::uniform_int_distribution<>(0, 3)(rng);
left_source_builder.addRow({k1, k2, left_t, attribute_value});
expected.push_back(attribute_value);
}
}
Block result_block = executePipeline(buildJoinPipeline(
left_source_builder.getSource(), right_source_builder.getSource(),
/* key_length = */ 3,
join_kind, JoinStrictness::Asof, asof_inequality));
assertColumnVectorEq<Int64>(expected, result_block, "t1.attr");
for (auto & e : expected)
/// Non matched rows from left table have negative attr
/// Value if attribute in right table is 10 times greater than in left table
e = e < 0 ? 0 : 10 * e;
assertColumnVectorEq<Int64>(expected, result_block, "t2.attr");
}
catch (Exception & e)
{
std::cout << e.getStackTraceString() << std::endl;
throw;
}

View File

@ -133,7 +133,7 @@ TEST(CheckSortedTransform, CheckBadLastRow)
EXPECT_NO_THROW(executor.pull(chunk));
EXPECT_NO_THROW(executor.pull(chunk));
#ifndef ABORT_ON_LOGICAL_ERROR
#ifndef DEBUG_OR_SANITIZER_BUILD
EXPECT_THROW(executor.pull(chunk), DB::Exception);
#endif
}
@ -158,7 +158,7 @@ TEST(CheckSortedTransform, CheckUnsortedBlock1)
Chunk chunk;
#ifndef ABORT_ON_LOGICAL_ERROR
#ifndef DEBUG_OR_SANITIZER_BUILD
EXPECT_THROW(executor.pull(chunk), DB::Exception);
#endif
}
@ -181,7 +181,7 @@ TEST(CheckSortedTransform, CheckUnsortedBlock2)
PullingPipelineExecutor executor(pipeline);
Chunk chunk;
#ifndef ABORT_ON_LOGICAL_ERROR
#ifndef DEBUG_OR_SANITIZER_BUILD
EXPECT_THROW(executor.pull(chunk), DB::Exception);
#endif
}
@ -204,7 +204,7 @@ TEST(CheckSortedTransform, CheckUnsortedBlock3)
PullingPipelineExecutor executor(pipeline);
Chunk chunk;
#ifndef ABORT_ON_LOGICAL_ERROR
#ifndef DEBUG_OR_SANITIZER_BUILD
EXPECT_THROW(executor.pull(chunk), DB::Exception);
#endif
}

View File

@ -666,7 +666,7 @@ void TCPHandler::runImpl()
// Server should die on std logic errors in debug, like with assert()
// or ErrorCodes::LOGICAL_ERROR. This helps catch these errors in
// tests.
#ifdef ABORT_ON_LOGICAL_ERROR
#ifdef DEBUG_OR_SANITIZER_BUILD
catch (const std::logic_error & e)
{
state.io.onException();

View File

@ -357,7 +357,7 @@ void RefreshTask::refreshTask()
stop_requested = true;
tryLogCurrentException(log,
"Unexpected exception in refresh scheduling, please investigate. The view will be stopped.");
#ifdef ABORT_ON_LOGICAL_ERROR
#ifdef DEBUG_OR_SANITIZER_BUILD
abortOnFailedAssertion("Unexpected exception in refresh scheduling");
#endif
}

View File

@ -155,6 +155,10 @@ void printExceptionWithRespectToAbort(LoggerPtr log, const String & query_id)
{
std::rethrow_exception(ex);
}
catch (const TestException &) // NOLINT
{
/// Exception from a unit test, ignore it.
}
catch (const Exception & e)
{
NOEXCEPT_SCOPE({

View File

@ -34,7 +34,7 @@ public:
auto choice = distribution(generator);
if (choice == 0)
throw std::runtime_error("Unlucky...");
throw TestException();
return false;
}
@ -48,7 +48,7 @@ public:
{
auto choice = distribution(generator);
if (choice == 0)
throw std::runtime_error("Unlucky...");
throw TestException();
}
Priority getPriority() const override { return {}; }

View File

@ -1516,7 +1516,7 @@ static time_t tryGetPartCreateTime(zkutil::ZooKeeperPtr & zookeeper, const Strin
void StorageReplicatedMergeTree::paranoidCheckForCoveredPartsInZooKeeperOnStart(const Strings & parts_in_zk, const Strings & parts_to_fetch) const
{
#ifdef ABORT_ON_LOGICAL_ERROR
#ifdef DEBUG_OR_SANITIZER_BUILD
constexpr bool paranoid_check_for_covered_parts_default = true;
#else
constexpr bool paranoid_check_for_covered_parts_default = false;
@ -2383,7 +2383,7 @@ static void paranoidCheckForCoveredPartsInZooKeeper(
const String & covering_part_name,
const StorageReplicatedMergeTree & storage)
{
#ifdef ABORT_ON_LOGICAL_ERROR
#ifdef DEBUG_OR_SANITIZER_BUILD
constexpr bool paranoid_check_for_covered_parts_default = true;
#else
constexpr bool paranoid_check_for_covered_parts_default = false;

Some files were not shown because too many files have changed in this diff Show More