Merge branch 'master' of github.com:ClickHouse/ClickHouse into json-data-type

This commit is contained in:
avogar 2024-07-22 15:35:57 +00:00
commit 300073f51d
183 changed files with 4599 additions and 1524 deletions

168
.github/actions/release/action.yml vendored Normal file
View File

@ -0,0 +1,168 @@
name: Release
description: Makes patch releases and creates new release branch
inputs:
ref:
description: 'Git reference (branch or commit sha) from which to create the release'
required: true
type: string
type:
description: 'The type of release: "new" for a new release or "patch" for a patch release'
required: true
type: choice
options:
- patch
- new
dry-run:
description: 'Dry run'
required: false
default: true
type: boolean
token:
required: true
type: string
runs:
using: "composite"
steps:
- name: Prepare Release Info
shell: bash
run: |
python3 ./tests/ci/create_release.py --prepare-release-info \
--ref ${{ inputs.ref }} --release-type ${{ inputs.type }} \
${{ inputs.dry-run && '--dry-run' || '' }}
echo "::group::Release Info"
python3 -m json.tool /tmp/release_info.json
echo "::endgroup::"
release_tag=$(jq -r '.release_tag' /tmp/release_info.json)
commit_sha=$(jq -r '.commit_sha' /tmp/release_info.json)
echo "Release Tag: $release_tag"
echo "RELEASE_TAG=$release_tag" >> "$GITHUB_ENV"
echo "COMMIT_SHA=$commit_sha" >> "$GITHUB_ENV"
- name: Download All Release Artifacts
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --download-packages ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Push Git Tag for the Release
shell: bash
run: |
python3 ./tests/ci/create_release.py --push-release-tag ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Push New Release Branch
if: ${{ inputs.type == 'new' }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --push-new-release-branch ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Bump CH Version and Update Contributors' List
shell: bash
run: |
python3 ./tests/ci/create_release.py --create-bump-version-pr ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Bump Docker versions, Changelog, Security
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
git checkout master
python3 ./tests/ci/create_release.py --set-progress-started --progress "update changelog, docker version, security"
echo "List versions"
./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv
echo "Update docker version"
./utils/list-versions/update-docker-version.sh
echo "Generate ChangeLog"
export CI=1
docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 -e CI=1 --network=host \
--volume=".:/ClickHouse" clickhouse/style-test \
/ClickHouse/tests/ci/changelog.py -v --debug-helpers \
--gh-user-or-token=${{ inputs.token }} --jobs=5 \
--output="/ClickHouse/docs/changelogs/${{ env.RELEASE_TAG }}.md" ${{ env.RELEASE_TAG }}
git add ./docs/changelogs/${{ env.RELEASE_TAG }}.md
echo "Generate Security"
python3 ./utils/security-generator/generate_security.py > SECURITY.md
git diff HEAD
- name: Create ChangeLog PR
if: ${{ inputs.type == 'patch' && ! inputs.dry-run }}
uses: peter-evans/create-pull-request@v6
with:
author: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
token: ${{ inputs.token }}
committer: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
commit-message: Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }}
branch: auto/${{ env.RELEASE_TAG }}
assignees: ${{ github.event.sender.login }} # assign the PR to the tag pusher
delete-branch: true
title: Update version_date.tsv and changelog after ${{ env.RELEASE_TAG }}
labels: do not test
body: |
Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }}
### Changelog category (leave one):
- Not for changelog (changelog entry is not required)
- name: Complete previous steps and Restore git state
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --set-progress-completed
git reset --hard HEAD
git checkout "$GITHUB_REF_NAME"
- name: Create GH Release
shell: bash
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/create_release.py --create-gh-release ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export TGZ Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --export-tgz ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test TGZ Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --test-tgz ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export RPM Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --export-rpm ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test RPM Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --test-rpm ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export Debian Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --export-debian ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test Debian Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --test-debian ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Docker clickhouse/clickhouse-server building
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
cd "./tests/ci"
python3 ./create_release.py --set-progress-started --progress "docker server release"
export CHECK_NAME="Docker server image"
python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }}
python3 ./create_release.py --set-progress-completed
- name: Docker clickhouse/clickhouse-keeper building
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
cd "./tests/ci"
python3 ./create_release.py --set-progress-started --progress "docker keeper release"
export CHECK_NAME="Docker keeper image"
python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }}
python3 ./create_release.py --set-progress-completed
- name: Set current Release progress to Completed with OK
shell: bash
run: |
python3 ./tests/ci/create_release.py --set-progress-started --progress "completed"
python3 ./tests/ci/create_release.py --set-progress-completed
- name: Post Slack Message
if: ${{ !cancelled() }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --post-status ${{ inputs.dry-run && '--dry-run' || '' }}

View File

@ -1,44 +1,110 @@
name: AutoRelease name: AutoRelease
env: env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1 PYTHONUNBUFFERED: 1
DRY_RUN: true
concurrency: concurrency:
group: auto-release group: release
on: # yamllint disable-line rule:truthy on: # yamllint disable-line rule:truthy
# schedule: # Workflow uses a test bucket for packages and dry run mode (no real releases)
# - cron: '0 10-16 * * 1-5' schedule:
- cron: '0 9 * * *'
- cron: '0 15 * * *'
workflow_dispatch: workflow_dispatch:
inputs:
dry-run:
description: 'Dry run'
required: false
default: true
type: boolean
jobs: jobs:
CherryPick: AutoRelease:
runs-on: [self-hosted, style-checker-aarch64] runs-on: [self-hosted, release-maker]
steps: steps:
- name: DebugInfo
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
- name: Set envs - name: Set envs
# https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings
run: | run: |
cat >> "$GITHUB_ENV" << 'EOF' cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/cherry_pick
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}} ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
RCSK RCSK
REPO_OWNER=ClickHouse
REPO_NAME=ClickHouse
REPO_TEAM=core
EOF EOF
- name: Set DRY_RUN for schedule
if: ${{ github.event_name == 'schedule' }}
run: echo "DRY_RUN=true" >> "$GITHUB_ENV"
- name: Set DRY_RUN for dispatch
if: ${{ github.event_name == 'workflow_dispatch' }}
run: echo "DRY_RUN=${{ github.event.inputs.dry-run }}" >> "$GITHUB_ENV"
- name: Check out repository code - name: Check out repository code
uses: ClickHouse/checkout@v1 uses: ClickHouse/checkout@v1
with: with:
clear-repository: true
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}} token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
fetch-depth: 0 fetch-depth: 0
- name: Auto-release - name: Auto Release Prepare
run: | run: |
cd "$GITHUB_WORKSPACE/tests/ci" cd "$GITHUB_WORKSPACE/tests/ci"
python3 auto_release.py --release-after-days=3 python3 auto_release.py --prepare
- name: Cleanup echo "::group::Auto Release Info"
if: always() python3 -m json.tool /tmp/autorelease_info.json
echo "::endgroup::"
{
echo 'AUTO_RELEASE_PARAMS<<EOF'
cat /tmp/autorelease_info.json
echo 'EOF'
} >> "$GITHUB_ENV"
- name: Post Release Branch statuses
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 auto_release.py --post-status
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0].release_branch }}
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0] && fromJson(env.AUTO_RELEASE_PARAMS).releases[0].ready }}
uses: ./.github/actions/release
with:
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0].commit_sha }}
type: patch
dry-run: ${{ env.DRY_RUN }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[1].release_branch }}
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0] && fromJson(env.AUTO_RELEASE_PARAMS).releases[1].ready }}
uses: ./.github/actions/release
with:
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[1].commit_sha }}
type: patch
dry-run: ${{ env.DRY_RUN }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[2].release_branch }}
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[2] && fromJson(env.AUTO_RELEASE_PARAMS).releases[2].ready }}
uses: ./.github/actions/release
with:
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[2].commit_sha }}
type: patch
dry-run: ${{ env.DRY_RUN }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[3].release_branch }}
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[3] && fromJson(env.AUTO_RELEASE_PARAMS).releases[3].ready }}
uses: ./.github/actions/release
with:
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[3].commit_sha }}
type: patch
dry-run: ${{ env.DRY_RUN }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[4].release_branch }}
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[4] && fromJson(env.AUTO_RELEASE_PARAMS).releases[4].ready }}
uses: ./.github/actions/release
with:
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[4].commit_sha }}
type: patch
dry-run: ${{ env.DRY_RUN }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
- name: Post Slack Message
if: ${{ !cancelled() }}
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 auto_release.py --post-auto-release-complete --wf-status ${{ job.status }}
- name: Clean up
run: | run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:

View File

@ -2,7 +2,6 @@ name: CreateRelease
concurrency: concurrency:
group: release group: release
'on': 'on':
workflow_dispatch: workflow_dispatch:
inputs: inputs:
@ -31,136 +30,15 @@ jobs:
steps: steps:
- name: DebugInfo - name: DebugInfo
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6 uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
- name: Set envs
# https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings
run: |
cat >> "$GITHUB_ENV" << 'EOF'
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
RCSK
RELEASE_INFO_FILE=${{ runner.temp }}/release_info.json
EOF
- name: Check out repository code - name: Check out repository code
uses: ClickHouse/checkout@v1 uses: ClickHouse/checkout@v1
with: with:
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}} token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
fetch-depth: 0 fetch-depth: 0
- name: Prepare Release Info - name: Call Release Action
run: | uses: ./.github/actions/release
python3 ./tests/ci/create_release.py --prepare-release-info \
--ref ${{ inputs.ref }} --release-type ${{ inputs.type }} \
--outfile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
echo "::group::Release Info"
python3 -m json.tool "$RELEASE_INFO_FILE"
echo "::endgroup::"
release_tag=$(jq -r '.release_tag' "$RELEASE_INFO_FILE")
commit_sha=$(jq -r '.commit_sha' "$RELEASE_INFO_FILE")
echo "Release Tag: $release_tag"
echo "RELEASE_TAG=$release_tag" >> "$GITHUB_ENV"
echo "COMMIT_SHA=$commit_sha" >> "$GITHUB_ENV"
- name: Download All Release Artifacts
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/create_release.py --infile "$RELEASE_INFO_FILE" --download-packages ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Push Git Tag for the Release
run: |
python3 ./tests/ci/create_release.py --push-release-tag --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Push New Release Branch
if: ${{ inputs.type == 'new' }}
run: |
python3 ./tests/ci/create_release.py --push-new-release-branch --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Bump CH Version and Update Contributors' List
run: |
python3 ./tests/ci/create_release.py --create-bump-version-pr --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Checkout master
run: |
git checkout master
- name: Bump Docker versions, Changelog, Security
if: ${{ inputs.type == 'patch' }}
run: |
[ "$(git branch --show-current)" != "master" ] && echo "not on the master" && exit 1
echo "List versions"
./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv
echo "Update docker version"
./utils/list-versions/update-docker-version.sh
echo "Generate ChangeLog"
export CI=1
docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 -e CI=1 --network=host \
--volume=".:/ClickHouse" clickhouse/style-test \
/ClickHouse/tests/ci/changelog.py -v --debug-helpers \
--gh-user-or-token="$GH_TOKEN" --jobs=5 \
--output="/ClickHouse/docs/changelogs/${{ env.RELEASE_TAG }}.md" ${{ env.RELEASE_TAG }}
git add ./docs/changelogs/${{ env.RELEASE_TAG }}.md
echo "Generate Security"
python3 ./utils/security-generator/generate_security.py > SECURITY.md
git diff HEAD
- name: Create ChangeLog PR
if: ${{ inputs.type == 'patch' && ! inputs.dry-run }}
uses: peter-evans/create-pull-request@v6
with: with:
author: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>" ref: ${{ inputs.ref }}
token: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }} type: ${{ inputs.type }}
committer: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>" dry-run: ${{ inputs.dry-run }}
commit-message: Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }} token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
branch: auto/${{ env.RELEASE_TAG }}
assignees: ${{ github.event.sender.login }} # assign the PR to the tag pusher
delete-branch: true
title: Update version_date.tsv and changelog after ${{ env.RELEASE_TAG }}
labels: do not test
body: |
Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }}
### Changelog category (leave one):
- Not for changelog (changelog entry is not required)
- name: Reset changes if Dry-run
if: ${{ inputs.dry-run }}
run: |
git reset --hard HEAD
- name: Checkout back to GITHUB_REF
run: |
git checkout "$GITHUB_REF_NAME"
- name: Create GH Release
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/create_release.py --create-gh-release \
--infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export TGZ Packages
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/artifactory.py --export-tgz --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test TGZ Packages
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/artifactory.py --test-tgz --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export RPM Packages
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/artifactory.py --export-rpm --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test RPM Packages
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/artifactory.py --test-rpm --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export Debian Packages
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/artifactory.py --export-debian --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test Debian Packages
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/artifactory.py --test-debian --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Docker clickhouse/clickhouse-server building
if: ${{ inputs.type == 'patch' }}
run: |
cd "./tests/ci"
export CHECK_NAME="Docker server image"
python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }}
- name: Docker clickhouse/clickhouse-keeper building
if: ${{ inputs.type == 'patch' }}
run: |
cd "./tests/ci"
export CHECK_NAME="Docker keeper image"
python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }}
- name: Post Slack Message
if: always()
run: |
echo Slack Message

View File

@ -87,10 +87,13 @@
# define ASAN_POISON_MEMORY_REGION(a, b) # define ASAN_POISON_MEMORY_REGION(a, b)
#endif #endif
#if !defined(ABORT_ON_LOGICAL_ERROR) /// We used to have only ABORT_ON_LOGICAL_ERROR macro, but most of its uses were actually in places where we didn't care about logical errors
#if !defined(NDEBUG) || defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) || defined(MEMORY_SANITIZER) || defined(UNDEFINED_BEHAVIOR_SANITIZER) /// but wanted to check exactly if the current build type is debug or with sanitizer. This new macro is introduced to fix those places.
#define ABORT_ON_LOGICAL_ERROR #if !defined(DEBUG_OR_SANITIZER_BUILD)
#endif # if !defined(NDEBUG) || defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) || defined(MEMORY_SANITIZER) \
|| defined(UNDEFINED_BEHAVIOR_SANITIZER)
# define DEBUG_OR_SANITIZER_BUILD
# endif
#endif #endif
/// chassert(x) is similar to assert(x), but: /// chassert(x) is similar to assert(x), but:
@ -101,7 +104,7 @@
/// Also it makes sense to call abort() instead of __builtin_unreachable() in debug builds, /// Also it makes sense to call abort() instead of __builtin_unreachable() in debug builds,
/// because SIGABRT is easier to debug than SIGTRAP (the second one makes gdb crazy) /// because SIGABRT is easier to debug than SIGTRAP (the second one makes gdb crazy)
#if !defined(chassert) #if !defined(chassert)
#if defined(ABORT_ON_LOGICAL_ERROR) # if defined(DEBUG_OR_SANITIZER_BUILD)
// clang-format off // clang-format off
#include <base/types.h> #include <base/types.h>
namespace DB namespace DB

View File

@ -22,6 +22,21 @@ Structure of the `users` section:
<!-- Or --> <!-- Or -->
<password_sha256_hex></password_sha256_hex> <password_sha256_hex></password_sha256_hex>
<ssh_keys>
<ssh_key>
<type>ssh-ed25519</type>
<base64_key>AAAAC3NzaC1lZDI1NTE5AAAAIDNf0r6vRl24Ix3tv2IgPmNPO2ATa2krvt80DdcTatLj</base64_key>
</ssh_key>
<ssh_key>
<type>ecdsa-sha2-nistp256</type>
<base64_key>AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBNxeV2uN5UY6CUbCzTA1rXfYimKQA5ivNIqxdax4bcMXz4D0nSk2l5E1TkR5mG8EBWtmExSPbcEPJ8V7lyWWbA8=</base64_key>
</ssh_key>
<ssh_key>
<type>ssh-rsa</type>
<base64_key>AAAAB3NzaC1yc2EAAAADAQABAAABgQCpgqL1SHhPVBOTFlOm0pu+cYBbADzC2jL41sPMawYCJHDyHuq7t+htaVVh2fRgpAPmSEnLEC2d4BEIKMtPK3bfR8plJqVXlLt6Q8t4b1oUlnjb3VPA9P6iGcW7CV1FBkZQEVx8ckOfJ3F+kI5VsrRlEDgiecm/C1VPl0/9M2llW/mPUMaD65cM9nlZgM/hUeBrfxOEqM11gDYxEZm1aRSbZoY4dfdm3vzvpSQ6lrCrkjn3X2aSmaCLcOWJhfBWMovNDB8uiPuw54g3ioZ++qEQMlfxVsqXDGYhXCrsArOVuW/5RbReO79BvXqdssiYShfwo+GhQ0+aLWMIW/jgBkkqx/n7uKLzCMX7b2F+aebRYFh+/QXEj7SnihdVfr9ud6NN3MWzZ1ltfIczlEcFLrLJ1Yq57wW6wXtviWh59WvTWFiPejGjeSjjJyqqB49tKdFVFuBnIU5u/bch2DXVgiAEdQwUrIp1ACoYPq22HFFAYUJrL32y7RxX3PGzuAv3LOc=</base64_key>
</ssh_key>
</ssh_keys>
<access_management>0|1</access_management> <access_management>0|1</access_management>
<networks incl="networks" replace="replace"> <networks incl="networks" replace="replace">
@ -79,6 +94,24 @@ Password can be specified in plaintext or in SHA256 (hex format).
The first line of the result is the password. The second line is the corresponding double SHA1 hash. The first line of the result is the password. The second line is the corresponding double SHA1 hash.
### username/ssh-key {#user-sshkey}
This setting allows authenticating with SSH keys.
Given a SSH key (as generated by `ssh-keygen`) like
```
ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDNf0r6vRl24Ix3tv2IgPmNPO2ATa2krvt80DdcTatLj john@example.com
```
The `ssh_key` element is expected to be
```
<ssh_key>
<type>ssh-ed25519</type>
<base64_key>AAAAC3NzaC1lZDI1NTE5AAAAIDNf0r6vRl24Ix3tv2IgPmNPO2ATa2krvt80DdcTatLj</base64_key>
</ssh_key>
```
Substitute `ssh-ed25519` with `ssh-rsa` or `ecdsa-sha2-nistp256` for the other supported algorithms.
### access_management {#access_management-user-setting} ### access_management {#access_management-user-setting}
This setting enables or disables using of SQL-driven [access control and account management](../../guides/sre/user-management/index.md#access-control) for the user. This setting enables or disables using of SQL-driven [access control and account management](../../guides/sre/user-management/index.md#access-control) for the user.

View File

@ -297,7 +297,7 @@ Algorithm requires the special column in tables. This column:
- Must contain an ordered sequence. - Must contain an ordered sequence.
- Can be one of the following types: [Int, UInt](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), [Date](../../../sql-reference/data-types/date.md), [DateTime](../../../sql-reference/data-types/datetime.md), [Decimal](../../../sql-reference/data-types/decimal.md). - Can be one of the following types: [Int, UInt](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), [Date](../../../sql-reference/data-types/date.md), [DateTime](../../../sql-reference/data-types/datetime.md), [Decimal](../../../sql-reference/data-types/decimal.md).
- Cant be the only column in the `JOIN` clause. - For `hash` join algorithm it cant be the only column in the `JOIN` clause.
Syntax `ASOF JOIN ... ON`: Syntax `ASOF JOIN ... ON`:
@ -337,7 +337,8 @@ For example, consider the following tables:
`ASOF JOIN` can take the timestamp of a user event from `table_1` and find an event in `table_2` where the timestamp is closest to the timestamp of the event from `table_1` corresponding to the closest match condition. Equal timestamp values are the closest if available. Here, the `user_id` column can be used for joining on equality and the `ev_time` column can be used for joining on the closest match. In our example, `event_1_1` can be joined with `event_2_1` and `event_1_2` can be joined with `event_2_3`, but `event_2_2` cant be joined. `ASOF JOIN` can take the timestamp of a user event from `table_1` and find an event in `table_2` where the timestamp is closest to the timestamp of the event from `table_1` corresponding to the closest match condition. Equal timestamp values are the closest if available. Here, the `user_id` column can be used for joining on equality and the `ev_time` column can be used for joining on the closest match. In our example, `event_1_1` can be joined with `event_2_1` and `event_1_2` can be joined with `event_2_3`, but `event_2_2` cant be joined.
:::note :::note
`ASOF` join is **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine. `ASOF JOIN` is supported only by `hash` and `full_sorting_merge` join algorithms.
It's **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine.
::: :::
## PASTE JOIN Usage ## PASTE JOIN Usage

View File

@ -6,38 +6,38 @@ sidebar_label: Playground
# ClickHouse Playground {#clickhouse-playground} # ClickHouse Playground {#clickhouse-playground}
[ClickHouse Playground](https://play.clickhouse.com/play?user=play) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster. [ClickHouse Playground](https://play.clickhouse.com/play?user=play) позволяет пользователям экспериментировать с ClickHouse, выполняя запросы мгновенно, без необходимости настройки сервера или кластера.
Several example datasets are available in Playground. В Playground доступны несколько примеров наборов данных.
You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md). Вы можете выполнять запросы к Playground, используя любой HTTP-клиент, например [curl](https://curl.haxx.se) или [wget](https://www.gnu.org/software/wget/), или настроить соединение, используя драйверы [JDBC](../interfaces/jdbc.md) или [ODBC](../interfaces/odbc.md). Дополнительную информацию о программных продуктах, поддерживающих ClickHouse, можно найти [здесь](../interfaces/index.md).
## Credentials {#credentials} ## Учетные данные {#credentials}
| Parameter | Value | | Параметр | Значение |
|:--------------------|:-----------------------------------| |:--------------------|:-----------------------------------|
| HTTPS endpoint | `https://play.clickhouse.com:443/` | | HTTPS-адрес | `https://play.clickhouse.com:443/` |
| Native TCP endpoint | `play.clickhouse.com:9440` | | TCP-адрес | `play.clickhouse.com:9440` |
| User | `explorer` or `play` | | Пользователь | `explorer` или `play` |
| Password | (empty) | | Пароль | (пусто) |
## Limitations {#limitations} ## Ограничения {#limitations}
The queries are executed as a read-only user. It implies some limitations: Запросы выполняются от имени пользователя с правами только на чтение. Это предполагает некоторые ограничения:
- DDL queries are not allowed - DDL-запросы не разрешены
- INSERT queries are not allowed - INSERT-запросы не разрешены
The service also have quotas on its usage. Сервис также имеет квоты на использование.
## Examples {#examples} ## Примеры {#examples}
HTTPS endpoint example with `curl`: Пример использования HTTPS-адреса с `curl`:
``` bash ```bash
curl "https://play.clickhouse.com/?user=explorer" --data-binary "SELECT 'Play ClickHouse'" curl "https://play.clickhouse.com/?user=explorer" --data-binary "SELECT 'Play ClickHouse'"
``` ```
TCP endpoint example with [CLI](../interfaces/cli.md): Пример использования TCP-адреса с [CLI](../interfaces/cli.md):
``` bash ``` bash
clickhouse client --secure --host play.clickhouse.com --user explorer clickhouse client --secure --host play.clickhouse.com --user explorer

View File

@ -68,6 +68,41 @@ QueryTreeNodePtr findEqualsFunction(const QueryTreeNodes & nodes)
return nullptr; return nullptr;
} }
/// Checks if the node is combination of isNull and notEquals functions of two the same arguments
bool matchIsNullOfTwoArgs(const QueryTreeNodes & nodes, QueryTreeNodePtr & lhs, QueryTreeNodePtr & rhs)
{
QueryTreeNodePtrWithHashSet all_arguments;
for (const auto & node : nodes)
{
const auto * func_node = node->as<FunctionNode>();
if (!func_node)
return false;
const auto & arguments = func_node->getArguments().getNodes();
if (func_node->getFunctionName() == "isNull" && arguments.size() == 1)
all_arguments.insert(QueryTreeNodePtrWithHash(arguments[0]));
else if (func_node->getFunctionName() == "notEquals" && arguments.size() == 2)
{
if (arguments[0]->isEqual(*arguments[1]))
return false;
all_arguments.insert(QueryTreeNodePtrWithHash(arguments[0]));
all_arguments.insert(QueryTreeNodePtrWithHash(arguments[1]));
}
else
return false;
if (all_arguments.size() > 2)
return false;
}
if (all_arguments.size() != 2)
return false;
lhs = all_arguments.begin()->node;
rhs = std::next(all_arguments.begin())->node;
return true;
}
bool isBooleanConstant(const QueryTreeNodePtr & node, bool expected_value) bool isBooleanConstant(const QueryTreeNodePtr & node, bool expected_value)
{ {
const auto * constant_node = node->as<ConstantNode>(); const auto * constant_node = node->as<ConstantNode>();
@ -213,11 +248,14 @@ private:
else if (func_name == "and") else if (func_name == "and")
{ {
const auto & and_arguments = argument_function->getArguments().getNodes(); const auto & and_arguments = argument_function->getArguments().getNodes();
bool all_are_is_null = and_arguments.size() == 2 && isNodeFunction(and_arguments[0], "isNull") && isNodeFunction(and_arguments[1], "isNull");
if (all_are_is_null) QueryTreeNodePtr is_null_lhs_arg;
QueryTreeNodePtr is_null_rhs_arg;
if (matchIsNullOfTwoArgs(and_arguments, is_null_lhs_arg, is_null_rhs_arg))
{ {
is_null_argument_to_indices[getFunctionArgument(and_arguments.front(), 0)].push_back(or_operands.size() - 1); is_null_argument_to_indices[is_null_lhs_arg].push_back(or_operands.size() - 1);
is_null_argument_to_indices[getFunctionArgument(and_arguments.back(), 0)].push_back(or_operands.size() - 1); is_null_argument_to_indices[is_null_rhs_arg].push_back(or_operands.size() - 1);
continue;
} }
/// Expression `a = b AND (a IS NOT NULL) AND true AND (b IS NOT NULL)` we can be replaced with `a = b` /// Expression `a = b AND (a IS NOT NULL) AND true AND (b IS NOT NULL)` we can be replaced with `a = b`

View File

@ -62,7 +62,7 @@ namespace ErrorCodes
namespace namespace
{ {
#if defined(ABORT_ON_LOGICAL_ERROR) #if defined(DEBUG_OR_SANITIZER_BUILD)
/** This visitor checks if Query Tree structure is valid after each pass /** This visitor checks if Query Tree structure is valid after each pass
* in debug build. * in debug build.
@ -183,7 +183,7 @@ void QueryTreePassManager::run(QueryTreeNodePtr query_tree_node)
for (size_t i = 0; i < passes_size; ++i) for (size_t i = 0; i < passes_size; ++i)
{ {
passes[i]->run(query_tree_node, current_context); passes[i]->run(query_tree_node, current_context);
#if defined(ABORT_ON_LOGICAL_ERROR) #if defined(DEBUG_OR_SANITIZER_BUILD)
ValidationChecker(passes[i]->getName()).visit(query_tree_node); ValidationChecker(passes[i]->getName()).visit(query_tree_node);
#endif #endif
} }
@ -208,7 +208,7 @@ void QueryTreePassManager::run(QueryTreeNodePtr query_tree_node, size_t up_to_pa
for (size_t i = 0; i < up_to_pass_index; ++i) for (size_t i = 0; i < up_to_pass_index; ++i)
{ {
passes[i]->run(query_tree_node, current_context); passes[i]->run(query_tree_node, current_context);
#if defined(ABORT_ON_LOGICAL_ERROR) #if defined(DEBUG_OR_SANITIZER_BUILD)
ValidationChecker(passes[i]->getName()).visit(query_tree_node); ValidationChecker(passes[i]->getName()).visit(query_tree_node);
#endif #endif
} }

View File

@ -4124,7 +4124,9 @@ void QueryAnalyzer::resolveInterpolateColumnsNodeList(QueryTreeNodePtr & interpo
auto * column_to_interpolate = interpolate_node_typed.getExpression()->as<IdentifierNode>(); auto * column_to_interpolate = interpolate_node_typed.getExpression()->as<IdentifierNode>();
if (!column_to_interpolate) if (!column_to_interpolate)
throw Exception(ErrorCodes::LOGICAL_ERROR, "INTERPOLATE can work only for indentifiers, but {} is found", throw Exception(
ErrorCodes::LOGICAL_ERROR,
"INTERPOLATE can work only for identifiers, but {} is found",
interpolate_node_typed.getExpression()->formatASTForErrorMessage()); interpolate_node_typed.getExpression()->formatASTForErrorMessage());
auto column_to_interpolate_name = column_to_interpolate->getIdentifier().getFullName(); auto column_to_interpolate_name = column_to_interpolate->getIdentifier().getFullName();

View File

@ -267,7 +267,7 @@ bool ColumnAggregateFunction::structureEquals(const IColumn & to) const
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnAggregateFunction::insertRangeFrom(const IColumn & from, size_t start, size_t length) void ColumnAggregateFunction::insertRangeFrom(const IColumn & from, size_t start, size_t length)
#else #else
void ColumnAggregateFunction::doInsertRangeFrom(const IColumn & from, size_t start, size_t length) void ColumnAggregateFunction::doInsertRangeFrom(const IColumn & from, size_t start, size_t length)
@ -366,13 +366,10 @@ void ColumnAggregateFunction::updateHashWithValue(size_t n, SipHash & hash) cons
hash.update(wbuf.str().c_str(), wbuf.str().size()); hash.update(wbuf.str().c_str(), wbuf.str().size());
} }
void ColumnAggregateFunction::updateWeakHash32(WeakHash32 & hash) const WeakHash32 ColumnAggregateFunction::getWeakHash32() const
{ {
auto s = data.size(); auto s = data.size();
if (hash.getData().size() != data.size()) WeakHash32 hash(s);
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), hash.getData().size());
auto & hash_data = hash.getData(); auto & hash_data = hash.getData();
std::vector<UInt8> v; std::vector<UInt8> v;
@ -383,6 +380,8 @@ void ColumnAggregateFunction::updateWeakHash32(WeakHash32 & hash) const
wbuf.finalize(); wbuf.finalize();
hash_data[i] = ::updateWeakHash32(v.data(), v.size(), hash_data[i]); hash_data[i] = ::updateWeakHash32(v.data(), v.size(), hash_data[i]);
} }
return hash;
} }
void ColumnAggregateFunction::updateHashFast(SipHash & hash) const void ColumnAggregateFunction::updateHashFast(SipHash & hash) const
@ -466,7 +465,7 @@ void ColumnAggregateFunction::insertFromWithOwnership(const IColumn & from, size
insertMergeFrom(from, n); insertMergeFrom(from, n);
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnAggregateFunction::insertFrom(const IColumn & from, size_t n) void ColumnAggregateFunction::insertFrom(const IColumn & from, size_t n)
#else #else
void ColumnAggregateFunction::doInsertFrom(const IColumn & from, size_t n) void ColumnAggregateFunction::doInsertFrom(const IColumn & from, size_t n)

View File

@ -145,7 +145,7 @@ public:
void insertData(const char * pos, size_t length) override; void insertData(const char * pos, size_t length) override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & from, size_t n) override; void insertFrom(const IColumn & from, size_t n) override;
#else #else
using IColumn::insertFrom; using IColumn::insertFrom;
@ -177,7 +177,7 @@ public:
void updateHashWithValue(size_t n, SipHash & hash) const override; void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override; WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override; void updateHashFast(SipHash & hash) const override;
@ -189,7 +189,7 @@ public:
void protect() override; void protect() override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn & from, size_t start, size_t length) override; void insertRangeFrom(const IColumn & from, size_t start, size_t length) override;
#else #else
void doInsertRangeFrom(const IColumn & from, size_t start, size_t length) override; void doInsertRangeFrom(const IColumn & from, size_t start, size_t length) override;
@ -212,7 +212,7 @@ public:
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t, size_t, const IColumn &, int) const override int compareAt(size_t, size_t, const IColumn &, int) const override
#else #else
int doCompareAt(size_t, size_t, const IColumn &, int) const override int doCompareAt(size_t, size_t, const IColumn &, int) const override

View File

@ -271,15 +271,12 @@ void ColumnArray::updateHashWithValue(size_t n, SipHash & hash) const
getData().updateHashWithValue(offset + i, hash); getData().updateHashWithValue(offset + i, hash);
} }
void ColumnArray::updateWeakHash32(WeakHash32 & hash) const WeakHash32 ColumnArray::getWeakHash32() const
{ {
auto s = offsets->size(); auto s = offsets->size();
if (hash.getData().size() != s) WeakHash32 hash(s);
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", s, hash.getData().size());
WeakHash32 internal_hash(data->size()); WeakHash32 internal_hash = data->getWeakHash32();
data->updateWeakHash32(internal_hash);
Offset prev_offset = 0; Offset prev_offset = 0;
const auto & offsets_data = getOffsets(); const auto & offsets_data = getOffsets();
@ -300,6 +297,8 @@ void ColumnArray::updateWeakHash32(WeakHash32 & hash) const
prev_offset = offsets_data[i]; prev_offset = offsets_data[i];
} }
return hash;
} }
void ColumnArray::updateHashFast(SipHash & hash) const void ColumnArray::updateHashFast(SipHash & hash) const
@ -337,7 +336,7 @@ bool ColumnArray::tryInsert(const Field & x)
return true; return true;
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnArray::insertFrom(const IColumn & src_, size_t n) void ColumnArray::insertFrom(const IColumn & src_, size_t n)
#else #else
void ColumnArray::doInsertFrom(const IColumn & src_, size_t n) void ColumnArray::doInsertFrom(const IColumn & src_, size_t n)
@ -396,7 +395,7 @@ int ColumnArray::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan
: 1); : 1);
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const
#else #else
int ColumnArray::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const int ColumnArray::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const
@ -543,7 +542,7 @@ void ColumnArray::getExtremes(Field & min, Field & max) const
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t length) void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else #else
void ColumnArray::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) void ColumnArray::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)

View File

@ -82,16 +82,16 @@ public:
const char * deserializeAndInsertFromArena(const char * pos) override; const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override; const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override; void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override; WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override; void updateHashFast(SipHash & hash) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else #else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#endif #endif
void insert(const Field & x) override; void insert(const Field & x) override;
bool tryInsert(const Field & x) override; bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & src_, size_t n) override; void insertFrom(const IColumn & src_, size_t n) override;
#else #else
void doInsertFrom(const IColumn & src_, size_t n) override; void doInsertFrom(const IColumn & src_, size_t n) override;
@ -103,7 +103,7 @@ public:
ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override;
template <typename Type> ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const; template <typename Type> ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;
#else #else
int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;

View File

@ -3,6 +3,7 @@
#include <optional> #include <optional>
#include <Core/Field.h> #include <Core/Field.h>
#include <Columns/IColumn.h> #include <Columns/IColumn.h>
#include <Common/WeakHash.h>
#include <IO/BufferWithOwnMemory.h> #include <IO/BufferWithOwnMemory.h>
@ -85,7 +86,7 @@ public:
bool isDefaultAt(size_t) const override { throwMustBeDecompressed(); } bool isDefaultAt(size_t) const override { throwMustBeDecompressed(); }
void insert(const Field &) override { throwMustBeDecompressed(); } void insert(const Field &) override { throwMustBeDecompressed(); }
bool tryInsert(const Field &) override { throwMustBeDecompressed(); } bool tryInsert(const Field &) override { throwMustBeDecompressed(); }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); } void insertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); }
#else #else
void doInsertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); } void doInsertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); }
@ -98,13 +99,13 @@ public:
const char * deserializeAndInsertFromArena(const char *) override { throwMustBeDecompressed(); } const char * deserializeAndInsertFromArena(const char *) override { throwMustBeDecompressed(); }
const char * skipSerializedInArena(const char *) const override { throwMustBeDecompressed(); } const char * skipSerializedInArena(const char *) const override { throwMustBeDecompressed(); }
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeDecompressed(); } void updateHashWithValue(size_t, SipHash &) const override { throwMustBeDecompressed(); }
void updateWeakHash32(WeakHash32 &) const override { throwMustBeDecompressed(); } WeakHash32 getWeakHash32() const override { throwMustBeDecompressed(); }
void updateHashFast(SipHash &) const override { throwMustBeDecompressed(); } void updateHashFast(SipHash &) const override { throwMustBeDecompressed(); }
ColumnPtr filter(const Filter &, ssize_t) const override { throwMustBeDecompressed(); } ColumnPtr filter(const Filter &, ssize_t) const override { throwMustBeDecompressed(); }
void expand(const Filter &, bool) override { throwMustBeDecompressed(); } void expand(const Filter &, bool) override { throwMustBeDecompressed(); }
ColumnPtr permute(const Permutation &, size_t) const override { throwMustBeDecompressed(); } ColumnPtr permute(const Permutation &, size_t) const override { throwMustBeDecompressed(); }
ColumnPtr index(const IColumn &, size_t) const override { throwMustBeDecompressed(); } ColumnPtr index(const IColumn &, size_t) const override { throwMustBeDecompressed(); }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); } int compareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); }
#else #else
int doCompareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); } int doCompareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); }

View File

@ -137,18 +137,10 @@ void ColumnConst::updatePermutation(PermutationSortDirection /*direction*/, Perm
{ {
} }
void ColumnConst::updateWeakHash32(WeakHash32 & hash) const WeakHash32 ColumnConst::getWeakHash32() const
{ {
if (hash.getData().size() != s) WeakHash32 element_hash = data->getWeakHash32();
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: " return WeakHash32(s, element_hash.getData()[0]);
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
WeakHash32 element_hash(1);
data->updateWeakHash32(element_hash);
size_t data_hash = element_hash.getData()[0];
for (auto & value : hash.getData())
value = static_cast<UInt32>(intHashCRC32(data_hash, value));
} }
void ColumnConst::compareColumn( void ColumnConst::compareColumn(

View File

@ -123,7 +123,7 @@ public:
return data->isNullAt(0); return data->isNullAt(0);
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn &, size_t /*start*/, size_t length) override void insertRangeFrom(const IColumn &, size_t /*start*/, size_t length) override
#else #else
void doInsertRangeFrom(const IColumn &, size_t /*start*/, size_t length) override void doInsertRangeFrom(const IColumn &, size_t /*start*/, size_t length) override
@ -151,7 +151,7 @@ public:
++s; ++s;
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn &, size_t) override void insertFrom(const IColumn &, size_t) override
#else #else
void doInsertFrom(const IColumn &, size_t) override void doInsertFrom(const IColumn &, size_t) override
@ -160,7 +160,7 @@ public:
++s; ++s;
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; } void insertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; }
#else #else
void doInsertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; } void doInsertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; }
@ -204,7 +204,7 @@ public:
data->updateHashWithValue(0, hash); data->updateHashWithValue(0, hash);
} }
void updateWeakHash32(WeakHash32 & hash) const override; WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override void updateHashFast(SipHash & hash) const override
{ {
@ -237,7 +237,7 @@ public:
return data->allocatedBytes() + sizeof(s); return data->allocatedBytes() + sizeof(s);
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t, size_t, const IColumn & rhs, int nan_direction_hint) const override int compareAt(size_t, size_t, const IColumn & rhs, int nan_direction_hint) const override
#else #else
int doCompareAt(size_t, size_t, const IColumn & rhs, int nan_direction_hint) const override int doCompareAt(size_t, size_t, const IColumn & rhs, int nan_direction_hint) const override

View File

@ -28,11 +28,10 @@ namespace ErrorCodes
extern const int PARAMETER_OUT_OF_BOUND; extern const int PARAMETER_OUT_OF_BOUND;
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
extern const int NOT_IMPLEMENTED; extern const int NOT_IMPLEMENTED;
extern const int LOGICAL_ERROR;
} }
template <is_decimal T> template <is_decimal T>
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
int ColumnDecimal<T>::compareAt(size_t n, size_t m, const IColumn & rhs_, int) const int ColumnDecimal<T>::compareAt(size_t n, size_t m, const IColumn & rhs_, int) const
#else #else
int ColumnDecimal<T>::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int) const int ColumnDecimal<T>::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int) const
@ -76,13 +75,10 @@ void ColumnDecimal<T>::updateHashWithValue(size_t n, SipHash & hash) const
} }
template <is_decimal T> template <is_decimal T>
void ColumnDecimal<T>::updateWeakHash32(WeakHash32 & hash) const WeakHash32 ColumnDecimal<T>::getWeakHash32() const
{ {
auto s = data.size(); auto s = data.size();
WeakHash32 hash(s);
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
const T * begin = data.data(); const T * begin = data.data();
const T * end = begin + s; const T * end = begin + s;
@ -94,6 +90,8 @@ void ColumnDecimal<T>::updateWeakHash32(WeakHash32 & hash) const
++begin; ++begin;
++hash_data; ++hash_data;
} }
return hash;
} }
template <is_decimal T> template <is_decimal T>
@ -335,7 +333,7 @@ void ColumnDecimal<T>::insertData(const char * src, size_t /*length*/)
} }
template <is_decimal T> template <is_decimal T>
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnDecimal<T>::insertRangeFrom(const IColumn & src, size_t start, size_t length) void ColumnDecimal<T>::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else #else
void ColumnDecimal<T>::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) void ColumnDecimal<T>::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)

View File

@ -55,13 +55,13 @@ public:
void reserve(size_t n) override { data.reserve_exact(n); } void reserve(size_t n) override { data.reserve_exact(n); }
void shrinkToFit() override { data.shrink_to_fit(); } void shrinkToFit() override { data.shrink_to_fit(); }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast<const Self &>(src).getData()[n]); } void insertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast<const Self &>(src).getData()[n]); }
#else #else
void doInsertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast<const Self &>(src).getData()[n]); } void doInsertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast<const Self &>(src).getData()[n]); }
#endif #endif
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertManyFrom(const IColumn & src, size_t position, size_t length) override void insertManyFrom(const IColumn & src, size_t position, size_t length) override
#else #else
void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override
@ -76,7 +76,7 @@ public:
void insertManyDefaults(size_t length) override { data.resize_fill(data.size() + length); } void insertManyDefaults(size_t length) override { data.resize_fill(data.size() + length); }
void insert(const Field & x) override { data.push_back(x.get<T>()); } void insert(const Field & x) override { data.push_back(x.get<T>()); }
bool tryInsert(const Field & x) override; bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else #else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
@ -102,9 +102,9 @@ public:
const char * deserializeAndInsertFromArena(const char * pos) override; const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override; const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override; void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override; WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override; void updateHashFast(SipHash & hash) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;
#else #else
int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;

View File

@ -215,7 +215,7 @@ bool ColumnDynamic::tryInsert(const DB::Field & x)
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnDynamic::insertFrom(const DB::IColumn & src_, size_t n) void ColumnDynamic::insertFrom(const DB::IColumn & src_, size_t n)
#else #else
void ColumnDynamic::doInsertFrom(const DB::IColumn & src_, size_t n) void ColumnDynamic::doInsertFrom(const DB::IColumn & src_, size_t n)
@ -269,7 +269,7 @@ void ColumnDynamic::doInsertFrom(const DB::IColumn & src_, size_t n)
variant_col.insertIntoVariantFrom(string_variant_discr, *tmp_string_column, 0); variant_col.insertIntoVariantFrom(string_variant_discr, *tmp_string_column, 0);
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnDynamic::insertRangeFrom(const DB::IColumn & src_, size_t start, size_t length) void ColumnDynamic::insertRangeFrom(const DB::IColumn & src_, size_t start, size_t length)
#else #else
void ColumnDynamic::doInsertRangeFrom(const DB::IColumn & src_, size_t start, size_t length) void ColumnDynamic::doInsertRangeFrom(const DB::IColumn & src_, size_t start, size_t length)
@ -439,7 +439,7 @@ void ColumnDynamic::doInsertRangeFrom(const DB::IColumn & src_, size_t start, si
} }
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnDynamic::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length) void ColumnDynamic::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length)
#else #else
void ColumnDynamic::doInsertManyFrom(const DB::IColumn & src_, size_t position, size_t length) void ColumnDynamic::doInsertManyFrom(const DB::IColumn & src_, size_t position, size_t length)
@ -603,7 +603,7 @@ void ColumnDynamic::updateHashWithValue(size_t n, SipHash & hash) const
variant_col.getVariantByGlobalDiscriminator(discr).updateHashWithValue(variant_col.offsetAt(n), hash); variant_col.getVariantByGlobalDiscriminator(discr).updateHashWithValue(variant_col.offsetAt(n), hash);
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
int ColumnDynamic::compareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const int ColumnDynamic::compareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const
#else #else
int ColumnDynamic::doCompareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const int ColumnDynamic::doCompareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const

View File

@ -4,6 +4,7 @@
#include <Columns/ColumnVector.h> #include <Columns/ColumnVector.h>
#include <Columns/ColumnVariant.h> #include <Columns/ColumnVariant.h>
#include <DataTypes/IDataType.h> #include <DataTypes/IDataType.h>
#include <Common/WeakHash.h>
namespace DB namespace DB
@ -142,7 +143,7 @@ public:
void insert(const Field & x) override; void insert(const Field & x) override;
bool tryInsert(const Field & x) override; bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & src_, size_t n) override; void insertFrom(const IColumn & src_, size_t n) override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
void insertManyFrom(const IColumn & src, size_t position, size_t length) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
@ -173,9 +174,9 @@ public:
void updateHashWithValue(size_t n, SipHash & hash) const override; void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override WeakHash32 getWeakHash32() const override
{ {
variant_column->updateWeakHash32(hash); return variant_column->getWeakHash32();
} }
void updateHashFast(SipHash & hash) const override void updateHashFast(SipHash & hash) const override
@ -219,7 +220,7 @@ public:
return scattered_columns; return scattered_columns;
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#else #else
int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;

View File

@ -74,7 +74,7 @@ bool ColumnFixedString::tryInsert(const Field & x)
return true; return true;
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnFixedString::insertFrom(const IColumn & src_, size_t index) void ColumnFixedString::insertFrom(const IColumn & src_, size_t index)
#else #else
void ColumnFixedString::doInsertFrom(const IColumn & src_, size_t index) void ColumnFixedString::doInsertFrom(const IColumn & src_, size_t index)
@ -90,7 +90,7 @@ void ColumnFixedString::doInsertFrom(const IColumn & src_, size_t index)
memcpySmallAllowReadWriteOverflow15(chars.data() + old_size, &src.chars[n * index], n); memcpySmallAllowReadWriteOverflow15(chars.data() + old_size, &src.chars[n * index], n);
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnFixedString::insertManyFrom(const IColumn & src, size_t position, size_t length) void ColumnFixedString::insertManyFrom(const IColumn & src, size_t position, size_t length)
#else #else
void ColumnFixedString::doInsertManyFrom(const IColumn & src, size_t position, size_t length) void ColumnFixedString::doInsertManyFrom(const IColumn & src, size_t position, size_t length)
@ -137,14 +137,10 @@ void ColumnFixedString::updateHashWithValue(size_t index, SipHash & hash) const
hash.update(reinterpret_cast<const char *>(&chars[n * index]), n); hash.update(reinterpret_cast<const char *>(&chars[n * index]), n);
} }
void ColumnFixedString::updateWeakHash32(WeakHash32 & hash) const WeakHash32 ColumnFixedString::getWeakHash32() const
{ {
auto s = size(); auto s = size();
WeakHash32 hash(s);
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, "
"hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
const UInt8 * pos = chars.data(); const UInt8 * pos = chars.data();
UInt32 * hash_data = hash.getData().data(); UInt32 * hash_data = hash.getData().data();
@ -156,6 +152,8 @@ void ColumnFixedString::updateWeakHash32(WeakHash32 & hash) const
pos += n; pos += n;
++hash_data; ++hash_data;
} }
return hash;
} }
void ColumnFixedString::updateHashFast(SipHash & hash) const void ColumnFixedString::updateHashFast(SipHash & hash) const
@ -227,7 +225,7 @@ size_t ColumnFixedString::estimateCardinalityInPermutedRange(const Permutation &
return elements.size(); return elements.size();
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnFixedString::insertRangeFrom(const IColumn & src, size_t start, size_t length) void ColumnFixedString::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else #else
void ColumnFixedString::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) void ColumnFixedString::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)

View File

@ -98,13 +98,13 @@ public:
bool tryInsert(const Field & x) override; bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & src_, size_t index) override; void insertFrom(const IColumn & src_, size_t index) override;
#else #else
void doInsertFrom(const IColumn & src_, size_t index) override; void doInsertFrom(const IColumn & src_, size_t index) override;
#endif #endif
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertManyFrom(const IColumn & src, size_t position, size_t length) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
#else #else
void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override;
@ -133,11 +133,11 @@ public:
void updateHashWithValue(size_t index, SipHash & hash) const override; void updateHashWithValue(size_t index, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override; WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override; void updateHashFast(SipHash & hash) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override int compareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override
#else #else
int doCompareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override int doCompareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override
@ -156,7 +156,7 @@ public:
size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const override; size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else #else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;

View File

@ -72,7 +72,7 @@ ColumnPtr ColumnFunction::cut(size_t start, size_t length) const
return ColumnFunction::create(length, function, capture, is_short_circuit_argument, is_function_compiled); return ColumnFunction::create(length, function, capture, is_short_circuit_argument, is_function_compiled);
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnFunction::insertFrom(const IColumn & src, size_t n) void ColumnFunction::insertFrom(const IColumn & src, size_t n)
#else #else
void ColumnFunction::doInsertFrom(const IColumn & src, size_t n) void ColumnFunction::doInsertFrom(const IColumn & src, size_t n)
@ -93,7 +93,7 @@ void ColumnFunction::doInsertFrom(const IColumn & src, size_t n)
++elements_size; ++elements_size;
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnFunction::insertRangeFrom(const IColumn & src, size_t start, size_t length) void ColumnFunction::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else #else
void ColumnFunction::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) void ColumnFunction::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)

View File

@ -4,6 +4,7 @@
#include <Core/NamesAndTypes.h> #include <Core/NamesAndTypes.h>
#include <Core/ColumnsWithTypeAndName.h> #include <Core/ColumnsWithTypeAndName.h>
#include <Columns/IColumn.h> #include <Columns/IColumn.h>
#include <Common/WeakHash.h>
namespace DB namespace DB
@ -94,12 +95,12 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot insert into {}", getName()); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot insert into {}", getName());
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & src, size_t n) override; void insertFrom(const IColumn & src, size_t n) override;
#else #else
void doInsertFrom(const IColumn & src, size_t n) override; void doInsertFrom(const IColumn & src, size_t n) override;
#endif #endif
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn &, size_t start, size_t length) override; void insertRangeFrom(const IColumn &, size_t start, size_t length) override;
#else #else
void doInsertRangeFrom(const IColumn &, size_t start, size_t length) override; void doInsertRangeFrom(const IColumn &, size_t start, size_t length) override;
@ -130,9 +131,9 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "updateHashWithValue is not implemented for {}", getName()); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "updateHashWithValue is not implemented for {}", getName());
} }
void updateWeakHash32(WeakHash32 &) const override WeakHash32 getWeakHash32() const override
{ {
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "updateWeakHash32 is not implemented for {}", getName()); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "getWeakHash32 is not implemented for {}", getName());
} }
void updateHashFast(SipHash &) const override void updateHashFast(SipHash &) const override
@ -145,7 +146,7 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "popBack is not implemented for {}", getName()); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "popBack is not implemented for {}", getName());
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t, size_t, const IColumn &, int) const override int compareAt(size_t, size_t, const IColumn &, int) const override
#else #else
int doCompareAt(size_t, size_t, const IColumn &, int) const override int doCompareAt(size_t, size_t, const IColumn &, int) const override

View File

@ -7,8 +7,7 @@
#include <Common/HashTable/HashMap.h> #include <Common/HashTable/HashMap.h>
#include <Common/WeakHash.h> #include <Common/WeakHash.h>
#include <Common/assert_cast.h> #include <Common/assert_cast.h>
#include "Storages/IndicesDescription.h" #include <base/types.h>
#include "base/types.h"
#include <base/sort.h> #include <base/sort.h>
#include <base/scope_guard.h> #include <base/scope_guard.h>
@ -159,7 +158,7 @@ void ColumnLowCardinality::insertDefault()
idx.insertPosition(getDictionary().getDefaultValueIndex()); idx.insertPosition(getDictionary().getDefaultValueIndex());
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnLowCardinality::insertFrom(const IColumn & src, size_t n) void ColumnLowCardinality::insertFrom(const IColumn & src, size_t n)
#else #else
void ColumnLowCardinality::doInsertFrom(const IColumn & src, size_t n) void ColumnLowCardinality::doInsertFrom(const IColumn & src, size_t n)
@ -191,7 +190,7 @@ void ColumnLowCardinality::insertFromFullColumn(const IColumn & src, size_t n)
idx.insertPosition(getDictionary().uniqueInsertFrom(src, n)); idx.insertPosition(getDictionary().uniqueInsertFrom(src, n));
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, size_t length) void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else #else
void ColumnLowCardinality::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) void ColumnLowCardinality::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
@ -320,19 +319,10 @@ const char * ColumnLowCardinality::skipSerializedInArena(const char * pos) const
return getDictionary().skipSerializedInArena(pos); return getDictionary().skipSerializedInArena(pos);
} }
void ColumnLowCardinality::updateWeakHash32(WeakHash32 & hash) const WeakHash32 ColumnLowCardinality::getWeakHash32() const
{ {
auto s = size(); WeakHash32 dict_hash = getDictionary().getNestedColumn()->getWeakHash32();
return idx.getWeakHash(dict_hash);
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
const auto & dict = getDictionary().getNestedColumn();
WeakHash32 dict_hash(dict->size());
dict->updateWeakHash32(dict_hash);
idx.updateWeakHash(hash, dict_hash);
} }
void ColumnLowCardinality::updateHashFast(SipHash & hash) const void ColumnLowCardinality::updateHashFast(SipHash & hash) const
@ -372,7 +362,7 @@ int ColumnLowCardinality::compareAtImpl(size_t n, size_t m, const IColumn & rhs,
return getDictionary().compareAt(n_index, m_index, low_cardinality_column.getDictionary(), nan_direction_hint); return getDictionary().compareAt(n_index, m_index, low_cardinality_column.getDictionary(), nan_direction_hint);
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
#else #else
int ColumnLowCardinality::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const int ColumnLowCardinality::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
@ -832,10 +822,11 @@ bool ColumnLowCardinality::Index::containsDefault() const
return contains; return contains;
} }
void ColumnLowCardinality::Index::updateWeakHash(WeakHash32 & hash, WeakHash32 & dict_hash) const WeakHash32 ColumnLowCardinality::Index::getWeakHash(const WeakHash32 & dict_hash) const
{ {
WeakHash32 hash(positions->size());
auto & hash_data = hash.getData(); auto & hash_data = hash.getData();
auto & dict_hash_data = dict_hash.getData(); const auto & dict_hash_data = dict_hash.getData();
auto update_weak_hash = [&](auto x) auto update_weak_hash = [&](auto x)
{ {
@ -844,10 +835,11 @@ void ColumnLowCardinality::Index::updateWeakHash(WeakHash32 & hash, WeakHash32 &
auto size = data.size(); auto size = data.size();
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
hash_data[i] = static_cast<UInt32>(intHashCRC32(dict_hash_data[data[i]], hash_data[i])); hash_data[i] = dict_hash_data[data[i]];
}; };
callForType(std::move(update_weak_hash), size_of_type); callForType(std::move(update_weak_hash), size_of_type);
return hash;
} }
void ColumnLowCardinality::Index::collectSerializedValueSizes( void ColumnLowCardinality::Index::collectSerializedValueSizes(

View File

@ -78,14 +78,14 @@ public:
bool tryInsert(const Field & x) override; bool tryInsert(const Field & x) override;
void insertDefault() override; void insertDefault() override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & src, size_t n) override; void insertFrom(const IColumn & src, size_t n) override;
#else #else
void doInsertFrom(const IColumn & src, size_t n) override; void doInsertFrom(const IColumn & src, size_t n) override;
#endif #endif
void insertFromFullColumn(const IColumn & src, size_t n); void insertFromFullColumn(const IColumn & src, size_t n);
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else #else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
@ -111,7 +111,7 @@ public:
getDictionary().updateHashWithValue(getIndexes().getUInt(n), hash); getDictionary().updateHashWithValue(getIndexes().getUInt(n), hash);
} }
void updateWeakHash32(WeakHash32 & hash) const override; WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash &) const override; void updateHashFast(SipHash &) const override;
@ -135,7 +135,7 @@ public:
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().index(indexes_, limit)); return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().index(indexes_, limit));
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#else #else
int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
@ -325,7 +325,7 @@ public:
bool containsDefault() const; bool containsDefault() const;
void updateWeakHash(WeakHash32 & hash, WeakHash32 & dict_hash) const; WeakHash32 getWeakHash(const WeakHash32 & dict_hash) const;
void collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const PaddedPODArray<UInt64> & dict_sizes) const; void collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const PaddedPODArray<UInt64> & dict_sizes) const;

View File

@ -143,9 +143,9 @@ void ColumnMap::updateHashWithValue(size_t n, SipHash & hash) const
nested->updateHashWithValue(n, hash); nested->updateHashWithValue(n, hash);
} }
void ColumnMap::updateWeakHash32(WeakHash32 & hash) const WeakHash32 ColumnMap::getWeakHash32() const
{ {
nested->updateWeakHash32(hash); return nested->getWeakHash32();
} }
void ColumnMap::updateHashFast(SipHash & hash) const void ColumnMap::updateHashFast(SipHash & hash) const
@ -153,7 +153,7 @@ void ColumnMap::updateHashFast(SipHash & hash) const
nested->updateHashFast(hash); nested->updateHashFast(hash);
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnMap::insertFrom(const IColumn & src, size_t n) void ColumnMap::insertFrom(const IColumn & src, size_t n)
#else #else
void ColumnMap::doInsertFrom(const IColumn & src, size_t n) void ColumnMap::doInsertFrom(const IColumn & src, size_t n)
@ -162,7 +162,7 @@ void ColumnMap::doInsertFrom(const IColumn & src, size_t n)
nested->insertFrom(assert_cast<const ColumnMap &>(src).getNestedColumn(), n); nested->insertFrom(assert_cast<const ColumnMap &>(src).getNestedColumn(), n);
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnMap::insertManyFrom(const IColumn & src, size_t position, size_t length) void ColumnMap::insertManyFrom(const IColumn & src, size_t position, size_t length)
#else #else
void ColumnMap::doInsertManyFrom(const IColumn & src, size_t position, size_t length) void ColumnMap::doInsertManyFrom(const IColumn & src, size_t position, size_t length)
@ -171,7 +171,7 @@ void ColumnMap::doInsertManyFrom(const IColumn & src, size_t position, size_t le
assert_cast<ColumnArray &>(*nested).insertManyFrom(assert_cast<const ColumnMap &>(src).getNestedColumn(), position, length); assert_cast<ColumnArray &>(*nested).insertManyFrom(assert_cast<const ColumnMap &>(src).getNestedColumn(), position, length);
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnMap::insertRangeFrom(const IColumn & src, size_t start, size_t length) void ColumnMap::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else #else
void ColumnMap::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) void ColumnMap::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
@ -222,7 +222,7 @@ MutableColumns ColumnMap::scatter(ColumnIndex num_columns, const Selector & sele
return res; return res;
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
int ColumnMap::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const int ColumnMap::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
#else #else
int ColumnMap::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const int ColumnMap::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const

View File

@ -64,10 +64,10 @@ public:
const char * deserializeAndInsertFromArena(const char * pos) override; const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override; const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override; void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override; WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override; void updateHashFast(SipHash & hash) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & src_, size_t n) override; void insertFrom(const IColumn & src_, size_t n) override;
void insertManyFrom(const IColumn & src, size_t position, size_t length) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
@ -83,7 +83,7 @@ public:
ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override;
ColumnPtr replicate(const Offsets & offsets) const override; ColumnPtr replicate(const Offsets & offsets) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#else #else
int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;

View File

@ -56,25 +56,21 @@ void ColumnNullable::updateHashWithValue(size_t n, SipHash & hash) const
getNestedColumn().updateHashWithValue(n, hash); getNestedColumn().updateHashWithValue(n, hash);
} }
void ColumnNullable::updateWeakHash32(WeakHash32 & hash) const WeakHash32 ColumnNullable::getWeakHash32() const
{ {
auto s = size(); auto s = size();
if (hash.getData().size() != s) WeakHash32 hash = nested_column->getWeakHash32();
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
WeakHash32 old_hash = hash;
nested_column->updateWeakHash32(hash);
const auto & null_map_data = getNullMapData(); const auto & null_map_data = getNullMapData();
auto & hash_data = hash.getData(); auto & hash_data = hash.getData();
auto & old_hash_data = old_hash.getData();
/// Use old data for nulls. /// Use default for nulls.
for (size_t row = 0; row < s; ++row) for (size_t row = 0; row < s; ++row)
if (null_map_data[row]) if (null_map_data[row])
hash_data[row] = old_hash_data[row]; hash_data[row] = WeakHash32::kDefaultInitialValue;
return hash;
} }
void ColumnNullable::updateHashFast(SipHash & hash) const void ColumnNullable::updateHashFast(SipHash & hash) const
@ -221,7 +217,7 @@ const char * ColumnNullable::skipSerializedInArena(const char * pos) const
return pos; return pos;
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnNullable::insertRangeFrom(const IColumn & src, size_t start, size_t length) void ColumnNullable::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else #else
void ColumnNullable::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) void ColumnNullable::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
@ -262,7 +258,7 @@ bool ColumnNullable::tryInsert(const Field & x)
return true; return true;
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnNullable::insertFrom(const IColumn & src, size_t n) void ColumnNullable::insertFrom(const IColumn & src, size_t n)
#else #else
void ColumnNullable::doInsertFrom(const IColumn & src, size_t n) void ColumnNullable::doInsertFrom(const IColumn & src, size_t n)
@ -274,7 +270,7 @@ void ColumnNullable::doInsertFrom(const IColumn & src, size_t n)
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnNullable::insertManyFrom(const IColumn & src, size_t position, size_t length) void ColumnNullable::insertManyFrom(const IColumn & src, size_t position, size_t length)
#else #else
void ColumnNullable::doInsertManyFrom(const IColumn & src, size_t position, size_t length) void ColumnNullable::doInsertManyFrom(const IColumn & src, size_t position, size_t length)
@ -414,7 +410,7 @@ int ColumnNullable::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int
return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint); return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint);
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const
#else #else
int ColumnNullable::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const int ColumnNullable::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const

View File

@ -69,7 +69,7 @@ public:
char * serializeValueIntoMemory(size_t n, char * memory) const override; char * serializeValueIntoMemory(size_t n, char * memory) const override;
const char * deserializeAndInsertFromArena(const char * pos) override; const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override; const char * skipSerializedInArena(const char * pos) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else #else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
@ -77,7 +77,7 @@ public:
void insert(const Field & x) override; void insert(const Field & x) override;
bool tryInsert(const Field & x) override; bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & src, size_t n) override; void insertFrom(const IColumn & src, size_t n) override;
void insertManyFrom(const IColumn & src, size_t position, size_t length) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
#else #else
@ -100,7 +100,7 @@ public:
void expand(const Filter & mask, bool inverted) override; void expand(const Filter & mask, bool inverted) override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override;
#else #else
int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override;
@ -133,7 +133,7 @@ public:
void protect() override; void protect() override;
ColumnPtr replicate(const Offsets & replicate_offsets) const override; ColumnPtr replicate(const Offsets & replicate_offsets) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override; void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override; WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override; void updateHashFast(SipHash & hash) const override;
void getExtremes(Field & min, Field & max) const override; void getExtremes(Field & min, Field & max) const override;
// Special function for nullable minmax index // Special function for nullable minmax index

View File

@ -385,7 +385,7 @@ bool ColumnObject::tryInsert(const Field & x)
return true; return true;
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnObject::insertFrom(const IColumn & src, size_t n) void ColumnObject::insertFrom(const IColumn & src, size_t n)
#else #else
void ColumnObject::doInsertFrom(const IColumn & src, size_t n) void ColumnObject::doInsertFrom(const IColumn & src, size_t n)
@ -418,7 +418,7 @@ void ColumnObject::doInsertFrom(const IColumn & src, size_t n)
insertFromSharedDataAndFillRemainingDynamicPaths(src_object_column, src_dynamic_paths_for_shared_data, n, 1); insertFromSharedDataAndFillRemainingDynamicPaths(src_object_column, src_dynamic_paths_for_shared_data, n, 1);
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t length) void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else #else
void ColumnObject::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) void ColumnObject::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
@ -747,13 +747,15 @@ void ColumnObject::updateHashWithValue(size_t n, SipHash & hash) const
shared_data->updateHashWithValue(n, hash); shared_data->updateHashWithValue(n, hash);
} }
void ColumnObject::updateWeakHash32(WeakHash32 & hash) const WeakHash32 ColumnObject::getWeakHash32() const
{ {
WeakHash32 hash(size());
for (const auto & [_, column] : typed_paths) for (const auto & [_, column] : typed_paths)
column->updateWeakHash32(hash); hash.update(column->getWeakHash32());
for (const auto & [_, column] : dynamic_paths) for (const auto & [_, column] : dynamic_paths)
column->updateWeakHash32(hash); hash.update(column->getWeakHash32());
shared_data->updateWeakHash32(hash); hash.update(shared_data->getWeakHash32());
return hash;
} }
void ColumnObject::updateHashFast(SipHash & hash) const void ColumnObject::updateHashFast(SipHash & hash) const

View File

@ -5,10 +5,13 @@
#include <Columns/ColumnArray.h> #include <Columns/ColumnArray.h>
#include <Columns/ColumnTuple.h> #include <Columns/ColumnTuple.h>
#include <Columns/ColumnString.h> #include <Columns/ColumnString.h>
#include <DataTypes/IDataType.h> #include <DataTypes/IDataType.h>
#include <DataTypes/Serializations/SerializationDynamic.h> #include <DataTypes/Serializations/SerializationDynamic.h>
#include <Formats/FormatSettings.h> #include <Formats/FormatSettings.h>
#include <Common/WeakHash.h>
namespace DB namespace DB
{ {
@ -73,7 +76,7 @@ public:
void insert(const Field & x) override; void insert(const Field & x) override;
bool tryInsert(const Field & x) override; bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & src, size_t n) override; void insertFrom(const IColumn & src, size_t n) override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else #else
@ -91,7 +94,7 @@ public:
const char * skipSerializedInArena(const char * pos) const override; const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override; void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override; WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override; void updateHashFast(SipHash & hash) const override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
@ -105,7 +108,7 @@ public:
void updatePermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation &, EqualRanges &) const override {} void updatePermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation &, EqualRanges &) const override {}
/// Values of ColumnObject are not comparable. /// Values of ColumnObject are not comparable.
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; } int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; }
#else #else
int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; } int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; }

View File

@ -5,6 +5,7 @@
#include <Core/Names.h> #include <Core/Names.h>
#include <DataTypes/Serializations/SubcolumnsTree.h> #include <DataTypes/Serializations/SubcolumnsTree.h>
#include <Common/PODArray.h> #include <Common/PODArray.h>
#include <Common/WeakHash.h>
#include <DataTypes/IDataType.h> #include <DataTypes/IDataType.h>
@ -209,7 +210,7 @@ public:
void insert(const Field & field) override; void insert(const Field & field) override;
bool tryInsert(const Field & field) override; bool tryInsert(const Field & field) override;
void insertDefault() override; void insertDefault() override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & src, size_t n) override; void insertFrom(const IColumn & src, size_t n) override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else #else
@ -233,7 +234,7 @@ public:
/// Order of rows in ColumnObjectDeprecated is undefined. /// Order of rows in ColumnObjectDeprecated is undefined.
void getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation & res) const override; void getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation & res) const override;
void updatePermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation &, EqualRanges &) const override {} void updatePermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation &, EqualRanges &) const override {}
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; } int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; }
#else #else
int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; } int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; }
@ -250,7 +251,7 @@ public:
const char * deserializeAndInsertFromArena(const char *) override { throwMustBeConcrete(); } const char * deserializeAndInsertFromArena(const char *) override { throwMustBeConcrete(); }
const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); } const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); }
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); } void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); }
void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); } WeakHash32 getWeakHash32() const override { throwMustBeConcrete(); }
void updateHashFast(SipHash &) const override; void updateHashFast(SipHash &) const override;
void expand(const Filter &, bool) override { throwMustBeConcrete(); } void expand(const Filter &, bool) override { throwMustBeConcrete(); }
bool hasEqualValues() const override { throwMustBeConcrete(); } bool hasEqualValues() const override { throwMustBeConcrete(); }

View File

@ -174,7 +174,7 @@ const char * ColumnSparse::skipSerializedInArena(const char * pos) const
return values->skipSerializedInArena(pos); return values->skipSerializedInArena(pos);
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnSparse::insertRangeFrom(const IColumn & src, size_t start, size_t length) void ColumnSparse::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else #else
void ColumnSparse::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) void ColumnSparse::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
@ -252,7 +252,7 @@ bool ColumnSparse::tryInsert(const Field & x)
return true; return true;
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnSparse::insertFrom(const IColumn & src, size_t n) void ColumnSparse::insertFrom(const IColumn & src, size_t n)
#else #else
void ColumnSparse::doInsertFrom(const IColumn & src, size_t n) void ColumnSparse::doInsertFrom(const IColumn & src, size_t n)
@ -454,7 +454,7 @@ ColumnPtr ColumnSparse::indexImpl(const PaddedPODArray<Type> & indexes, size_t l
return ColumnSparse::create(std::move(res_values), std::move(res_offsets), limit); return ColumnSparse::create(std::move(res_values), std::move(res_offsets), limit);
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
int ColumnSparse::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const int ColumnSparse::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const
#else #else
int ColumnSparse::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const int ColumnSparse::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const
@ -678,20 +678,22 @@ void ColumnSparse::updateHashWithValue(size_t n, SipHash & hash) const
values->updateHashWithValue(getValueIndex(n), hash); values->updateHashWithValue(getValueIndex(n), hash);
} }
void ColumnSparse::updateWeakHash32(WeakHash32 & hash) const WeakHash32 ColumnSparse::getWeakHash32() const
{ {
if (hash.getData().size() != _size) WeakHash32 values_hash = values->getWeakHash32();
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: " WeakHash32 hash(size());
"column size is {}, hash size is {}", _size, hash.getData().size());
auto & hash_data = hash.getData();
auto & values_hash_data = values_hash.getData();
auto offset_it = begin(); auto offset_it = begin();
auto & hash_data = hash.getData();
for (size_t i = 0; i < _size; ++i, ++offset_it) for (size_t i = 0; i < _size; ++i, ++offset_it)
{ {
size_t value_index = offset_it.getValueIndex(); size_t value_index = offset_it.getValueIndex();
auto data_ref = values->getDataAt(value_index); hash_data[i] = values_hash_data[value_index];
hash_data[i] = ::updateWeakHash32(reinterpret_cast<const UInt8 *>(data_ref.data), data_ref.size, hash_data[i]);
} }
return hash;
} }
void ColumnSparse::updateHashFast(SipHash & hash) const void ColumnSparse::updateHashFast(SipHash & hash) const

View File

@ -81,14 +81,14 @@ public:
char * serializeValueIntoMemory(size_t n, char * memory) const override; char * serializeValueIntoMemory(size_t n, char * memory) const override;
const char * deserializeAndInsertFromArena(const char * pos) override; const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char *) const override; const char * skipSerializedInArena(const char *) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else #else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#endif #endif
void insert(const Field & x) override; void insert(const Field & x) override;
bool tryInsert(const Field & x) override; bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & src, size_t n) override; void insertFrom(const IColumn & src, size_t n) override;
#else #else
void doInsertFrom(const IColumn & src, size_t n) override; void doInsertFrom(const IColumn & src, size_t n) override;
@ -106,7 +106,7 @@ public:
template <typename Type> template <typename Type>
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const; ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override;
#else #else
int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override;
@ -139,7 +139,7 @@ public:
void protect() override; void protect() override;
ColumnPtr replicate(const Offsets & replicate_offsets) const override; ColumnPtr replicate(const Offsets & replicate_offsets) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override; void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override; WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override; void updateHashFast(SipHash & hash) const override;
void getExtremes(Field & min, Field & max) const override; void getExtremes(Field & min, Field & max) const override;

View File

@ -39,7 +39,7 @@ ColumnString::ColumnString(const ColumnString & src)
last_offset, chars.size()); last_offset, chars.size());
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnString::insertManyFrom(const IColumn & src, size_t position, size_t length) void ColumnString::insertManyFrom(const IColumn & src, size_t position, size_t length)
#else #else
void ColumnString::doInsertManyFrom(const IColumn & src, size_t position, size_t length) void ColumnString::doInsertManyFrom(const IColumn & src, size_t position, size_t length)
@ -108,13 +108,10 @@ MutableColumnPtr ColumnString::cloneResized(size_t to_size) const
return res; return res;
} }
void ColumnString::updateWeakHash32(WeakHash32 & hash) const WeakHash32 ColumnString::getWeakHash32() const
{ {
auto s = offsets.size(); auto s = offsets.size();
WeakHash32 hash(s);
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
const UInt8 * pos = chars.data(); const UInt8 * pos = chars.data();
UInt32 * hash_data = hash.getData().data(); UInt32 * hash_data = hash.getData().data();
@ -130,10 +127,12 @@ void ColumnString::updateWeakHash32(WeakHash32 & hash) const
prev_offset = offset; prev_offset = offset;
++hash_data; ++hash_data;
} }
return hash;
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnString::insertRangeFrom(const IColumn & src, size_t start, size_t length) void ColumnString::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else #else
void ColumnString::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) void ColumnString::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)

View File

@ -142,7 +142,7 @@ public:
return true; return true;
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & src_, size_t n) override void insertFrom(const IColumn & src_, size_t n) override
#else #else
void doInsertFrom(const IColumn & src_, size_t n) override void doInsertFrom(const IColumn & src_, size_t n) override
@ -169,7 +169,7 @@ public:
} }
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertManyFrom(const IColumn & src, size_t position, size_t length) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
#else #else
void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override;
@ -212,7 +212,7 @@ public:
hash.update(reinterpret_cast<const char *>(&chars[offset]), string_size); hash.update(reinterpret_cast<const char *>(&chars[offset]), string_size);
} }
void updateWeakHash32(WeakHash32 & hash) const override; WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override void updateHashFast(SipHash & hash) const override
{ {
@ -220,7 +220,7 @@ public:
hash.update(reinterpret_cast<const char *>(chars.data()), chars.size() * sizeof(chars[0])); hash.update(reinterpret_cast<const char *>(chars.data()), chars.size() * sizeof(chars[0]));
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else #else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
@ -250,7 +250,7 @@ public:
offsets.push_back(offsets.back() + 1); offsets.push_back(offsets.back() + 1);
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override int compareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override
#else #else
int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override

View File

@ -206,7 +206,7 @@ bool ColumnTuple::tryInsert(const Field & x)
return true; return true;
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnTuple::insertFrom(const IColumn & src_, size_t n) void ColumnTuple::insertFrom(const IColumn & src_, size_t n)
#else #else
void ColumnTuple::doInsertFrom(const IColumn & src_, size_t n) void ColumnTuple::doInsertFrom(const IColumn & src_, size_t n)
@ -223,7 +223,7 @@ void ColumnTuple::doInsertFrom(const IColumn & src_, size_t n)
columns[i]->insertFrom(*src.columns[i], n); columns[i]->insertFrom(*src.columns[i], n);
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnTuple::insertManyFrom(const IColumn & src, size_t position, size_t length) void ColumnTuple::insertManyFrom(const IColumn & src, size_t position, size_t length)
#else #else
void ColumnTuple::doInsertManyFrom(const IColumn & src, size_t position, size_t length) void ColumnTuple::doInsertManyFrom(const IColumn & src, size_t position, size_t length)
@ -310,16 +310,15 @@ void ColumnTuple::updateHashWithValue(size_t n, SipHash & hash) const
column->updateHashWithValue(n, hash); column->updateHashWithValue(n, hash);
} }
void ColumnTuple::updateWeakHash32(WeakHash32 & hash) const WeakHash32 ColumnTuple::getWeakHash32() const
{ {
auto s = size(); auto s = size();
WeakHash32 hash(s);
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
for (const auto & column : columns) for (const auto & column : columns)
column->updateWeakHash32(hash); hash.update(column->getWeakHash32());
return hash;
} }
void ColumnTuple::updateHashFast(SipHash & hash) const void ColumnTuple::updateHashFast(SipHash & hash) const
@ -328,7 +327,7 @@ void ColumnTuple::updateHashFast(SipHash & hash) const
column->updateHashFast(hash); column->updateHashFast(hash);
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnTuple::insertRangeFrom(const IColumn & src, size_t start, size_t length) void ColumnTuple::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else #else
void ColumnTuple::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) void ColumnTuple::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
@ -484,7 +483,7 @@ int ColumnTuple::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_
return 0; return 0;
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
#else #else
int ColumnTuple::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const int ColumnTuple::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const

View File

@ -66,7 +66,7 @@ public:
void insert(const Field & x) override; void insert(const Field & x) override;
bool tryInsert(const Field & x) override; bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & src_, size_t n) override; void insertFrom(const IColumn & src_, size_t n) override;
void insertManyFrom(const IColumn & src, size_t position, size_t length) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
#else #else
@ -81,9 +81,9 @@ public:
const char * deserializeAndInsertFromArena(const char * pos) override; const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override; const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override; void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override; WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override; void updateHashFast(SipHash & hash) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else #else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
@ -94,7 +94,7 @@ public:
ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override;
ColumnPtr replicate(const Offsets & offsets) const override; ColumnPtr replicate(const Offsets & offsets) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#else #else
int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;

View File

@ -90,7 +90,7 @@ public:
return getNestedColumn()->updateHashWithValue(n, hash_func); return getNestedColumn()->updateHashWithValue(n, hash_func);
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#else #else
int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
@ -492,7 +492,7 @@ const char * ColumnUnique<ColumnType>::skipSerializedInArena(const char *) const
} }
template <typename ColumnType> template <typename ColumnType>
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
int ColumnUnique<ColumnType>::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const int ColumnUnique<ColumnType>::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
#else #else
int ColumnUnique<ColumnType>::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const int ColumnUnique<ColumnType>::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const

View File

@ -595,7 +595,7 @@ void ColumnVariant::insertManyFromImpl(const DB::IColumn & src_, size_t position
} }
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnVariant::insertFrom(const IColumn & src_, size_t n) void ColumnVariant::insertFrom(const IColumn & src_, size_t n)
#else #else
void ColumnVariant::doInsertFrom(const IColumn & src_, size_t n) void ColumnVariant::doInsertFrom(const IColumn & src_, size_t n)
@ -604,7 +604,7 @@ void ColumnVariant::doInsertFrom(const IColumn & src_, size_t n)
insertFromImpl(src_, n, nullptr); insertFromImpl(src_, n, nullptr);
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t length) void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t length)
#else #else
void ColumnVariant::doInsertRangeFrom(const IColumn & src_, size_t start, size_t length) void ColumnVariant::doInsertRangeFrom(const IColumn & src_, size_t start, size_t length)
@ -613,7 +613,7 @@ void ColumnVariant::doInsertRangeFrom(const IColumn & src_, size_t start, size_t
insertRangeFromImpl(src_, start, length, nullptr); insertRangeFromImpl(src_, start, length, nullptr);
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnVariant::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length) void ColumnVariant::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length)
#else #else
void ColumnVariant::doInsertManyFrom(const DB::IColumn & src_, size_t position, size_t length) void ColumnVariant::doInsertManyFrom(const DB::IColumn & src_, size_t position, size_t length)
@ -789,36 +789,26 @@ void ColumnVariant::updateHashWithValue(size_t n, SipHash & hash) const
variants[localDiscriminatorByGlobal(global_discr)]->updateHashWithValue(offsetAt(n), hash); variants[localDiscriminatorByGlobal(global_discr)]->updateHashWithValue(offsetAt(n), hash);
} }
void ColumnVariant::updateWeakHash32(WeakHash32 & hash) const WeakHash32 ColumnVariant::getWeakHash32() const
{ {
auto s = size(); auto s = size();
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
/// If we have only NULLs, keep hash unchanged. /// If we have only NULLs, keep hash unchanged.
if (hasOnlyNulls()) if (hasOnlyNulls())
return; return WeakHash32(s);
/// Optimization for case when there is only 1 non-empty variant and no NULLs. /// Optimization for case when there is only 1 non-empty variant and no NULLs.
/// In this case we can just calculate weak hash for this variant. /// In this case we can just calculate weak hash for this variant.
if (auto non_empty_local_discr = getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls()) if (auto non_empty_local_discr = getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls())
{ return variants[*non_empty_local_discr]->getWeakHash32();
variants[*non_empty_local_discr]->updateWeakHash32(hash);
return;
}
/// Calculate weak hash for all variants. /// Calculate weak hash for all variants.
std::vector<WeakHash32> nested_hashes; std::vector<WeakHash32> nested_hashes;
for (const auto & variant : variants) for (const auto & variant : variants)
{ nested_hashes.emplace_back(variant->getWeakHash32());
WeakHash32 nested_hash(variant->size());
variant->updateWeakHash32(nested_hash);
nested_hashes.emplace_back(std::move(nested_hash));
}
/// For each row hash is a hash of corresponding row from corresponding variant. /// For each row hash is a hash of corresponding row from corresponding variant.
WeakHash32 hash(s);
auto & hash_data = hash.getData(); auto & hash_data = hash.getData();
const auto & local_discriminators_data = getLocalDiscriminators(); const auto & local_discriminators_data = getLocalDiscriminators();
const auto & offsets_data = getOffsets(); const auto & offsets_data = getOffsets();
@ -827,11 +817,10 @@ void ColumnVariant::updateWeakHash32(WeakHash32 & hash) const
Discriminator discr = local_discriminators_data[i]; Discriminator discr = local_discriminators_data[i];
/// Update hash only for non-NULL values /// Update hash only for non-NULL values
if (discr != NULL_DISCRIMINATOR) if (discr != NULL_DISCRIMINATOR)
{ hash_data[i] = nested_hashes[discr].getData()[offsets_data[i]];
auto nested_hash = nested_hashes[local_discriminators_data[i]].getData()[offsets_data[i]];
hash_data[i] = static_cast<UInt32>(hashCRC32(nested_hash, hash_data[i]));
}
} }
return hash;
} }
void ColumnVariant::updateHashFast(SipHash & hash) const void ColumnVariant::updateHashFast(SipHash & hash) const
@ -1186,7 +1175,7 @@ bool ColumnVariant::hasEqualValues() const
return local_discriminators->hasEqualValues() && variants[localDiscriminatorAt(0)]->hasEqualValues(); return local_discriminators->hasEqualValues() && variants[localDiscriminatorAt(0)]->hasEqualValues();
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
int ColumnVariant::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const int ColumnVariant::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
#else #else
int ColumnVariant::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const int ColumnVariant::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const

View File

@ -180,7 +180,7 @@ public:
void insert(const Field & x) override; void insert(const Field & x) override;
bool tryInsert(const Field & x) override; bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & src_, size_t n) override; void insertFrom(const IColumn & src_, size_t n) override;
void insertRangeFrom(const IColumn & src_, size_t start, size_t length) override; void insertRangeFrom(const IColumn & src_, size_t start, size_t length) override;
void insertManyFrom(const IColumn & src_, size_t position, size_t length) override; void insertManyFrom(const IColumn & src_, size_t position, size_t length) override;
@ -213,7 +213,7 @@ public:
const char * deserializeVariantAndInsertFromArena(Discriminator global_discr, const char * pos); const char * deserializeVariantAndInsertFromArena(Discriminator global_discr, const char * pos);
const char * skipSerializedInArena(const char * pos) const override; const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override; void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override; WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override; void updateHashFast(SipHash & hash) const override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
void expand(const Filter & mask, bool inverted) override; void expand(const Filter & mask, bool inverted) override;
@ -223,7 +223,7 @@ public:
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const; ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
ColumnPtr replicate(const Offsets & replicate_offsets) const override; ColumnPtr replicate(const Offsets & replicate_offsets) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#else #else
int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;

View File

@ -73,13 +73,10 @@ void ColumnVector<T>::updateHashWithValue(size_t n, SipHash & hash) const
} }
template <typename T> template <typename T>
void ColumnVector<T>::updateWeakHash32(WeakHash32 & hash) const WeakHash32 ColumnVector<T>::getWeakHash32() const
{ {
auto s = data.size(); auto s = data.size();
WeakHash32 hash(s);
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
const T * begin = data.data(); const T * begin = data.data();
const T * end = begin + s; const T * end = begin + s;
@ -91,6 +88,8 @@ void ColumnVector<T>::updateWeakHash32(WeakHash32 & hash) const
++begin; ++begin;
++hash_data; ++hash_data;
} }
return hash;
} }
template <typename T> template <typename T>
@ -503,7 +502,7 @@ bool ColumnVector<T>::tryInsert(const DB::Field & x)
} }
template <typename T> template <typename T>
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnVector<T>::insertRangeFrom(const IColumn & src, size_t start, size_t length) void ColumnVector<T>::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else #else
void ColumnVector<T>::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) void ColumnVector<T>::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)

View File

@ -64,7 +64,7 @@ public:
return data.size(); return data.size();
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & src, size_t n) override void insertFrom(const IColumn & src, size_t n) override
#else #else
void doInsertFrom(const IColumn & src, size_t n) override void doInsertFrom(const IColumn & src, size_t n) override
@ -73,7 +73,7 @@ public:
data.push_back(assert_cast<const Self &>(src).getData()[n]); data.push_back(assert_cast<const Self &>(src).getData()[n]);
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertManyFrom(const IColumn & src, size_t position, size_t length) override void insertManyFrom(const IColumn & src, size_t position, size_t length) override
#else #else
void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override
@ -114,7 +114,7 @@ public:
void updateHashWithValue(size_t n, SipHash & hash) const override; void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override; WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override; void updateHashFast(SipHash & hash) const override;
@ -150,7 +150,7 @@ public:
} }
/// This method implemented in header because it could be possibly devirtualized. /// This method implemented in header because it could be possibly devirtualized.
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override
#else #else
int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override
@ -240,7 +240,7 @@ public:
bool tryInsert(const DB::Field & x) override; bool tryInsert(const DB::Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else #else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;

View File

@ -47,7 +47,7 @@ String IColumn::dumpStructure() const
return res.str(); return res.str();
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void IColumn::insertFrom(const IColumn & src, size_t n) void IColumn::insertFrom(const IColumn & src, size_t n)
#else #else
void IColumn::doInsertFrom(const IColumn & src, size_t n) void IColumn::doInsertFrom(const IColumn & src, size_t n)

View File

@ -179,7 +179,7 @@ public:
/// Appends n-th element from other column with the same type. /// Appends n-th element from other column with the same type.
/// Is used in merge-sort and merges. It could be implemented in inherited classes more optimally than default implementation. /// Is used in merge-sort and merges. It could be implemented in inherited classes more optimally than default implementation.
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
virtual void insertFrom(const IColumn & src, size_t n); virtual void insertFrom(const IColumn & src, size_t n);
#else #else
void insertFrom(const IColumn & src, size_t n) void insertFrom(const IColumn & src, size_t n)
@ -191,7 +191,7 @@ public:
/// Appends range of elements from other column with the same type. /// Appends range of elements from other column with the same type.
/// Could be used to concatenate columns. /// Could be used to concatenate columns.
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
virtual void insertRangeFrom(const IColumn & src, size_t start, size_t length) = 0; virtual void insertRangeFrom(const IColumn & src, size_t start, size_t length) = 0;
#else #else
void insertRangeFrom(const IColumn & src, size_t start, size_t length) void insertRangeFrom(const IColumn & src, size_t start, size_t length)
@ -202,7 +202,7 @@ public:
#endif #endif
/// Appends one element from other column with the same type multiple times. /// Appends one element from other column with the same type multiple times.
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
virtual void insertManyFrom(const IColumn & src, size_t position, size_t length) virtual void insertManyFrom(const IColumn & src, size_t position, size_t length)
{ {
for (size_t i = 0; i < length; ++i) for (size_t i = 0; i < length; ++i)
@ -300,10 +300,10 @@ public:
/// passed bytes to hash must identify sequence of values unambiguously. /// passed bytes to hash must identify sequence of values unambiguously.
virtual void updateHashWithValue(size_t n, SipHash & hash) const = 0; virtual void updateHashWithValue(size_t n, SipHash & hash) const = 0;
/// Update hash function value. Hash is calculated for each element. /// Get hash function value. Hash is calculated for each element.
/// It's a fast weak hash function. Mainly need to scatter data between threads. /// It's a fast weak hash function. Mainly need to scatter data between threads.
/// WeakHash32 must have the same size as column. /// WeakHash32 must have the same size as column.
virtual void updateWeakHash32(WeakHash32 & hash) const = 0; virtual WeakHash32 getWeakHash32() const = 0;
/// Update state of hash with all column. /// Update state of hash with all column.
virtual void updateHashFast(SipHash & hash) const = 0; virtual void updateHashFast(SipHash & hash) const = 0;
@ -345,7 +345,7 @@ public:
* *
* For non Nullable and non floating point types, nan_direction_hint is ignored. * For non Nullable and non floating point types, nan_direction_hint is ignored.
*/ */
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
[[nodiscard]] virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0; [[nodiscard]] virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0;
#else #else
[[nodiscard]] int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const [[nodiscard]] int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
@ -667,7 +667,7 @@ protected:
Sort full_sort, Sort full_sort,
PartialSort partial_sort) const; PartialSort partial_sort) const;
#if defined(ABORT_ON_LOGICAL_ERROR) #if defined(DEBUG_OR_SANITIZER_BUILD)
virtual void doInsertFrom(const IColumn & src, size_t n); virtual void doInsertFrom(const IColumn & src, size_t n);
virtual void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) = 0; virtual void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) = 0;

View File

@ -1,6 +1,7 @@
#pragma once #pragma once
#include <Columns/IColumn.h> #include <Columns/IColumn.h>
#include <Common/WeakHash.h>
namespace DB namespace DB
@ -26,7 +27,7 @@ public:
size_t byteSize() const override { return 0; } size_t byteSize() const override { return 0; }
size_t byteSizeAt(size_t) const override { return 0; } size_t byteSizeAt(size_t) const override { return 0; }
size_t allocatedBytes() const override { return 0; } size_t allocatedBytes() const override { return 0; }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; } int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; }
#else #else
int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; } int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; }
@ -63,15 +64,16 @@ public:
{ {
} }
void updateWeakHash32(WeakHash32 & /*hash*/) const override WeakHash32 getWeakHash32() const override
{ {
return WeakHash32(s);
} }
void updateHashFast(SipHash & /*hash*/) const override void updateHashFast(SipHash & /*hash*/) const override
{ {
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn &, size_t) override void insertFrom(const IColumn &, size_t) override
#else #else
void doInsertFrom(const IColumn &, size_t) override void doInsertFrom(const IColumn &, size_t) override
@ -80,7 +82,7 @@ public:
++s; ++s;
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn & /*src*/, size_t /*start*/, size_t length) override void insertRangeFrom(const IColumn & /*src*/, size_t /*start*/, size_t length) override
#else #else
void doInsertRangeFrom(const IColumn & /*src*/, size_t /*start*/, size_t length) override void doInsertRangeFrom(const IColumn & /*src*/, size_t /*start*/, size_t length) override

View File

@ -1,6 +1,7 @@
#pragma once #pragma once
#include <optional> #include <optional>
#include <Columns/IColumn.h> #include <Columns/IColumn.h>
#include <Common/WeakHash.h>
namespace DB namespace DB
{ {
@ -85,7 +86,7 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method tryInsert is not supported for ColumnUnique."); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method tryInsert is not supported for ColumnUnique.");
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn &, size_t, size_t) override void insertRangeFrom(const IColumn &, size_t, size_t) override
#else #else
void doInsertRangeFrom(const IColumn &, size_t, size_t) override void doInsertRangeFrom(const IColumn &, size_t, size_t) override
@ -166,9 +167,9 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method scatter is not supported for ColumnUnique."); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method scatter is not supported for ColumnUnique.");
} }
void updateWeakHash32(WeakHash32 &) const override WeakHash32 getWeakHash32() const override
{ {
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method updateWeakHash32 is not supported for ColumnUnique."); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getWeakHash32 is not supported for ColumnUnique.");
} }
void updateHashFast(SipHash &) const override void updateHashFast(SipHash &) const override

View File

@ -52,7 +52,7 @@ static ColumnPtr mockColumn(const DataTypePtr & type, size_t rows)
} }
#if !defined(ABORT_ON_LOGICAL_ERROR) #if !defined(DEBUG_OR_SANITIZER_BUILD)
static NO_INLINE void insertManyFrom(IColumn & dst, const IColumn & src) static NO_INLINE void insertManyFrom(IColumn & dst, const IColumn & src)
#else #else
static NO_INLINE void doInsertManyFrom(IColumn & dst, const IColumn & src) static NO_INLINE void doInsertManyFrom(IColumn & dst, const IColumn & src)

View File

@ -60,8 +60,7 @@ TEST(WeakHash32, ColumnVectorU8)
data.push_back(i); data.push_back(i);
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), col->getData()); checkColumn(hash.getData(), col->getData());
} }
@ -77,8 +76,7 @@ TEST(WeakHash32, ColumnVectorI8)
data.push_back(i); data.push_back(i);
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), col->getData()); checkColumn(hash.getData(), col->getData());
} }
@ -94,8 +92,7 @@ TEST(WeakHash32, ColumnVectorU16)
data.push_back(i); data.push_back(i);
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), col->getData()); checkColumn(hash.getData(), col->getData());
} }
@ -111,8 +108,7 @@ TEST(WeakHash32, ColumnVectorI16)
data.push_back(i); data.push_back(i);
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), col->getData()); checkColumn(hash.getData(), col->getData());
} }
@ -128,8 +124,7 @@ TEST(WeakHash32, ColumnVectorU32)
data.push_back(i << 16u); data.push_back(i << 16u);
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), col->getData()); checkColumn(hash.getData(), col->getData());
} }
@ -145,8 +140,7 @@ TEST(WeakHash32, ColumnVectorI32)
data.push_back(i << 16); data.push_back(i << 16);
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), col->getData()); checkColumn(hash.getData(), col->getData());
} }
@ -162,8 +156,7 @@ TEST(WeakHash32, ColumnVectorU64)
data.push_back(i << 32u); data.push_back(i << 32u);
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), col->getData()); checkColumn(hash.getData(), col->getData());
} }
@ -179,8 +172,7 @@ TEST(WeakHash32, ColumnVectorI64)
data.push_back(i << 32); data.push_back(i << 32);
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), col->getData()); checkColumn(hash.getData(), col->getData());
} }
@ -204,8 +196,7 @@ TEST(WeakHash32, ColumnVectorU128)
} }
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), eq_data); checkColumn(hash.getData(), eq_data);
} }
@ -221,8 +212,7 @@ TEST(WeakHash32, ColumnVectorI128)
data.push_back(i << 32); data.push_back(i << 32);
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), col->getData()); checkColumn(hash.getData(), col->getData());
} }
@ -238,8 +228,7 @@ TEST(WeakHash32, ColumnDecimal32)
data.push_back(i << 16); data.push_back(i << 16);
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), col->getData()); checkColumn(hash.getData(), col->getData());
} }
@ -255,8 +244,7 @@ TEST(WeakHash32, ColumnDecimal64)
data.push_back(i << 32); data.push_back(i << 32);
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), col->getData()); checkColumn(hash.getData(), col->getData());
} }
@ -272,8 +260,7 @@ TEST(WeakHash32, ColumnDecimal128)
data.push_back(i << 32); data.push_back(i << 32);
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), col->getData()); checkColumn(hash.getData(), col->getData());
} }
@ -294,8 +281,7 @@ TEST(WeakHash32, ColumnString1)
} }
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), data); checkColumn(hash.getData(), data);
} }
@ -331,8 +317,7 @@ TEST(WeakHash32, ColumnString2)
} }
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), data); checkColumn(hash.getData(), data);
} }
@ -369,8 +354,7 @@ TEST(WeakHash32, ColumnString3)
} }
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), data); checkColumn(hash.getData(), data);
} }
@ -397,8 +381,7 @@ TEST(WeakHash32, ColumnFixedString)
} }
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), data); checkColumn(hash.getData(), data);
} }
@ -444,8 +427,7 @@ TEST(WeakHash32, ColumnArray)
auto col_arr = ColumnArray::create(std::move(val), std::move(off)); auto col_arr = ColumnArray::create(std::move(val), std::move(off));
WeakHash32 hash(col_arr->size()); WeakHash32 hash = col_arr->getWeakHash32();
col_arr->updateWeakHash32(hash);
checkColumn(hash.getData(), eq_data); checkColumn(hash.getData(), eq_data);
} }
@ -479,8 +461,7 @@ TEST(WeakHash32, ColumnArray2)
auto col_arr = ColumnArray::create(std::move(val), std::move(off)); auto col_arr = ColumnArray::create(std::move(val), std::move(off));
WeakHash32 hash(col_arr->size()); WeakHash32 hash = col_arr->getWeakHash32();
col_arr->updateWeakHash32(hash);
checkColumn(hash.getData(), eq_data); checkColumn(hash.getData(), eq_data);
} }
@ -536,8 +517,7 @@ TEST(WeakHash32, ColumnArrayArray)
auto col_arr = ColumnArray::create(std::move(val), std::move(off)); auto col_arr = ColumnArray::create(std::move(val), std::move(off));
auto col_arr_arr = ColumnArray::create(std::move(col_arr), std::move(off2)); auto col_arr_arr = ColumnArray::create(std::move(col_arr), std::move(off2));
WeakHash32 hash(col_arr_arr->size()); WeakHash32 hash = col_arr_arr->getWeakHash32();
col_arr_arr->updateWeakHash32(hash);
checkColumn(hash.getData(), eq_data); checkColumn(hash.getData(), eq_data);
} }
@ -555,8 +535,7 @@ TEST(WeakHash32, ColumnConst)
auto col_const = ColumnConst::create(std::move(inner_col), 256); auto col_const = ColumnConst::create(std::move(inner_col), 256);
WeakHash32 hash(col_const->size()); WeakHash32 hash = col_const->getWeakHash32();
col_const->updateWeakHash32(hash);
checkColumn(hash.getData(), data); checkColumn(hash.getData(), data);
} }
@ -576,8 +555,7 @@ TEST(WeakHash32, ColumnLowcardinality)
} }
} }
WeakHash32 hash(col->size()); WeakHash32 hash = col->getWeakHash32();
col->updateWeakHash32(hash);
checkColumn(hash.getData(), data); checkColumn(hash.getData(), data);
} }
@ -602,8 +580,7 @@ TEST(WeakHash32, ColumnNullable)
auto col_null = ColumnNullable::create(std::move(col), std::move(mask)); auto col_null = ColumnNullable::create(std::move(col), std::move(mask));
WeakHash32 hash(col_null->size()); WeakHash32 hash = col_null->getWeakHash32();
col_null->updateWeakHash32(hash);
checkColumn(hash.getData(), eq); checkColumn(hash.getData(), eq);
} }
@ -633,8 +610,7 @@ TEST(WeakHash32, ColumnTupleUInt64UInt64)
columns.emplace_back(std::move(col2)); columns.emplace_back(std::move(col2));
auto col_tuple = ColumnTuple::create(std::move(columns)); auto col_tuple = ColumnTuple::create(std::move(columns));
WeakHash32 hash(col_tuple->size()); WeakHash32 hash = col_tuple->getWeakHash32();
col_tuple->updateWeakHash32(hash);
checkColumn(hash.getData(), eq); checkColumn(hash.getData(), eq);
} }
@ -671,8 +647,7 @@ TEST(WeakHash32, ColumnTupleUInt64String)
columns.emplace_back(std::move(col2)); columns.emplace_back(std::move(col2));
auto col_tuple = ColumnTuple::create(std::move(columns)); auto col_tuple = ColumnTuple::create(std::move(columns));
WeakHash32 hash(col_tuple->size()); WeakHash32 hash = col_tuple->getWeakHash32();
col_tuple->updateWeakHash32(hash);
checkColumn(hash.getData(), eq); checkColumn(hash.getData(), eq);
} }
@ -709,8 +684,7 @@ TEST(WeakHash32, ColumnTupleUInt64FixedString)
columns.emplace_back(std::move(col2)); columns.emplace_back(std::move(col2));
auto col_tuple = ColumnTuple::create(std::move(columns)); auto col_tuple = ColumnTuple::create(std::move(columns));
WeakHash32 hash(col_tuple->size()); WeakHash32 hash = col_tuple->getWeakHash32();
col_tuple->updateWeakHash32(hash);
checkColumn(hash.getData(), eq); checkColumn(hash.getData(), eq);
} }
@ -756,8 +730,7 @@ TEST(WeakHash32, ColumnTupleUInt64Array)
columns.emplace_back(ColumnArray::create(std::move(val), std::move(off))); columns.emplace_back(ColumnArray::create(std::move(val), std::move(off)));
auto col_tuple = ColumnTuple::create(std::move(columns)); auto col_tuple = ColumnTuple::create(std::move(columns));
WeakHash32 hash(col_tuple->size()); WeakHash32 hash = col_tuple->getWeakHash32();
col_tuple->updateWeakHash32(hash);
checkColumn(hash.getData(), eq_data); checkColumn(hash.getData(), eq_data);
} }

View File

@ -206,7 +206,7 @@ void * Allocator<clear_memory_, populate>::realloc(void * buf, size_t old_size,
} }
else else
{ {
[[maybe_unused]] auto trace_free = CurrentMemoryTracker::free(old_size); [[maybe_unused]] auto trace_free = CurrentMemoryTracker::free(new_size);
ProfileEvents::increment(ProfileEvents::GWPAsanAllocateFailed); ProfileEvents::increment(ProfileEvents::GWPAsanAllocateFailed);
} }
} }
@ -239,7 +239,7 @@ void * Allocator<clear_memory_, populate>::realloc(void * buf, size_t old_size,
void * new_buf = ::realloc(buf, new_size); void * new_buf = ::realloc(buf, new_size);
if (nullptr == new_buf) if (nullptr == new_buf)
{ {
[[maybe_unused]] auto trace_free = CurrentMemoryTracker::free(old_size); [[maybe_unused]] auto trace_free = CurrentMemoryTracker::free(new_size);
throw DB::ErrnoException( throw DB::ErrnoException(
DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY,
"Allocator: Cannot realloc from {} to {}", "Allocator: Cannot realloc from {} to {}",

View File

@ -38,7 +38,7 @@ namespace
std::erase_if(left_subkeys, [&](const String & key) { return ignore_keys->contains(key); }); std::erase_if(left_subkeys, [&](const String & key) { return ignore_keys->contains(key); });
std::erase_if(right_subkeys, [&](const String & key) { return ignore_keys->contains(key); }); std::erase_if(right_subkeys, [&](const String & key) { return ignore_keys->contains(key); });
#if defined(ABORT_ON_LOGICAL_ERROR) #if defined(DEBUG_OR_SANITIZER_BUILD)
/// Compound `ignore_keys` are not yet implemented. /// Compound `ignore_keys` are not yet implemented.
for (const auto & ignore_key : *ignore_keys) for (const auto & ignore_key : *ignore_keys)
chassert(ignore_key.find('.') == std::string_view::npos); chassert(ignore_key.find('.') == std::string_view::npos);

View File

@ -234,10 +234,10 @@
M(PartsCommitted, "Deprecated. See PartsActive.") \ M(PartsCommitted, "Deprecated. See PartsActive.") \
M(PartsPreActive, "The part is in data_parts, but not used for SELECTs.") \ M(PartsPreActive, "The part is in data_parts, but not used for SELECTs.") \
M(PartsActive, "Active data part, used by current and upcoming SELECTs.") \ M(PartsActive, "Active data part, used by current and upcoming SELECTs.") \
M(AttachedDatabase, "Active database, used by current and upcoming SELECTs.") \ M(AttachedDatabase, "Active databases.") \
M(AttachedTable, "Active table, used by current and upcoming SELECTs.") \ M(AttachedTable, "Active tables.") \
M(AttachedView, "Active view, used by current and upcoming SELECTs.") \ M(AttachedView, "Active views.") \
M(AttachedDictionary, "Active dictionary, used by current and upcoming SELECTs.") \ M(AttachedDictionary, "Active dictionaries.") \
M(PartsOutdated, "Not active data part, but could be used by only current SELECTs, could be deleted after SELECTs finishes.") \ M(PartsOutdated, "Not active data part, but could be used by only current SELECTs, could be deleted after SELECTs finishes.") \
M(PartsDeleting, "Not active data part with identity refcounter, it is deleting right now by a cleaner.") \ M(PartsDeleting, "Not active data part with identity refcounter, it is deleting right now by a cleaner.") \
M(PartsDeleteOnDestroy, "Part was moved to another disk and should be deleted in own destructor.") \ M(PartsDeleteOnDestroy, "Part was moved to another disk and should be deleted in own destructor.") \

View File

@ -64,7 +64,7 @@ void handle_error_code(const std::string & msg, int code, bool remote, const Exc
{ {
// In debug builds and builds with sanitizers, treat LOGICAL_ERROR as an assertion failure. // In debug builds and builds with sanitizers, treat LOGICAL_ERROR as an assertion failure.
// Log the message before we fail. // Log the message before we fail.
#ifdef ABORT_ON_LOGICAL_ERROR #ifdef DEBUG_OR_SANITIZER_BUILD
if (code == ErrorCodes::LOGICAL_ERROR) if (code == ErrorCodes::LOGICAL_ERROR)
{ {
abortOnFailedAssertion(msg, trace.data(), 0, trace.size()); abortOnFailedAssertion(msg, trace.data(), 0, trace.size());
@ -443,7 +443,7 @@ PreformattedMessage getCurrentExceptionMessageAndPattern(bool with_stacktrace, b
} }
catch (...) {} // NOLINT(bugprone-empty-catch) catch (...) {} // NOLINT(bugprone-empty-catch)
#ifdef ABORT_ON_LOGICAL_ERROR #ifdef DEBUG_OR_SANITIZER_BUILD
try try
{ {
throw; throw;

View File

@ -244,6 +244,15 @@ private:
const char * className() const noexcept override { return "DB::ErrnoException"; } const char * className() const noexcept override { return "DB::ErrnoException"; }
}; };
/// An exception to use in unit tests to test interfaces.
/// It is distinguished from others, so it does not have to be logged.
class TestException : public Exception
{
public:
using Exception::Exception;
};
using Exceptions = std::vector<std::exception_ptr>; using Exceptions = std::vector<std::exception_ptr>;
/** Try to write an exception to the log (and forget about it). /** Try to write an exception to the log (and forget about it).

View File

@ -192,7 +192,7 @@ void MemoryTracker::debugLogBigAllocationWithoutCheck(Int64 size [[maybe_unused]
{ {
/// Big allocations through allocNoThrow (without checking memory limits) may easily lead to OOM (and it's hard to debug). /// Big allocations through allocNoThrow (without checking memory limits) may easily lead to OOM (and it's hard to debug).
/// Let's find them. /// Let's find them.
#ifdef ABORT_ON_LOGICAL_ERROR #ifdef DEBUG_OR_SANITIZER_BUILD
if (size < 0) if (size < 0)
return; return;

View File

@ -424,7 +424,7 @@ static void logUnexpectedSyscallError(std::string name)
{ {
std::string message = fmt::format("{} failed: {}", name, errnoToString()); std::string message = fmt::format("{} failed: {}", name, errnoToString());
LOG_WARNING(&Poco::Logger::get("PageCache"), "{}", message); LOG_WARNING(&Poco::Logger::get("PageCache"), "{}", message);
#if defined(ABORT_ON_LOGICAL_ERROR) #if defined(DEBUG_OR_SANITIZER_BUILD)
volatile bool true_ = true; volatile bool true_ = true;
if (true_) // suppress warning about missing [[noreturn]] if (true_) // suppress warning about missing [[noreturn]]
abortOnFailedAssertion(message); abortOnFailedAssertion(message);

View File

@ -23,8 +23,20 @@ namespace DB
LazyPipeFDs TraceSender::pipe; LazyPipeFDs TraceSender::pipe;
static thread_local bool inside_send = false;
void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Extras extras) void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Extras extras)
{ {
/** The method shouldn't be called recursively or throw exceptions.
* There are several reasons:
* - avoid infinite recursion when some of subsequent functions invoke tracing;
* - avoid inconsistent writes if the method was interrupted by a signal handler in the middle of writing,
* and then another tracing is invoked (e.g., from query profiler).
*/
if (unlikely(inside_send))
return;
inside_send = true;
DENY_ALLOCATIONS_IN_SCOPE;
constexpr size_t buf_size = sizeof(char) /// TraceCollector stop flag constexpr size_t buf_size = sizeof(char) /// TraceCollector stop flag
+ sizeof(UInt8) /// String size + sizeof(UInt8) /// String size
+ QUERY_ID_MAX_LEN /// Maximum query_id length + QUERY_ID_MAX_LEN /// Maximum query_id length
@ -80,6 +92,8 @@ void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Ext
writePODBinary(extras.increment, out); writePODBinary(extras.increment, out);
out.next(); out.next();
inside_send = false;
} }
} }

View File

@ -1,2 +1,24 @@
#include <Common/WeakHash.h> #include <Common/WeakHash.h>
#include <Common/Exception.h>
#include <Common/HashTable/Hash.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
void WeakHash32::update(const WeakHash32 & other)
{
size_t size = data.size();
if (size != other.data.size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match:"
"left size is {}, right size is {}", size, other.data.size());
for (size_t i = 0; i < size; ++i)
data[i] = static_cast<UInt32>(intHashCRC32(other.data[i], data[i]));
}
}

View File

@ -11,9 +11,8 @@ namespace DB
/// The main purpose why this class needed is to support data initialization. Initially, every bit is 1. /// The main purpose why this class needed is to support data initialization. Initially, every bit is 1.
class WeakHash32 class WeakHash32
{ {
static constexpr UInt32 kDefaultInitialValue = ~UInt32(0);
public: public:
static constexpr UInt32 kDefaultInitialValue = ~UInt32(0);
using Container = PaddedPODArray<UInt32>; using Container = PaddedPODArray<UInt32>;
@ -22,6 +21,8 @@ public:
void reset(size_t size, UInt32 initial_value = kDefaultInitialValue) { data.assign(size, initial_value); } void reset(size_t size, UInt32 initial_value = kDefaultInitialValue) { data.assign(size, initial_value); }
void update(const WeakHash32 & other);
const Container & getData() const { return data; } const Container & getData() const { return data; }
Container & getData() { return data; } Container & getData() { return data; }

View File

@ -25,7 +25,7 @@ namespace DB
template <typename To, typename From> template <typename To, typename From>
inline To assert_cast(From && from) inline To assert_cast(From && from)
{ {
#ifdef ABORT_ON_LOGICAL_ERROR #ifdef DEBUG_OR_SANITIZER_BUILD
try try
{ {
if constexpr (std::is_pointer_v<To>) if constexpr (std::is_pointer_v<To>)

View File

@ -6,12 +6,17 @@ namespace DB
{ {
String getRandomASCIIString(size_t length) String getRandomASCIIString(size_t length)
{
return getRandomASCIIString(length, thread_local_rng);
}
String getRandomASCIIString(size_t length, pcg64 & rng)
{ {
std::uniform_int_distribution<int> distribution('a', 'z'); std::uniform_int_distribution<int> distribution('a', 'z');
String res; String res;
res.resize(length); res.resize(length);
for (auto & c : res) for (auto & c : res)
c = distribution(thread_local_rng); c = distribution(rng);
return res; return res;
} }

View File

@ -2,11 +2,14 @@
#include <Core/Types.h> #include <Core/Types.h>
#include <pcg_random.hpp>
namespace DB namespace DB
{ {
/// Slow random string. Useful for random names and things like this. Not for generating data. /// Slow random string. Useful for random names and things like this. Not for generating data.
String getRandomASCIIString(size_t length); String getRandomASCIIString(size_t length);
String getRandomASCIIString(size_t length, pcg64 & rng);
} }

View File

@ -166,7 +166,7 @@ TEST(Common, RWLockRecursive)
auto lock2 = fifo_lock->getLock(RWLockImpl::Read, "q2"); auto lock2 = fifo_lock->getLock(RWLockImpl::Read, "q2");
#ifndef ABORT_ON_LOGICAL_ERROR #ifndef DEBUG_OR_SANITIZER_BUILD
/// It throws LOGICAL_ERROR /// It throws LOGICAL_ERROR
EXPECT_ANY_THROW({fifo_lock->getLock(RWLockImpl::Write, "q2");}); EXPECT_ANY_THROW({fifo_lock->getLock(RWLockImpl::Write, "q2");});
#endif #endif

View File

@ -54,16 +54,3 @@ TEST(ShellCommand, ExecuteWithInput)
EXPECT_EQ(res, "Hello, world!\n"); EXPECT_EQ(res, "Hello, world!\n");
} }
TEST(ShellCommand, AutoWait)
{
// <defunct> hunting:
for (int i = 0; i < 1000; ++i)
{
auto command = ShellCommand::execute("echo " + std::to_string(i));
//command->wait(); // now automatic
}
// std::cerr << "inspect me: ps auxwwf\n";
// std::this_thread::sleep_for(std::chrono::seconds(100));
}

View File

@ -195,7 +195,7 @@ void DatabaseLazy::attachTable(ContextPtr /* context_ */, const String & table_n
snapshot_detached_tables.erase(table_name); snapshot_detached_tables.erase(table_name);
} }
CurrentMetrics::add(CurrentMetrics::AttachedTable, 1); CurrentMetrics::add(CurrentMetrics::AttachedTable);
} }
StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & table_name) StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & table_name)
@ -221,7 +221,7 @@ StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & ta
.metadata_path = getObjectMetadataPath(table_name), .metadata_path = getObjectMetadataPath(table_name),
.is_permanently = false}); .is_permanently = false});
CurrentMetrics::sub(CurrentMetrics::AttachedTable, 1); CurrentMetrics::sub(CurrentMetrics::AttachedTable);
} }
return res; return res;
} }

View File

@ -289,8 +289,8 @@ StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_n
tables.erase(it); tables.erase(it);
table_storage->is_detached = true; table_storage->is_detached = true;
if (table_storage->isSystemStorage() == false) if (!table_storage->isSystemStorage() && database_name != DatabaseCatalog::SYSTEM_DATABASE)
CurrentMetrics::sub(getAttachedCounterForStorage(table_storage), 1); CurrentMetrics::sub(getAttachedCounterForStorage(table_storage));
auto table_id = table_storage->getStorageID(); auto table_id = table_storage->getStorageID();
if (table_id.hasUUID()) if (table_id.hasUUID())
@ -334,8 +334,8 @@ void DatabaseWithOwnTablesBase::attachTableUnlocked(const String & table_name, c
/// non-Atomic database the is_detached is set to true before RENAME. /// non-Atomic database the is_detached is set to true before RENAME.
table->is_detached = false; table->is_detached = false;
if (table->isSystemStorage() == false && table_id.database_name != DatabaseCatalog::SYSTEM_DATABASE) if (!table->isSystemStorage() && table_id.database_name != DatabaseCatalog::SYSTEM_DATABASE)
CurrentMetrics::add(getAttachedCounterForStorage(table), 1); CurrentMetrics::add(getAttachedCounterForStorage(table));
} }
void DatabaseWithOwnTablesBase::shutdown() void DatabaseWithOwnTablesBase::shutdown()

View File

@ -59,7 +59,7 @@ CachedOnDiskReadBufferFromFile::CachedOnDiskReadBufferFromFile(
std::optional<size_t> read_until_position_, std::optional<size_t> read_until_position_,
std::shared_ptr<FilesystemCacheLog> cache_log_) std::shared_ptr<FilesystemCacheLog> cache_log_)
: ReadBufferFromFileBase(use_external_buffer_ ? 0 : settings_.remote_fs_buffer_size, nullptr, 0, file_size_) : ReadBufferFromFileBase(use_external_buffer_ ? 0 : settings_.remote_fs_buffer_size, nullptr, 0, file_size_)
#ifdef ABORT_ON_LOGICAL_ERROR #ifdef DEBUG_OR_SANITIZER_BUILD
, log(getLogger(fmt::format("CachedOnDiskReadBufferFromFile({})", cache_key_))) , log(getLogger(fmt::format("CachedOnDiskReadBufferFromFile({})", cache_key_)))
#else #else
, log(getLogger("CachedOnDiskReadBufferFromFile")) , log(getLogger("CachedOnDiskReadBufferFromFile"))
@ -452,7 +452,7 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegment & file_segme
{ {
case ReadType::CACHED: case ReadType::CACHED:
{ {
#ifdef ABORT_ON_LOGICAL_ERROR #ifdef DEBUG_OR_SANITIZER_BUILD
size_t file_size = getFileSizeFromReadBuffer(*read_buffer_for_file_segment); size_t file_size = getFileSizeFromReadBuffer(*read_buffer_for_file_segment);
if (file_size == 0 || range.left + file_size <= file_offset_of_buffer_end) if (file_size == 0 || range.left + file_size <= file_offset_of_buffer_end)
throw Exception( throw Exception(
@ -937,7 +937,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
if (!result) if (!result)
{ {
#ifdef ABORT_ON_LOGICAL_ERROR #ifdef DEBUG_OR_SANITIZER_BUILD
if (read_type == ReadType::CACHED) if (read_type == ReadType::CACHED)
{ {
size_t cache_file_size = getFileSizeFromReadBuffer(*implementation_buffer); size_t cache_file_size = getFileSizeFromReadBuffer(*implementation_buffer);

View File

@ -1954,7 +1954,10 @@ struct ToRelativeSubsecondNumImpl
return t.value; return t.value;
if (scale > scale_multiplier) if (scale > scale_multiplier)
return t.value / (scale / scale_multiplier); return t.value / (scale / scale_multiplier);
return t.value * (scale_multiplier / scale); return static_cast<UInt128>(t.value) * static_cast<UInt128>((scale_multiplier / scale));
/// Casting ^^: All integers are Int64, yet if t.value is big enough the multiplication can still
/// overflow which is UB. This place is too low-level and generic to check if t.value is sane.
/// Therefore just let it overflow safely and don't bother further.
} }
static Int64 execute(UInt32 t, const DateLUTImpl &) static Int64 execute(UInt32 t, const DateLUTImpl &)
{ {

View File

@ -47,54 +47,85 @@ bool allArgumentsAreConstants(const ColumnsWithTypeAndName & args)
return true; return true;
} }
/// Replaces single low cardinality column in a function call by its dictionary
/// This can only happen after the arguments have been adapted in IFunctionOverloadResolver::getReturnType
/// as it's only possible if there is one low cardinality column and, optionally, const columns
ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes( ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
ColumnsWithTypeAndName & args, bool can_be_executed_on_default_arguments, size_t input_rows_count) ColumnsWithTypeAndName & args, bool can_be_executed_on_default_arguments, size_t input_rows_count)
{ {
size_t num_rows = input_rows_count; /// We return the LC indexes so the LC can be reconstructed with the function result
ColumnPtr indexes; ColumnPtr indexes;
/// Find first LowCardinality column and replace it to nested dictionary. size_t number_low_cardinality_columns = 0;
for (auto & column : args) size_t last_low_cardinality = 0;
size_t number_const_columns = 0;
size_t number_full_columns = 0;
for (size_t i = 0; i < args.size(); i++)
{ {
if (const auto * low_cardinality_column = checkAndGetColumn<ColumnLowCardinality>(column.column.get())) auto const & arg = args[i];
if (checkAndGetColumn<ColumnLowCardinality>(arg.column.get()))
{ {
/// Single LowCardinality column is supported now. number_low_cardinality_columns++;
if (indexes) last_low_cardinality = i;
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected single dictionary argument for function.");
const auto * low_cardinality_type = checkAndGetDataType<DataTypeLowCardinality>(column.type.get());
if (!low_cardinality_type)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Incompatible type for LowCardinality column: {}",
column.type->getName());
if (can_be_executed_on_default_arguments)
{
/// Normal case, when function can be executed on values' default.
column.column = low_cardinality_column->getDictionary().getNestedColumn();
indexes = low_cardinality_column->getIndexesPtr();
}
else
{
/// Special case when default value can't be used. Example: 1 % LowCardinality(Int).
/// LowCardinality always contains default, so 1 % 0 will throw exception in normal case.
auto dict_encoded = low_cardinality_column->getMinimalDictionaryEncodedColumn(0, low_cardinality_column->size());
column.column = dict_encoded.dictionary;
indexes = dict_encoded.indexes;
}
num_rows = column.column->size();
column.type = low_cardinality_type->getDictionaryType();
} }
else if (checkAndGetColumn<ColumnConst>(arg.column.get()))
number_const_columns++;
else
number_full_columns++;
} }
/// Change size of constants. if (!number_low_cardinality_columns && !number_const_columns)
return nullptr;
if (number_full_columns > 0 || number_low_cardinality_columns > 1)
{
/// This should not be possible but currently there are multiple tests in CI failing because of it
/// TODO: Fix those cases, then enable this exception
#if 0
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected low cardinality types found. Low cardinality: {}. Full {}. Const {}",
number_low_cardinality_columns, number_full_columns, number_const_columns);
#else
return nullptr;
#endif
}
else if (number_low_cardinality_columns == 1)
{
auto & lc_arg = args[last_low_cardinality];
const auto * low_cardinality_type = checkAndGetDataType<DataTypeLowCardinality>(lc_arg.type.get());
if (!low_cardinality_type)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Incompatible type for LowCardinality column: {}", lc_arg.type->getName());
const auto * low_cardinality_column = checkAndGetColumn<ColumnLowCardinality>(lc_arg.column.get());
chassert(low_cardinality_column);
if (can_be_executed_on_default_arguments)
{
/// Normal case, when function can be executed on values' default.
lc_arg.column = low_cardinality_column->getDictionary().getNestedColumn();
indexes = low_cardinality_column->getIndexesPtr();
}
else
{
/// Special case when default value can't be used. Example: 1 % LowCardinality(Int).
/// LowCardinality always contains default, so 1 % 0 will throw exception in normal case.
auto dict_encoded = low_cardinality_column->getMinimalDictionaryEncodedColumn(0, low_cardinality_column->size());
lc_arg.column = dict_encoded.dictionary;
indexes = dict_encoded.indexes;
}
/// The new column will have a different number of rows, normally less but occasionally it might be more (NULL)
input_rows_count = lc_arg.column->size();
lc_arg.type = low_cardinality_type->getDictionaryType();
}
/// Change size of constants
for (auto & column : args) for (auto & column : args)
{ {
if (const auto * column_const = checkAndGetColumn<ColumnConst>(column.column.get())) if (const auto * column_const = checkAndGetColumn<ColumnConst>(column.column.get()))
{ {
column.column = ColumnConst::create(recursiveRemoveLowCardinality(column_const->getDataColumnPtr()), num_rows); column.column = ColumnConst::create(recursiveRemoveLowCardinality(column_const->getDataColumnPtr()), input_rows_count);
column.type = recursiveRemoveLowCardinality(column.type); column.type = recursiveRemoveLowCardinality(column.type);
} }
} }
@ -270,6 +301,8 @@ ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithType
bool can_be_executed_on_default_arguments = canBeExecutedOnDefaultArguments(); bool can_be_executed_on_default_arguments = canBeExecutedOnDefaultArguments();
const auto & dictionary_type = res_low_cardinality_type->getDictionaryType(); const auto & dictionary_type = res_low_cardinality_type->getDictionaryType();
/// The arguments should have been adapted in IFunctionOverloadResolver::getReturnType
/// So there is only one low cardinality column (and optionally some const columns) and no full column
ColumnPtr indexes = replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes( ColumnPtr indexes = replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
columns_without_low_cardinality, can_be_executed_on_default_arguments, input_rows_count); columns_without_low_cardinality, can_be_executed_on_default_arguments, input_rows_count);

View File

@ -26,8 +26,6 @@ namespace DB
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_COLUMN;
extern const int BAD_ARGUMENTS; extern const int BAD_ARGUMENTS;
} }
@ -45,84 +43,82 @@ public:
template <typename Transform> template <typename Transform>
void dispatchForColumns( void dispatchForColumns(
const IColumn & x, const IColumn & y, const IColumn & col_x, const IColumn & col_y,
const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y,
size_t input_rows_count,
ColumnInt64::Container & result) const ColumnInt64::Container & result) const
{ {
if (const auto * x_vec_16 = checkAndGetColumn<ColumnDate>(&x)) if (const auto * x_vec_16 = checkAndGetColumn<ColumnDate>(&col_x))
dispatchForSecondColumn<Transform>(*x_vec_16, y, timezone_x, timezone_y, result); dispatchForSecondColumn<Transform>(*x_vec_16, col_y, timezone_x, timezone_y, input_rows_count, result);
else if (const auto * x_vec_32 = checkAndGetColumn<ColumnDateTime>(&x)) else if (const auto * x_vec_32 = checkAndGetColumn<ColumnDateTime>(&col_x))
dispatchForSecondColumn<Transform>(*x_vec_32, y, timezone_x, timezone_y, result); dispatchForSecondColumn<Transform>(*x_vec_32, col_y, timezone_x, timezone_y, input_rows_count, result);
else if (const auto * x_vec_32_s = checkAndGetColumn<ColumnDate32>(&x)) else if (const auto * x_vec_32_s = checkAndGetColumn<ColumnDate32>(&col_x))
dispatchForSecondColumn<Transform>(*x_vec_32_s, y, timezone_x, timezone_y, result); dispatchForSecondColumn<Transform>(*x_vec_32_s, col_y, timezone_x, timezone_y, input_rows_count, result);
else if (const auto * x_vec_64 = checkAndGetColumn<ColumnDateTime64>(&x)) else if (const auto * x_vec_64 = checkAndGetColumn<ColumnDateTime64>(&col_x))
dispatchForSecondColumn<Transform>(*x_vec_64, y, timezone_x, timezone_y, result); dispatchForSecondColumn<Transform>(*x_vec_64, col_y, timezone_x, timezone_y, input_rows_count, result);
else if (const auto * x_const_16 = checkAndGetColumnConst<ColumnDate>(&x)) else if (const auto * x_const_16 = checkAndGetColumnConst<ColumnDate>(&col_x))
dispatchConstForSecondColumn<Transform>(x_const_16->getValue<UInt16>(), y, timezone_x, timezone_y, result); dispatchConstForSecondColumn<Transform>(x_const_16->getValue<UInt16>(), col_y, timezone_x, timezone_y, input_rows_count, result);
else if (const auto * x_const_32 = checkAndGetColumnConst<ColumnDateTime>(&x)) else if (const auto * x_const_32 = checkAndGetColumnConst<ColumnDateTime>(&col_x))
dispatchConstForSecondColumn<Transform>(x_const_32->getValue<UInt32>(), y, timezone_x, timezone_y, result); dispatchConstForSecondColumn<Transform>(x_const_32->getValue<UInt32>(), col_y, timezone_x, timezone_y, input_rows_count, result);
else if (const auto * x_const_32_s = checkAndGetColumnConst<ColumnDate32>(&x)) else if (const auto * x_const_32_s = checkAndGetColumnConst<ColumnDate32>(&col_x))
dispatchConstForSecondColumn<Transform>(x_const_32_s->getValue<Int32>(), y, timezone_x, timezone_y, result); dispatchConstForSecondColumn<Transform>(x_const_32_s->getValue<Int32>(), col_y, timezone_x, timezone_y, input_rows_count, result);
else if (const auto * x_const_64 = checkAndGetColumnConst<ColumnDateTime64>(&x)) else if (const auto * x_const_64 = checkAndGetColumnConst<ColumnDateTime64>(&col_x))
dispatchConstForSecondColumn<Transform>(x_const_64->getValue<DecimalField<DateTime64>>(), y, timezone_x, timezone_y, result); dispatchConstForSecondColumn<Transform>(x_const_64->getValue<DecimalField<DateTime64>>(), col_y, timezone_x, timezone_y, input_rows_count, result);
else else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for first argument of function {}, must be Date, Date32, DateTime or DateTime64", name);
"Illegal column for first argument of function {}, must be Date, Date32, DateTime or DateTime64",
name);
} }
template <typename Transform, typename LeftColumnType> template <typename Transform, typename LeftColumnType>
void dispatchForSecondColumn( void dispatchForSecondColumn(
const LeftColumnType & x, const IColumn & y, const LeftColumnType & x, const IColumn & col_y,
const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y,
size_t input_rows_count,
ColumnInt64::Container & result) const ColumnInt64::Container & result) const
{ {
if (const auto * y_vec_16 = checkAndGetColumn<ColumnDate>(&y)) if (const auto * y_vec_16 = checkAndGetColumn<ColumnDate>(&col_y))
vectorVector<Transform>(x, *y_vec_16, timezone_x, timezone_y, result); vectorVector<Transform>(x, *y_vec_16, timezone_x, timezone_y, input_rows_count, result);
else if (const auto * y_vec_32 = checkAndGetColumn<ColumnDateTime>(&y)) else if (const auto * y_vec_32 = checkAndGetColumn<ColumnDateTime>(&col_y))
vectorVector<Transform>(x, *y_vec_32, timezone_x, timezone_y, result); vectorVector<Transform>(x, *y_vec_32, timezone_x, timezone_y, input_rows_count, result);
else if (const auto * y_vec_32_s = checkAndGetColumn<ColumnDate32>(&y)) else if (const auto * y_vec_32_s = checkAndGetColumn<ColumnDate32>(&col_y))
vectorVector<Transform>(x, *y_vec_32_s, timezone_x, timezone_y, result); vectorVector<Transform>(x, *y_vec_32_s, timezone_x, timezone_y, input_rows_count, result);
else if (const auto * y_vec_64 = checkAndGetColumn<ColumnDateTime64>(&y)) else if (const auto * y_vec_64 = checkAndGetColumn<ColumnDateTime64>(&col_y))
vectorVector<Transform>(x, *y_vec_64, timezone_x, timezone_y, result); vectorVector<Transform>(x, *y_vec_64, timezone_x, timezone_y, input_rows_count, result);
else if (const auto * y_const_16 = checkAndGetColumnConst<ColumnDate>(&y)) else if (const auto * y_const_16 = checkAndGetColumnConst<ColumnDate>(&col_y))
vectorConstant<Transform>(x, y_const_16->getValue<UInt16>(), timezone_x, timezone_y, result); vectorConstant<Transform>(x, y_const_16->getValue<UInt16>(), timezone_x, timezone_y, input_rows_count, result);
else if (const auto * y_const_32 = checkAndGetColumnConst<ColumnDateTime>(&y)) else if (const auto * y_const_32 = checkAndGetColumnConst<ColumnDateTime>(&col_y))
vectorConstant<Transform>(x, y_const_32->getValue<UInt32>(), timezone_x, timezone_y, result); vectorConstant<Transform>(x, y_const_32->getValue<UInt32>(), timezone_x, timezone_y, input_rows_count, result);
else if (const auto * y_const_32_s = checkAndGetColumnConst<ColumnDate32>(&y)) else if (const auto * y_const_32_s = checkAndGetColumnConst<ColumnDate32>(&col_y))
vectorConstant<Transform>(x, y_const_32_s->getValue<Int32>(), timezone_x, timezone_y, result); vectorConstant<Transform>(x, y_const_32_s->getValue<Int32>(), timezone_x, timezone_y, input_rows_count, result);
else if (const auto * y_const_64 = checkAndGetColumnConst<ColumnDateTime64>(&y)) else if (const auto * y_const_64 = checkAndGetColumnConst<ColumnDateTime64>(&col_y))
vectorConstant<Transform>(x, y_const_64->getValue<DecimalField<DateTime64>>(), timezone_x, timezone_y, result); vectorConstant<Transform>(x, y_const_64->getValue<DecimalField<DateTime64>>(), timezone_x, timezone_y, input_rows_count, result);
else else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64", name);
"Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64",
name);
} }
template <typename Transform, typename T1> template <typename Transform, typename T1>
void dispatchConstForSecondColumn( void dispatchConstForSecondColumn(
T1 x, const IColumn & y, T1 x, const IColumn & col_y,
const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y,
size_t input_rows_count,
ColumnInt64::Container & result) const ColumnInt64::Container & result) const
{ {
if (const auto * y_vec_16 = checkAndGetColumn<ColumnDate>(&y)) if (const auto * y_vec_16 = checkAndGetColumn<ColumnDate>(&col_y))
constantVector<Transform>(x, *y_vec_16, timezone_x, timezone_y, result); constantVector<Transform>(x, *y_vec_16, timezone_x, timezone_y, input_rows_count, result);
else if (const auto * y_vec_32 = checkAndGetColumn<ColumnDateTime>(&y)) else if (const auto * y_vec_32 = checkAndGetColumn<ColumnDateTime>(&col_y))
constantVector<Transform>(x, *y_vec_32, timezone_x, timezone_y, result); constantVector<Transform>(x, *y_vec_32, timezone_x, timezone_y, input_rows_count, result);
else if (const auto * y_vec_32_s = checkAndGetColumn<ColumnDate32>(&y)) else if (const auto * y_vec_32_s = checkAndGetColumn<ColumnDate32>(&col_y))
constantVector<Transform>(x, *y_vec_32_s, timezone_x, timezone_y, result); constantVector<Transform>(x, *y_vec_32_s, timezone_x, timezone_y, input_rows_count, result);
else if (const auto * y_vec_64 = checkAndGetColumn<ColumnDateTime64>(&y)) else if (const auto * y_vec_64 = checkAndGetColumn<ColumnDateTime64>(&col_y))
constantVector<Transform>(x, *y_vec_64, timezone_x, timezone_y, result); constantVector<Transform>(x, *y_vec_64, timezone_x, timezone_y, input_rows_count, result);
else else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64", name);
"Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64",
name);
} }
template <typename Transform, typename LeftColumnType, typename RightColumnType> template <typename Transform, typename LeftColumnType, typename RightColumnType>
void vectorVector( void vectorVector(
const LeftColumnType & x, const RightColumnType & y, const LeftColumnType & x, const RightColumnType & y,
const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y,
size_t input_rows_count,
ColumnInt64::Container & result) const ColumnInt64::Container & result) const
{ {
const auto & x_data = x.getData(); const auto & x_data = x.getData();
@ -130,14 +126,15 @@ public:
const auto transform_x = TransformDateTime64<Transform>(getScale(x)); const auto transform_x = TransformDateTime64<Transform>(getScale(x));
const auto transform_y = TransformDateTime64<Transform>(getScale(y)); const auto transform_y = TransformDateTime64<Transform>(getScale(y));
for (size_t i = 0, size = x.size(); i < size; ++i) for (size_t i = 0; i < input_rows_count; ++i)
result[i] = calculate(transform_x, transform_y, x_data[i], y_data[i], timezone_x, timezone_y); result[i] = calculate(transform_x, transform_y, x_data[i], y_data[i], timezone_x, timezone_y);
} }
template <typename Transform, typename LeftColumnType, typename T2> template <typename Transform, typename LeftColumnType, typename T2>
void vectorConstant( void vectorConstant(
const LeftColumnType & x, T2 y, const LeftColumnType & x, T2 y,
const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y,
size_t input_rows_count,
ColumnInt64::Container & result) const ColumnInt64::Container & result) const
{ {
const auto & x_data = x.getData(); const auto & x_data = x.getData();
@ -145,7 +142,7 @@ public:
const auto transform_y = TransformDateTime64<Transform>(getScale(y)); const auto transform_y = TransformDateTime64<Transform>(getScale(y));
const auto y_value = stripDecimalFieldValue(y); const auto y_value = stripDecimalFieldValue(y);
for (size_t i = 0, size = x.size(); i < size; ++i) for (size_t i = 0; i < input_rows_count; ++i)
result[i] = calculate(transform_x, transform_y, x_data[i], y_value, timezone_x, timezone_y); result[i] = calculate(transform_x, transform_y, x_data[i], y_value, timezone_x, timezone_y);
} }
@ -153,6 +150,7 @@ public:
void constantVector( void constantVector(
T1 x, const RightColumnType & y, T1 x, const RightColumnType & y,
const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y,
size_t input_rows_count,
ColumnInt64::Container & result) const ColumnInt64::Container & result) const
{ {
const auto & y_data = y.getData(); const auto & y_data = y.getData();
@ -160,20 +158,22 @@ public:
const auto transform_y = TransformDateTime64<Transform>(getScale(y)); const auto transform_y = TransformDateTime64<Transform>(getScale(y));
const auto x_value = stripDecimalFieldValue(x); const auto x_value = stripDecimalFieldValue(x);
for (size_t i = 0, size = y.size(); i < size; ++i) for (size_t i = 0; i < input_rows_count; ++i)
result[i] = calculate(transform_x, transform_y, x_value, y_data[i], timezone_x, timezone_y); result[i] = calculate(transform_x, transform_y, x_value, y_data[i], timezone_x, timezone_y);
} }
template <typename TransformX, typename TransformY, typename T1, typename T2> template <typename TransformX, typename TransformY, typename T1, typename T2>
Int64 calculate(const TransformX & transform_x, const TransformY & transform_y, T1 x, T2 y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y) const Int64 calculate(const TransformX & transform_x, const TransformY & transform_y, T1 x, T2 y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y) const
{ {
auto res = static_cast<Int64>(transform_y.execute(y, timezone_y)) - static_cast<Int64>(transform_x.execute(x, timezone_x));
if constexpr (is_diff) if constexpr (is_diff)
return static_cast<Int64>(transform_y.execute(y, timezone_y)) {
- static_cast<Int64>(transform_x.execute(x, timezone_x)); return res;
}
else else
{ {
auto res = static_cast<Int64>(transform_y.execute(y, timezone_y)) /// Adjust res:
- static_cast<Int64>(transform_x.execute(x, timezone_x));
DateTimeComponentsWithFractionalPart a_comp; DateTimeComponentsWithFractionalPart a_comp;
DateTimeComponentsWithFractionalPart b_comp; DateTimeComponentsWithFractionalPart b_comp;
Int64 adjust_value; Int64 adjust_value;
@ -332,95 +332,73 @@ public:
static constexpr auto name = is_relative ? "dateDiff" : "age"; static constexpr auto name = is_relative ? "dateDiff" : "age";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionDateDiff>(); } static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionDateDiff>(); }
String getName() const override String getName() const override { return name; }
{
return name;
}
bool isVariadic() const override { return true; } bool isVariadic() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
size_t getNumberOfArguments() const override { return 0; } size_t getNumberOfArguments() const override { return 0; }
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0, 3}; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{ {
if (arguments.size() != 3 && arguments.size() != 4) FunctionArgumentDescriptors mandatory_args{
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, {"unit", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
"Number of arguments for function {} doesn't match: passed {}, should be 3 or 4", {"startdate", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isDateOrDate32OrDateTimeOrDateTime64), nullptr, "Date[32] or DateTime[64]"},
getName(), arguments.size()); {"enddate", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isDateOrDate32OrDateTimeOrDateTime64), nullptr, "Date[32] or DateTime[64]"},
};
if (!isString(arguments[0])) FunctionArgumentDescriptors optional_args{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, {"timezone", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
"First argument for function {} (unit) must be String", };
getName());
if (!isDate(arguments[1]) && !isDate32(arguments[1]) && !isDateTime(arguments[1]) && !isDateTime64(arguments[1])) validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Second argument for function {} must be Date, Date32, DateTime or DateTime64",
getName());
if (!isDate(arguments[2]) && !isDate32(arguments[2]) && !isDateTime(arguments[2]) && !isDateTime64(arguments[2]))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Third argument for function {} must be Date, Date32, DateTime or DateTime64",
getName()
);
if (arguments.size() == 4 && !isString(arguments[3]))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Fourth argument for function {} (timezone) must be String",
getName());
return std::make_shared<DataTypeInt64>(); return std::make_shared<DataTypeInt64>();
} }
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0, 3}; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{ {
const auto * unit_column = checkAndGetColumnConst<ColumnString>(arguments[0].column.get()); const auto * col_unit = checkAndGetColumnConst<ColumnString>(arguments[0].column.get());
if (!unit_column) if (!col_unit)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument for function {} must be constant String", getName());
"First argument for function {} must be constant String",
getName());
String unit = Poco::toLower(unit_column->getValue<String>()); String unit = Poco::toLower(col_unit->getValue<String>());
const IColumn & x = *arguments[1].column; const IColumn & col_x = *arguments[1].column;
const IColumn & y = *arguments[2].column; const IColumn & col_y = *arguments[2].column;
size_t rows = input_rows_count; auto col_res = ColumnInt64::create(input_rows_count);
auto res = ColumnInt64::create(rows);
const auto & timezone_x = extractTimeZoneFromFunctionArguments(arguments, 3, 1); const auto & timezone_x = extractTimeZoneFromFunctionArguments(arguments, 3, 1);
const auto & timezone_y = extractTimeZoneFromFunctionArguments(arguments, 3, 2); const auto & timezone_y = extractTimeZoneFromFunctionArguments(arguments, 3, 2);
if (unit == "year" || unit == "years" || unit == "yy" || unit == "yyyy") if (unit == "year" || unit == "years" || unit == "yy" || unit == "yyyy")
impl.template dispatchForColumns<ToRelativeYearNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData()); impl.template dispatchForColumns<ToRelativeYearNumImpl<ResultPrecision::Extended>>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData());
else if (unit == "quarter" || unit == "quarters" || unit == "qq" || unit == "q") else if (unit == "quarter" || unit == "quarters" || unit == "qq" || unit == "q")
impl.template dispatchForColumns<ToRelativeQuarterNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData()); impl.template dispatchForColumns<ToRelativeQuarterNumImpl<ResultPrecision::Extended>>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData());
else if (unit == "month" || unit == "months" || unit == "mm" || unit == "m") else if (unit == "month" || unit == "months" || unit == "mm" || unit == "m")
impl.template dispatchForColumns<ToRelativeMonthNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData()); impl.template dispatchForColumns<ToRelativeMonthNumImpl<ResultPrecision::Extended>>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData());
else if (unit == "week" || unit == "weeks" || unit == "wk" || unit == "ww") else if (unit == "week" || unit == "weeks" || unit == "wk" || unit == "ww")
impl.template dispatchForColumns<ToRelativeWeekNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData()); impl.template dispatchForColumns<ToRelativeWeekNumImpl<ResultPrecision::Extended>>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData());
else if (unit == "day" || unit == "days" || unit == "dd" || unit == "d") else if (unit == "day" || unit == "days" || unit == "dd" || unit == "d")
impl.template dispatchForColumns<ToRelativeDayNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData()); impl.template dispatchForColumns<ToRelativeDayNumImpl<ResultPrecision::Extended>>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData());
else if (unit == "hour" || unit == "hours" || unit == "hh" || unit == "h") else if (unit == "hour" || unit == "hours" || unit == "hh" || unit == "h")
impl.template dispatchForColumns<ToRelativeHourNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData()); impl.template dispatchForColumns<ToRelativeHourNumImpl<ResultPrecision::Extended>>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData());
else if (unit == "minute" || unit == "minutes" || unit == "mi" || unit == "n") else if (unit == "minute" || unit == "minutes" || unit == "mi" || unit == "n")
impl.template dispatchForColumns<ToRelativeMinuteNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData()); impl.template dispatchForColumns<ToRelativeMinuteNumImpl<ResultPrecision::Extended>>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData());
else if (unit == "second" || unit == "seconds" || unit == "ss" || unit == "s") else if (unit == "second" || unit == "seconds" || unit == "ss" || unit == "s")
impl.template dispatchForColumns<ToRelativeSecondNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData()); impl.template dispatchForColumns<ToRelativeSecondNumImpl<ResultPrecision::Extended>>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData());
else if (unit == "millisecond" || unit == "milliseconds" || unit == "ms") else if (unit == "millisecond" || unit == "milliseconds" || unit == "ms")
impl.template dispatchForColumns<ToRelativeSubsecondNumImpl<millisecond_multiplier>>(x, y, timezone_x, timezone_y, res->getData()); impl.template dispatchForColumns<ToRelativeSubsecondNumImpl<millisecond_multiplier>>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData());
else if (unit == "microsecond" || unit == "microseconds" || unit == "us" || unit == "u") else if (unit == "microsecond" || unit == "microseconds" || unit == "us" || unit == "u")
impl.template dispatchForColumns<ToRelativeSubsecondNumImpl<microsecond_multiplier>>(x, y, timezone_x, timezone_y, res->getData()); impl.template dispatchForColumns<ToRelativeSubsecondNumImpl<microsecond_multiplier>>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData());
else if (unit == "nanosecond" || unit == "nanoseconds" || unit == "ns") else if (unit == "nanosecond" || unit == "nanoseconds" || unit == "ns")
impl.template dispatchForColumns<ToRelativeSubsecondNumImpl<nanosecond_multiplier>>(x, y, timezone_x, timezone_y, res->getData()); impl.template dispatchForColumns<ToRelativeSubsecondNumImpl<nanosecond_multiplier>>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData());
else else
throw Exception(ErrorCodes::BAD_ARGUMENTS, throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} does not support '{}' unit", getName(), unit);
"Function {} does not support '{}' unit", getName(), unit);
return res; return col_res;
} }
private: private:
DateDiffImpl<is_relative> impl{name}; DateDiffImpl<is_relative> impl{name};
@ -437,50 +415,35 @@ public:
static constexpr auto name = "timeDiff"; static constexpr auto name = "timeDiff";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionTimeDiff>(); } static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionTimeDiff>(); }
String getName() const override String getName() const override { return name; }
{ bool useDefaultImplementationForConstants() const override { return true; }
return name; ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {}; }
}
bool isVariadic() const override { return false; } bool isVariadic() const override { return false; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
size_t getNumberOfArguments() const override { return 2; } size_t getNumberOfArguments() const override { return 2; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{ {
if (arguments.size() != 2) FunctionArgumentDescriptors args{
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, {"first_datetime", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isDateOrDate32OrDateTimeOrDateTime64), nullptr, "Date[32] or DateTime[64]"},
"Number of arguments for function {} doesn't match: passed {}, should be 2", {"second_datetime", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isDateOrDate32OrDateTimeOrDateTime64), nullptr, "Date[32] or DateTime[64]"},
getName(), arguments.size()); };
if (!isDate(arguments[0]) && !isDate32(arguments[0]) && !isDateTime(arguments[0]) && !isDateTime64(arguments[0])) validateFunctionArguments(*this, arguments, args);
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"First argument for function {} must be Date, Date32, DateTime or DateTime64",
getName());
if (!isDate(arguments[1]) && !isDate32(arguments[1]) && !isDateTime(arguments[1]) && !isDateTime64(arguments[1]))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Second argument for function {} must be Date, Date32, DateTime or DateTime64",
getName()
);
return std::make_shared<DataTypeInt64>(); return std::make_shared<DataTypeInt64>();
} }
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {}; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{ {
const IColumn & x = *arguments[0].column; const IColumn & col_x = *arguments[0].column;
const IColumn & y = *arguments[1].column; const IColumn & col_y = *arguments[1].column;
size_t rows = input_rows_count; auto col_res = ColumnInt64::create(input_rows_count);
auto res = ColumnInt64::create(rows);
impl.dispatchForColumns<ToRelativeSecondNumImpl<ResultPrecision::Extended>>(x, y, DateLUT::instance(), DateLUT::instance(), res->getData()); impl.dispatchForColumns<ToRelativeSecondNumImpl<ResultPrecision::Extended>>(col_x, col_y, DateLUT::instance(), DateLUT::instance(), input_rows_count, col_res->getData());
return res; return col_res;
} }
private: private:
DateDiffImpl<true> impl{name}; DateDiffImpl<true> impl{name};

View File

@ -134,7 +134,7 @@ TEST(MemoryResizeTest, SmallInitAndBigResizeOverflowWhenPadding)
ASSERT_EQ(memory.m_capacity, 0x8000000000000000ULL - 1); ASSERT_EQ(memory.m_capacity, 0x8000000000000000ULL - 1);
ASSERT_EQ(memory.m_size, 0x8000000000000000ULL - PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 0x8000000000000000ULL - PADDING_FOR_SIMD);
#ifndef ABORT_ON_LOGICAL_ERROR #ifndef DEBUG_OR_SANITIZER_BUILD
EXPECT_THROW_ERROR_CODE(memory.resize(0x8000000000000000ULL - (PADDING_FOR_SIMD - 1)), Exception, ErrorCodes::LOGICAL_ERROR); EXPECT_THROW_ERROR_CODE(memory.resize(0x8000000000000000ULL - (PADDING_FOR_SIMD - 1)), Exception, ErrorCodes::LOGICAL_ERROR);
ASSERT_TRUE(memory.m_data); // state is intact after exception ASSERT_TRUE(memory.m_data); // state is intact after exception
ASSERT_EQ(memory.m_capacity, 0x8000000000000000ULL - 1); ASSERT_EQ(memory.m_capacity, 0x8000000000000000ULL - 1);
@ -158,7 +158,7 @@ TEST(MemoryResizeTest, SmallInitAndBigResizeOverflowWhenPadding)
ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD);
ASSERT_EQ(memory.m_size, 1); ASSERT_EQ(memory.m_size, 1);
#ifndef ABORT_ON_LOGICAL_ERROR #ifndef DEBUG_OR_SANITIZER_BUILD
EXPECT_THROW_ERROR_CODE(memory.resize(0x8000000000000000ULL - (PADDING_FOR_SIMD - 1)), Exception, ErrorCodes::LOGICAL_ERROR); EXPECT_THROW_ERROR_CODE(memory.resize(0x8000000000000000ULL - (PADDING_FOR_SIMD - 1)), Exception, ErrorCodes::LOGICAL_ERROR);
ASSERT_TRUE(memory.m_data); // state is intact after exception ASSERT_TRUE(memory.m_data); // state is intact after exception
ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD);
@ -197,7 +197,7 @@ TEST(MemoryResizeTest, BigInitAndSmallResizeOverflowWhenPadding)
, ErrorCodes::ARGUMENT_OUT_OF_BOUND); , ErrorCodes::ARGUMENT_OUT_OF_BOUND);
} }
#ifndef ABORT_ON_LOGICAL_ERROR #ifndef DEBUG_OR_SANITIZER_BUILD
{ {
EXPECT_THROW_ERROR_CODE( EXPECT_THROW_ERROR_CODE(
{ {

View File

@ -917,8 +917,8 @@ TEST_P(SyncAsync, ExceptionOnUploadPart) {
TEST_F(WBS3Test, PrefinalizeCalledMultipleTimes) { TEST_F(WBS3Test, PrefinalizeCalledMultipleTimes) {
#ifdef ABORT_ON_LOGICAL_ERROR #ifdef DEBUG_OR_SANITIZER_BUILD
GTEST_SKIP() << "this test trigger LOGICAL_ERROR, runs only if ABORT_ON_LOGICAL_ERROR is not defined"; GTEST_SKIP() << "this test trigger LOGICAL_ERROR, runs only if DEBUG_OR_SANITIZER_BUILD is not defined";
#else #else
EXPECT_THROW({ EXPECT_THROW({
try { try {

View File

@ -1007,7 +1007,7 @@ void FileCache::freeSpaceRatioKeepingThreadFunc()
limits_satisfied = main_priority->collectCandidatesForEviction( limits_satisfied = main_priority->collectCandidatesForEviction(
desired_size, desired_elements_num, keep_up_free_space_remove_batch, stat, eviction_candidates, lock); desired_size, desired_elements_num, keep_up_free_space_remove_batch, stat, eviction_candidates, lock);
#ifdef ABORT_ON_LOGICAL_ERROR #ifdef DEBUG_OR_SANITIZER_BUILD
/// Let's make sure that we correctly processed the limits. /// Let's make sure that we correctly processed the limits.
if (limits_satisfied && eviction_candidates.size() < keep_up_free_space_remove_batch) if (limits_satisfied && eviction_candidates.size() < keep_up_free_space_remove_batch)
{ {
@ -1110,7 +1110,7 @@ void FileCache::removeAllReleasable(const UserID & user_id)
{ {
assertInitialized(); assertInitialized();
#ifdef ABORT_ON_LOGICAL_ERROR #ifdef DEBUG_OR_SANITIZER_BUILD
assertCacheCorrectness(); assertCacheCorrectness();
#endif #endif
@ -1226,7 +1226,7 @@ void FileCache::loadMetadataImpl()
if (first_exception) if (first_exception)
std::rethrow_exception(first_exception); std::rethrow_exception(first_exception);
#ifdef ABORT_ON_LOGICAL_ERROR #ifdef DEBUG_OR_SANITIZER_BUILD
assertCacheCorrectness(); assertCacheCorrectness();
#endif #endif
} }
@ -1393,7 +1393,7 @@ void FileCache::loadMetadataForKeys(const fs::path & keys_dir)
FileCache::~FileCache() FileCache::~FileCache()
{ {
deactivateBackgroundOperations(); deactivateBackgroundOperations();
#ifdef ABORT_ON_LOGICAL_ERROR #ifdef DEBUG_OR_SANITIZER_BUILD
assertCacheCorrectness(); assertCacheCorrectness();
#endif #endif
} }

View File

@ -67,7 +67,7 @@ FileSegment::FileSegment(
, key_metadata(key_metadata_) , key_metadata(key_metadata_)
, queue_iterator(queue_iterator_) , queue_iterator(queue_iterator_)
, cache(cache_) , cache(cache_)
#ifdef ABORT_ON_LOGICAL_ERROR #ifdef DEBUG_OR_SANITIZER_BUILD
, log(getLogger(fmt::format("FileSegment({}) : {}", key_.toString(), range().toString()))) , log(getLogger(fmt::format("FileSegment({}) : {}", key_.toString(), range().toString())))
#else #else
, log(getLogger("FileSegment")) , log(getLogger("FileSegment"))
@ -385,9 +385,9 @@ void FileSegment::write(char * from, size_t size, size_t offset_in_file)
try try
{ {
#ifdef ABORT_ON_LOGICAL_ERROR #ifdef DEBUG_OR_SANITIZER_BUILD
/// This mutex is only needed to have a valid assertion in assertCacheCorrectness(), /// This mutex is only needed to have a valid assertion in assertCacheCorrectness(),
/// which is only executed in debug/sanitizer builds (under ABORT_ON_LOGICAL_ERROR). /// which is only executed in debug/sanitizer builds (under DEBUG_OR_SANITIZER_BUILD).
std::lock_guard lock(write_mutex); std::lock_guard lock(write_mutex);
#endif #endif

View File

@ -963,7 +963,7 @@ KeyMetadata::iterator LockedKey::removeFileSegmentImpl(
} }
else if (!can_be_broken) else if (!can_be_broken)
{ {
#ifdef ABORT_ON_LOGICAL_ERROR #ifdef DEBUG_OR_SANITIZER_BUILD
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected path {} to exist", path); throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected path {} to exist", path);
#else #else
LOG_WARNING(key_metadata->logger(), "Expected path {} to exist, while removing {}:{}", LOG_WARNING(key_metadata->logger(), "Expected path {} to exist, while removing {}:{}",

View File

@ -310,7 +310,7 @@ IColumn::Selector ConcurrentHashJoin::selectDispatchBlock(const Strings & key_co
{ {
const auto & key_col = from_block.getByName(key_name).column->convertToFullColumnIfConst(); const auto & key_col = from_block.getByName(key_name).column->convertToFullColumnIfConst();
const auto & key_col_no_lc = recursiveRemoveLowCardinality(recursiveRemoveSparse(key_col)); const auto & key_col_no_lc = recursiveRemoveLowCardinality(recursiveRemoveSparse(key_col));
key_col_no_lc->updateWeakHash32(hash); hash.update(key_col_no_lc->getWeakHash32());
} }
return hashToSelector(hash, num_shards); return hashToSelector(hash, num_shards);
} }

View File

@ -16,7 +16,6 @@
#include <Common/logger_useful.h> #include <Common/logger_useful.h>
#include <Core/Settings.h> #include <Core/Settings.h>
#include <Parsers/ASTSetQuery.h> #include <Parsers/ASTSetQuery.h>
#include <base/hex.h>
#include <Core/Defines.h> #include <Core/Defines.h>
#include <Core/SettingsEnums.h> #include <Core/SettingsEnums.h>
@ -82,13 +81,13 @@
#include <Interpreters/ApplyWithSubqueryVisitor.h> #include <Interpreters/ApplyWithSubqueryVisitor.h>
#include <TableFunctions/TableFunctionFactory.h> #include <TableFunctions/TableFunctionFactory.h>
#include <DataTypes/DataTypeFixedString.h>
#include <Functions/UserDefined/UserDefinedSQLFunctionFactory.h> #include <Functions/UserDefined/UserDefinedSQLFunctionFactory.h>
#include <Functions/UserDefined/UserDefinedSQLFunctionVisitor.h> #include <Functions/UserDefined/UserDefinedSQLFunctionVisitor.h>
#include <Interpreters/ReplaceQueryParameterVisitor.h> #include <Interpreters/ReplaceQueryParameterVisitor.h>
#include <Parsers/QueryParameterVisitor.h> #include <Parsers/QueryParameterVisitor.h>
namespace CurrentMetrics namespace CurrentMetrics
{ {
extern const Metric AttachedTable; extern const Metric AttachedTable;
@ -147,27 +146,27 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create)
} }
auto db_num_limit = getContext()->getGlobalContext()->getServerSettings().max_database_num_to_throw; auto db_num_limit = getContext()->getGlobalContext()->getServerSettings().max_database_num_to_throw;
if (db_num_limit > 0) if (db_num_limit > 0 && !internal)
{ {
size_t db_count = DatabaseCatalog::instance().getDatabases().size(); size_t db_count = DatabaseCatalog::instance().getDatabases().size();
std::vector<String> system_databases = { std::initializer_list<std::string_view> system_databases =
{
DatabaseCatalog::TEMPORARY_DATABASE, DatabaseCatalog::TEMPORARY_DATABASE,
DatabaseCatalog::SYSTEM_DATABASE, DatabaseCatalog::SYSTEM_DATABASE,
DatabaseCatalog::INFORMATION_SCHEMA, DatabaseCatalog::INFORMATION_SCHEMA,
DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE,
DatabaseCatalog::DEFAULT_DATABASE
}; };
for (const auto & system_database : system_databases) for (const auto & system_database : system_databases)
{ {
if (db_count > 0 && DatabaseCatalog::instance().isDatabaseExist(system_database)) if (db_count > 0 && DatabaseCatalog::instance().isDatabaseExist(std::string(system_database)))
db_count--; --db_count;
} }
if (db_count >= db_num_limit) if (db_count >= db_num_limit)
throw Exception(ErrorCodes::TOO_MANY_DATABASES, throw Exception(ErrorCodes::TOO_MANY_DATABASES,
"Too many databases in the Clickhouse. " "Too many databases. "
"The limit (setting 'max_database_num_to_throw') is set to {}, current number of databases is {}", "The limit (server configuration parameter `max_database_num_to_throw`) is set to {}, the current number of databases is {}",
db_num_limit, db_count); db_num_limit, db_count);
} }
@ -1601,13 +1600,13 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
} }
UInt64 table_num_limit = getContext()->getGlobalContext()->getServerSettings().max_table_num_to_throw; UInt64 table_num_limit = getContext()->getGlobalContext()->getServerSettings().max_table_num_to_throw;
if (table_num_limit > 0 && create.getDatabase() != DatabaseCatalog::SYSTEM_DATABASE) if (table_num_limit > 0 && !internal)
{ {
UInt64 table_count = CurrentMetrics::get(CurrentMetrics::AttachedTable); UInt64 table_count = CurrentMetrics::get(CurrentMetrics::AttachedTable);
if (table_count >= table_num_limit) if (table_count >= table_num_limit)
throw Exception(ErrorCodes::TOO_MANY_TABLES, throw Exception(ErrorCodes::TOO_MANY_TABLES,
"Too many tables in the Clickhouse. " "Too many tables. "
"The limit (setting 'max_table_num_to_throw') is set to {}, current number of tables is {}", "The limit (server configuration parameter `max_table_num_to_throw`) is set to {}, the current number of tables is {}",
table_num_limit, table_count); table_num_limit, table_count);
} }

View File

@ -1726,7 +1726,10 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
const auto & join_clause = table_join.getOnlyClause(); const auto & join_clause = table_join.getOnlyClause();
auto join_kind = table_join.kind(); auto join_kind = table_join.kind();
bool kind_allows_filtering = isInner(join_kind) || isLeft(join_kind) || isRight(join_kind); auto join_strictness = table_join.strictness();
bool join_type_allows_filtering = (join_strictness == JoinStrictness::All || join_strictness == JoinStrictness::Any)
&& (isInner(join_kind) || isLeft(join_kind) || isRight(join_kind));
auto has_non_const = [](const Block & block, const auto & keys) auto has_non_const = [](const Block & block, const auto & keys)
{ {
@ -1745,7 +1748,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
bool has_non_const_keys = has_non_const(query_plan.getCurrentDataStream().header, join_clause.key_names_left) bool has_non_const_keys = has_non_const(query_plan.getCurrentDataStream().header, join_clause.key_names_left)
&& has_non_const(joined_plan->getCurrentDataStream().header, join_clause.key_names_right); && has_non_const(joined_plan->getCurrentDataStream().header, join_clause.key_names_right);
if (settings.max_rows_in_set_to_optimize_join > 0 && kind_allows_filtering && has_non_const_keys) if (settings.max_rows_in_set_to_optimize_join > 0 && join_type_allows_filtering && has_non_const_keys)
{ {
auto * left_set = add_create_set(query_plan, join_clause.key_names_left, JoinTableSide::Left); auto * left_set = add_create_set(query_plan, join_clause.key_names_left, JoinTableSide::Left);
auto * right_set = add_create_set(*joined_plan, join_clause.key_names_right, JoinTableSide::Right); auto * right_set = add_create_set(*joined_plan, join_clause.key_names_right, JoinTableSide::Right);

View File

@ -121,9 +121,18 @@ String InterpreterShowTablesQuery::getRewrittenQuery()
if (query.merges) if (query.merges)
{ {
WriteBufferFromOwnString rewritten_query; WriteBufferFromOwnString rewritten_query;
rewritten_query << "SELECT table, database, round((elapsed * (1 / merges.progress)) - merges.elapsed, 2) AS estimate_complete, round(elapsed,2) elapsed, " rewritten_query << R"(
"round(progress*100, 2) AS progress, is_mutation, formatReadableSize(total_size_bytes_compressed) AS size_compressed, " SELECT
"formatReadableSize(memory_usage) AS memory_usage FROM system.merges"; table,
database,
merges.progress > 0 ? round(merges.elapsed * (1 - merges.progress) / merges.progress, 2) : NULL AS estimate_complete,
round(elapsed, 2) AS elapsed,
round(progress * 100, 2) AS progress,
is_mutation,
formatReadableSize(total_size_bytes_compressed) AS size_compressed,
formatReadableSize(memory_usage) AS memory_usage
FROM system.merges
)";
if (!query.like.empty()) if (!query.like.empty())
{ {

View File

@ -554,7 +554,7 @@ static Blocks scatterBlockByHashImpl(const Strings & key_columns_names, const Bl
for (const auto & key_name : key_columns_names) for (const auto & key_name : key_columns_names)
{ {
ColumnPtr key_col = materializeColumn(block, key_name); ColumnPtr key_col = materializeColumn(block, key_name);
key_col->updateWeakHash32(hash); hash.update(key_col->getWeakHash32());
} }
auto selector = hashToSelector(hash, sharder); auto selector = hashToSelector(hash, sharder);

View File

@ -538,7 +538,7 @@ Chunk DDLQueryStatusSource::generate()
ExecutionStatus status(-1, "Cannot obtain error message"); ExecutionStatus status(-1, "Cannot obtain error message");
/// Replicated database retries in case of error, it should not write error status. /// Replicated database retries in case of error, it should not write error status.
#ifdef ABORT_ON_LOGICAL_ERROR #ifdef DEBUG_OR_SANITIZER_BUILD
bool need_check_status = true; bool need_check_status = true;
#else #else
bool need_check_status = !is_replicated_database; bool need_check_status = !is_replicated_database;

View File

@ -7,7 +7,6 @@
#include <Common/FieldVisitorToString.h> #include <Common/FieldVisitorToString.h>
#include <Common/KnownObjectNames.h> #include <Common/KnownObjectNames.h>
#include <Common/SipHash.h> #include <Common/SipHash.h>
#include <Common/typeid_cast.h>
#include <IO/Operators.h> #include <IO/Operators.h>
#include <IO/WriteBufferFromString.h> #include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
@ -19,9 +18,6 @@
#include <Parsers/queryToString.h> #include <Parsers/queryToString.h>
#include <Parsers/ASTSetQuery.h> #include <Parsers/ASTSetQuery.h>
#include <Parsers/FunctionSecretArgumentsFinderAST.h> #include <Parsers/FunctionSecretArgumentsFinderAST.h>
#include <Core/QualifiedTableName.h>
#include <boost/algorithm/string.hpp>
using namespace std::literals; using namespace std::literals;
@ -632,6 +628,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
settings.ostr << ", "; settings.ostr << ", ";
if (arguments->children[i]->as<ASTSetQuery>()) if (arguments->children[i]->as<ASTSetQuery>())
settings.ostr << "SETTINGS "; settings.ostr << "SETTINGS ";
nested_dont_need_parens.list_element_index = i;
arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens); arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens);
} }
settings.ostr << (settings.hilite ? hilite_operator : "") << ']' << (settings.hilite ? hilite_none : ""); settings.ostr << (settings.hilite ? hilite_operator : "") << ']' << (settings.hilite ? hilite_none : "");
@ -642,12 +639,14 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
{ {
settings.ostr << (settings.hilite ? hilite_operator : "") << ((frame.need_parens && !alias.empty()) ? "tuple" : "") << '(' settings.ostr << (settings.hilite ? hilite_operator : "") << ((frame.need_parens && !alias.empty()) ? "tuple" : "") << '('
<< (settings.hilite ? hilite_none : ""); << (settings.hilite ? hilite_none : "");
for (size_t i = 0; i < arguments->children.size(); ++i) for (size_t i = 0; i < arguments->children.size(); ++i)
{ {
if (i != 0) if (i != 0)
settings.ostr << ", "; settings.ostr << ", ";
if (arguments->children[i]->as<ASTSetQuery>()) if (arguments->children[i]->as<ASTSetQuery>())
settings.ostr << "SETTINGS "; settings.ostr << "SETTINGS ";
nested_dont_need_parens.list_element_index = i;
arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens); arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens);
} }
settings.ostr << (settings.hilite ? hilite_operator : "") << ')' << (settings.hilite ? hilite_none : ""); settings.ostr << (settings.hilite ? hilite_operator : "") << ')' << (settings.hilite ? hilite_none : "");
@ -663,6 +662,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
settings.ostr << ", "; settings.ostr << ", ";
if (arguments->children[i]->as<ASTSetQuery>()) if (arguments->children[i]->as<ASTSetQuery>())
settings.ostr << "SETTINGS "; settings.ostr << "SETTINGS ";
nested_dont_need_parens.list_element_index = i;
arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens); arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens);
} }
settings.ostr << (settings.hilite ? hilite_operator : "") << ')' << (settings.hilite ? hilite_none : ""); settings.ostr << (settings.hilite ? hilite_operator : "") << ')' << (settings.hilite ? hilite_none : "");

View File

@ -2743,7 +2743,7 @@ Action ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & po
/// 'AND' can be both boolean function and part of the '... BETWEEN ... AND ...' operator /// 'AND' can be both boolean function and part of the '... BETWEEN ... AND ...' operator
if (op.function_name == "and" && layers.back()->between_counter) if (op.function_name == "and" && layers.back()->between_counter)
{ {
layers.back()->between_counter--; --layers.back()->between_counter;
op = finish_between_operator; op = finish_between_operator;
} }

View File

@ -745,7 +745,12 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan,
{ {
auto & interpolate_node_typed = interpolate_node->as<InterpolateNode &>(); auto & interpolate_node_typed = interpolate_node->as<InterpolateNode &>();
PlannerActionsVisitor planner_actions_visitor(planner_context); PlannerActionsVisitor planner_actions_visitor(
planner_context,
/* use_column_identifier_as_action_node_name_, (default value)*/ true,
/// Prefer the INPUT to CONSTANT nodes (actions must be non constant)
/* always_use_const_column_for_constant_nodes */ false);
auto expression_to_interpolate_expression_nodes = planner_actions_visitor.visit(*interpolate_actions_dag, auto expression_to_interpolate_expression_nodes = planner_actions_visitor.visit(*interpolate_actions_dag,
interpolate_node_typed.getExpression()); interpolate_node_typed.getExpression());
if (expression_to_interpolate_expression_nodes.size() != 1) if (expression_to_interpolate_expression_nodes.size() != 1)

View File

@ -487,16 +487,33 @@ public:
return node; return node;
} }
const ActionsDAG::Node * addConstantIfNecessary(const std::string & node_name, const ColumnWithTypeAndName & column) [[nodiscard]] String addConstantIfNecessary(
const std::string & node_name, const ColumnWithTypeAndName & column, bool always_use_const_column_for_constant_nodes)
{ {
chassert(column.column != nullptr);
auto it = node_name_to_node.find(node_name); auto it = node_name_to_node.find(node_name);
if (it != node_name_to_node.end() && (!always_use_const_column_for_constant_nodes || it->second->column))
return {node_name};
if (it != node_name_to_node.end()) if (it != node_name_to_node.end())
return it->second; {
/// There is a node with this name, but it doesn't have a column
/// This likely happens because we executed the query until WithMergeableState with a const node in the
/// WHERE clause and, as the results of headers are materialized, the column was removed
/// Let's add a new column and keep this
String dupped_name{node_name + "_dupped"};
if (node_name_to_node.find(dupped_name) != node_name_to_node.end())
return dupped_name;
const auto * node = &actions_dag.addColumn(column);
node_name_to_node[dupped_name] = node;
return dupped_name;
}
const auto * node = &actions_dag.addColumn(column); const auto * node = &actions_dag.addColumn(column);
node_name_to_node[node->result_name] = node; node_name_to_node[node->result_name] = node;
return node; return {node_name};
} }
template <typename FunctionOrOverloadResolver> template <typename FunctionOrOverloadResolver>
@ -525,7 +542,7 @@ public:
} }
private: private:
std::unordered_map<std::string_view, const ActionsDAG::Node *> node_name_to_node; std::unordered_map<String, const ActionsDAG::Node *> node_name_to_node;
ActionsDAG & actions_dag; ActionsDAG & actions_dag;
QueryTreeNodePtr scope_node; QueryTreeNodePtr scope_node;
}; };
@ -533,9 +550,11 @@ private:
class PlannerActionsVisitorImpl class PlannerActionsVisitorImpl
{ {
public: public:
PlannerActionsVisitorImpl(ActionsDAG & actions_dag, PlannerActionsVisitorImpl(
ActionsDAG & actions_dag,
const PlannerContextPtr & planner_context_, const PlannerContextPtr & planner_context_,
bool use_column_identifier_as_action_node_name_); bool use_column_identifier_as_action_node_name_,
bool always_use_const_column_for_constant_nodes_);
ActionsDAG::NodeRawConstPtrs visit(QueryTreeNodePtr expression_node); ActionsDAG::NodeRawConstPtrs visit(QueryTreeNodePtr expression_node);
@ -595,14 +614,18 @@ private:
const PlannerContextPtr planner_context; const PlannerContextPtr planner_context;
ActionNodeNameHelper action_node_name_helper; ActionNodeNameHelper action_node_name_helper;
bool use_column_identifier_as_action_node_name; bool use_column_identifier_as_action_node_name;
bool always_use_const_column_for_constant_nodes;
}; };
PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(ActionsDAG & actions_dag, PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(
ActionsDAG & actions_dag,
const PlannerContextPtr & planner_context_, const PlannerContextPtr & planner_context_,
bool use_column_identifier_as_action_node_name_) bool use_column_identifier_as_action_node_name_,
bool always_use_const_column_for_constant_nodes_)
: planner_context(planner_context_) : planner_context(planner_context_)
, action_node_name_helper(node_to_node_name, *planner_context, use_column_identifier_as_action_node_name_) , action_node_name_helper(node_to_node_name, *planner_context, use_column_identifier_as_action_node_name_)
, use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_) , use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_)
, always_use_const_column_for_constant_nodes(always_use_const_column_for_constant_nodes_)
{ {
actions_stack.emplace_back(actions_dag, nullptr); actions_stack.emplace_back(actions_dag, nullptr);
} }
@ -725,17 +748,16 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi
column.type = constant_type; column.type = constant_type;
column.column = column.type->createColumnConst(1, constant_literal); column.column = column.type->createColumnConst(1, constant_literal);
actions_stack[0].addConstantIfNecessary(constant_node_name, column); String final_name = actions_stack[0].addConstantIfNecessary(constant_node_name, column, always_use_const_column_for_constant_nodes);
size_t actions_stack_size = actions_stack.size(); size_t actions_stack_size = actions_stack.size();
for (size_t i = 1; i < actions_stack_size; ++i) for (size_t i = 1; i < actions_stack_size; ++i)
{ {
auto & actions_stack_node = actions_stack[i]; auto & actions_stack_node = actions_stack[i];
actions_stack_node.addInputConstantColumnIfNecessary(constant_node_name, column); actions_stack_node.addInputConstantColumnIfNecessary(final_name, column);
} }
return {constant_node_name, Levels(0)}; return {final_name, Levels(0)};
} }
PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitLambda(const QueryTreeNodePtr & node) PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitLambda(const QueryTreeNodePtr & node)
@ -864,16 +886,16 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::ma
else else
column.column = std::move(column_set); column.column = std::move(column_set);
actions_stack[0].addConstantIfNecessary(column.name, column); String final_name = actions_stack[0].addConstantIfNecessary(column.name, column, always_use_const_column_for_constant_nodes);
size_t actions_stack_size = actions_stack.size(); size_t actions_stack_size = actions_stack.size();
for (size_t i = 1; i < actions_stack_size; ++i) for (size_t i = 1; i < actions_stack_size; ++i)
{ {
auto & actions_stack_node = actions_stack[i]; auto & actions_stack_node = actions_stack[i];
actions_stack_node.addInputConstantColumnIfNecessary(column.name, column); actions_stack_node.addInputConstantColumnIfNecessary(final_name, column);
} }
return {column.name, Levels(0)}; return {final_name, Levels(0)};
} }
PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitIndexHintFunction(const QueryTreeNodePtr & node) PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitIndexHintFunction(const QueryTreeNodePtr & node)
@ -1010,14 +1032,19 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi
} }
PlannerActionsVisitor::PlannerActionsVisitor(const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_) PlannerActionsVisitor::PlannerActionsVisitor(
const PlannerContextPtr & planner_context_,
bool use_column_identifier_as_action_node_name_,
bool always_use_const_column_for_constant_nodes_)
: planner_context(planner_context_) : planner_context(planner_context_)
, use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_) , use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_)
, always_use_const_column_for_constant_nodes(always_use_const_column_for_constant_nodes_)
{} {}
ActionsDAG::NodeRawConstPtrs PlannerActionsVisitor::visit(ActionsDAG & actions_dag, QueryTreeNodePtr expression_node) ActionsDAG::NodeRawConstPtrs PlannerActionsVisitor::visit(ActionsDAG & actions_dag, QueryTreeNodePtr expression_node)
{ {
PlannerActionsVisitorImpl actions_visitor_impl(actions_dag, planner_context, use_column_identifier_as_action_node_name); PlannerActionsVisitorImpl actions_visitor_impl(
actions_dag, planner_context, use_column_identifier_as_action_node_name, always_use_const_column_for_constant_nodes);
return actions_visitor_impl.visit(expression_node); return actions_visitor_impl.visit(expression_node);
} }

View File

@ -27,11 +27,17 @@ using PlannerContextPtr = std::shared_ptr<PlannerContext>;
* During actions build, there is special handling for following functions: * During actions build, there is special handling for following functions:
* 1. Aggregate functions are added in actions dag as INPUT nodes. Aggregate functions arguments are not added. * 1. Aggregate functions are added in actions dag as INPUT nodes. Aggregate functions arguments are not added.
* 2. For function `in` and its variants, already collected sets from planner context are used. * 2. For function `in` and its variants, already collected sets from planner context are used.
* 3. When building actions that use CONSTANT nodes, by default we ignore pre-existing INPUTs if those don't have
* a column (a const column always has a column). This is for compatibility with previous headers. We disable this
* behaviour when we explicitly want to override CONSTANT nodes with the input (resolving InterpolateNode for example)
*/ */
class PlannerActionsVisitor class PlannerActionsVisitor
{ {
public: public:
explicit PlannerActionsVisitor(const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_ = true); explicit PlannerActionsVisitor(
const PlannerContextPtr & planner_context_,
bool use_column_identifier_as_action_node_name_ = true,
bool always_use_const_column_for_constant_nodes_ = true);
/** Add actions necessary to calculate expression node into expression dag. /** Add actions necessary to calculate expression node into expression dag.
* Necessary actions are not added in actions dag output. * Necessary actions are not added in actions dag output.
@ -42,6 +48,7 @@ public:
private: private:
const PlannerContextPtr planner_context; const PlannerContextPtr planner_context;
bool use_column_identifier_as_action_node_name = true; bool use_column_identifier_as_action_node_name = true;
bool always_use_const_column_for_constant_nodes = true;
}; };
/** Calculate query tree expression node action dag name and add them into node to name map. /** Calculate query tree expression node action dag name and add them into node to name map.

View File

@ -77,7 +77,6 @@ namespace ErrorCodes
extern const int INVALID_JOIN_ON_EXPRESSION; extern const int INVALID_JOIN_ON_EXPRESSION;
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED; extern const int NOT_IMPLEMENTED;
extern const int SYNTAX_ERROR;
extern const int ACCESS_DENIED; extern const int ACCESS_DENIED;
extern const int PARAMETER_OUT_OF_BOUND; extern const int PARAMETER_OUT_OF_BOUND;
extern const int TOO_MANY_COLUMNS; extern const int TOO_MANY_COLUMNS;
@ -1397,12 +1396,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
{ {
if (!join_clause.hasASOF()) if (!join_clause.hasASOF())
throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
"JOIN {} no inequality in ASOF JOIN ON section.", "JOIN {} no inequality in ASOF JOIN ON section",
join_node.formatASTForErrorMessage());
if (table_join_clause.key_names_left.size() <= 1)
throw Exception(ErrorCodes::SYNTAX_ERROR,
"JOIN {} ASOF join needs at least one equi-join column",
join_node.formatASTForErrorMessage()); join_node.formatASTForErrorMessage());
} }
@ -1524,7 +1518,9 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
{ {
const auto & join_clause = table_join->getOnlyClause(); const auto & join_clause = table_join->getOnlyClause();
bool kind_allows_filtering = isInner(join_kind) || isLeft(join_kind) || isRight(join_kind); bool join_type_allows_filtering = (join_strictness == JoinStrictness::All || join_strictness == JoinStrictness::Any)
&& (isInner(join_kind) || isLeft(join_kind) || isRight(join_kind));
auto has_non_const = [](const Block & block, const auto & keys) auto has_non_const = [](const Block & block, const auto & keys)
{ {
@ -1544,7 +1540,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
bool has_non_const_keys = has_non_const(left_plan.getCurrentDataStream().header, join_clause.key_names_left) bool has_non_const_keys = has_non_const(left_plan.getCurrentDataStream().header, join_clause.key_names_left)
&& has_non_const(right_plan.getCurrentDataStream().header, join_clause.key_names_right); && has_non_const(right_plan.getCurrentDataStream().header, join_clause.key_names_right);
if (settings.max_rows_in_set_to_optimize_join > 0 && kind_allows_filtering && has_non_const_keys) if (settings.max_rows_in_set_to_optimize_join > 0 && join_type_allows_filtering && has_non_const_keys)
{ {
auto * left_set = add_create_set(left_plan, join_clause.key_names_left, JoinTableSide::Left); auto * left_set = add_create_set(left_plan, join_clause.key_names_left, JoinTableSide::Left);
auto * right_set = add_create_set(right_plan, join_clause.key_names_right, JoinTableSide::Right); auto * right_set = add_create_set(right_plan, join_clause.key_names_right, JoinTableSide::Right);

View File

@ -34,13 +34,20 @@ namespace ErrorCodes
namespace namespace
{ {
FullMergeJoinCursorPtr createCursor(const Block & block, const Names & columns) FullMergeJoinCursorPtr createCursor(const Block & block, const Names & columns, JoinStrictness strictness)
{ {
SortDescription desc; SortDescription desc;
desc.reserve(columns.size()); desc.reserve(columns.size());
for (const auto & name : columns) for (const auto & name : columns)
desc.emplace_back(name); desc.emplace_back(name);
return std::make_unique<FullMergeJoinCursor>(block, desc); return std::make_unique<FullMergeJoinCursor>(block, desc, strictness == JoinStrictness::Asof);
}
bool ALWAYS_INLINE isNullAt(const IColumn & column, size_t row)
{
if (const auto * nullable_column = checkAndGetColumn<ColumnNullable>(&column))
return nullable_column->isNullAt(row);
return false;
} }
template <bool has_left_nulls, bool has_right_nulls> template <bool has_left_nulls, bool has_right_nulls>
@ -54,7 +61,7 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column,
if (left_nullable && right_nullable) if (left_nullable && right_nullable)
{ {
int res = left_nullable->compareAt(lhs_pos, rhs_pos, right_column, null_direction_hint); int res = left_nullable->compareAt(lhs_pos, rhs_pos, right_column, null_direction_hint);
if (res) if (res != 0)
return res; return res;
/// NULL != NULL case /// NULL != NULL case
@ -90,9 +97,10 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column,
int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, size_t lpos, int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, size_t lpos,
const SortCursorImpl & rhs, size_t rpos, const SortCursorImpl & rhs, size_t rpos,
size_t key_length,
int null_direction_hint) int null_direction_hint)
{ {
for (size_t i = 0; i < lhs.sort_columns_size; ++i) for (size_t i = 0; i < key_length; ++i)
{ {
/// TODO(@vdimir): use nullableCompareAt only if there's nullable columns /// TODO(@vdimir): use nullableCompareAt only if there's nullable columns
int cmp = nullableCompareAt<true, true>(*lhs.sort_columns[i], *rhs.sort_columns[i], lpos, rpos, null_direction_hint); int cmp = nullableCompareAt<true, true>(*lhs.sort_columns[i], *rhs.sort_columns[i], lpos, rpos, null_direction_hint);
@ -104,13 +112,18 @@ int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, size_t lpos,
int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, const SortCursorImpl & rhs, int null_direction_hint) int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, const SortCursorImpl & rhs, int null_direction_hint)
{ {
return compareCursors(lhs, lhs.getRow(), rhs, rhs.getRow(), null_direction_hint); return compareCursors(lhs, lhs.getRow(), rhs, rhs.getRow(), lhs.sort_columns_size, null_direction_hint);
}
int compareAsofCursors(const FullMergeJoinCursor & lhs, const FullMergeJoinCursor & rhs, int null_direction_hint)
{
return nullableCompareAt<true, true>(*lhs.getAsofColumn(), *rhs.getAsofColumn(), lhs->getRow(), rhs->getRow(), null_direction_hint);
} }
bool ALWAYS_INLINE totallyLess(SortCursorImpl & lhs, SortCursorImpl & rhs, int null_direction_hint) bool ALWAYS_INLINE totallyLess(SortCursorImpl & lhs, SortCursorImpl & rhs, int null_direction_hint)
{ {
/// The last row of left cursor is less than the current row of the right cursor. /// The last row of left cursor is less than the current row of the right cursor.
int cmp = compareCursors(lhs, lhs.rows - 1, rhs, rhs.getRow(), null_direction_hint); int cmp = compareCursors(lhs, lhs.rows - 1, rhs, rhs.getRow(), lhs.sort_columns_size, null_direction_hint);
return cmp < 0; return cmp < 0;
} }
@ -222,25 +235,136 @@ Chunk getRowFromChunk(const Chunk & chunk, size_t pos)
return result; return result;
} }
void inline addRange(PaddedPODArray<UInt64> & left_map, size_t start, size_t end) void inline addRange(PaddedPODArray<UInt64> & values, UInt64 start, UInt64 end)
{ {
assert(end > start); assert(end > start);
for (size_t i = start; i < end; ++i) for (UInt64 i = start; i < end; ++i)
left_map.push_back(i); values.push_back(i);
} }
void inline addMany(PaddedPODArray<UInt64> & left_or_right_map, size_t idx, size_t num) void inline addMany(PaddedPODArray<UInt64> & values, UInt64 value, size_t num)
{ {
for (size_t i = 0; i < num; ++i) values.resize_fill(values.size() + num, value);
left_or_right_map.push_back(idx);
} }
} }
FullMergeJoinCursor::FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_) JoinKeyRow::JoinKeyRow(const FullMergeJoinCursor & cursor, size_t pos)
: sample_block(materializeBlock(sample_block_).cloneEmpty()), desc(description_)
{ {
row.reserve(cursor->sort_columns.size());
for (const auto & col : cursor->sort_columns)
{
auto new_col = col->cloneEmpty();
new_col->insertFrom(*col, pos);
row.push_back(std::move(new_col));
}
if (const IColumn * asof_column = cursor.getAsofColumn())
{
if (const auto * nullable_asof_column = checkAndGetColumn<ColumnNullable>(asof_column))
{
/// We save matched column, and since NULL do not match anything, we can't use it as a key
chassert(!nullable_asof_column->isNullAt(pos));
asof_column = nullable_asof_column->getNestedColumnPtr().get();
}
auto new_col = asof_column->cloneEmpty();
new_col->insertFrom(*asof_column, pos);
row.push_back(std::move(new_col));
}
} }
void JoinKeyRow::reset()
{
row.clear();
}
bool JoinKeyRow::equals(const FullMergeJoinCursor & cursor) const
{
if (row.empty())
return false;
for (size_t i = 0; i < cursor->sort_columns_size; ++i)
{
// int cmp = this->row[i]->compareAt(0, cursor->getRow(), *(cursor->sort_columns[i]), cursor->desc[i].nulls_direction);
int cmp = nullableCompareAt<true, true>(*this->row[i], *cursor->sort_columns[i], 0, cursor->getRow(), cursor->desc[i].nulls_direction);
if (cmp != 0)
return false;
}
return true;
}
bool JoinKeyRow::asofMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const
{
chassert(this->row.size() == cursor->sort_columns_size + 1);
if (!equals(cursor))
return false;
const auto & asof_row = row.back();
if (isNullAt(*asof_row, 0) || isNullAt(*cursor.getAsofColumn(), cursor->getRow()))
return false;
int cmp = 0;
if (const auto * nullable_column = checkAndGetColumn<ColumnNullable>(cursor.getAsofColumn()))
cmp = nullable_column->getNestedColumn().compareAt(cursor->getRow(), 0, *asof_row, 1);
else
cmp = cursor.getAsofColumn()->compareAt(cursor->getRow(), 0, *asof_row, 1);
return (asof_inequality == ASOFJoinInequality::Less && cmp < 0)
|| (asof_inequality == ASOFJoinInequality::LessOrEquals && cmp <= 0)
|| (asof_inequality == ASOFJoinInequality::Greater && cmp > 0)
|| (asof_inequality == ASOFJoinInequality::GreaterOrEquals && cmp >= 0);
}
void AnyJoinState::set(size_t source_num, const FullMergeJoinCursor & cursor)
{
assert(cursor->rows);
keys[source_num] = JoinKeyRow(cursor, cursor->rows - 1);
}
void AnyJoinState::reset(size_t source_num)
{
keys[source_num].reset();
value.clear();
}
void AnyJoinState::setValue(Chunk value_)
{
value = std::move(value_);
}
bool AnyJoinState::empty() const { return keys[0].row.empty() && keys[1].row.empty(); }
void AsofJoinState::set(const FullMergeJoinCursor & rcursor, size_t rpos)
{
key = JoinKeyRow(rcursor, rpos);
value = rcursor.getCurrent().clone();
value_row = rpos;
}
void AsofJoinState::reset()
{
key.reset();
value.clear();
}
FullMergeJoinCursor::FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_, bool is_asof)
: sample_block(materializeBlock(sample_block_).cloneEmpty())
, desc(description_)
{
if (desc.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty sort description for FullMergeJoinCursor");
if (is_asof)
{
/// For ASOF join prefix of sort description is used for equality comparison
/// and the last column is used for inequality comparison and is handled separately
auto asof_column_description = desc.back();
desc.pop_back();
chassert(asof_column_description.direction == 1 && asof_column_description.nulls_direction == 1);
asof_column_position = sample_block.getPositionByName(asof_column_description.column_name);
}
}
const Chunk & FullMergeJoinCursor::getCurrent() const const Chunk & FullMergeJoinCursor::getCurrent() const
{ {
@ -278,48 +402,103 @@ bool FullMergeJoinCursor::fullyCompleted() const
return !cursor.isValid() && recieved_all_blocks; return !cursor.isValid() && recieved_all_blocks;
} }
String FullMergeJoinCursor::dump() const
{
Strings row_dump;
if (cursor.isValid())
{
Field val;
for (size_t i = 0; i < cursor.sort_columns_size; ++i)
{
cursor.sort_columns[i]->get(cursor.getRow(), val);
row_dump.push_back(val.dump());
}
if (const auto * asof_column = getAsofColumn())
{
asof_column->get(cursor.getRow(), val);
row_dump.push_back(val.dump());
}
}
return fmt::format("<{}/{}{}>[{}]",
cursor.getRow(), cursor.rows,
recieved_all_blocks ? "(finished)" : "",
fmt::join(row_dump, ", "));
}
MergeJoinAlgorithm::MergeJoinAlgorithm( MergeJoinAlgorithm::MergeJoinAlgorithm(
JoinPtr table_join_, JoinKind kind_,
JoinStrictness strictness_,
const TableJoin::JoinOnClause & on_clause_,
const Blocks & input_headers, const Blocks & input_headers,
size_t max_block_size_) size_t max_block_size_)
: table_join(table_join_) : kind(kind_)
, strictness(strictness_)
, max_block_size(max_block_size_) , max_block_size(max_block_size_)
, log(getLogger("MergeJoinAlgorithm")) , log(getLogger("MergeJoinAlgorithm"))
{ {
if (input_headers.size() != 2) if (input_headers.size() != 2)
throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeJoinAlgorithm requires exactly two inputs"); throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeJoinAlgorithm requires exactly two inputs");
auto strictness = table_join->getTableJoin().strictness(); if (strictness != JoinStrictness::Any && strictness != JoinStrictness::All && strictness != JoinStrictness::Asof)
if (strictness != JoinStrictness::Any && strictness != JoinStrictness::All)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm is not implemented for strictness {}", strictness); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm is not implemented for strictness {}", strictness);
auto kind = table_join->getTableJoin().kind(); if (strictness == JoinStrictness::Asof)
{
if (kind != JoinKind::Left && kind != JoinKind::Inner)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm does not implement ASOF {} join", kind);
}
if (!isInner(kind) && !isLeft(kind) && !isRight(kind) && !isFull(kind)) if (!isInner(kind) && !isLeft(kind) && !isRight(kind) && !isFull(kind))
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm is not implemented for kind {}", kind); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm is not implemented for kind {}", kind);
const auto & join_on = table_join->getTableJoin().getOnlyClause(); if (on_clause_.on_filter_condition_left || on_clause_.on_filter_condition_right)
if (join_on.on_filter_condition_left || join_on.on_filter_condition_right)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm does not support ON filter conditions"); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm does not support ON filter conditions");
cursors = { cursors = {
createCursor(input_headers[0], join_on.key_names_left), createCursor(input_headers[0], on_clause_.key_names_left, strictness),
createCursor(input_headers[1], join_on.key_names_right) createCursor(input_headers[1], on_clause_.key_names_right, strictness),
}; };
}
for (const auto & [left_key, right_key] : table_join->getTableJoin().leftToRightKeyRemap()) MergeJoinAlgorithm::MergeJoinAlgorithm(
JoinPtr join_ptr,
const Blocks & input_headers,
size_t max_block_size_)
: MergeJoinAlgorithm(
join_ptr->getTableJoin().kind(),
join_ptr->getTableJoin().strictness(),
join_ptr->getTableJoin().getOnlyClause(),
input_headers,
max_block_size_)
{
for (const auto & [left_key, right_key] : join_ptr->getTableJoin().leftToRightKeyRemap())
{ {
size_t left_idx = input_headers[0].getPositionByName(left_key); size_t left_idx = input_headers[0].getPositionByName(left_key);
size_t right_idx = input_headers[1].getPositionByName(right_key); size_t right_idx = input_headers[1].getPositionByName(right_key);
left_to_right_key_remap[left_idx] = right_idx; left_to_right_key_remap[left_idx] = right_idx;
} }
const auto *smjPtr = typeid_cast<const FullSortingMergeJoin *>(table_join.get()); const auto *smjPtr = typeid_cast<const FullSortingMergeJoin *>(join_ptr.get());
if (smjPtr) if (smjPtr)
{ {
null_direction_hint = smjPtr->getNullDirection(); null_direction_hint = smjPtr->getNullDirection();
} }
if (strictness == JoinStrictness::Asof)
setAsofInequality(join_ptr->getTableJoin().getAsofInequality());
}
void MergeJoinAlgorithm::setAsofInequality(ASOFJoinInequality asof_inequality_)
{
if (strictness != JoinStrictness::Asof)
throw Exception(ErrorCodes::LOGICAL_ERROR, "setAsofInequality is only supported for ASOF joins");
if (asof_inequality_ == ASOFJoinInequality::None)
throw Exception(ErrorCodes::LOGICAL_ERROR, "ASOF inequality cannot be None");
asof_inequality = asof_inequality_;
} }
void MergeJoinAlgorithm::logElapsed(double seconds) void MergeJoinAlgorithm::logElapsed(double seconds)
@ -407,7 +586,7 @@ struct AllJoinImpl
size_t lnum = nextDistinct(left_cursor.cursor); size_t lnum = nextDistinct(left_cursor.cursor);
size_t rnum = nextDistinct(right_cursor.cursor); size_t rnum = nextDistinct(right_cursor.cursor);
bool all_fit_in_block = std::max(left_map.size(), right_map.size()) + lnum * rnum <= max_block_size; bool all_fit_in_block = !max_block_size || std::max(left_map.size(), right_map.size()) + lnum * rnum <= max_block_size;
bool have_all_ranges = left_cursor.cursor.isValid() && right_cursor.cursor.isValid(); bool have_all_ranges = left_cursor.cursor.isValid() && right_cursor.cursor.isValid();
if (all_fit_in_block && have_all_ranges) if (all_fit_in_block && have_all_ranges)
{ {
@ -421,7 +600,7 @@ struct AllJoinImpl
else else
{ {
assert(state == nullptr); assert(state == nullptr);
state = std::make_unique<AllJoinState>(left_cursor.cursor, lpos, right_cursor.cursor, rpos); state = std::make_unique<AllJoinState>(left_cursor, lpos, right_cursor, rpos);
state->addRange(0, left_cursor.getCurrent().clone(), lpos, lnum); state->addRange(0, left_cursor.getCurrent().clone(), lpos, lnum);
state->addRange(1, right_cursor.getCurrent().clone(), rpos, rnum); state->addRange(1, right_cursor.getCurrent().clone(), rpos, rnum);
return; return;
@ -466,6 +645,17 @@ void dispatchKind(JoinKind kind, Args && ... args)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported join kind: \"{}\"", kind); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported join kind: \"{}\"", kind);
} }
MutableColumns MergeJoinAlgorithm::getEmptyResultColumns() const
{
MutableColumns result_cols;
for (size_t i = 0; i < 2; ++i)
{
for (const auto & col : cursors[i]->sampleColumns())
result_cols.push_back(col->cloneEmpty());
}
return result_cols;
}
std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAllJoinState() std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAllJoinState()
{ {
if (all_join_state && all_join_state->finished()) if (all_join_state && all_join_state->finished())
@ -479,7 +669,7 @@ std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAllJoinState
/// Accumulate blocks with same key in all_join_state /// Accumulate blocks with same key in all_join_state
for (size_t i = 0; i < 2; ++i) for (size_t i = 0; i < 2; ++i)
{ {
if (cursors[i]->cursor.isValid() && all_join_state->keys[i].equals(cursors[i]->cursor)) if (cursors[i]->cursor.isValid() && all_join_state->keys[i].equals(*cursors[i]))
{ {
size_t pos = cursors[i]->cursor.getRow(); size_t pos = cursors[i]->cursor.getRow();
size_t num = nextDistinct(cursors[i]->cursor); size_t num = nextDistinct(cursors[i]->cursor);
@ -499,15 +689,10 @@ std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAllJoinState
stat.max_blocks_loaded = std::max(stat.max_blocks_loaded, all_join_state->blocksStored()); stat.max_blocks_loaded = std::max(stat.max_blocks_loaded, all_join_state->blocksStored());
/// join all rows with current key /// join all rows with current key
MutableColumns result_cols; MutableColumns result_cols = getEmptyResultColumns();
for (size_t i = 0; i < 2; ++i)
{
for (const auto & col : cursors[i]->sampleColumns())
result_cols.push_back(col->cloneEmpty());
}
size_t total_rows = 0; size_t total_rows = 0;
while (total_rows < max_block_size) while (!max_block_size || total_rows < max_block_size)
{ {
const auto & left_range = all_join_state->getLeft(); const auto & left_range = all_join_state->getLeft();
const auto & right_range = all_join_state->getRight(); const auto & right_range = all_join_state->getRight();
@ -532,7 +717,52 @@ std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAllJoinState
return {}; return {};
} }
MergeJoinAlgorithm::Status MergeJoinAlgorithm::allJoin(JoinKind kind) std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAsofJoinState()
{
if (strictness != JoinStrictness::Asof)
return {};
if (!cursors[1]->fullyCompleted())
return {};
auto & left_cursor = *cursors[0];
const auto & left_columns = left_cursor.getCurrent().getColumns();
MutableColumns result_cols = getEmptyResultColumns();
while (left_cursor->isValid() && asof_join_state.hasMatch(left_cursor, asof_inequality))
{
size_t i = 0;
for (const auto & col : left_columns)
result_cols[i++]->insertFrom(*col, left_cursor->getRow());
for (const auto & col : asof_join_state.value.getColumns())
result_cols[i++]->insertFrom(*col, asof_join_state.value_row);
chassert(i == result_cols.size());
left_cursor->next();
}
while (isLeft(kind) && left_cursor->isValid())
{
/// return row with default values at right side
size_t i = 0;
for (const auto & col : left_columns)
result_cols[i++]->insertFrom(*col, left_cursor->getRow());
for (; i < result_cols.size(); ++i)
result_cols[i]->insertDefault();
chassert(i == result_cols.size());
left_cursor->next();
}
size_t result_rows = result_cols.empty() ? 0 : result_cols.front()->size();
if (result_rows)
return Status(Chunk(std::move(result_cols), result_rows));
return {};
}
MergeJoinAlgorithm::Status MergeJoinAlgorithm::allJoin()
{ {
PaddedPODArray<UInt64> idx_map[2]; PaddedPODArray<UInt64> idx_map[2];
@ -595,7 +825,7 @@ struct AnyJoinImpl
FullMergeJoinCursor & right_cursor, FullMergeJoinCursor & right_cursor,
PaddedPODArray<UInt64> & left_map, PaddedPODArray<UInt64> & left_map,
PaddedPODArray<UInt64> & right_map, PaddedPODArray<UInt64> & right_map,
AnyJoinState & state, AnyJoinState & any_join_state,
int null_direction_hint) int null_direction_hint)
{ {
assert(enabled); assert(enabled);
@ -656,21 +886,21 @@ struct AnyJoinImpl
} }
} }
/// Remember index of last joined row to propagate it to next block /// Remember last joined row to propagate it to next block
state.setValue({}); any_join_state.setValue({});
if (!left_cursor->isValid()) if (!left_cursor->isValid())
{ {
state.set(0, left_cursor.cursor); any_join_state.set(0, left_cursor);
if (cmp == 0 && isLeft(kind)) if (cmp == 0 && isLeft(kind))
state.setValue(getRowFromChunk(right_cursor.getCurrent(), rpos)); any_join_state.setValue(getRowFromChunk(right_cursor.getCurrent(), rpos));
} }
if (!right_cursor->isValid()) if (!right_cursor->isValid())
{ {
state.set(1, right_cursor.cursor); any_join_state.set(1, right_cursor);
if (cmp == 0 && isRight(kind)) if (cmp == 0 && isRight(kind))
state.setValue(getRowFromChunk(left_cursor.getCurrent(), lpos)); any_join_state.setValue(getRowFromChunk(left_cursor.getCurrent(), lpos));
} }
} }
}; };
@ -680,40 +910,34 @@ std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAnyJoinState
if (any_join_state.empty()) if (any_join_state.empty())
return {}; return {};
auto kind = table_join->getTableJoin().kind();
Chunk result; Chunk result;
for (size_t source_num = 0; source_num < 2; ++source_num) for (size_t source_num = 0; source_num < 2; ++source_num)
{ {
auto & current = *cursors[source_num]; auto & current = *cursors[source_num];
auto & state = any_join_state; if (any_join_state.keys[source_num].equals(current))
if (any_join_state.keys[source_num].equals(current.cursor))
{ {
size_t start_pos = current->getRow(); size_t start_pos = current->getRow();
size_t length = nextDistinct(current.cursor); size_t length = nextDistinct(current.cursor);
if (length && isLeft(kind) && source_num == 0) if (length && isLeft(kind) && source_num == 0)
{ {
if (state.value) if (any_join_state.value)
result = copyChunkResized(current.getCurrent(), state.value, start_pos, length); result = copyChunkResized(current.getCurrent(), any_join_state.value, start_pos, length);
else else
result = createBlockWithDefaults(source_num, start_pos, length); result = createBlockWithDefaults(source_num, start_pos, length);
} }
if (length && isRight(kind) && source_num == 1) if (length && isRight(kind) && source_num == 1)
{ {
if (state.value) if (any_join_state.value)
result = copyChunkResized(state.value, current.getCurrent(), start_pos, length); result = copyChunkResized(any_join_state.value, current.getCurrent(), start_pos, length);
else else
result = createBlockWithDefaults(source_num, start_pos, length); result = createBlockWithDefaults(source_num, start_pos, length);
} }
/// We've found row with other key, no need to skip more rows with current key
if (current->isValid()) if (current->isValid())
{ any_join_state.keys[source_num].reset();
state.keys[source_num].reset();
}
} }
else else
{ {
@ -726,7 +950,7 @@ std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAnyJoinState
return {}; return {};
} }
MergeJoinAlgorithm::Status MergeJoinAlgorithm::anyJoin(JoinKind kind) MergeJoinAlgorithm::Status MergeJoinAlgorithm::anyJoin()
{ {
if (auto result = handleAnyJoinState()) if (auto result = handleAnyJoinState())
return std::move(*result); return std::move(*result);
@ -771,10 +995,151 @@ MergeJoinAlgorithm::Status MergeJoinAlgorithm::anyJoin(JoinKind kind)
return Status(std::move(result)); return Status(std::move(result));
} }
MergeJoinAlgorithm::Status MergeJoinAlgorithm::asofJoin()
{
auto & left_cursor = *cursors[0];
if (!left_cursor->isValid())
return Status(0);
auto & right_cursor = *cursors[1];
if (!right_cursor->isValid())
return Status(1);
const auto & left_columns = left_cursor.getCurrent().getColumns();
const auto & right_columns = right_cursor.getCurrent().getColumns();
MutableColumns result_cols = getEmptyResultColumns();
while (left_cursor->isValid() && right_cursor->isValid())
{
auto lpos = left_cursor->getRow();
auto rpos = right_cursor->getRow();
auto cmp = compareCursors(*left_cursor, *right_cursor, null_direction_hint);
if (cmp == 0)
{
if (isNullAt(*left_cursor.getAsofColumn(), lpos))
cmp = -1;
if (isNullAt(*right_cursor.getAsofColumn(), rpos))
cmp = 1;
}
if (cmp == 0)
{
auto asof_cmp = compareAsofCursors(left_cursor, right_cursor, null_direction_hint);
if ((asof_inequality == ASOFJoinInequality::Less && asof_cmp <= -1)
|| (asof_inequality == ASOFJoinInequality::LessOrEquals && asof_cmp <= 0))
{
/// First row in right table that is greater (or equal) than current row in left table
/// matches asof join condition the best
size_t i = 0;
for (const auto & col : left_columns)
result_cols[i++]->insertFrom(*col, lpos);
for (const auto & col : right_columns)
result_cols[i++]->insertFrom(*col, rpos);
chassert(i == result_cols.size());
left_cursor->next();
continue;
}
if (asof_inequality == ASOFJoinInequality::Less || asof_inequality == ASOFJoinInequality::LessOrEquals)
{
/// Asof condition is not (yet) satisfied, skip row in right table
right_cursor->next();
continue;
}
if ((asof_inequality == ASOFJoinInequality::Greater && asof_cmp >= 1)
|| (asof_inequality == ASOFJoinInequality::GreaterOrEquals && asof_cmp >= 0))
{
/// condition is satisfied, remember this row and move next to try to find better match
asof_join_state.set(right_cursor, rpos);
right_cursor->next();
continue;
}
if (asof_inequality == ASOFJoinInequality::Greater || asof_inequality == ASOFJoinInequality::GreaterOrEquals)
{
/// Asof condition is not satisfied anymore, use last matched row from right table
if (asof_join_state.hasMatch(left_cursor, asof_inequality))
{
size_t i = 0;
for (const auto & col : left_columns)
result_cols[i++]->insertFrom(*col, lpos);
for (const auto & col : asof_join_state.value.getColumns())
result_cols[i++]->insertFrom(*col, asof_join_state.value_row);
chassert(i == result_cols.size());
}
else
{
asof_join_state.reset();
if (isLeft(kind))
{
/// return row with default values at right side
size_t i = 0;
for (const auto & col : left_columns)
result_cols[i++]->insertFrom(*col, lpos);
for (; i < result_cols.size(); ++i)
result_cols[i]->insertDefault();
chassert(i == result_cols.size());
}
}
left_cursor->next();
continue;
}
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "TODO: implement ASOF equality join");
}
else if (cmp < 0)
{
if (asof_join_state.hasMatch(left_cursor, asof_inequality))
{
size_t i = 0;
for (const auto & col : left_columns)
result_cols[i++]->insertFrom(*col, lpos);
for (const auto & col : asof_join_state.value.getColumns())
result_cols[i++]->insertFrom(*col, asof_join_state.value_row);
chassert(i == result_cols.size());
left_cursor->next();
continue;
}
else
{
asof_join_state.reset();
}
/// no matches for rows in left table, just pass them through
size_t num = nextDistinct(*left_cursor);
if (isLeft(kind) && num)
{
/// return them with default values at right side
size_t i = 0;
for (const auto & col : left_columns)
result_cols[i++]->insertRangeFrom(*col, lpos, num);
for (; i < result_cols.size(); ++i)
result_cols[i]->insertManyDefaults(num);
chassert(i == result_cols.size());
}
}
else
{
/// skip rows in right table until we find match for current row in left table
nextDistinct(*right_cursor);
}
}
size_t num_rows = result_cols.empty() ? 0 : result_cols.front()->size();
return Status(Chunk(std::move(result_cols), num_rows));
}
/// if `source_num == 0` get data from left cursor and fill defaults at right /// if `source_num == 0` get data from left cursor and fill defaults at right
/// otherwise - vice versa /// otherwise - vice versa
Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num, size_t start, size_t num_rows) const Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num, size_t start, size_t num_rows) const
{ {
ColumnRawPtrs cols; ColumnRawPtrs cols;
{ {
const auto & columns_left = source_num == 0 ? cursors[0]->getCurrent().getColumns() : cursors[0]->sampleColumns(); const auto & columns_left = source_num == 0 ? cursors[0]->getCurrent().getColumns() : cursors[0]->sampleColumns();
@ -797,7 +1162,6 @@ Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num, size_t star
cols.push_back(col.get()); cols.push_back(col.get());
} }
} }
Chunk result_chunk; Chunk result_chunk;
copyColumnsResized(cols, start, num_rows, result_chunk); copyColumnsResized(cols, start, num_rows, result_chunk);
return result_chunk; return result_chunk;
@ -813,7 +1177,6 @@ Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num)
IMergingAlgorithm::Status MergeJoinAlgorithm::merge() IMergingAlgorithm::Status MergeJoinAlgorithm::merge()
{ {
auto kind = table_join->getTableJoin().kind();
if (!cursors[0]->cursor.isValid() && !cursors[0]->fullyCompleted()) if (!cursors[0]->cursor.isValid() && !cursors[0]->fullyCompleted())
return Status(0); return Status(0);
@ -821,11 +1184,11 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge()
if (!cursors[1]->cursor.isValid() && !cursors[1]->fullyCompleted()) if (!cursors[1]->cursor.isValid() && !cursors[1]->fullyCompleted())
return Status(1); return Status(1);
if (auto result = handleAllJoinState()) if (auto result = handleAllJoinState())
{
return std::move(*result); return std::move(*result);
}
if (auto result = handleAsofJoinState())
return std::move(*result);
if (cursors[0]->fullyCompleted() || cursors[1]->fullyCompleted()) if (cursors[0]->fullyCompleted() || cursors[1]->fullyCompleted())
{ {
@ -839,7 +1202,7 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge()
} }
/// check if blocks are not intersecting at all /// check if blocks are not intersecting at all
if (int cmp = totallyCompare(cursors[0]->cursor, cursors[1]->cursor, null_direction_hint); cmp != 0) if (int cmp = totallyCompare(cursors[0]->cursor, cursors[1]->cursor, null_direction_hint); cmp != 0 && strictness != JoinStrictness::Asof)
{ {
if (cmp < 0) if (cmp < 0)
{ {
@ -858,13 +1221,14 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge()
} }
} }
auto strictness = table_join->getTableJoin().strictness();
if (strictness == JoinStrictness::Any) if (strictness == JoinStrictness::Any)
return anyJoin(kind); return anyJoin();
if (strictness == JoinStrictness::All) if (strictness == JoinStrictness::All)
return allJoin(kind); return allJoin();
if (strictness == JoinStrictness::Asof)
return asofJoin();
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported strictness '{}'", strictness); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported strictness '{}'", strictness);
} }
@ -883,9 +1247,26 @@ MergeJoinTransform::MergeJoinTransform(
/* always_read_till_end_= */ false, /* always_read_till_end_= */ false,
/* empty_chunk_on_finish_= */ true, /* empty_chunk_on_finish_= */ true,
table_join, input_headers, max_block_size) table_join, input_headers, max_block_size)
, log(getLogger("MergeJoinTransform"))
{ {
LOG_TRACE(log, "Use MergeJoinTransform"); }
MergeJoinTransform::MergeJoinTransform(
JoinKind kind_,
JoinStrictness strictness_,
const TableJoin::JoinOnClause & on_clause_,
const Blocks & input_headers,
const Block & output_header,
size_t max_block_size,
UInt64 limit_hint_)
: IMergingTransform<MergeJoinAlgorithm>(
input_headers,
output_header,
/* have_all_inputs_= */ true,
limit_hint_,
/* always_read_till_end_= */ false,
/* empty_chunk_on_finish_= */ true,
kind_, strictness_, on_clause_, input_headers, max_block_size)
{
} }
void MergeJoinTransform::onFinish() void MergeJoinTransform::onFinish()

View File

@ -8,6 +8,7 @@
#include <unordered_map> #include <unordered_map>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include <array>
#include <boost/core/noncopyable.hpp> #include <boost/core/noncopyable.hpp>
@ -19,6 +20,7 @@
#include <Processors/Chunk.h> #include <Processors/Chunk.h>
#include <Processors/Merges/Algorithms/IMergingAlgorithm.h> #include <Processors/Merges/Algorithms/IMergingAlgorithm.h>
#include <Processors/Merges/IMergingTransform.h> #include <Processors/Merges/IMergingTransform.h>
#include <Interpreters/TableJoin.h>
namespace Poco { class Logger; } namespace Poco { class Logger; }
@ -35,57 +37,28 @@ using FullMergeJoinCursorPtr = std::unique_ptr<FullMergeJoinCursor>;
/// Used instead of storing previous block /// Used instead of storing previous block
struct JoinKeyRow struct JoinKeyRow
{ {
std::vector<ColumnPtr> row;
JoinKeyRow() = default; JoinKeyRow() = default;
explicit JoinKeyRow(const SortCursorImpl & impl_, size_t pos) JoinKeyRow(const FullMergeJoinCursor & cursor, size_t pos);
{
row.reserve(impl_.sort_columns.size());
for (const auto & col : impl_.sort_columns)
{
auto new_col = col->cloneEmpty();
new_col->insertFrom(*col, pos);
row.push_back(std::move(new_col));
}
}
void reset() bool equals(const FullMergeJoinCursor & cursor) const;
{ bool asofMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const;
row.clear();
}
bool equals(const SortCursorImpl & impl) const void reset();
{
if (row.empty())
return false;
assert(this->row.size() == impl.sort_columns_size); std::vector<ColumnPtr> row;
for (size_t i = 0; i < impl.sort_columns_size; ++i)
{
int cmp = this->row[i]->compareAt(0, impl.getRow(), *impl.sort_columns[i], impl.desc[i].nulls_direction);
if (cmp != 0)
return false;
}
return true;
}
}; };
/// Remembers previous key if it was joined in previous block /// Remembers previous key if it was joined in previous block
class AnyJoinState : boost::noncopyable class AnyJoinState : boost::noncopyable
{ {
public: public:
AnyJoinState() = default; void set(size_t source_num, const FullMergeJoinCursor & cursor);
void setValue(Chunk value_);
void set(size_t source_num, const SortCursorImpl & cursor) void reset(size_t source_num);
{
assert(cursor.rows);
keys[source_num] = JoinKeyRow(cursor, cursor.rows - 1);
}
void setValue(Chunk value_) { value = std::move(value_); } bool empty() const;
bool empty() const { return keys[0].row.empty() && keys[1].row.empty(); }
/// current keys /// current keys
JoinKeyRow keys[2]; JoinKeyRow keys[2];
@ -118,8 +91,8 @@ public:
Chunk chunk; Chunk chunk;
}; };
AllJoinState(const SortCursorImpl & lcursor, size_t lpos, AllJoinState(const FullMergeJoinCursor & lcursor, size_t lpos,
const SortCursorImpl & rcursor, size_t rpos) const FullMergeJoinCursor & rcursor, size_t rpos)
: keys{JoinKeyRow(lcursor, lpos), JoinKeyRow(rcursor, rpos)} : keys{JoinKeyRow(lcursor, lpos), JoinKeyRow(rcursor, rpos)}
{ {
} }
@ -187,13 +160,32 @@ private:
size_t ridx = 0; size_t ridx = 0;
}; };
class AsofJoinState : boost::noncopyable
{
public:
void set(const FullMergeJoinCursor & rcursor, size_t rpos);
void reset();
bool hasMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const
{
if (value.empty())
return false;
return key.asofMatch(cursor, asof_inequality);
}
JoinKeyRow key;
Chunk value;
size_t value_row = 0;
};
/* /*
* Wrapper for SortCursorImpl * Wrapper for SortCursorImpl
*/ */
class FullMergeJoinCursor : boost::noncopyable class FullMergeJoinCursor : boost::noncopyable
{ {
public: public:
explicit FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_); explicit FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_, bool is_asof = false);
bool fullyCompleted() const; bool fullyCompleted() const;
void setChunk(Chunk && chunk); void setChunk(Chunk && chunk);
@ -203,17 +195,31 @@ public:
SortCursorImpl * operator-> () { return &cursor; } SortCursorImpl * operator-> () { return &cursor; }
const SortCursorImpl * operator-> () const { return &cursor; } const SortCursorImpl * operator-> () const { return &cursor; }
SortCursorImpl & operator* () { return cursor; }
const SortCursorImpl & operator* () const { return cursor; }
SortCursorImpl cursor; SortCursorImpl cursor;
const Block & sampleBlock() const { return sample_block; } const Block & sampleBlock() const { return sample_block; }
Columns sampleColumns() const { return sample_block.getColumns(); } Columns sampleColumns() const { return sample_block.getColumns(); }
const IColumn * getAsofColumn() const
{
if (!asof_column_position)
return nullptr;
return cursor.all_columns[*asof_column_position];
}
String dump() const;
private: private:
Block sample_block; Block sample_block;
SortDescription desc; SortDescription desc;
Chunk current_chunk; Chunk current_chunk;
bool recieved_all_blocks = false; bool recieved_all_blocks = false;
std::optional<size_t> asof_column_position;
}; };
/* /*
@ -223,22 +229,33 @@ private:
class MergeJoinAlgorithm final : public IMergingAlgorithm class MergeJoinAlgorithm final : public IMergingAlgorithm
{ {
public: public:
explicit MergeJoinAlgorithm(JoinPtr table_join, const Blocks & input_headers, size_t max_block_size_); MergeJoinAlgorithm(JoinKind kind_,
JoinStrictness strictness_,
const TableJoin::JoinOnClause & on_clause_,
const Blocks & input_headers,
size_t max_block_size_);
MergeJoinAlgorithm(JoinPtr join_ptr, const Blocks & input_headers, size_t max_block_size_);
const char * getName() const override { return "MergeJoinAlgorithm"; } const char * getName() const override { return "MergeJoinAlgorithm"; }
void initialize(Inputs inputs) override; void initialize(Inputs inputs) override;
void consume(Input & input, size_t source_num) override; void consume(Input & input, size_t source_num) override;
Status merge() override; Status merge() override;
void logElapsed(double seconds); void setAsofInequality(ASOFJoinInequality asof_inequality_);
void logElapsed(double seconds);
private: private:
std::optional<Status> handleAnyJoinState(); std::optional<Status> handleAnyJoinState();
Status anyJoin(JoinKind kind); Status anyJoin();
std::optional<Status> handleAllJoinState(); std::optional<Status> handleAllJoinState();
Status allJoin(JoinKind kind); Status allJoin();
std::optional<Status> handleAsofJoinState();
Status asofJoin();
MutableColumns getEmptyResultColumns() const;
Chunk createBlockWithDefaults(size_t source_num); Chunk createBlockWithDefaults(size_t source_num);
Chunk createBlockWithDefaults(size_t source_num, size_t start, size_t num_rows) const; Chunk createBlockWithDefaults(size_t source_num, size_t start, size_t num_rows) const;
@ -246,12 +263,15 @@ private:
std::unordered_map<size_t, size_t> left_to_right_key_remap; std::unordered_map<size_t, size_t> left_to_right_key_remap;
std::array<FullMergeJoinCursorPtr, 2> cursors; std::array<FullMergeJoinCursorPtr, 2> cursors;
ASOFJoinInequality asof_inequality = ASOFJoinInequality::None;
/// Keep some state to make connection between data in different blocks /// Keep some state to make handle data from different blocks
AnyJoinState any_join_state; AnyJoinState any_join_state;
std::unique_ptr<AllJoinState> all_join_state; std::unique_ptr<AllJoinState> all_join_state;
AsofJoinState asof_join_state;
JoinPtr table_join; JoinKind kind;
JoinStrictness strictness;
size_t max_block_size; size_t max_block_size;
int null_direction_hint = 1; int null_direction_hint = 1;
@ -281,12 +301,21 @@ public:
size_t max_block_size, size_t max_block_size,
UInt64 limit_hint = 0); UInt64 limit_hint = 0);
MergeJoinTransform(
JoinKind kind_,
JoinStrictness strictness_,
const TableJoin::JoinOnClause & on_clause_,
const Blocks & input_headers,
const Block & output_header,
size_t max_block_size,
UInt64 limit_hint_ = 0);
String getName() const override { return "MergeJoinTransform"; } String getName() const override { return "MergeJoinTransform"; }
void setAsofInequality(ASOFJoinInequality asof_inequality_) { algorithm.setAsofInequality(asof_inequality_); }
protected: protected:
void onFinish() override; void onFinish() override;
LoggerPtr log;
}; };
} }

View File

@ -109,7 +109,7 @@ void ScatterByPartitionTransform::generateOutputChunks()
hash.reset(num_rows); hash.reset(num_rows);
for (const auto & column_number : key_columns) for (const auto & column_number : key_columns)
columns[column_number]->updateWeakHash32(hash); hash.update(columns[column_number]->getWeakHash32());
const auto & hash_data = hash.getData(); const auto & hash_data = hash.getData();
IColumn::Selector selector(num_rows); IColumn::Selector selector(num_rows);

View File

@ -50,7 +50,7 @@ TEST(Processors, PortsNotConnected)
processors->emplace_back(std::move(source)); processors->emplace_back(std::move(source));
processors->emplace_back(std::move(sink)); processors->emplace_back(std::move(sink));
#ifndef ABORT_ON_LOGICAL_ERROR #ifndef DEBUG_OR_SANITIZER_BUILD
try try
{ {
QueryStatusPtr element; QueryStatusPtr element;

View File

@ -0,0 +1,768 @@
#include <gtest/gtest.h>
#include <pcg_random.hpp>
#include <random>
#include <Poco/ConsoleChannel.h>
#include <Poco/Logger.h>
#include <Poco/AutoPtr.h>
#include <Columns/ColumnsNumber.h>
#include <Common/getRandomASCIIString.h>
#include <Common/randomSeed.h>
#include <DataTypes/DataTypesNumber.h>
#include <Interpreters/TableJoin.h>
#include <Processors/Executors/PipelineExecutor.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Processors/Sinks/NullSink.h>
#include <Processors/Sources/SourceFromChunks.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <Processors/Transforms/MergeJoinTransform.h>
#include <Processors/Formats/Impl/PrettyCompactBlockOutputFormat.h>
#include <Processors/Executors/CompletedPipelineExecutor.h>
#include <QueryPipeline/QueryPipeline.h>
using namespace DB;
namespace
{
QueryPipeline buildJoinPipeline(
std::shared_ptr<ISource> left_source,
std::shared_ptr<ISource> right_source,
size_t key_length = 1,
JoinKind kind = JoinKind::Inner,
JoinStrictness strictness = JoinStrictness::All,
ASOFJoinInequality asof_inequality = ASOFJoinInequality::None)
{
Blocks inputs;
inputs.emplace_back(left_source->getPort().getHeader());
inputs.emplace_back(right_source->getPort().getHeader());
Block out_header;
for (const auto & input : inputs)
{
for (ColumnWithTypeAndName column : input)
{
if (&input == &inputs.front())
column.name = "t1." + column.name;
else
column.name = "t2." + column.name;
out_header.insert(column);
}
}
TableJoin::JoinOnClause on_clause;
for (size_t i = 0; i < key_length; ++i)
{
on_clause.key_names_left.emplace_back(inputs[0].getByPosition(i).name);
on_clause.key_names_right.emplace_back(inputs[1].getByPosition(i).name);
}
auto joining = std::make_shared<MergeJoinTransform>(
kind,
strictness,
on_clause,
inputs, out_header, /* max_block_size = */ 0);
if (asof_inequality != ASOFJoinInequality::None)
joining->setAsofInequality(asof_inequality);
chassert(joining->getInputs().size() == 2);
connect(left_source->getPort(), joining->getInputs().front());
connect(right_source->getPort(), joining->getInputs().back());
auto * output_port = &joining->getOutputPort();
auto processors = std::make_shared<Processors>();
processors->emplace_back(std::move(left_source));
processors->emplace_back(std::move(right_source));
processors->emplace_back(std::move(joining));
QueryPipeline pipeline(QueryPlanResourceHolder{}, processors, output_port);
return pipeline;
}
std::shared_ptr<ISource> oneColumnSource(const std::vector<std::vector<UInt64>> & values)
{
Block header = {
ColumnWithTypeAndName(std::make_shared<DataTypeUInt64>(), "key"),
ColumnWithTypeAndName(std::make_shared<DataTypeUInt64>(), "idx"),
};
UInt64 idx = 0;
Chunks chunks;
for (const auto & chunk_values : values)
{
auto key_column = ColumnUInt64::create();
auto idx_column = ColumnUInt64::create();
for (auto n : chunk_values)
{
key_column->insertValue(n);
idx_column->insertValue(idx);
++idx;
}
chunks.emplace_back(Chunk(Columns{std::move(key_column), std::move(idx_column)}, chunk_values.size()));
}
return std::make_shared<SourceFromChunks>(header, std::move(chunks));
}
class SourceChunksBuilder
{
public:
explicit SourceChunksBuilder(const Block & header_)
: header(header_)
{
current_chunk = header.cloneEmptyColumns();
chassert(!current_chunk.empty());
}
void setBreakProbability(pcg64 & rng_)
{
/// random probability with possibility to have exact 0.0 and 1.0 values
break_prob = std::uniform_int_distribution<size_t>(0, 5)(rng_) / static_cast<double>(5);
rng = &rng_;
}
void addRow(const std::vector<Field> & row)
{
chassert(row.size() == current_chunk.size());
for (size_t i = 0; i < current_chunk.size(); ++i)
current_chunk[i]->insert(row[i]);
if (rng && std::uniform_real_distribution<>(0.0, 1.0)(*rng) < break_prob)
addChunk();
}
void addChunk()
{
if (current_chunk.front()->empty())
return;
size_t rows = current_chunk.front()->size();
chunks.emplace_back(std::move(current_chunk), rows);
current_chunk = header.cloneEmptyColumns();
}
std::shared_ptr<ISource> getSource()
{
addChunk();
/// copy chunk to allow reusing same builder
Chunks chunks_copy;
chunks_copy.reserve(chunks.size());
for (const auto & chunk : chunks)
chunks_copy.emplace_back(chunk.clone());
return std::make_shared<SourceFromChunks>(header, std::move(chunks_copy));
}
private:
Block header;
Chunks chunks;
MutableColumns current_chunk;
pcg64 * rng = nullptr;
double break_prob = 0.0;
};
std::vector<std::vector<Field>> getValuesFromBlock(const Block & block, const Names & names)
{
std::vector<std::vector<Field>> result;
for (size_t i = 0; i < block.rows(); ++i)
{
auto & row = result.emplace_back();
for (const auto & name : names)
block.getByName(name).column->get(i, row.emplace_back());
}
return result;
}
Block executePipeline(QueryPipeline && pipeline)
{
PullingPipelineExecutor executor(pipeline);
Blocks result_blocks;
while (true)
{
Block block;
bool is_ok = executor.pull(block);
if (!is_ok)
break;
result_blocks.emplace_back(std::move(block));
}
return concatenateBlocks(result_blocks);
}
template <typename T>
void assertColumnVectorEq(const typename ColumnVector<T>::Container & expected, const Block & block, const std::string & name)
{
const auto * actual = typeid_cast<const ColumnVector<T> *>(block.getByName(name).column.get());
ASSERT_TRUE(actual) << "unexpected column type: " << block.getByName(name).column->dumpStructure() << "expected: " << typeid(ColumnVector<T>).name();
auto get_first_diff = [&]() -> String
{
const auto & actual_data = actual->getData();
size_t num_rows = std::min(expected.size(), actual_data.size());
for (size_t i = 0; i < num_rows; ++i)
{
if (expected[i] != actual_data[i])
return fmt::format(", expected: {}, actual: {} at row {}", expected[i], actual_data[i], i);
}
return "";
};
EXPECT_EQ(actual->getData().size(), expected.size());
ASSERT_EQ(actual->getData(), expected) << "column name: " << name << get_first_diff();
}
template <typename T>
void assertColumnEq(const IColumn & expected, const Block & block, const std::string & name)
{
const ColumnPtr & actual = block.getByName(name).column;
ASSERT_TRUE(checkColumn<T>(*actual));
ASSERT_TRUE(checkColumn<T>(expected));
EXPECT_EQ(actual->size(), expected.size());
auto dump_val = [](const IColumn & col, size_t i) -> String
{
Field value;
col.get(i, value);
return value.dump();
};
size_t num_rows = std::min(actual->size(), expected.size());
for (size_t i = 0; i < num_rows; ++i)
ASSERT_EQ(actual->compareAt(i, i, expected, 1), 0) << dump_val(*actual, i) << " != " << dump_val(expected, i) << " at row " << i;
}
template <typename T>
T getRandomFrom(pcg64 & rng, const std::initializer_list<T> & opts)
{
std::vector<T> options(opts.begin(), opts.end());
size_t idx = std::uniform_int_distribution<size_t>(0, options.size() - 1)(rng);
return options[idx];
}
void generateNextKey(pcg64 & rng, UInt64 & k1, String & k2)
{
size_t str_len = std::uniform_int_distribution<>(1, 10)(rng);
String new_k2 = getRandomASCIIString(str_len, rng);
if (new_k2.compare(k2) <= 0)
++k1;
k2 = new_k2;
}
bool isStrict(ASOFJoinInequality inequality)
{
return inequality == ASOFJoinInequality::Less || inequality == ASOFJoinInequality::Greater;
}
}
class FullSortingJoinTest : public ::testing::Test
{
public:
FullSortingJoinTest() = default;
void SetUp() override
{
Poco::AutoPtr<Poco::ConsoleChannel> channel(new Poco::ConsoleChannel(std::cerr));
Poco::Logger::root().setChannel(channel);
if (const char * test_log_level = std::getenv("TEST_LOG_LEVEL")) // NOLINT(concurrency-mt-unsafe)
Poco::Logger::root().setLevel(test_log_level);
else
Poco::Logger::root().setLevel("none");
UInt64 seed = randomSeed();
if (const char * random_seed = std::getenv("TEST_RANDOM_SEED")) // NOLINT(concurrency-mt-unsafe)
seed = std::stoull(random_seed);
std::cout << "TEST_RANDOM_SEED=" << seed << std::endl;
rng = pcg64(seed);
}
void TearDown() override
{
}
pcg64 rng;
};
TEST_F(FullSortingJoinTest, AllAnyOneKey)
try
{
{
SCOPED_TRACE("Inner All");
Block result = executePipeline(buildJoinPipeline(
oneColumnSource({ {1, 2, 3, 4, 5} }),
oneColumnSource({ {1}, {2}, {3}, {4}, {5} }),
1, JoinKind::Inner, JoinStrictness::All));
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t1.idx");
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t2.idx");
}
{
SCOPED_TRACE("Inner Any");
Block result = executePipeline(buildJoinPipeline(
oneColumnSource({ {1, 2, 3, 4, 5} }),
oneColumnSource({ {1}, {2}, {3}, {4}, {5} }),
1, JoinKind::Inner, JoinStrictness::Any));
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t1.idx");
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t2.idx");
}
{
SCOPED_TRACE("Inner All");
Block result = executePipeline(buildJoinPipeline(
oneColumnSource({ {2, 2, 2}, {2, 3}, {3, 5} }),
oneColumnSource({ {1, 1, 1}, {2, 2}, {3, 4} }),
1, JoinKind::Inner, JoinStrictness::All));
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 0, 1, 2, 3, 3, 4, 5}), result, "t1.idx");
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({3, 3, 3, 4, 4, 4, 3, 4, 5, 5}), result, "t2.idx");
}
{
SCOPED_TRACE("Inner Any");
Block result = executePipeline(buildJoinPipeline(
oneColumnSource({ {2, 2, 2}, {2, 3}, {3, 5} }),
oneColumnSource({ {1, 1, 1}, {2, 2}, {3, 4} }),
1, JoinKind::Inner, JoinStrictness::Any));
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 4}), result, "t1.idx");
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({3, 5}), result, "t2.idx");
}
{
SCOPED_TRACE("Inner Any");
Block result = executePipeline(buildJoinPipeline(
oneColumnSource({ {2, 2, 2, 2}, {3}, {3, 5} }),
oneColumnSource({ {1, 1, 1, 2}, {2}, {3, 4} }),
1, JoinKind::Inner, JoinStrictness::Any));
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 4}), result, "t1.idx");
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({3, 5}), result, "t2.idx");
}
{
SCOPED_TRACE("Left Any");
Block result = executePipeline(buildJoinPipeline(
oneColumnSource({ {2, 2, 2}, {2, 3}, {3, 5} }),
oneColumnSource({ {1, 1, 1}, {2, 2}, {3, 4} }),
1, JoinKind::Left, JoinStrictness::Any));
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4, 5, 6}), result, "t1.idx");
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({3, 3, 3, 3, 5, 5, 0}), result, "t2.idx");
}
{
SCOPED_TRACE("Left Any");
Block result = executePipeline(buildJoinPipeline(
oneColumnSource({ {2, 2, 2, 2}, {3}, {3, 5} }),
oneColumnSource({ {1, 1, 1, 2}, {2}, {3, 4} }),
1, JoinKind::Left, JoinStrictness::Any));
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4, 5, 6}), result, "t1.idx");
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({3, 3, 3, 3, 5, 5, 0}), result, "t2.idx");
}
}
catch (Exception & e)
{
std::cout << e.getStackTraceString() << std::endl;
throw;
}
TEST_F(FullSortingJoinTest, AnySimple)
try
{
JoinKind kind = getRandomFrom(rng, {JoinKind::Inner, JoinKind::Left, JoinKind::Right});
SourceChunksBuilder left_source({
{std::make_shared<DataTypeUInt64>(), "k1"},
{std::make_shared<DataTypeString>(), "k2"},
{std::make_shared<DataTypeString>(), "attr"},
});
SourceChunksBuilder right_source({
{std::make_shared<DataTypeUInt64>(), "k1"},
{std::make_shared<DataTypeString>(), "k2"},
{std::make_shared<DataTypeString>(), "attr"},
});
left_source.setBreakProbability(rng);
right_source.setBreakProbability(rng);
size_t num_keys = std::uniform_int_distribution<>(100, 1000)(rng);
auto expected_left = ColumnString::create();
auto expected_right = ColumnString::create();
UInt64 k1 = 1;
String k2;
auto get_attr = [&](const String & side, size_t idx) -> String
{
return toString(k1) + "_" + k2 + "_" + side + "_" + toString(idx);
};
for (size_t i = 0; i < num_keys; ++i)
{
generateNextKey(rng, k1, k2);
/// Key is present in left, right or both tables. Both tables is more probable.
size_t key_presence = std::uniform_int_distribution<>(0, 10)(rng);
size_t num_rows_left = key_presence == 0 ? 0 : std::uniform_int_distribution<>(1, 10)(rng);
for (size_t j = 0; j < num_rows_left; ++j)
left_source.addRow({k1, k2, get_attr("left", j)});
size_t num_rows_right = key_presence == 1 ? 0 : std::uniform_int_distribution<>(1, 10)(rng);
for (size_t j = 0; j < num_rows_right; ++j)
right_source.addRow({k1, k2, get_attr("right", j)});
String left_attr = num_rows_left ? get_attr("left", 0) : "";
String right_attr = num_rows_right ? get_attr("right", 0) : "";
if (kind == JoinKind::Inner && num_rows_left && num_rows_right)
{
expected_left->insert(left_attr);
expected_right->insert(right_attr);
}
else if (kind == JoinKind::Left)
{
for (size_t j = 0; j < num_rows_left; ++j)
{
expected_left->insert(get_attr("left", j));
expected_right->insert(right_attr);
}
}
else if (kind == JoinKind::Right)
{
for (size_t j = 0; j < num_rows_right; ++j)
{
expected_left->insert(left_attr);
expected_right->insert(get_attr("right", j));
}
}
}
Block result_block = executePipeline(buildJoinPipeline(
left_source.getSource(), right_source.getSource(), /* key_length = */ 2,
kind, JoinStrictness::Any));
assertColumnEq<ColumnString>(*expected_left, result_block, "t1.attr");
assertColumnEq<ColumnString>(*expected_right, result_block, "t2.attr");
}
catch (Exception & e)
{
std::cout << e.getStackTraceString() << std::endl;
throw;
}
TEST_F(FullSortingJoinTest, AsofSimple)
try
{
SourceChunksBuilder left_source({
{std::make_shared<DataTypeString>(), "key"},
{std::make_shared<DataTypeUInt64>(), "t"},
});
left_source.addRow({"AMZN", 3});
left_source.addRow({"AMZN", 4});
left_source.addRow({"AMZN", 6});
left_source.addRow({"SBUX", 10});
SourceChunksBuilder right_source({
{std::make_shared<DataTypeString>(), "key"},
{std::make_shared<DataTypeUInt64>(), "t"},
{std::make_shared<DataTypeUInt64>(), "value"},
});
right_source.addRow({"AAPL", 1, 97});
right_source.addChunk();
right_source.addRow({"AAPL", 2, 98});
right_source.addRow({"AAPL", 3, 99});
right_source.addRow({"AMZN", 1, 100});
right_source.addRow({"AMZN", 2, 110});
right_source.addChunk();
right_source.addRow({"AMZN", 2, 110});
right_source.addChunk();
right_source.addRow({"AMZN", 4, 130});
right_source.addRow({"AMZN", 5, 140});
right_source.addRow({"SBUX", 8, 180});
right_source.addChunk();
right_source.addRow({"SBUX", 9, 190});
{
Block result_block = executePipeline(buildJoinPipeline(
left_source.getSource(), right_source.getSource(), /* key_length = */ 2,
JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::LessOrEquals));
auto values = getValuesFromBlock(result_block, {"t1.key", "t1.t", "t2.t", "t2.value"});
ASSERT_EQ(values, (std::vector<std::vector<Field>>{
{"AMZN", 3u, 4u, 130u},
{"AMZN", 4u, 4u, 130u},
}));
}
{
Block result_block = executePipeline(buildJoinPipeline(
left_source.getSource(), right_source.getSource(), /* key_length = */ 2,
JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::GreaterOrEquals));
auto values = getValuesFromBlock(result_block, {"t1.key", "t1.t", "t2.t", "t2.value"});
ASSERT_EQ(values, (std::vector<std::vector<Field>>{
{"AMZN", 3u, 2u, 110u},
{"AMZN", 4u, 4u, 130u},
{"AMZN", 6u, 5u, 140u},
{"SBUX", 10u, 9u, 190u},
}));
}
}
catch (Exception & e)
{
std::cout << e.getStackTraceString() << std::endl;
throw;
}
TEST_F(FullSortingJoinTest, AsofOnlyColumn)
try
{
auto left_source = oneColumnSource({ {3}, {3, 3, 3}, {3, 5, 5, 6}, {9, 9}, {10, 20} });
SourceChunksBuilder right_source_builder({
{std::make_shared<DataTypeUInt64>(), "t"},
{std::make_shared<DataTypeUInt64>(), "value"},
});
right_source_builder.setBreakProbability(rng);
for (const auto & row : std::vector<std::vector<Field>>{ {1, 101}, {2, 102}, {4, 104}, {5, 105}, {11, 111}, {15, 115} })
right_source_builder.addRow(row);
auto right_source = right_source_builder.getSource();
auto pipeline = buildJoinPipeline(
left_source, right_source, /* key_length = */ 1,
JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::LessOrEquals);
Block result_block = executePipeline(std::move(pipeline));
ASSERT_EQ(
assert_cast<const ColumnUInt64 *>(result_block.getByName("t1.key").column.get())->getData(),
(ColumnUInt64::Container{3, 3, 3, 3, 3, 5, 5, 6, 9, 9, 10})
);
ASSERT_EQ(
assert_cast<const ColumnUInt64 *>(result_block.getByName("t2.t").column.get())->getData(),
(ColumnUInt64::Container{4, 4, 4, 4, 4, 5, 5, 11, 11, 11, 11})
);
ASSERT_EQ(
assert_cast<const ColumnUInt64 *>(result_block.getByName("t2.value").column.get())->getData(),
(ColumnUInt64::Container{104, 104, 104, 104, 104, 105, 105, 111, 111, 111, 111})
);
}
catch (Exception & e)
{
std::cout << e.getStackTraceString() << std::endl;
throw;
}
TEST_F(FullSortingJoinTest, AsofLessGeneratedTestData)
try
{
/// Generate data random and build expected result at the same time.
/// Test specific combinations of join kind and inequality per each run
auto join_kind = getRandomFrom(rng, { JoinKind::Inner, JoinKind::Left });
auto asof_inequality = getRandomFrom(rng, { ASOFJoinInequality::Less, ASOFJoinInequality::LessOrEquals });
SCOPED_TRACE(fmt::format("{} {}", join_kind, asof_inequality));
/// Key is complex, `k1, k2` for equality and `t` for asof
SourceChunksBuilder left_source_builder({
{std::make_shared<DataTypeUInt64>(), "k1"},
{std::make_shared<DataTypeString>(), "k2"},
{std::make_shared<DataTypeUInt64>(), "t"},
{std::make_shared<DataTypeInt64>(), "attr"},
});
SourceChunksBuilder right_source_builder({
{std::make_shared<DataTypeUInt64>(), "k1"},
{std::make_shared<DataTypeString>(), "k2"},
{std::make_shared<DataTypeUInt64>(), "t"},
{std::make_shared<DataTypeInt64>(), "attr"},
});
/// How small generated block should be
left_source_builder.setBreakProbability(rng);
right_source_builder.setBreakProbability(rng);
/// We are going to generate sorted data and remember expected result
ColumnInt64::Container expected;
UInt64 k1 = 1;
String k2;
auto key_num_total = std::uniform_int_distribution<>(1, 1000)(rng);
for (size_t key_num = 0; key_num < key_num_total; ++key_num)
{
/// Generate new key greater than previous
generateNextKey(rng, k1, k2);
Int64 left_t = 0;
/// Generate several rows for the key
size_t num_left_rows = std::uniform_int_distribution<>(1, 100)(rng);
for (size_t i = 0; i < num_left_rows; ++i)
{
/// t is strictly greater than previous
left_t += std::uniform_int_distribution<>(1, 10)(rng);
auto left_arrtibute_value = 10 * left_t;
left_source_builder.addRow({k1, k2, left_t, left_arrtibute_value});
expected.push_back(left_arrtibute_value);
auto num_matches = 1 + std::poisson_distribution<>(4)(rng);
/// Generate several matches in the right table
auto right_t = left_t;
for (size_t j = 0; j < num_matches; ++j)
{
int min_step = isStrict(asof_inequality) ? 1 : 0;
right_t += std::uniform_int_distribution<>(min_step, 3)(rng);
/// First row should match
bool is_match = j == 0;
right_source_builder.addRow({k1, k2, right_t, is_match ? 10 * left_arrtibute_value : -1});
}
/// Next left_t should be greater than right_t not to match with previous rows
left_t = right_t;
}
/// generate some rows with greater left_t to check that they are not matched
num_left_rows = std::bernoulli_distribution(0.5)(rng) ? std::uniform_int_distribution<>(1, 100)(rng) : 0;
for (size_t i = 0; i < num_left_rows; ++i)
{
left_t += std::uniform_int_distribution<>(1, 10)(rng);
left_source_builder.addRow({k1, k2, left_t, -10 * left_t});
if (join_kind == JoinKind::Left)
expected.push_back(-10 * left_t);
}
}
Block result_block = executePipeline(buildJoinPipeline(
left_source_builder.getSource(), right_source_builder.getSource(),
/* key_length = */ 3,
join_kind, JoinStrictness::Asof, asof_inequality));
assertColumnVectorEq<Int64>(expected, result_block, "t1.attr");
for (auto & e : expected)
/// Non matched rows from left table have negative attr
/// Value if attribute in right table is 10 times greater than in left table
e = e < 0 ? 0 : 10 * e;
assertColumnVectorEq<Int64>(expected, result_block, "t2.attr");
}
catch (Exception & e)
{
std::cout << e.getStackTraceString() << std::endl;
throw;
}
TEST_F(FullSortingJoinTest, AsofGreaterGeneratedTestData)
try
{
/// Generate data random and build expected result at the same time.
/// Test specific combinations of join kind and inequality per each run
auto join_kind = getRandomFrom(rng, { JoinKind::Inner, JoinKind::Left });
auto asof_inequality = getRandomFrom(rng, { ASOFJoinInequality::Greater, ASOFJoinInequality::GreaterOrEquals });
SCOPED_TRACE(fmt::format("{} {}", join_kind, asof_inequality));
SourceChunksBuilder left_source_builder({
{std::make_shared<DataTypeUInt64>(), "k1"},
{std::make_shared<DataTypeString>(), "k2"},
{std::make_shared<DataTypeUInt64>(), "t"},
{std::make_shared<DataTypeInt64>(), "attr"},
});
SourceChunksBuilder right_source_builder({
{std::make_shared<DataTypeUInt64>(), "k1"},
{std::make_shared<DataTypeString>(), "k2"},
{std::make_shared<DataTypeUInt64>(), "t"},
{std::make_shared<DataTypeInt64>(), "attr"},
});
left_source_builder.setBreakProbability(rng);
right_source_builder.setBreakProbability(rng);
ColumnInt64::Container expected;
UInt64 k1 = 1;
String k2;
UInt64 left_t = 0;
auto key_num_total = std::uniform_int_distribution<>(1, 1000)(rng);
for (size_t key_num = 0; key_num < key_num_total; ++key_num)
{
/// Generate new key greater than previous
generateNextKey(rng, k1, k2);
/// Generate some rows with smaller left_t to check that they are not matched
size_t num_left_rows = std::bernoulli_distribution(0.5)(rng) ? std::uniform_int_distribution<>(1, 100)(rng) : 0;
for (size_t i = 0; i < num_left_rows; ++i)
{
left_t += std::uniform_int_distribution<>(1, 10)(rng);
left_source_builder.addRow({k1, k2, left_t, -10 * left_t});
if (join_kind == JoinKind::Left)
expected.push_back(-10 * left_t);
}
if (std::bernoulli_distribution(0.1)(rng))
continue;
size_t num_right_matches = std::uniform_int_distribution<>(1, 10)(rng);
auto right_t = left_t + std::uniform_int_distribution<>(isStrict(asof_inequality) ? 0 : 1, 10)(rng);
auto attribute_value = 10 * right_t;
for (size_t j = 0; j < num_right_matches; ++j)
{
right_t += std::uniform_int_distribution<>(0, 3)(rng);
bool is_match = j == num_right_matches - 1;
right_source_builder.addRow({k1, k2, right_t, is_match ? 10 * attribute_value : -1});
}
/// Next left_t should be greater than (or equals) right_t to match with previous rows
left_t = right_t + std::uniform_int_distribution<>(isStrict(asof_inequality) ? 1 : 0, 100)(rng);
size_t num_left_matches = std::uniform_int_distribution<>(1, 100)(rng);
for (size_t j = 0; j < num_left_matches; ++j)
{
left_t += std::uniform_int_distribution<>(0, 3)(rng);
left_source_builder.addRow({k1, k2, left_t, attribute_value});
expected.push_back(attribute_value);
}
}
Block result_block = executePipeline(buildJoinPipeline(
left_source_builder.getSource(), right_source_builder.getSource(),
/* key_length = */ 3,
join_kind, JoinStrictness::Asof, asof_inequality));
assertColumnVectorEq<Int64>(expected, result_block, "t1.attr");
for (auto & e : expected)
/// Non matched rows from left table have negative attr
/// Value if attribute in right table is 10 times greater than in left table
e = e < 0 ? 0 : 10 * e;
assertColumnVectorEq<Int64>(expected, result_block, "t2.attr");
}
catch (Exception & e)
{
std::cout << e.getStackTraceString() << std::endl;
throw;
}

View File

@ -133,7 +133,7 @@ TEST(CheckSortedTransform, CheckBadLastRow)
EXPECT_NO_THROW(executor.pull(chunk)); EXPECT_NO_THROW(executor.pull(chunk));
EXPECT_NO_THROW(executor.pull(chunk)); EXPECT_NO_THROW(executor.pull(chunk));
#ifndef ABORT_ON_LOGICAL_ERROR #ifndef DEBUG_OR_SANITIZER_BUILD
EXPECT_THROW(executor.pull(chunk), DB::Exception); EXPECT_THROW(executor.pull(chunk), DB::Exception);
#endif #endif
} }
@ -158,7 +158,7 @@ TEST(CheckSortedTransform, CheckUnsortedBlock1)
Chunk chunk; Chunk chunk;
#ifndef ABORT_ON_LOGICAL_ERROR #ifndef DEBUG_OR_SANITIZER_BUILD
EXPECT_THROW(executor.pull(chunk), DB::Exception); EXPECT_THROW(executor.pull(chunk), DB::Exception);
#endif #endif
} }
@ -181,7 +181,7 @@ TEST(CheckSortedTransform, CheckUnsortedBlock2)
PullingPipelineExecutor executor(pipeline); PullingPipelineExecutor executor(pipeline);
Chunk chunk; Chunk chunk;
#ifndef ABORT_ON_LOGICAL_ERROR #ifndef DEBUG_OR_SANITIZER_BUILD
EXPECT_THROW(executor.pull(chunk), DB::Exception); EXPECT_THROW(executor.pull(chunk), DB::Exception);
#endif #endif
} }
@ -204,7 +204,7 @@ TEST(CheckSortedTransform, CheckUnsortedBlock3)
PullingPipelineExecutor executor(pipeline); PullingPipelineExecutor executor(pipeline);
Chunk chunk; Chunk chunk;
#ifndef ABORT_ON_LOGICAL_ERROR #ifndef DEBUG_OR_SANITIZER_BUILD
EXPECT_THROW(executor.pull(chunk), DB::Exception); EXPECT_THROW(executor.pull(chunk), DB::Exception);
#endif #endif
} }

View File

@ -666,7 +666,7 @@ void TCPHandler::runImpl()
// Server should die on std logic errors in debug, like with assert() // Server should die on std logic errors in debug, like with assert()
// or ErrorCodes::LOGICAL_ERROR. This helps catch these errors in // or ErrorCodes::LOGICAL_ERROR. This helps catch these errors in
// tests. // tests.
#ifdef ABORT_ON_LOGICAL_ERROR #ifdef DEBUG_OR_SANITIZER_BUILD
catch (const std::logic_error & e) catch (const std::logic_error & e)
{ {
state.io.onException(); state.io.onException();

View File

@ -357,7 +357,7 @@ void RefreshTask::refreshTask()
stop_requested = true; stop_requested = true;
tryLogCurrentException(log, tryLogCurrentException(log,
"Unexpected exception in refresh scheduling, please investigate. The view will be stopped."); "Unexpected exception in refresh scheduling, please investigate. The view will be stopped.");
#ifdef ABORT_ON_LOGICAL_ERROR #ifdef DEBUG_OR_SANITIZER_BUILD
abortOnFailedAssertion("Unexpected exception in refresh scheduling"); abortOnFailedAssertion("Unexpected exception in refresh scheduling");
#endif #endif
} }

View File

@ -155,6 +155,10 @@ void printExceptionWithRespectToAbort(LoggerPtr log, const String & query_id)
{ {
std::rethrow_exception(ex); std::rethrow_exception(ex);
} }
catch (const TestException &) // NOLINT
{
/// Exception from a unit test, ignore it.
}
catch (const Exception & e) catch (const Exception & e)
{ {
NOEXCEPT_SCOPE({ NOEXCEPT_SCOPE({

View File

@ -34,7 +34,7 @@ public:
auto choice = distribution(generator); auto choice = distribution(generator);
if (choice == 0) if (choice == 0)
throw std::runtime_error("Unlucky..."); throw TestException();
return false; return false;
} }
@ -48,7 +48,7 @@ public:
{ {
auto choice = distribution(generator); auto choice = distribution(generator);
if (choice == 0) if (choice == 0)
throw std::runtime_error("Unlucky..."); throw TestException();
} }
Priority getPriority() const override { return {}; } Priority getPriority() const override { return {}; }

View File

@ -1516,7 +1516,7 @@ static time_t tryGetPartCreateTime(zkutil::ZooKeeperPtr & zookeeper, const Strin
void StorageReplicatedMergeTree::paranoidCheckForCoveredPartsInZooKeeperOnStart(const Strings & parts_in_zk, const Strings & parts_to_fetch) const void StorageReplicatedMergeTree::paranoidCheckForCoveredPartsInZooKeeperOnStart(const Strings & parts_in_zk, const Strings & parts_to_fetch) const
{ {
#ifdef ABORT_ON_LOGICAL_ERROR #ifdef DEBUG_OR_SANITIZER_BUILD
constexpr bool paranoid_check_for_covered_parts_default = true; constexpr bool paranoid_check_for_covered_parts_default = true;
#else #else
constexpr bool paranoid_check_for_covered_parts_default = false; constexpr bool paranoid_check_for_covered_parts_default = false;
@ -2383,7 +2383,7 @@ static void paranoidCheckForCoveredPartsInZooKeeper(
const String & covering_part_name, const String & covering_part_name,
const StorageReplicatedMergeTree & storage) const StorageReplicatedMergeTree & storage)
{ {
#ifdef ABORT_ON_LOGICAL_ERROR #ifdef DEBUG_OR_SANITIZER_BUILD
constexpr bool paranoid_check_for_covered_parts_default = true; constexpr bool paranoid_check_for_covered_parts_default = true;
#else #else
constexpr bool paranoid_check_for_covered_parts_default = false; constexpr bool paranoid_check_for_covered_parts_default = false;

Some files were not shown because too many files have changed in this diff Show More